コード例 #1
0
ファイル: test_opt.py プロジェクト: HapeMask/Theano
def test_no_complex():
    width_var = tensor.cscalar()
    freq_var = tensor.fscalar()
    signal_var = tensor.fscalar()
    stft_out = tensor.exp(width_var * freq_var) * signal_var
    theano.function([width_var, freq_var, signal_var], stft_out,
                    mode=mode_with_gpu)
コード例 #2
0
    def test_default_dtype(self):
        random = RandomStreams(utt.fetch_seed())
        low = tensor.dscalar()
        high = tensor.dscalar()

        # Should not silently downcast from low and high
        out0 = random.uniform(low=low, high=high, size=(42,))
        assert out0.dtype == 'float64'
        f0 = function([low, high], out0)
        val0 = f0(-2.1, 3.1)
        assert val0.dtype == 'float64'

        # Should downcast, since asked explicitly
        out1 = random.uniform(low=low, high=high, size=(42,), dtype='float32')
        assert out1.dtype == 'float32'
        f1 = function([low, high], out1)
        val1 = f1(-1.1, 1.1)
        assert val1.dtype == 'float32'

        # Should use floatX
        lowf = tensor.fscalar()
        highf = tensor.fscalar()
        outf = random.uniform(low=lowf, high=highf, size=(42,))
        assert outf.dtype == config.floatX
        ff = function([lowf, highf], outf)
        valf = ff(numpy.float32(-0.1), numpy.float32(0.3))
        assert valf.dtype == config.floatX
コード例 #3
0
ファイル: learning_rate.py プロジェクト: bentzinir/Buffe
def create_learning_rate_func(solver_params):
    base = tt.fscalar('base')
    gamma = tt.fscalar('gamma')
    power = tt.fscalar('power')
    itrvl = tt.fscalar('itrvl')
    iter = tt.scalar('iter')

    if solver_params['lr_type']=='inv':
        lr_ = base * tt.pow(1 + gamma * iter, -power)

        lr = t.function(
            inputs=[iter, t.Param(base, default=solver_params['base']), t.Param(gamma, default=solver_params['gamma']), t.Param(power, default=solver_params['power'])],
            outputs=lr_)

    elif solver_params['lr_type']=='fixed':
        lr_ = base

        lr = t.function(
            inputs=[iter, t.Param(base, default=solver_params['base'])],
            outputs=lr_,
            on_unused_input='ignore')

    elif solver_params['lr_type']=='episodic':
        lr_ = base / (tt.floor(iter/itrvl) + 1)

        lr = t.function(
            inputs=[iter, t.Param(base, default=solver_params['base']), t.Param(itrvl, default=solver_params['interval'])],
            outputs=lr_,
            on_unused_input='ignore')
    return lr
コード例 #4
0
ファイル: train_emb.py プロジェクト: dongx-duan/crf
  def __init__(self, vocabulary_size, hidden_size, output_size):
    X = tensor.ivector()
    Y = tensor.ivector()
    keep_prob = tensor.fscalar()
    learning_rate = tensor.fscalar()

    emb_layer = Embedding(vocabulary_size, hidden_size)
    lstm_layer = BiLSTM(hidden_size, hidden_size)
    dropout_layer = Dropout(keep_prob)
    fc_layer = FullConnect(2*hidden_size, output_size)
    crf = CRF(output_size)
    # graph defination
    X_emb = emb_layer(X)
    scores = fc_layer(tensor.tanh(lstm_layer(dropout_layer(X_emb))))
    
    loss, predict = crf(scores, Y, isTraining=True)
    # loss, predict and accuracy
    accuracy = tensor.sum(tensor.eq(predict, Y)) * 1.0 / Y.shape[0]

    params = emb_layer.params + lstm_layer.params + fc_layer.params + crf.params
    updates = MomentumSGD(loss, params, lr=learning_rate)

    print("Compiling train function: ")
    train = theano.function(inputs=[X, Y, keep_prob, learning_rate], outputs=[predict, accuracy, loss], 
      updates=updates, allow_input_downcast=True)

    print("Compiling evaluate function: ")
    evaluate = theano.function(inputs=[X_emb, Y, keep_prob], outputs=[predict, accuracy, loss], 
      allow_input_downcast=True)

    self.embedding_tensor = emb_layer.params[0]
    self.train = train
    self.evaluate = evaluate
    self.params = params
コード例 #5
0
ファイル: ica_gpu.py プロジェクト: edamaraju/ica
    def __init__(self, n_comp=10, verbose=False):

        # Theano initialization
        self.T_weights = shared(np.eye(n_comp, dtype=np.float32))
        self.T_bias = shared(np.ones((n_comp, 1), dtype=np.float32))

        T_p_x_white = T.fmatrix()
        T_lrate = T.fscalar()
        T_block = T.fscalar()
        T_unmixed = T.dot(self.T_weights,T_p_x_white) + T.addbroadcast(self.T_bias,1)
        T_logit = 1 - 2 / (1 + T.exp(-T_unmixed))

        T_out =  self.T_weights +  T_lrate * T.dot(T_block * T.identity_like(self.T_weights) + T.dot(T_logit, T.transpose(T_unmixed)), self.T_weights)
        T_bias_out = self.T_bias + T_lrate * T.reshape(T_logit.sum(axis=1), (-1,1))
        T_max_w = T.max(self.T_weights)
        T_isnan = T.any(T.isnan(self.T_weights))

        self.w_up_fun = theano.function([T_p_x_white, T_lrate, T_block],
                                        [T_max_w, T_isnan],
                                        updates=[(self.T_weights, T_out),
                                                 (self.T_bias, T_bias_out)],
                                        allow_input_downcast=True)

        T_matrix = T.fmatrix()
        T_cov = T.dot(T_matrix,T.transpose(T_matrix))/T_block
        self.cov_fun = theano.function([T_matrix, T_block], T_cov, allow_input_downcast=True)
        
        self.loading = None
        self.sources = None
        self.weights = None
        self.n_comp = n_comp
        self.verbose = verbose
コード例 #6
0
ファイル: neural_net.py プロジェクト: grant/algo-rhythm
    def generate_theano_functions(self, next_layer):
        '''Compile necessary theano functions'''
        exp = tensor.fmatrix('expected')
        rate = tensor.fscalar('rate')
        momentum = tensor.fscalar('momentum')

        ##Compute outputs given inputs
        self.get_output = theano.function([],
                                          updates = [(self.outputs,
                                                        tensor.nnet.sigmoid(
                                                            tensor.dot(
                                                               self.inputs,
                                                               self.weights)))],
                                          name='get_output')

        ##Compute error values given errors of previous layer
        if self.output:
            self.find_errors = theano.function([exp],
                                               updates = [(self.errors,
                                                           self.outputs *
                                                           (1 - self.outputs)
                                                           * exp)],
                                               name='find_errors',
                                               allow_input_downcast=True)
        else:
            self.find_errors = theano.function([],
                                               updates = [(self.errors,
                                                          self.outputs *
                                                          (1 - self.outputs) *
                                             tensor.dot(next_layer.errors,
                                                        next_layer.weights.T))],
                                           name='find_errors')

        ##Compute the change to the weight vector using stochastic gradient
        ##descent with momentum
        self.train_compute = theano.function([rate, momentum],
                                      updates = [(self.delta_weights,
                                                  self.delta_weights *
                                                  momentum +
                                            theano.tensor.dot(self.inputs.T,
                                                        (rate * self.errors)))],
                                     name='train_compute',
                                     allow_input_downcast=True)

        ##Adjust weights using the delta_w computed in train_compute
        self.adjust = theano.function([], updates=[(self.weights, self.weights +
                                                    self.delta_weights)],
                                      name='adjust')

        ##Drop a number of nodes roughly equal to rate/output_size
        self.dropout = theano.function([rate], updates = [(self.outputs,
                                            tensor.switch(
                                                self.random.binomial(size=(1,
                                                    self.output_size),
                                                    p=rate), self.outputs /
                                                             rate, 0))],
                                       name='dropout',
                                       allow_input_downcast=True)
コード例 #7
0
    def __build_iterative_functions(self):

        def states_dot(lambda_x, lambda_y, x_data, y_data):
            [x_dot, h_dot, y_dot] = T.grad(-self.energy_sum, self.states)
            x_dot_final = lambda_x * (x_data - self.x) + (1. - lambda_x) * x_dot
            y_dot_final = lambda_y * (y_data - self.y) + (1. - lambda_y) * y_dot
            return [x_dot_final, h_dot, y_dot_final]

        lambda_x = T.fscalar('lambda_x')
        lambda_y = T.fscalar('lambda_y')

        x_data = self.outside_world.x_data
        y_data = self.outside_world.y_data_one_hot

        states_dot = [x_dot, h_dot, y_dot] = states_dot(lambda_x, lambda_y, x_data, y_data)

        kinetic_energy = T.mean( sum( [(state_dot ** 2).sum(axis=1) for state_dot in states_dot] ) )
        params_dot = T.grad(kinetic_energy, self.params)

        # UPDATES
        epsilon  = T.fscalar('epsilon')
        alpha_W1 = T.fscalar('alpha_W1')
        alpha_W2 = T.fscalar('alpha_W2')
        learning_rates = [alpha_W1,alpha_W1,alpha_W1,alpha_W2,alpha_W2]

        Delta_states = [epsilon * state_dot for state_dot in states_dot]
        Delta_params = [alpha * param_dot for alpha,param_dot in zip(learning_rates,params_dot)]
        states_new = [state+Delta for state,Delta in zip(self.states,Delta_states)]
        params_new = [param+Delta for param,Delta in zip(self.params,Delta_params)]
        updates_states = zip(self.states,states_new)
        updates_params = zip(self.params,params_new)

        # OUTPUTS FOR MONITORING
        error_rate   = T.mean(T.neq(self.prediction, self.outside_world.y_data))
        mse          = T.mean(((self.y - self.outside_world.y_data_one_hot) ** 2).sum(axis=1))
        norm_grad_hy = T.sqrt( (h_dot ** 2).mean(axis=0).sum() + (y_dot ** 2).mean(axis=0).sum() )
        Delta_W1 = Delta_params[1]
        Delta_W2 = Delta_params[3]
        Delta_logW1  = T.sqrt( (Delta_W1 ** 2).mean() ) / T.sqrt( (self.W1 ** 2).mean() )
        Delta_logW2  = T.sqrt( (Delta_W2 ** 2).mean() ) / T.sqrt( (self.W2 ** 2).mean() )

        # THEANO FUNCTIONS
        iterative_function = theano.function(
            inputs=[lambda_x, lambda_y, epsilon, alpha_W1, alpha_W2],
            outputs=[self.energy, norm_grad_hy, self.prediction, error_rate, mse, Delta_logW1, Delta_logW2],
            updates=updates_params+updates_states
        )

        relaxation_function = theano.function(
            inputs=[epsilon],
            outputs=[self.energy, norm_grad_hy, self.prediction, error_rate, mse],
            givens={
            lambda_y: T.constant(0.)
            },
            updates=updates_states[1:3]
        )

        return iterative_function, relaxation_function
コード例 #8
0
ファイル: tsne.py プロジェクト: paulorauber/thesne
def find_Y(X_shared, Y_shared, sigma_shared, N, output_dims, n_epochs,
           initial_lr, final_lr, lr_switch, init_stdev, initial_momentum,
           final_momentum, momentum_switch, metric, verbose=0):
    """Optimize cost wrt Y"""
    # Optimization hyperparameters
    initial_lr = np.array(initial_lr, dtype=floath)
    final_lr = np.array(final_lr, dtype=floath)
    initial_momentum = np.array(initial_momentum, dtype=floath)
    final_momentum = np.array(final_momentum, dtype=floath)

    lr = T.fscalar('lr')
    lr_shared = theano.shared(initial_lr)

    momentum = T.fscalar('momentum')
    momentum_shared = theano.shared(initial_momentum)

    # Y velocities
    Yv = T.fmatrix('Yv')
    Yv_shared = theano.shared(np.zeros((N, output_dims), dtype=floath))

    # Cost
    X = T.fmatrix('X')
    sigma = T.fvector('sigma')
    Y = T.fmatrix('Y')

    cost = cost_var(X, Y, sigma, metric)

    # Setting update for Y velocities
    grad_Y = T.grad(cost, Y)

    updates = [(Yv_shared, momentum*Yv - lr*grad_Y)]
    givens = {X: X_shared, sigma: sigma_shared, Y: Y_shared, Yv: Yv_shared,
              lr: lr_shared, momentum: momentum_shared}

    update_Yv = theano.function([], cost, givens=givens, updates=updates)

    # Setting update for Y
    givens = {Y: Y_shared, Yv: Yv_shared}
    updates = [(Y_shared, Y + Yv)]

    update_Y = theano.function([], [], givens=givens, updates=updates)

    # Momentum-based gradient descent
    for epoch in range(n_epochs):
        if epoch == lr_switch:
            lr_shared.set_value(final_lr)
        if epoch == momentum_switch:
            momentum_shared.set_value(final_momentum)

        c = update_Yv()
        update_Y()
        if verbose:
            print('Epoch: {0}. Cost: {1:.6f}.'.format(epoch + 1, float(c)))

    return np.array(Y_shared.get_value())
コード例 #9
0
ファイル: similarity.py プロジェクト: valadhi/AttachmentDBN
  def train(self, data1, data2, similarities, miniBatchSize=20, epochs=200):
    nrMiniBatches = len(data1) / miniBatchSize
    miniBatchIndex = T.lscalar()
    momentum = T.fscalar()
    learningRate = T.fscalar()

    learningRateMiniBatch = np.float32(self.learningRate / miniBatchSize)
    print "learningRateMiniBatch in similarity net"
    print learningRateMiniBatch

    net = self._trainRBM(data1, data2)

    data1  = theano.shared(np.asarray(data1,dtype=theanoFloat))
    data2  = theano.shared(np.asarray(data2,dtype=theanoFloat))
    similarities = theano.shared(np.asarray(similarities,dtype=theanoFloat))

    # The mini-batch data is a matrix
    x = T.matrix('x', dtype=theanoFloat)
    y = T.matrix('y', dtype=theanoFloat)
    self.x = x
    self.y = y

    z = T.vector('z', dtype=theanoFloat)

    trainer = Trainer(x, y, net)
    self.trainer = trainer

    # error = T.sum(T.sqr(trainer.output-z))
    error = T.sum(T.nnet.binary_crossentropy(trainer.output, z))

    updates = self.buildUpdates(trainer, error, learningRate, momentum)

    # Now you have to define the theano function
    discriminativeTraining = theano.function(
      inputs=[miniBatchIndex, learningRate, momentum],
      outputs=[trainer.output, trainer.cos],
      updates=updates,
      givens={
            x: data1[miniBatchIndex * miniBatchSize:(miniBatchIndex + 1) * miniBatchSize],
            y: data2[miniBatchIndex * miniBatchSize:(miniBatchIndex + 1) * miniBatchSize],
            z: similarities[miniBatchIndex * miniBatchSize:(miniBatchIndex + 1) * miniBatchSize],
            })

    for epoch in xrange(epochs):
      print "epoch", epoch
      momentum = np.float32(min(np.float32(0.5) + epoch * np.float32(0.1),
                       np.float32(0.95)))

      for miniBatch in xrange(nrMiniBatches):
        output, cos = discriminativeTraining(miniBatch, learningRateMiniBatch, momentum)

    print trainer.w.get_value()
    print trainer.b.get_value()
コード例 #10
0
ファイル: dnn.py プロジェクト: mclaughlin6464/pdnn
    def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size):

        #print len(self.layers)
        #print [T.shape(l.W)[0] for l in self.layers]

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        #print T.shape(train_set_x), T.shape(train_set_y)

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.fscalar('learning_rate')
        momentum = T.fscalar('momentum')

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = collections.OrderedDict()
        for dparam, gparam in zip(self.delta_params, gparams):
            updates[dparam] = momentum * dparam - gparam*learning_rate
        for dparam, param in zip(self.delta_params, self.params):
            updates[param] = param + updates[dparam]

        if self.max_col_norm is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                if W in updates:
                    updated_W = updates[W]
                    col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                    desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                    updates[W] = updated_W * (desired_norms / (1e-7 + col_norms))

        train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001),
              theano.Param(momentum, default = 0.5)],
              outputs=self.errors,
              updates=updates,
              givens={
                self.x: train_set_x[index * batch_size:
                                    (index + 1) * batch_size],
                self.y: train_set_y[index * batch_size:
                                    (index + 1) * batch_size]})

        valid_fn = theano.function(inputs=[index],
              outputs=self.errors,
              givens={
                self.x: valid_set_x[index * batch_size:
                                    (index + 1) * batch_size],
                self.y: valid_set_y[index * batch_size:
                                    (index + 1) * batch_size]})

        return train_fn, valid_fn
コード例 #11
0
ファイル: bimu_expectation.py プロジェクト: MorLong/bimu
    def __init__(self, input_dim, emb_dim, n_senses, W_w_f, lambdaH, lambdaL2, adjust, lambdaF):
        super().__init__(input_dim, emb_dim, n_senses, W_w_f, lambdaF)
        self.Wb = zeros((input_dim+1, n_senses), name="Wb")  # sense- and word-specific bias
        self.H = TT.fscalar()  # entropy
        self.L2 = TT.fscalar()
        self.lambdaH = lambdaH  # weight for entropy regularizer
        self.lambdaL2 = lambdaL2  # weight for L2 regularizer

        if lambdaL2 == 0.:
            self.L2 = 0.
        else:
            self.L2 = TT.sum(TT.sqr(self.W_w)) + TT.sum(TT.sqr(self.W_c))
        self.adjust = adjust
コード例 #12
0
ファイル: ctrlr_optimizer.py プロジェクト: bentzinir/Buffe
    def __init__(self, game_params, arch_params, solver_params, trained_model, sn_dir):

        params=[None, None]

        if trained_model[0]:
            params[0] = common.load_params(trained_model[0])

        if trained_model[1]:
            params[1] = common.load_params(trained_model[1])

        self.lr_func = []
        self.lr_func.append(create_learning_rate_func(solver_params['controler_0']))
        self.lr_func.append(create_learning_rate_func(solver_params['controler_1']))

        self.x_host_0 = tt.fvector('x_host_0')
        self.v_host_0 = tt.fvector('v_host_0')
        self.x_target_0 = tt.fvector('x_target_0')
        self.v_target_0 = tt.fvector('v_target_0')
        self.x_mines_0 = tt.fmatrix('x_mines_0')
        self.mines_map = tt.fmatrix('mines_map')
        self.time_steps = tt.fvector('time_steps')
        self.force = tt.fmatrix('force')
        self.n_steps_0 = tt.iscalar('n_steps_0')
        self.n_steps_1 = tt.iscalar('n_steps_1')
        self.lr = tt.fscalar('lr')
        self.goal_1 = tt.fvector('goal_1')
        self.trnsprnt = tt.fscalar('trnsprnt')
        self.rand_goals = tt.fmatrix('rand_goals')
        self.game_params = game_params
        self.arch_params = arch_params
        self.solver_params = solver_params
        self.sn_dir = sn_dir

        self.model = CONTROLLER(self.x_host_0,
                                self.v_host_0,
                                self.x_target_0,
                                self.v_target_0,
                                self.x_mines_0,
                                self.mines_map,
                                self.time_steps,
                                self.force,
                                self.n_steps_0,
                                self.n_steps_1,
                                self.lr,
                                self.goal_1,
                                self.trnsprnt,
                                self.rand_goals,
                                self.game_params,
                                self.arch_params,
                                self.solver_params,
                                params)
コード例 #13
0
ファイル: net.py プロジェクト: yueranyuan/vector_edu
    def compile(self):
        """ compile theano functions
        """
        self.t_L1_reg = T.fscalar('L1_reg')
        self.t_L2_reg = T.fscalar('L2_reg')
        self.t_learning_rate = T.fscalar('learning_rate')
        cost = self.loss + self.t_L1_reg * self.L1 + self.t_L2_reg * self.L2_sqr

        self.parameter_updates = [(param, param - self.t_learning_rate * T.grad(cost, param)) for param in self.params]

        self._tf_train = theano.function(inputs=[self.input, self.true_output, self.t_L1_reg, self.t_L2_reg, self.t_learning_rate],
                                         outputs=[self.loss], allow_input_downcast=True, updates=self.parameter_updates)
        self._tf_infer = theano.function(inputs=[self.input], outputs=[self.output], allow_input_downcast=True)
        self._tf_evaluate = theano.function(inputs=[self.input, self.true_output], outputs=[self.loss],
                                            allow_input_downcast=True)
コード例 #14
0
ファイル: modelbase.py プロジェクト: osdf/Theano-Lights
    def compile(self, cost, error_map_pyx, add_updates=[]):
        batch_idx = T.iscalar()
        learning_rate = T.fscalar()

        updates, norm_grad = self.hp.optimizer(cost, self.params.values(), lr=learning_rate)

        updates += add_updates

        self.outidx = {'cost':0, 'error_map_pyx':1, 'norm_grad':2}
        outputs = [cost, error_map_pyx]

        self.train = theano.function(inputs=[batch_idx, learning_rate], updates=updates,
                                     givens={
                                         self.X:self.data['tr_X'][batch_idx * self.hp.batch_size : 
                                                                  (batch_idx+1) * self.hp.batch_size],
                                         self.Y:self.data['tr_Y'][batch_idx * self.hp.batch_size : 
                                                                  (batch_idx+1) * self.hp.batch_size]},
                                     outputs=outputs + [norm_grad])
        
        self.validate = theano.function(inputs=[batch_idx], 
                                        givens={
                                         self.X:self.data['va_X'][batch_idx * self.hp.test_batch_size : 
                                                                  (batch_idx+1) * self.hp.test_batch_size],
                                         self.Y:self.data['va_Y'][batch_idx * self.hp.test_batch_size : 
                                                                  (batch_idx+1) * self.hp.test_batch_size]},
                                    outputs=outputs)
        
        self.test = theano.function(inputs=[batch_idx], 
                                    givens={
                                         self.X:self.data['te_X'][batch_idx * self.hp.test_batch_size : 
                                                                  (batch_idx+1) * self.hp.test_batch_size],
                                         self.Y:self.data['te_Y'][batch_idx * self.hp.test_batch_size : 
                                                                  (batch_idx+1) * self.hp.test_batch_size]},
                                    outputs=outputs)
コード例 #15
0
ファイル: nnet_archs.py プロジェクト: Verderey/timit_tools
    def get_SGD_trainer(self, debug=False):
        """ Returns a plain SGD minibatch trainer with learning rate as param.
        """
        batch_x1 = T.fmatrix('batch_x1')
        batch_x2 = T.fmatrix('batch_x2')
        batch_y = T.ivector('batch_y')
        learning_rate = T.fscalar('lr')  # learning rate to use
        # compute the gradients with respect to the model parameters
        # using mean_cost so that the learning rate is not too dependent on the batch size
        cost = self.mean_cos_sim_cost
        gparams = T.grad(cost, self.params)

        # compute list of weights updates
        updates = OrderedDict()
        for param, gparam in zip(self.params, gparams):
            updates[param] = param - gparam * learning_rate 

        outputs = cost
        if debug:
            outputs = [cost] + self.params + gparams +\
                    [updates[param] for param in self.params]

        train_fn = theano.function(inputs=[theano.Param(batch_x1), 
            theano.Param(batch_x2), theano.Param(batch_y),
            theano.Param(learning_rate)],
            outputs=outputs,
            updates=updates,
            givens={self.x1: batch_x1, self.x2: batch_x2, self.y: batch_y})

        return train_fn
コード例 #16
0
ファイル: test_pfunc.py プロジェクト: gwtaylor/Theano
    def test_allow_downcast_floatX(self):
        a = tensor.fscalar('a')
        b = tensor.fvector('b')

        f = pfunc([a, b], (a + b), allow_input_downcast=True)
        g = pfunc([a, b], (a + b), allow_input_downcast=False)
        h = pfunc([a, b], (a + b), allow_input_downcast=None)

        # If the values can be accurately represented, OK
        assert numpy.all(f(0, [0]) == 0)
        assert numpy.all(g(0, [0]) == 0)
        assert numpy.all(h(0, [0]) == 0)

        # For the vector: OK iff allow_input_downcast is True
        assert numpy.allclose(f(0, [0.1]), 0.1)
        self.assertRaises(TypeError, g, 0, [0.1])
        self.assertRaises(TypeError, h, 0, [0.1])

        # For the scalar: OK if allow_input_downcast is True,
        # or None and floatX==float32
        assert numpy.allclose(f(0.1, [0]), 0.1)
        self.assertRaises(TypeError, g, 0.1, [0])
        if config.floatX == 'float32':
            assert numpy.allclose(h(0.1, [0]), 0.1)
        else:
            self.assertRaises(TypeError, h, 0.1, [0])
コード例 #17
0
    def ADAMopt(self, tVars, loss, lr, momentum=0):

        i = T.iscalar('i'); lr = T.fscalar('lr');
        grads = T.grad(loss, tVars)
        '''ADAM Code from
            https://github.com/danfischetti/deep-recurrent-attentive-writer/blob/master/DRAW/adam.py
        '''
        self.m = [theano.shared(name = 'm', \
                value = np.zeros(param.get_value().shape,dtype=theano.config.floatX)) for param in model.params]
        self.v = [theano.shared(name = 'v', \
        	value = np.zeros(param.get_value().shape,dtype=theano.config.floatX)) for param in model.params]
        self.t = theano.shared(name = 't',value = np.asarray(1).astype(theano.config.floatX))
        updates = [(self.t,self.t+1)]

        for param, gparam,m,v in zip(model.params, gparams, self.m, self.v):

            b1_t = 1-(1-beta1)*(l**(self.t-1))
            m_t = b1_t*gparam + (1-b1_t)*m
            updates.append((m,m_t))
            v_t = beta2*(gparam**2)+(1-beta2)*v
            updates.append((v,v_t))
            m_t_bias = m_t/(1-(1-beta1)**self.t)
            v_t_bias = v_t/(1-(1-beta2)**self.t)
            if param.get_value().ndim == 1:
                updates.append((param,param - 5*lr*m_t_bias/(T.sqrt(v_t_bias)+epsilon)))
            else:
                updates.append((param,param - lr*m_t_bias/(T.sqrt(v_t_bias)+epsilon)))

        return theano.function([], loss, updates=updates)
コード例 #18
0
ファイル: test_blas.py プロジェクト: gyenney/Tools
    def cmp(a_shp, b_shp):
        a = tensor.fmatrix()
        b = tensor.fmatrix()
        scalar = tensor.fscalar()
        av = my_rand(*a_shp)
        bv = my_rand(*b_shp)

        f = theano.function(
                [a, b],
                tensor.dot(a, b) * numpy.asarray(4, 'float32'),
                mode=mode_with_gpu)
        f2 = theano.function(
                [a, b],
                tensor.dot(a, b) * numpy.asarray(4, 'float32'))
        t = f.maker.fgraph.toposort()
        assert len(t) == 4
        assert isinstance(t[0].op, tcn.GpuFromHost)
        assert isinstance(t[1].op, tcn.GpuFromHost)
        assert isinstance(t[2].op, tcn.blas.GpuDot22Scalar)
        assert isinstance(t[3].op, tcn.HostFromGpu)
        assert numpy.allclose(f(av, bv), f2(av, bv))

        f = theano.function([a, b, scalar], tensor.dot(a, b) * scalar,
                mode=mode_with_gpu)
        f2 = theano.function([a, b, scalar], tensor.dot(a, b) * scalar)
        t = f.maker.fgraph.toposort()
        assert len(t) == 4
        assert isinstance(t[0].op, tcn.GpuFromHost)
        assert isinstance(t[1].op, tcn.GpuFromHost)
        assert isinstance(t[2].op, tcn.blas.GpuDot22Scalar)
        assert isinstance(t[3].op, tcn.HostFromGpu)
        assert numpy.allclose(f(av, bv, 0.5), f2(av, bv, 0.5))
コード例 #19
0
ファイル: dnn.py プロジェクト: jortizcs/machine-learning
    def get_SGD_trainer(self):
        """ Returns a plain SGD minibatch trainer with learning rate as param.
        """
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        learning_rate = T.fscalar('lr')  # learning rate to use
        # compute the gradients with respect to the model parameters
        # using mean_cost so that the learning rate is not too dependent
        # on the batch size
        gparams = T.grad(self.mean_cost, self.params)

        # compute list of weights updates
        updates = OrderedDict()
        for param, gparam in zip(self.params, gparams):
            if self.max_norm:
                W = param - gparam * learning_rate
                col_norms = W.norm(2, axis=0)
                desired_norms = T.clip(col_norms, 0, self.max_norm)
                updates[param] = W * (desired_norms / (1e-6 + col_norms))
            else:
                updates[param] = param - gparam * learning_rate

        train_fn = theano.function(inputs=[theano.Param(batch_x),
                                           theano.Param(batch_y),
                                           theano.Param(learning_rate)],
                                   outputs=self.mean_cost,
                                   updates=updates,
                                   givens={self.x: batch_x, self.y: batch_y})

        return train_fn
コード例 #20
0
ファイル: __init__.py プロジェクト: Tinrry/anna
 def __init__(self, name, path, learning_rate=0.001):
     self.r_symbol = T.fvector('r')
     self.gamma_symbol = T.fscalar('gamma')
     self.action_symbol = T.fmatrix('action')
     self.y_symbol = T.fvector('y')
     super(ReinforcementModel, self).__init__(
         name, path, learning_rate=learning_rate)
コード例 #21
0
 def __init__(self, n_in, n_classes, l2 = None):
     # Model
     W = 0.01*np.random.randn(n_in, n_classes).astype(dtype)
     b = 0.01*np.random.randn(n_classes).astype(dtype)
     self.W = theano.shared(W, name='W')
     self.b = theano.shared(b, name='b')
     self.params = [self.W, self.b]
     
     self.input = T.fmatrix('input')
     self.y_true = T.ivector('y_true')
     self.y_hat = T.nnet.softmax(T.dot(self.input, self.W) + self.b)
     
     # Train
     self.loglikelihood = -T.log(self.y_hat[T.arange(self.y_hat.shape[0]),self.y_true])
     self.cost = T.mean(self.loglikelihood)
     if l2:
         for p in self.params:
             self.cost += l2*T.sum(p**2)
     self.gradients = T.grad(self.cost, self.params)
     self.lr = T.fscalar('lr')
     updates = [(p,p-self.lr*g) for p,g in zip(self.params, self.gradients)]
     
     self.train = theano.function(inputs=[self.input, self.y_true, self.lr], 
                                  outputs=self.cost, 
                                  updates = updates,
                                  allow_input_downcast=True)
                                  
     # Predict
     self.y_predict = T.argmax(self.y_hat, axis=1)
     self.predict = theano.function(inputs=[self.input],
                                    outputs=self.y_predict,
                                    allow_input_downcast=True)
コード例 #22
0
    def get_adagrad_trainer(self):
        """ Returns an Adagrad (Duchi et al. 2010) trainer using a learning rate.
        """
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        learning_rate = T.fscalar('lr')  # learning rate to use
        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.mean_cost, self.params)

        # compute list of weights updates
        updates = OrderedDict()
        for accugrad, param, gparam in zip(self._accugrads, self.params, gparams):
            # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
            
            agrad = accugrad + gparam * gparam
            dx = - (learning_rate / T.sqrt(agrad + self._eps)) * gparam
            updates[param] = param + dx
            updates[accugrad] = agrad

        train_fn = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y),
            theano.Param(learning_rate)],
            outputs=self.mean_cost,
            updates=updates,
            givens={self.x: batch_x, self.y: batch_y})

        return train_fn
コード例 #23
0
ファイル: modelbase.py プロジェクト: ronvohra/Theano-Lights
    def compile(self, log_pxz, log_qpz, cost, a_pxz):
        batch_idx = T.iscalar()
        learning_rate = T.fscalar()

        updates, norm_grad = self.hp.optimizer(cost, self.params.values(), lr=learning_rate)

        self.outidx = {'cost':0, 'cost_p':1, 'cost_q':2, 'norm_grad':3}
        outputs = [cost, log_pxz, log_qpz]

        self.train = theano.function(inputs=[batch_idx, learning_rate], 
                                     givens={self.X:self.data['tr_X'][batch_idx * self.hp.batch_size : 
                                                                      (batch_idx+1) * self.hp.batch_size]},
                                     outputs=outputs + [norm_grad], updates=updates)
        
        self.validate = theano.function(inputs=[batch_idx], 
                                        givens={self.X:self.data['tr_X'][batch_idx * self.hp.test_batch_size : 
                                                                      (batch_idx+1) * self.hp.test_batch_size]},
                                        outputs=outputs)
        
        self.test = theano.function(inputs=[batch_idx], 
                                    givens={self.X:self.data['te_X'][batch_idx * self.hp.test_batch_size : 
                                                                      (batch_idx+1) * self.hp.test_batch_size]},
                                    outputs=outputs)
        
        n_samples = T.iscalar()

        if self.resample_z:
            self.data['ge_Z'] = srnd.normal((self.max_gen_samples, self.n_z), dtype=theano.config.floatX)
        else:
            self.data['ge_Z'] = shared(np.random.randn(self.max_gen_samples, self.n_z))

        self.decode = theano.function(inputs=[n_samples], 
                                      givens={self.Z:self.data['ge_Z'][:n_samples]}, 
                                      outputs=a_pxz)
コード例 #24
0
ファイル: model.py プロジェクト: philippmuller/hackmusic
    def __init__(self, t_layer_sizes, p_layer_sizes, dropout=0):

        self.t_layer_sizes = t_layer_sizes
        self.p_layer_sizes = p_layer_sizes

        # From our architecture definition, size of the notewise input
        self.t_input_size = 80

        # time network maps from notewise input size to various hidden sizes
        self.time_model = StackedCells( self.t_input_size, celltype=LSTM, layers = t_layer_sizes)
        self.time_model.layers.append(PassthroughLayer())

        # pitch network takes last layer of time model and state of last note, moving upward
        # and eventually ends with a two-element sigmoid layer
        p_input_size = t_layer_sizes[-1] + 2
        self.pitch_model = StackedCells( p_input_size, celltype=LSTM, layers = p_layer_sizes)
        self.pitch_model.layers.append(Layer(p_layer_sizes[-1], 2, activation = T.nnet.sigmoid))

        self.dropout = dropout

        self.conservativity = T.fscalar()
        self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))

        print "model-setup::Trace-1"
        self.setup_train()
        print "model-setup::Trace-2"
        self.setup_predict()
        print "model-setup::Trace-3"
        self.setup_slow_walk()
コード例 #25
0
    def test_copy_delete_updates(self):
        w = T.iscalar('w')
        x = T.fscalar('x')
        # SharedVariable for tests, one of them has update
        y = theano.shared(value=1, name='y')
        z = theano.shared(value=2, name='z')
        out = x + y + z

        # Test for different linkers
        # for mode in ["FAST_RUN","FAST_COMPILE"]:
        # second_time = False
        for mode in ["FAST_RUN", "FAST_COMPILE"]:
            ori = theano.function([x], out, mode=mode, updates={z: z * 2})
            cpy = ori.copy(delete_updates=True)

            assert cpy(1)[0] == 4
            assert cpy(1)[0] == 4
            assert cpy(1)[0] == 4

        # Test if unused implicit and explicit inputs from delete_updates
        # are ignored as intended.
        for mode in ["FAST_RUN", "FAST_COMPILE"]:
            ori = theano.function([x], x, mode=mode, updates={z: z * 2})
            cpy = ori.copy(delete_updates=True)

            ori = theano.function([x, w], x, mode=mode, updates={z: z + w})
            cpy = ori.copy(delete_updates=True)
コード例 #26
0
    def test_copy_share_memory(self):
        x = T.fscalar('x')
        # SharedVariable for tests, one of them has update
        y = theano.shared(value=1)
        z = theano.shared(value=2)
        out = T.tanh((x + y + 2) / (x + z - 0.2)**2)

        # Test for different linkers
        for mode in ["FAST_RUN", "FAST_COMPILE"]:
            ori = theano.function([x], [out], mode=mode, updates={z: z + 1})
            cpy = ori.copy(share_memory=True)

            # Test if memories shared
            storage_map_ori = ori.fn.storage_map
            storage_map_cpy = cpy.fn.storage_map
            fgraph_cpy = cpy.maker.fgraph

            # Assert intermediate and Constants storages are shared.
            # and output stoarges are not shared
            i_o_variables = fgraph_cpy.inputs + fgraph_cpy.outputs
            ori_storages = storage_map_ori.values()
            l = [val for key, val in storage_map_cpy.items()
                 if key not in i_o_variables or isinstance(key, theano.tensor.Constant)]
            for storage in l:
                self.assertTrue(any([storage is s for s in ori_storages]))

            # Assert storages of SharedVariable without updates are shared
            for (input, _1, _2), here, there in zip(ori.indices,
                                                    ori.input_storage,
                                                    cpy.input_storage):
                self.assertTrue(here.data is there.data)
コード例 #27
0
    def __build_backprop(self):

        y_init = self.outside_world.y_data_one_hot                    # initialize y=y_data
        h_init = my_op(2 * (T.dot(rho(y_init), self.W2.T) + self.bh)) # initialize h by backward propagation
        x_init = my_op(T.dot(rho(h_init), self.W1.T) + self.bx)       # initialize x by backward propagation

        Delta_y = y_init - self.y
        Delta_h = h_init - self.h
        Delta_x = x_init - self.x

        by_dot = T.mean(Delta_y, axis=0)
        W2_dot = T.dot(self.rho_h.T, Delta_y) / T.cast(self.x.shape[0], dtype=theano.config.floatX)
        bh_dot = T.mean(Delta_h, axis=0)
        W1_dot = T.dot(self.rho_x.T, Delta_h) / T.cast(self.x.shape[0], dtype=theano.config.floatX)
        bx_dot = T.mean(Delta_x, axis=0)

        alpha  = T.fscalar('alpha')
        by_new = self.by + alpha * by_dot
        W2_new = self.W2 + alpha * W2_dot
        bh_new = self.bh + alpha * bh_dot
        W1_new = self.W1 + alpha * W1_dot
        bx_new = self.bx + alpha * bx_dot
        
        updates_states = [(self.x, x_init), (self.h, h_init), (self.y, y_init)]
        updates_params = [(self.by, by_new), (self.W2, W2_new), (self.bh, bh_new), (self.W1, W1_new)]

        backprop = theano.function(
            inputs=[alpha],
            outputs=[],
            updates=updates_states+updates_params
        )

        return backprop
コード例 #28
0
ファイル: rnns.py プロジェクト: KayneWest/Stuff
	def get_SAG_trainer(self, R=1., alpha=0., debug=False):  # alpha for reg. TODO
		batch_x = T.fmatrix('batch_x')
		batch_y = T.ivector('batch_y')
		ind_minibatch = T.iscalar('ind_minibatch')
		n_seen = T.fscalar('n_seen')
		# compute the gradients with respect to the model parameters
		cost = self.mean_cost
		gparams = T.grad(cost, self.params)
		#sparams = T.grad(cost, self.pre_activations)  # SAG specific

		scaling = numpy.float32(1. / (R / 4. + alpha))

		updates = OrderedDict()
		for accugrad, gradient_memory, param, gparam in zip(
				self._accugrads, self._sag_gradient_memory,
				#self._accugrads, self._sag_gradient_memory[ind_minibatch.eval()],
				self.params, gparams):
			new = gparam + alpha * param
			agrad = accugrad + new - gradient_memory[ind_minibatch]
			# updates[gradient_memory[ind_minibatch]] = new
			updates[gradient_memory] = T.set_subtensor(gradient_memory[ind_minibatch], new)

			updates[param] = param - (scaling / n_seen) * agrad
			updates[accugrad] = agrad

		train_fn = theano.function(inputs=[theano.Param(batch_x), 
			theano.Param(batch_y), theano.Param(ind_minibatch),
			theano.Param(n_seen)],
			outputs=cost,
			updates=updates,
			givens={self.x: batch_x, self.y: batch_y})

		return train_fn
コード例 #29
0
    def __init__(self, input_dimensionality, output_dimensionality, params=None, learning_rate=0.0001, momentum=.25):
        self.input_dimensionality = input_dimensionality
        self.output_dimensionality = output_dimensionality
        self.learning_rate = learning_rate
        srng = theano.tensor.shared_randomstreams.RandomStreams(seed=1234)

        input_seq = T.fmatrix('input_seq')
        dropoutRate = T.fscalar('dropoutRate')

        if params is None:
            self.ff1 = FeedForwardLayer(input_seq, self.input_dimensionality, 2000, rng=srng, dropout_rate=dropoutRate)
            self.ff2 = FeedForwardLayer(self.ff1.output, 2000, 1000, rng=srng, dropout_rate=dropoutRate)
            self.ff3 = FeedForwardLayer(self.ff2.output, 1000, 800, rng=srng, dropout_rate=dropoutRate)
            self.rf = RecurrentLayer(self.ff3.output, 800, 500, False)     # Forward layer
            self.rb = RecurrentLayer(self.ff3.output, 800, 500, True)      # Backward layer

            # REVERSE THE BACKWARDS RECURRENT OUTPUTS IN TIME (from [T-1, 0] ===> [0, T-1]
            self.s = SoftmaxLayer(T.concatenate((self.rf.output, self.rb.output[::-1, :]), axis=1), 2*500, self.output_dimensionality)

        else:
            self.ff1 = FeedForwardLayer(input_seq, self.input_dimensionality, 2000, parameters=params[0], rng=srng, dropout_rate=dropoutRate)
            self.ff2 = FeedForwardLayer(self.ff1.output, 2000, 1000, parameters=params[1], rng=srng, dropout_rate=dropoutRate)
            self.ff3 = FeedForwardLayer(self.ff2.output, 1000, 800, parameters=params[2], rng=srng, dropout_rate=dropoutRate)
            self.rf = RecurrentLayer(self.ff3.output, 800, 500, False, parameters=params[3])     # Forward layer
            self.rb = RecurrentLayer(self.ff3.output, 800, 500, True, parameters=params[4])      # Backward layer

            # REVERSE THE BACKWARDS RECURRENT OUTPUTS IN TIME (from [T-1, 0] ===> [0, T-1]
            self.s = SoftmaxLayer(T.concatenate((self.rf.output, self.rb.output[::-1, :]), axis=1), 2*500, self.output_dimensionality, parameters=params[5])


        self.probabilities = theano.function(
            inputs=[input_seq, dropoutRate],
            outputs=[self.s.output],
            allow_input_downcast=True
        )
コード例 #30
0
def train_linreg(X_train, y_train, eta, epochs):

    costs = []
    # Initialize arrays
    eta0 = T.fscalar('eta0')
    y = T.fvector(name='y')
    X = T.fmatrix(name='X')
    w = theano.shared(np.zeros(
                      shape=(X_train.shape[1] + 1),
                      dtype=theano.config.floatX),
                      name='w')

    # calculate cost
    net_input = T.dot(X, w[1:]) + w[0]
    errors = y - net_input
    cost = T.sum(T.pow(errors, 2))

    # perform gradient update
    gradient = T.grad(cost, wrt=w)
    update = [(w, w - eta0 * gradient)]

    # compile model
    train = theano.function(inputs=[eta0],
                            outputs=cost,
                            updates=update,
                            givens={X: X_train,
                                    y: y_train})

    for _ in range(epochs):
        costs.append(train(eta))

    return costs, w
コード例 #31
0
    def __init__(self, wordMatrix, shape, filters, rfilter, features, poolSize,
                 time, categories, static, dropoutRate, learningRate, useVal,
                 name):
        '''
        >>>initialize the model
        
        >>>type wordMatrix: matrix
        >>>para wordMatrix: input tensor
        >>>type shape: tuple or list of length 4
        >>>para shape: (batchSize,feature maps,sentenceLen,dimension)
        >>>type filters: tuple or list of 2-len tuple or list
        >>>para filters: the size of filters in each layer
        >>>type rfilter: tuple or list of 2-len tuple or list
        >>>para rfilter: the size of recurrent connection in each layer
        >>>type features: tuple or list of int
        >>>para features: num of feature maps in each layer
        >>>type poolSize: tuple or list of 2-len tuple or list
        >>>para poolSize: pooling size of each layer
        >>>type time: int
        >>>para time: the iteration times of recurrent connection
        >>>type categories: int
        >>>para categories: target categories
        >>>type static: boolean
        >>>para static: static wordVec or not
        >>>type dropoutRate: tuple or list of float
        >>>para dropoutRate: dropout rate of each layer
        >>>type learningRate: float
        >>>para learningRate: learning rate
        >>>type useVal: bool
        >>>para useVal: whether or not to use validation set
        >>>type name: str
        >>>para name: the name of the model
        '''
        self.learningRate = learningRate
        self.static = static
        self.name = name
        self.useVal = useVal
        self.batchSize, self.featureMaps, self.sentenceLen, self.wdim = shape
        self.categories = categories

        rng = np.random.RandomState(2011010539)

        self.x = T.matrix('x')
        self.y = T.ivector('y')
        self.lr = T.fscalar('lr')

        self.wordVec = theano.shared(wordMatrix, name='wordVec')
        input = self.wordVec[T.cast(self.x.flatten(),
                                    dtype='int32')].reshape(shape)

        self.deep = min(len(features), len(filters), len(poolSize))
        self.layers = []
        print 'This is a network of %i layer(s)' % self.deep

        for i in xrange(self.deep):
            if i == 0:
                layerSize = shape
                layerInput = input
                fmapIn = self.featureMaps
            else:
                layerSize = [
                    self.batchSize, features[i - 1],
                    (self.layers[-1].shape[2] - filters[i - 1][0] + 1) /
                    poolSize[i - 1][0],
                    (self.layers[-1].shape[3] - filters[i - 1][1] + 1) /
                    poolSize[i - 1][1]
                ]
                layerInput = self.layers[-1].output
                fmapIn = features[i - 1]
            newlayer = DropoutConvPool(
                rng=rng,
                input=layerInput,
                shape=layerSize,
                filters=[features[i], fmapIn, filters[i][0], filters[i][1]],
                pool=poolSize[i],
                dropout=dropoutRate[i])
            self.layers.append(newlayer)

        classifierInputShape = [
            self.batchSize, features[self.deep - 1],
            (self.layers[-1].shape[2] - filters[self.deep - 1][0] + 1) /
            poolSize[self.deep - 1][0],
            (self.layers[-1].shape[3] - filters[self.deep - 1][1] + 1) /
            poolSize[self.deep - 1][1]
        ]
        self.classifier = LogisticRegression(
            input=self.layers[-1].output.flatten(2),
            n_in=np.prod(classifierInputShape[1:]),
            n_out=categories)

        self.params = self.classifier.param
        for i in xrange(self.deep):
            self.params += self.layers[i].param
        if static == False:
            self.params += [self.wordVec]

        weights = 0
        for item in self.classifier.param:
            weights += T.sum(T.sqr(item))

        self.cost = self.classifier.negative_log_likelyhood(self.y)
        self.errors = self.classifier.errors(self.y)

        self.sgdUpdate = sgd(self.params, self.cost, self.lr)
        self.sgdMomentumUpdate = sgdMomentum(self.params, self.cost, self.lr)
        self.adadeltaUpdate = AdadeltaUpdate(self.params, self.cost, self.lr)
        self.adadeltaMomentumUpdate = AdadeltaMomentumUpdate(
            params=self.params, cost=self.cost, stepSize=self.lr)

        self.sgdDelta = self.plotUpdate(self.sgdUpdate)
        self.sgdMomentumDelta = self.plotUpdate(self.sgdMomentumUpdate)
        self.adadeltaDelta = self.plotUpdate(self.adadeltaUpdate)
        self.adadeltaMomentumDelta = self.plotUpdate(
            self.adadeltaMomentumUpdate)

        print 'model %s constructed!' % name
コード例 #32
0
    def train_lasagne(self,
                      learning_rate_value=0.2,
                      learning_rate_decay=0.9999,
                      num_epochs=4000):
        # Load the dataset

        self.saved_params = []

        print "Loading data..."

        learning_rate = T.fscalar('learning_rate')
        epoch = T.fscalar('epoch')

        # Create neural network model (depending on first command line parameter)
        print "Building model and compiling functions..."

        self.network = self.build_network(self.x)

        # Create a loss expression for training, i.e., a scalar objective we want
        # to minimize (for our multi-class problem, it is the cross-entropy loss):
        prediction = lasagne.layers.get_output(self.network['prob'],
                                               deterministic=False)

        loss = lasagne.objectives.categorical_crossentropy(prediction, self.y)
        loss = loss.mean()
        # We could add some weight decay as well here, see lasagne.regularization.

        # Create update expressions for training, i.e., how to modify the
        # parameters at each training step. Here, we'll use Stochastic Gradient
        # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.

        params = lasagne.layers.get_all_params(self.network['prob'],
                                               trainable=True)

        updates = lasagne.updates.nesterov_momentum(loss,
                                                    params,
                                                    learning_rate,
                                                    momentum=0.8)

        #updates = lasagne.updates.adam(loss,params,learning_rate=learning_rate_value, beta1=0.9, beta2=0.999, epsilon=1e-08)

        # Create a loss expression for validation/testing. The crucial difference
        # here is that we do a deterministic forward pass through the network,
        # disabling dropout layers.

        test_prediction = lasagne.layers.get_output(self.network['prob'],
                                                    deterministic=True)
        test_loss = lasagne.objectives.categorical_crossentropy(
            test_prediction, self.y)
        test_loss = test_loss.mean()
        # As a bonus, also create an expression for the classification accuracy:
        test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), self.y),
                          dtype=theano.config.floatX)

        ## Masking the gradients if masks are defined
        #if not self.mask_weights is None:
        #for param in self.params[-4:-2]:
        #if param.name == 'W':
        #updates[param] *= self.mask_weights
        #elif param.name == 'b':
        #updates[param] *= self.mask_biases

        # Compile a function performing a training ste on a mini-batch (by giving
        # the updates dictionary) and returning the corresponding training loss:
        train_fn = theano.function([self.x, self.y, learning_rate],
                                   loss,
                                   updates=updates,
                                   on_unused_input='ignore')

        val_fn = theano.function([self.x, self.y], [test_loss, test_acc],
                                 on_unused_input='ignore')

        # Loading the training and validation set
        # Loading the validation set
        self.prepare('valid')

        n_train_batches = self.nclasses * self.seq_per_class / self.batch_size
        n_valid_batches = self.nclasses * self.seq_per_class / self.batch_size

        # Finally, launch the training loop.
        print "Starting training..."
        # We iterate over epochs:

        self.best_validation_acc = -numpy.inf
        done_looping = False

        for epoch in range(num_epochs):
            self.prepare('train')
            # In each epoch, we do a full pass over the training data:
            train_err = 0
            train_batches = 0
            start_time = time.time()
            for batch in self.iterate_minibatches(self.mocap_train,
                                                  self.labels_train,
                                                  self.batch_size,
                                                  shuffle=True):
                inputs, targets = batch
                train_err += train_fn(inputs,
                                      targets,
                                      learning_rate=learning_rate_value)
                train_batches += 1

            # And a full pass over the validation data:
            val_err = 0
            val_acc = 0
            val_batches = 0
            for batch in self.iterate_minibatches(self.mocap_valid,
                                                  self.labels_valid,
                                                  self.batch_size,
                                                  shuffle=False):
                inputs, targets = batch
                err, acc = val_fn(inputs, targets)
                val_err += err
                val_acc += acc
                val_batches += 1

            this_validation_acc = val_acc

            #self._print_results(this_validation_loss, ts, iter_,
            #learning_rate_value)

            if this_validation_acc > self.best_validation_acc:
                self.best_validation_acc = this_validation_acc

                f = open(self.filters_file, 'w')
                numpy.savez(
                    f,
                    *lasagne.layers.get_all_param_values(self.network['prob']))
                f.close()

            learning_rate_value = learning_rate_value * learning_rate_decay

            print "Epoch= %d Learning rate = %3.4f Validation Accuracy= %3.3f" % (
                epoch + 1, learning_rate_value, val_acc / val_batches * 100)
コード例 #33
0
def test_ModelC_AllCNN(learning_rate=0.05,
                       n_epochs=350,
                       batch_size=200,
                       L2_reg=0.001,
                       input_ndo_p=0.8,
                       layer_ndo_p=0.5,
                       save_model=True,
                       save_freq=50):
    """
    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer
    
    :type batch_size: int
    :param batch_size: the number of training examples per batch
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]

    print 'n_train_batches: ', n_train_batches
    print 'n_valid_batches: ', n_valid_batches
    print 'n_test_batches: ', n_test_batches

    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    learning_rate = numpy.asarray(learning_rate, dtype=numpy.float32)

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    lr = T.fscalar()
    training_enabled = T.iscalar('training_enabled')

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # drop the input only while training, don't drop while testing
    dropout_input = T.switch(T.neq(training_enabled, 0),
                             drop(layer0_input, p=input_ndo_p),
                             input_ndo_p * layer0_input)

    layer0 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=dropout_input,
                         filter_shape=(96, 3, 5, 5),
                         image_shape=(batch_size, 3, 32, 32),
                         ssample=(1, 1),
                         bordermode='half',
                         p=1.0)

    layer1 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=layer0.output,
                         filter_shape=(96, 96, 3, 3),
                         image_shape=(batch_size, 96, 32, 32),
                         ssample=(2, 2),
                         bordermode='half',
                         p=0.5)

    layer2 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=layer1.output,
                         filter_shape=(192, 96, 5, 5),
                         image_shape=(batch_size, 96, 16, 16),
                         ssample=(1, 1),
                         bordermode='half',
                         p=1.0)

    layer3 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=layer2.output,
                         filter_shape=(192, 192, 3, 3),
                         image_shape=(batch_size, 192, 16, 16),
                         ssample=(2, 2),
                         bordermode='half',
                         p=0.5)

    layer4 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=layer3.output,
                         filter_shape=(192, 192, 3, 3),
                         image_shape=(batch_size, 192, 8, 8),
                         ssample=(1, 1),
                         bordermode='half',
                         p=1.0)

    layer5 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=layer4.output,
                         filter_shape=(192, 192, 1, 1),
                         image_shape=(batch_size, 192, 8, 8),
                         ssample=(1, 1),
                         bordermode='half',
                         p=1.0)

    layer6 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=layer5.output,
                         filter_shape=(10, 192, 1, 1),
                         image_shape=(batch_size, 192, 8, 8),
                         ssample=(1, 1),
                         bordermode='half',
                         p=1.0)

    # make sure this is what global averaging does
    global_average = layer6.output.mean(axis=(2, 3))
    softmax_layer = SoftmaxWrapper(input_data=global_average,
                                   n_in=10,
                                   n_out=10)

    L2_sqr = ((layer0.W**2).sum() + (layer1.W**2).sum() + (layer2.W**2).sum() +
              (layer3.W**2).sum() + (layer4.W**2).sum() + (layer5.W**2).sum() +
              (layer6.W**2).sum())

    # the cost we minimize during training is the NLL of the model
    cost = (softmax_layer.negative_log_likelihood(y) + L2_reg * L2_sqr)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        softmax_layer.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](0)
        })

    validate_model = theano.function(
        [index],
        softmax_layer.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](0)
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer6.params + layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.

    momentum = theano.shared(numpy.cast[theano.config.floatX](0.9),
                             name='momentum')
    updates = []
    for param in params:
        param_update = theano.shared(param.get_value() *
                                     numpy.cast[theano.config.floatX](0.))
        updates.append((param, param - lr * param_update))
        updates.append((param_update, momentum * param_update +
                        (numpy.cast[theano.config.floatX](1.) - momentum) *
                        T.grad(cost, param)))

    train_model = theano.function(
        [index, lr],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](1)
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    # early-stopping parameters
    #     patience = 10000  # look as this many examples regardless
    #     patience_increase = 2  # wait this much longer when a new best is found

    #     improvement_threshold = 0.995  # a relative improvement of this much is considered significant

    #    validation_frequency = min(n_train_batches, patience // 2)

    validation_frequency = n_train_batches // 2

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False
    updateLRAfter = 200

    while (epoch < n_epochs) and (not done_looping):

        # shuffle data before starting the epoch

        epoch = epoch + 1
        if (epoch > updateLRAfter):
            learning_rate *= 0.1
            updateLRAfter += 50
            print 'epoch: ', epoch
            print 'updateLRAfter: ', updateLRAfter
            print 'learning_rate: ', learning_rate

        for minibatch_index in range(n_train_batches):
            #print 'epoch: {0}, minibatch: {1}'.format(epoch, minibatch_index)

            iter = (epoch - 1) * n_train_batches + minibatch_index
            if iter % 50 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index, learning_rate)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    #                     if this_validation_loss < best_validation_loss *  \
                    #                        improvement_threshold:
                    #                         patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))


#             if patience <= iter:
#                 done_looping = True
#                 break

        if save_model and epoch % save_freq == 0:
            # add model name to the file to differentiate different models
            with gzip.open('parameters_epoch_{0}.pklz'.format(epoch),
                           'wb') as fp:
                cPickle.dump([param.get_value() for param in params],
                             fp,
                             protocol=2)

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print(
        'Best validation score of %f %% obtained at iteration %i, '
        'with test performance %f %%' %
        (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('The code for file ' + os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)), sys.stderr)
コード例 #34
0
    def __init__(self, We_initial, params):
        self.textfile = open(params.outfile, 'w')
        We = theano.shared(We_initial)
        embsize = We_initial.shape[1]
        hidden = params.hidden
        self.en_hidden_size = params.hidden_inf
        self.num_labels = params.num_labels
        self.de_hidden_size = params.de_hidden_size

        self.lstm_layers_num = 1

        input_var = T.imatrix(name='inputs')
        target_var = T.imatrix(name='targets')
        target_var_in = T.imatrix(name='in_targets')
        mask_var = T.fmatrix(name='masks')
        mask_var1 = T.fmatrix(name='masks1')
        length = T.iscalar()
        length0 = T.iscalar()
        t_t = T.fscalar()
        t_t0 = T.fscalar()

        Wyy0 = np.random.uniform(
            -0.02, 0.02,
            (self.num_labels + 1, self.num_labels + 1)).astype('float32')
        Wyy = theano.shared(Wyy0)

        l_in_word = lasagne.layers.InputLayer((None, None))
        l_mask_word = lasagne.layers.InputLayer(shape=(None, None))

        if params.emb == 1:
            l_emb_word = lasagne.layers.EmbeddingLayer(
                l_in_word,
                input_size=We_initial.shape[0],
                output_size=embsize,
                W=We)
        else:
            l_emb_word = lasagne_embedding_layer_2(l_in_word, embsize, We)

        l_lstm_wordf = lasagne.layers.LSTMLayer(l_emb_word,
                                                hidden,
                                                mask_input=l_mask_word)
        l_lstm_wordb = lasagne.layers.LSTMLayer(l_emb_word,
                                                hidden,
                                                mask_input=l_mask_word,
                                                backwards=True)

        concat = lasagne.layers.concat([l_lstm_wordf, l_lstm_wordb], axis=2)

        l_reshape_concat = lasagne.layers.ReshapeLayer(concat,
                                                       (-1, 2 * hidden))

        l_local = lasagne.layers.DenseLayer(
            l_reshape_concat,
            num_units=self.num_labels,
            nonlinearity=lasagne.nonlinearities.linear)

        network_params = lasagne.layers.get_all_params(l_local, trainable=True)
        network_params.append(Wyy)

        print len(network_params)
        f = open(
            'POS_CRF_lstm_pretrain.Batchsize_10_dropout_0_LearningRate_0.1_1e-050_emb_0.pickle',
            'r')
        data = pickle.load(f)
        f.close()

        for idx, p in enumerate(network_params):

            p.set_value(data[idx])

        self.params = []
        self.hos = []
        self.Cos = []
        self.encoder_lstm_layers = []
        self.decoder_lstm_layers = []

        ei, di, dt = T.imatrices(3)  #place holders
        decoderInputs0, em, em1, dm, tf, di0 = T.fmatrices(6)

        #### the last one is for the stary symbole
        self.de_lookuptable = theano.shared(name="Decoder LookUpTable",
                                            value=init_xavier_uniform(
                                                self.num_labels + 1,
                                                self.de_hidden_size),
                                            borrow=True)

        self.linear = theano.shared(
            name="Linear",
            value=init_xavier_uniform(
                self.de_hidden_size + 2 * self.en_hidden_size,
                self.num_labels),
            borrow=True)
        self.linear_bias = theano.shared(
            name="Hidden to Bias",
            value=np.asarray(np.random.randn(self.num_labels, ) * 0.,
                             dtype=theano.config.floatX),
            borrow=True)
        #self.hidden_decode = theano.shared(name="Hidden to Decode", value= init_xavier_uniform(2*hidden, self.de_hidden_size), borrow = True)

        #self.hidden_bias = theano.shared(
        #        name="Hidden to Bias",
        #        value=np.asarray(np.random.randn(self.de_hidden_size, )*0., dtype=theano.config.floatX) ,
        #        borrow=True
        #        )

        input_var_shuffle = input_var.dimshuffle(1, 0)
        mask_var_shuffle = mask_var.dimshuffle(1, 0)
        target_var_in_shuffle = target_var_in.dimshuffle(1, 0)
        target_var_shuffle = target_var.dimshuffle(1, 0)

        self.params += [self.linear, self.linear_bias,
                        self.de_lookuptable]  #concatenate
        state_below = We[input_var_shuffle.flatten()].reshape(
            (input_var_shuffle.shape[0], input_var_shuffle.shape[1], embsize))
        enclstm_f = LSTM(embsize, self.en_hidden_size)
        enclstm_b = LSTM(embsize, self.en_hidden_size, True)
        self.encoder_lstm_layers.append(enclstm_f)  #append
        self.encoder_lstm_layers.append(enclstm_b)  #append
        self.params += enclstm_f.params + enclstm_b.params  #concatenate

        hs_f, Cs_f = enclstm_f.forward(state_below, mask_var_shuffle)
        hs_b, Cs_b = enclstm_b.forward(state_below, mask_var_shuffle)

        hs = T.concatenate([hs_f, hs_b], axis=2)
        Cs = T.concatenate([Cs_f, Cs_b], axis=2)

        hs0 = T.concatenate([hs_f[-1], hs_b[0]], axis=1)
        Cs0 = T.concatenate([Cs_f[-1], Cs_b[0]], axis=1)
        #self.hos += T.tanh(tensor.dot(hs0, self.hidden_decode) + self.hidden_bias),
        #self.Cos += T.tanh(tensor.dot(Cs0, self.hidden_decode) + self.hidden_bias),
        self.hos += T.alloc(np.asarray(0., dtype=theano.config.floatX),
                            input_var_shuffle.shape[1], self.de_hidden_size),
        self.Cos += T.alloc(np.asarray(0., dtype=theano.config.floatX),
                            input_var_shuffle.shape[1], self.de_hidden_size),

        Encoder = hs

        ei, di, dt = T.imatrices(3)  #place holders
        em, dm, tf, di0 = T.fmatrices(4)
        self.encoder_function = theano.function(inputs=[ei, em],
                                                outputs=Encoder,
                                                givens={
                                                    input_var: ei,
                                                    mask_var: em
                                                })

        state_below = self.de_lookuptable[
            target_var_in_shuffle.flatten()].reshape(
                (target_var_in_shuffle.shape[0],
                 target_var_in_shuffle.shape[1], self.de_hidden_size))
        for i in range(self.lstm_layers_num):
            declstm = LSTM(self.de_hidden_size, self.de_hidden_size)
            self.decoder_lstm_layers += declstm,  #append
            self.params += declstm.params  #concatenate
            ho, Co = self.hos[i], self.Cos[i]
            state_below, Cs = declstm.forward(state_below, mask_var_shuffle,
                                              ho, Co)

        decoder_lstm_outputs = T.concatenate([state_below, Encoder], axis=2)
        linear_outputs = T.dot(decoder_lstm_outputs,
                               self.linear) + self.linear_bias[None, :]
        softmax_outputs, updates = theano.scan(
            fn=lambda x: T.nnet.softmax(x),
            sequences=[linear_outputs],
        )

        def _NLL(pred, y, m):
            return -m * T.log(pred[T.arange(input_var.shape[0]), y])

        def _step2(ctx_, state_, hs_, Cs_):

            hs, Cs = [], []
            token_idxs = T.cast(state_.argmax(axis=-1), "int32")
            msk_ = T.fill((T.zeros_like(token_idxs, dtype="float32")), 1)
            msk_ = msk_.dimshuffle('x', 0)
            state_below0 = self.de_lookuptable[token_idxs].reshape(
                (1, ctx_.shape[0], self.de_hidden_size))
            for i, lstm in enumerate(self.decoder_lstm_layers):
                h, C = lstm.forward(state_below0, msk_, hs_[i],
                                    Cs_[i])  #mind msk
                hs += h[-1],
                Cs += C[-1],
                state_below0 = h

            hs, Cs = T.as_tensor_variable(hs), T.as_tensor_variable(Cs)
            state_below0 = state_below0.reshape(
                (ctx_.shape[0], self.de_hidden_size))
            state_below0 = T.concatenate([ctx_, state_below0], axis=1)

            newpred = T.dot(state_below0,
                            self.linear) + self.linear_bias[None, :]
            state_below = T.nnet.softmax(newpred)

            extra_p = T.zeros_like(hs[:, :, 0])
            state_below = T.concatenate([state_below, extra_p.T], axis=1)

            return state_below, hs, Cs

        ctx_0, state_0 = T.fmatrices(2)
        hs_0 = T.ftensor3()
        Cs_0 = T.ftensor3()
        state_below_tmp, hs_tmp, Cs_tmp = _step2(ctx_0, state_0, hs_0, Cs_0)
        self.f_next = theano.function([ctx_0, state_0, hs_0, Cs_0],
                                      [state_below_tmp, hs_tmp, Cs_tmp],
                                      name='f_next')

        hs0, Cs0 = T.as_tensor_variable(
            self.hos, name="hs0"), T.as_tensor_variable(self.Cos, name="Cs0")
        train_outputs, _ = theano.scan(fn=_step2,
                                       sequences=[Encoder],
                                       outputs_info=[decoderInputs0, hs0, Cs0],
                                       n_steps=input_var_shuffle.shape[0])

        predy = train_outputs[0].dimshuffle(1, 0, 2)
        predy = predy[:, :, :-1] * mask_var[:, :, None]
        predy0 = predy.reshape((-1, self.num_labels))

        def inner_function(targets_one_step, mask_one_step, prev_label,
                           tg_energy):
            """
                        :param targets_one_step: [batch_size, t]
                        :param prev_label: [batch_size, t]
                        :param tg_energy: [batch_size]
                        :return:
                        """
            new_ta_energy = T.dot(prev_label, Wyy[:-1, :-1])
            new_ta_energy_t = tg_energy + T.sum(
                new_ta_energy * targets_one_step, axis=1)
            tg_energy_t = T.switch(mask_one_step, new_ta_energy_t, tg_energy)

            return [targets_one_step, tg_energy_t]

        local_energy = lasagne.layers.get_output(l_local, {
            l_in_word: input_var,
            l_mask_word: mask_var
        })
        local_energy = local_energy.reshape((-1, length, self.num_labels))
        local_energy = local_energy * mask_var[:, :, None]

        #####################
        # for the end symbole of a sequence
        ####################

        end_term = Wyy[:-1, -1]
        local_energy = local_energy + end_term.dimshuffle(
            'x', 'x', 0) * mask_var1[:, :, None]

        #predy0 = lasagne.layers.get_output(l_local_a, {l_in_word_a:input_var, l_mask_word_a:mask_var})

        predy_in = T.argmax(predy0, axis=1)
        A = T.extra_ops.to_one_hot(predy_in, self.num_labels)
        A = A.reshape((-1, length, self.num_labels))

        #predy = predy0.reshape((-1, length, 25))
        #predy = predy*mask_var[:,:,None]

        targets_shuffled = predy.dimshuffle(1, 0, 2)
        target_time0 = targets_shuffled[0]

        masks_shuffled = mask_var.dimshuffle(1, 0)

        initial_energy0 = T.dot(target_time0, Wyy[-1, :-1])

        initials = [target_time0, initial_energy0]
        [_, target_energies], _ = theano.scan(
            fn=inner_function,
            outputs_info=initials,
            sequences=[targets_shuffled[1:], masks_shuffled[1:]])
        cost11 = target_energies[-1] + T.sum(
            T.sum(local_energy * predy, axis=2) * mask_var, axis=1)

        # compute the ground-truth energy

        targets_shuffled0 = A.dimshuffle(1, 0, 2)
        target_time00 = targets_shuffled0[0]

        initial_energy00 = T.dot(target_time00, Wyy[-1, :-1])

        initials0 = [target_time00, initial_energy00]
        [_, target_energies0], _ = theano.scan(
            fn=inner_function,
            outputs_info=initials0,
            sequences=[targets_shuffled0[1:], masks_shuffled[1:]])
        cost110 = target_energies0[-1] + T.sum(
            T.sum(local_energy * A, axis=2) * mask_var, axis=1)

        #predy_f =  predy.reshape((-1, 25))
        y_f = target_var.flatten()

        if (params.annealing == 0):
            lamb = params.L3
        elif (params.annealing == 1):
            lamb = params.L3 * (1 - 0.01 * t_t)

        if (params.regutype == 0):
            ce_hinge = lasagne.objectives.categorical_crossentropy(
                predy0 + eps, y_f)
            ce_hinge = ce_hinge.reshape((-1, length))
            ce_hinge = T.sum(ce_hinge * mask_var, axis=1)
            cost = T.mean(-cost11) + lamb * T.mean(ce_hinge)
        else:

            entropy_term = -T.sum(predy0 * T.log(predy0 + eps), axis=1)
            entropy_term = entropy_term.reshape((-1, length))
            entropy_term = T.sum(entropy_term * mask_var, axis=1)
            cost = T.mean(-cost11) - lamb * T.mean(entropy_term)
        """
		f = open('F0_simple.pickle')
                PARA = pickle.load(f)
                f.close()
                l2_term = sum(lasagne.regularization.l2(x-PARA[index]) for index, x in enumerate(a_params))


                cost = T.mean(-cost11) + params.L2*l2_term
		"""

        #from adam import adam
        #updates_a = adam(cost, self.params, params.eta)
        #updates_a = lasagne.updates.sgd(cost, self.params, params.eta)
        #updates_a = lasagne.updates.apply_momentum(updates_a, self.params, momentum=0.9)
        from momentum import momentum
        updates_a = momentum(cost, self.params, params.eta, momentum=0.9)

        if (params.regutype == 0):
            self.train_fn = theano.function(
                inputs=[ei, dt, em, em1, length0, t_t0, di0],
                outputs=[cost, ce_hinge],
                updates=updates_a,
                on_unused_input='ignore',
                givens={
                    input_var: ei,
                    target_var: dt,
                    mask_var: em,
                    mask_var1: em1,
                    length: length0,
                    t_t: t_t0,
                    decoderInputs0: di0
                })
            #self.train_fn = theano.function([input_var, target_var, mask_var, mask_var1, length, t_t], [cost, ce_hinge], updates = updates_a, on_unused_input='ignore')
        else:

            self.train_fn = theano.function(
                inputs=[ei, dt, em, em1, length0, t_t0, di0],
                outputs=[cost, entropy_term],
                updates=updates_a,
                on_unused_input='ignore',
                givens={
                    input_var: ei,
                    target_var: dt,
                    mask_var: em,
                    mask_var1: em1,
                    length: length0,
                    t_t: t_t0,
                    decoderInputs0: di0
                })
            #self.train_fn = theano.function([input_var, target_var, mask_var, mask_var1, length, t_t], [cost, entropy_term], updates = updates_a, on_unused_input='ignore')

        prediction = T.argmax(predy, axis=2)
        corr = T.eq(prediction, target_var)
        corr_train = (corr * mask_var).sum(dtype=theano.config.floatX)
        num_tokens = mask_var.sum(dtype=theano.config.floatX)

        self.eval_fn = theano.function(
            inputs=[ei, dt, em, em1, length0, di0],
            outputs=[cost11, cost110, corr_train, num_tokens, prediction],
            on_unused_input='ignore',
            givens={
                input_var: ei,
                target_var: dt,
                mask_var: em,
                mask_var1: em1,
                length: length0,
                decoderInputs0: di0
            })
コード例 #35
0
ファイル: mdn.py プロジェクト: myorm00000000/world_merlin
    def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size):

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        # compute number of minibatches for training, validation and testing
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.fscalar('learning_rate')
        momentum = T.fscalar('momentum')         

        layer_size = len(self.params)
        lr_list = []
        for i in xrange(layer_size):
            lr_list.append(learning_rate)

        ##top 2 layers use a smaller learning rate
        if layer_size > 4:
            for i in range(layer_size-4, layer_size):
                lr_list[i] = learning_rate * 0.5

        # compute list of fine-tuning updates
        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        if self.use_rprop == 0:
        
            updates = OrderedDict()
            layer_index = 0
            for dparam, gparam in zip(self.delta_params, gparams):
                updates[dparam] = momentum * dparam - gparam * lr_list[layer_index]
                layer_index += 1

            for dparam, param in zip(self.delta_params, self.params):
                updates[param] = param + updates[dparam]

            train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001),
                  theano.Param(momentum, default = 0.5)],
                  outputs=self.errors,
                  updates=updates,
                  on_unused_input='ignore',
                  givens={self.x: train_set_x[index * batch_size:
                                              (index + 1) * batch_size],
                          self.y: train_set_y[index * batch_size:
                                              (index + 1) * batch_size]})

        elif self.use_rprop:        
            updates = compile_RPROP_train_function(self, gparams)
            
            ## retain learning rate and momentum to make interface backwards compatible,
            ## but we won't use them, means we have to use on_unused_input='warn'.
            ## Otherwise same function for RPROP or otherwise -- can move this block outside if clause.              
            train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001),
                  theano.Param(momentum, default = 0.5)],
                  outputs=self.errors,
                  updates=updates,
                  on_unused_input='warn',
                  givens={self.x: train_set_x[index * batch_size:
                                              (index + 1) * batch_size],
                          self.y: train_set_y[index * batch_size:
                                              (index + 1) * batch_size]})   
                                                                                
        valid_fn = theano.function([], 
              outputs=self.errors,
              on_unused_input='ignore',              
              givens={self.x: valid_set_x,
                      self.y: valid_set_y})

        valid_score_i = theano.function([index], 
              outputs=self.errors,
              on_unused_input='ignore',              
              givens={self.x: valid_set_x[index * batch_size:
                                          (index + 1) * batch_size],
                      self.y: valid_set_y[index * batch_size:
                                          (index + 1) * batch_size]})
        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in xrange(n_valid_batches)]

        return train_fn, valid_fn
コード例 #36
0
    l1_6 = ConvKeepLayer(rng, l1_5a, (2 * OUT + 8, 24 + 2, 5, 5))
    l1_7 = ConvKeepLayer(rng,
                         l1_6, (OUT + 1, 2 * OUT + 8, 9, 9),
                         Nonlinear=False)
    l1_7sm = LogSoftmaxLayer(l1_7)

    lout = LabelLoss(l1_7sm, l1)

    model = Model(l1, l2, l4, l8, l1_2, l2_2, l4_2, l8_2, l1_2s, l2_2s, l4_2s,
                  l2_2e, l4_2e, l8_2e, l1_2a, l2_2a, l4_2a, l8_2a, l1_3, l2_3,
                  l4_3, l8_3, l1_3s, l2_3s, l4_3s, l2_3e, l4_3e, l8_3e, l1_3a,
                  l2_3a, l4_3a, l8_3a, l1_4, l2_4, l4_4, l8_4, l1_4s, l2_4s,
                  l4_4s, l2_4e, l4_4e, l8_4e, l1_4a, l2_4a, l4_4a, l8_4a, l1_5,
                  l2_5, l4_5, l8_5, l8_5e, l4_5a, l4_6, l4_6e, l2_5a, l2_6,
                  l2_6e, l1_5a, l1_6, l1_7, l1_7sm, lout)
    a, b = T.fscalar(), T.fscalar()
    obinary, _tp, _fp, _tn, _fn, _F = binaryloss_label(l1_7.output,
                                                       lout.output, 0, a,
                                                       b)  #4.6, 1.41)

    cost = lout.loss

    params = model.params()
    momentums = model.pmomentum()

    grads = T.grad(cost, params)
    updates = []
    updating = 0.0
    for grad, momentum in zip(grads, momentums):
        updates.append((momentum, MOMENTUM * momentum - LEARN_RATE * grad))
        updating = updating + T.sum(abs(momentum))
コード例 #37
0
ファイル: ConvVAE.py プロジェクト: KyriacosShiarli/SingNet
    def __init__(self, dim_z, x_train, x_test, diff=None, magic=5000):
        ####################################### SETTINGS ###################################
        self.x_train = x_train
        self.x_test = x_test
        self.diff = diff
        self.batch_size = 100.
        self.learning_rate = theano.shared(np.float32(0.0008))
        self.momentum = 0.3
        self.performance = {"train": [], "test": []}
        self.inpt = T.ftensor4(name='input')
        self.df = T.fmatrix(name='differential')
        self.dim_z = dim_z
        self.generative_z = theano.shared(np.float32(np.zeros([1, dim_z])))
        self.activation = relu
        self.generative = False
        self.out_distribution = False
        #self.y = T.matrix(name="y")
        self.in_filters = [5, 5, 5]
        self.filter_lengths = [10., 10., 10.]
        self.params = []
        #magic = 73888.
        self.magic = magic

        self.dropout_symbolic = T.fscalar()
        self.dropout_prob = theano.shared(np.float32(0.0))
        ####################################### LAYERS ######################################
        # LAYER 1 ##############################
        self.conv1 = one_d_conv_layer(self.inpt,
                                      self.in_filters[0],
                                      1,
                                      self.filter_lengths[0],
                                      param_names=["W1", 'b1'])
        self.params += self.conv1.params
        self.bn1 = batchnorm(self.conv1.output)
        self.nl1 = self.activation(self.bn1.X)
        self.maxpool1 = ds.max_pool_2d(self.nl1, [3, 1],
                                       st=[2, 1],
                                       ignore_border=False).astype(
                                           theano.config.floatX)
        self.layer1_out = dropout(self.maxpool1, self.dropout_symbolic)
        #self.layer1_out = self.maxpool1
        # LAYER2 ################################
        self.flattened = T.flatten(self.layer1_out, outdim=2)
        # Variational Layer #####################
        self.latent_layer = variational_gauss_layer(self.flattened, self.magic,
                                                    dim_z)
        self.params += self.latent_layer.params
        self.latent_out = self.latent_layer.output
        # Hidden Layer #########################
        self.hidden_layer = hidden_layer(self.latent_out, dim_z, self.magic)
        self.params += self.hidden_layer.params
        self.hid_out = dropout(
            self.activation(self.hidden_layer.output).reshape(
                (self.inpt.shape[0], self.in_filters[-1],
                 int(self.magic / self.in_filters[-1]), 1)),
            self.dropout_symbolic)
        # Devonvolutional 1 ######################
        self.deconv1 = one_d_deconv_layer(self.hid_out,
                                          1,
                                          self.in_filters[2],
                                          self.filter_lengths[2],
                                          pool=2.,
                                          param_names=["W3", 'b3'],
                                          distribution=False)
        self.params += self.deconv1.params
        #self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic)
        self.tanh_out = self.deconv1.output
        self.last_layer = self.deconv1

        if self.out_distribution == True:
            self.trunk_sigma = self.last_layer.log_sigma[:, :, :self.inpt.
                                                         shape[2], :]
        self.trunc_output = self.tanh_out[:, :, :self.inpt.shape[2], :]

        ################################### FUNCTIONS ######################################################
        self.get_latent_states = theano.function(
            [self.inpt],
            self.latent_out,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        #self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior])
        #self.get_prior = theano.function([self.inpt],self.latent_layer.prior)
        #self.convolve1 = theano.function([self.inpt],self.layer1_out)
        #self.convolve2 = theano.function([self.inpt],self.layer2_out)
        self.output = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        self.get_flattened = theano.function(
            [self.inpt],
            self.flattened,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        #self.deconvolve1 = theano.function([self.inpt],self.deconv1.output)
        #self.deconvolve2 = theano.function([self.inpt],self.deconv2.output)
        #self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2))
        self.output = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        #self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
        self.generate_from_z = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob],
                    [self.latent_out, self.generative_z]])

        self.cost = self.MSE()
        self.mse = self.MSE()
        #self.likelihood = self.log_px_z()
        #self.get_cost = theano.function([self.inpt],[self.cost,self.mse])

        #self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood])
        self.derivatives = T.grad(self.cost, self.params)
        #self.get_gradients = theano.function([self.inpt],self.derivatives)
        self.updates = adam(self.params, self.derivatives, self.learning_rate)
        #self.updates =momentum_update(self.params,self.derivatives,self.learning_rate,self.momentum)
        self.train_model = theano.function(
            inputs=[self.inpt, self.df],
            outputs=self.cost,
            updates=self.updates,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
コード例 #38
0
ファイル: SpringSliders.py プロジェクト: JamesUnicomb/DySyTh
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

import theano
import theano.tensor as T

kappa = T.fscalar()
rho = T.fscalar()
beta1 = T.fscalar()
beta2 = T.fscalar()

h = T.fscalar()
N = T.iscalar()
x = T.fvector()


def f(X):
    X_ = T.zeros_like(X)
    X_ = T.set_subtensor(X_[1], (1.0 - T.exp(X[0])) * kappa)
    X_ = T.set_subtensor(X_[2], -T.exp(X[0]) * rho * (beta2 * X[0] + X[2]))
    X_ = T.set_subtensor(
        X_[0],
        T.exp(X[0]) * ((beta1 - 1.0) * X[0] + X[1] - X[2]) + X_[1] - X_[2])
    return X_


def step(X):
    k1 = h * f(X)
    k2 = h * f(X + 0.5 * k1)
    k3 = h * f(X + 0.5 * k2)
コード例 #39
0
ファイル: theano.py プロジェクト: antalcides/python
#!/usr/bin/python
 
from matplotlib import rc
from pylab import *
from theano import *
import theano.tensor as T
import numpy
 
rc('text', usetex=True)
rc('font', family='serif')
 
x = T.fvector('x')
x1 = T.fscalar('x1')
y = 1/(1 + T.exp(-x))
y1 = 1/(1 + T.exp(-x1))
logistic = function([x], y)
logistic1 = function([x1], y1)
grady = T.grad(y1, x1)
derivada = function([x1], grady)
 
a = float(input('Introduce el extremo izqdo. \n'))
b = float(input('Introduce el extremo drcho. \n'))
particion = float(input('Introduce la longitud de particion del intervalo. \n'))
pderiv = float(input('Introduce el punto donde hallar su recta tangente. \n'))
 
xval = arange(a,b,particion, dtype='float32')
z,w,w1=T.fscalars('z', 'w', 'w1')
rectatg2 = (x-z)*w+w1
rectatg3 = function([x, Param(z, default=pderiv), Param(w, default=derivada(pderiv)), Param(w1, default=logistic1(pderiv))], rectatg2)
 
figure(1)
コード例 #40
0
    #tempens model variables:
        z_target_var = T.matrix('z_targets')
        mask_train = T.vector('mask_train')
        unsup_weight_var = T.scalar('unsup_weight')
    
    learning_rate_var = T.scalar('learning_rate')
    adam_beta1_var = T.scalar('adam_beta1')
    
#    #Left sdp length
#    left_sdp_length=T.imatrix('left_sdp_length')
#    #Sentences length
#    sen_length=T.imatrix('sen_length')
    
    #negative loss
    negative_loss_alpha=T.fvector("negative_loss_alpha")
    negative_loss_lamda=T.fscalar("negative_loss_lamda") 
    
    """
    2.
    Bulit GRU network
    ADAM
    """
    gru_network,l_in,l_mask,l_gru_forward,l_split_cnn=model.bulit_gru(input_var,mask_var)
    
    #mask_train_input: where "1" is pass. where "0" isn't pass.
    mask_train_input=pro_data.mask_train_input(training_label,num_labels=model.num_labels)
    
    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(gru_network)
    l_gru = lasagne.layers.get_output(l_gru_forward)
コード例 #41
0
    def __init__(self, x_dim, hidden_dim, y_dim, w_spread, p_drop):

        # parameters of the model
        self.wx = theano.shared(
            name="wx",
            value=w_spread *
            np.random.uniform(-1., 1.,
                              (x_dim + hidden_dim + 1, hidden_dim)).astype(
                                  theano.config.floatX),
            borrow=True)
        self.hx_0 = theano.shared(name="hx_0",
                                  value=np.zeros(hidden_dim,
                                                 dtype=theano.config.floatX),
                                  borrow=True)

        self.w1 = theano.shared(
            name="w1",
            value=w_spread *
            np.random.uniform(-1., 1.,
                              (hidden_dim + hidden_dim + 1,
                               hidden_dim)).astype(theano.config.floatX),
            borrow=True)
        self.h1_0 = theano.shared(name="h1_0",
                                  value=np.zeros(hidden_dim,
                                                 dtype=theano.config.floatX),
                                  borrow=True)

        self.wy = theano.shared(
            name="wy",
            value=w_spread * np.random.uniform(
                -1., 1., (hidden_dim + 1, y_dim)).astype(theano.config.floatX),
            borrow=True)

        # bundle
        #self.params = [self.wx, self.hx_0, self.w1, self.h1_0, self.wy]
        self.params = [self.wx, self.w1, self.wy]

        # define recurrent neural network
        # (for each input word predict all output tags)
        x = T.fmatrix("x")
        y = T.fmatrix("y")
        learn_rate = T.fscalar('learn_rate')

        activation = T.tanh

        #activation = T.nnet.sigmoid
        #activation = lambda x: x * (x > 0)  # reLU
        #activation = lambda x: x * ((x > 0) + 0.01)
        #activation = lambda x: T.minimum(x * (x > 0), 6)  # capped reLU

        def model(x, wx, hx_0, w1, h1_0, wy, p_drop):
            def recurrence(x_cur, hx_prev, h1_prev, masks):
                one = np.float32(1.)
                hx = activation(
                    T.dot(T.concatenate([x_cur, hx_prev, [one]]), wx))
                hx_ = dropout_apply(hx, masks[0], p_drop)
                h1 = activation(T.dot(T.concatenate([hx_, h1_prev, [one]]),
                                      w1))
                h1_ = dropout_apply(h1, masks[1], p_drop)
                y_pred = activation(T.dot(T.concatenate([h1_, [one]]), wy))
                return (hx, h1, y_pred)

            if p_drop > 0.:
                masks = dropout_masks(p_drop, [hx_0.shape, h1_0.shape])
            else:
                masks = []
            (_, _, y_pred), _ = theano.scan(fn=recurrence,
                                            sequences=x,
                                            non_sequences=[masks],
                                            outputs_info=[hx_0, h1_0, None],
                                            n_steps=x.shape[0])
            return y_pred

        y_pred = model(x, self.wx, self.hx_0, self.w1, self.h1_0, self.wy, 0.)
        y_noise = model(x, self.wx, self.hx_0, self.w1, self.h1_0, self.wy,
                        p_drop)

        #loss = lambda y_pred, y: T.mean((y_pred - y) ** 2)  # MSE
        #loss = lambda y_pred, y: T.sum((y_pred - y) ** 16) ** (1./16)
        #loss = lambda y_pred, y: T.max((y_pred - y) ** 2)
        loss = lambda y_pred, y: T.max(abs(y - y_pred)) + T.mean(
            (y - y_pred)**2)
        #loss = lambda y_pred, y: T.sum((y_pred - y) ** 16) ** (1./16) + T.mean((y - y_pred) ** 2)
        l1_reg = 0.001
        l1 = T.mean(self.wx) + T.mean(self.w1) + T.mean(self.wy)
        l2_reg = 0.001
        l2 = T.mean(self.wx**2) + T.mean(self.w1**2) + T.mean(self.wy**2)

        # define gradients and updates
        cost = loss(y_noise, y) + l1_reg * l1 + l2_reg * l2
        #updates = sgd(cost, self.params, learn_rate)
        #updates = rmsprop(cost, self.params, learn_rate)
        updates = adam(cost, self.params, learn_rate)

        # compile theano functions
        self.predict = theano.function(inputs=[x], outputs=y_pred)
        self.train = theano.function(
            inputs=[x, y, learn_rate],
            outputs=[cost,
                     T.min(y_noise),
                     T.max(y_noise),
                     T.mean(y_noise)],
            updates=updates)
コード例 #42
0
    def train_validate_test(self, trainSet, validateSet, testSet, nEpoch):
        '''
        >>>train and test the model

        >>>type trainSet/validateSet/testSet: dict
        >>>para trainSet/validateSet/testSet: train/validate/test set
        >>>type nEpoch: int
        >>>para nEpoch: maximum iteration epoches
        '''
        trainSize = trainSet['x'].shape[0]
        validateSize = validateSet['x'].shape[0]
        testSize = testSet['x'].shape[0]

        trainX = theano.shared(trainSet['x'], borrow=True)
        trainY = theano.shared(trainSet['y'], borrow=True)
        trainY = T.cast(trainY, 'int32')
        validateX = theano.shared(validateSet['x'], borrow=True)
        validateY = theano.shared(validateSet['y'], borrow=True)
        validateY = T.cast(validateY, 'int32')
        testX = testSet['x']
        testY = np.asarray(testSet['y'], 'int32')
        trainBatches = trainSize / self.batchSize
        validateBatches = validateSize / self.batchSize

        index = T.iscalar('index')
        learnRate = T.fscalar('lr')
        stepSize = T.fscalar('lr')

        sgdTrainModel = theano.function(
            [index, learnRate],
            [self.cost, self.sgdDelta[0], self.sgdDelta[1], self.sgdDelta[2]],
            updates=self.sgdUpdate,
            givens={
                self.x:
                trainX[index * self.batchSize:(index + 1) * self.batchSize],
                self.y:
                trainY[index * self.batchSize:(index + 1) * self.batchSize],
                self.lr: learnRate
            })
        print 'SGD TrainModel Constructed!'

        sgdMomentumTrainModel = theano.function(
            [index, learnRate], [
                self.cost, self.sgdMomentumDelta[0], self.sgdMomentumDelta[1],
                self.sgdMomentumDelta[2]
            ],
            updates=self.sgdMomentumUpdate,
            givens={
                self.x:
                trainX[index * self.batchSize:(index + 1) * self.batchSize],
                self.y:
                trainY[index * self.batchSize:(index + 1) * self.batchSize],
                self.lr: learnRate
            })
        print 'SGD-Momentum TrainModel Constructed!'

        adadeltaTrainModel = theano.function(
            [index, stepSize], [
                self.cost, self.adadeltaDelta[0], self.adadeltaDelta[1],
                self.adadeltaDelta[2]
            ],
            updates=self.adadeltaUpdate,
            givens={
                self.x:
                trainX[index * self.batchSize:(index + 1) * self.batchSize],
                self.y:
                trainY[index * self.batchSize:(index + 1) * self.batchSize],
                self.lr: stepSize
            })
        print 'Adadelta TrainModel Constructed!'

        adadeltaMomentumTrainModel = theano.function(
            [index, stepSize], [
                self.cost, self.adadeltaMomentumDelta[0],
                self.adadeltaMomentumDelta[1], self.adadeltaMomentumDelta[2]
            ],
            updates=self.adadeltaMomentumUpdate,
            givens={
                self.x:
                trainX[index * self.batchSize:(index + 1) * self.batchSize],
                self.y:
                trainY[index * self.batchSize:(index + 1) * self.batchSize],
                self.lr: stepSize
            })
        print 'Adadelta(with momentum) TrainModel Constructed!'

        validateModel = theano.function(
            [index],
            self.errors,
            givens={
                self.x:
                validateX[index * self.batchSize:(index + 1) * self.batchSize],
                self.y:
                validateY[index * self.batchSize:(index + 1) * self.batchSize]
            })
        print 'Validation Model Constructed!'

        testTrain = theano.function(
            [index], [self.cost, self.errors],
            givens={
                self.x:
                trainX[index * self.batchSize:(index + 1) * self.batchSize],
                self.y:
                trainY[index * self.batchSize:(index + 1) * self.batchSize]
            })
        print 'Test Model on Training Set Constructed!'

        testInput = self.wordVec[T.cast(self.x.flatten(),
                                        dtype='int32')].reshape(
                                            (testSize, self.featureMaps,
                                             self.sentenceLen, self.wdim))
        testOutput = 0
        for i in xrange(self.deep):
            testOutput = self.layers[i].process(testInput, testSize)
            testInput = testOutput
        testClassifierInput = testInput.flatten(2)
        testPredict = self.classifier.predictInstance(testClassifierInput)
        testError = T.mean(T.neq(testPredict, self.y))
        testModel = theano.function([self.x, self.y], [testPredict, testError])
        print 'Testing Model Constructed!'

        epoch = 0
        maxEpoch = 5.0
        learningRate = self.learningRate
        steppingSize = 1.0
        localOpt = 0
        bestTestAcc = 0.0
        bestValAcc = 0.0
        finalAcc = 0.0
        self.trainAccs = []
        self.validateAccs = []
        self.testAccs = []
        self.costValues = []
        self.result = {'minError': 1.00, 'finalAcc': 0.00, 'bestValAcc': 0.00}
        testPredict = np.zeros(shape=(testSize, ), dtype='int32')
        testMatrix = np.zeros(shape=(self.categories, self.categories),
                              dtype='int32')

        while epoch < nEpoch:
            epoch += 1
            num = 0

            for minBatch in np.random.permutation(range(trainBatches)):
                cost, dmax, dmin, dmean = adadeltaTrainModel(
                    minBatch, learningRate)
                #cost=sgdMomentumTrainModel(minBatch,self.learningRate)
                #adadeltaMomentumTrainModel(minBatch,self.learningRate)
                x = float(epoch) + float(num + 1) / float(trainBatches) - 1
                if num % 50 == 0:
                    trainResult = [testTrain(i) for i in xrange(trainBatches)]
                    trainCost, trainError = np.mean(trainResult, axis=0)
                    trainAcc = 1 - trainError
                    self.costValues.append({'x': x, 'value': trainCost})
                    self.trainAccs.append({'x': x, 'acc': trainAcc})
                    if self.useVal:
                        validateError = [
                            validateModel(i) for i in xrange(validateBatches)
                        ]
                        validateAcc = 1 - np.mean(validateError)
                        self.validateAccs.append({'x': x, 'acc': validateAcc})
                        print 'Epoch=%i,Num=%i,TrainAcc=%f%%,ValidateAcc=%f%%' % (
                            epoch, num, trainAcc * 100., validateAcc * 100.)
                    else:
                        print 'Epoch=%i,Num=%i,TrainAcc=%f%%' % (
                            epoch, num, trainAcc * 100.)
                    print 'costValue=%f, learningRate=%f' % (trainCost,
                                                             self.learningRate)

                    testPredict, testError = testModel(testX, testY)
                    assert len(testPredict) == len(testY)
                    testMatrix = np.zeros(shape=(self.categories,
                                                 self.categories),
                                          dtype='int32')
                    for case in xrange(len(testY)):
                        testMatrix[testY[case], testPredict[case]] += 1
                    testAcc = 1 - testError
                    self.testAccs.append({'x': x, 'acc': testAcc})
                    print 'TestAcc=%f%%' % (testAcc * 100.)

                    if self.useVal and validateAcc > bestValAcc:
                        bestValAcc = validateAcc
                        bestTestAcc = max(bestTestAcc, testAcc)
                        finalAcc = testAcc
                        localOpt = 0
                        maxEpoch = max(maxEpoch, epoch * 1.5)
                        self.result = {
                            'minError': 1 - bestTestAcc,
                            'finalAcc': finalAcc,
                            'bestValAcc': bestValAcc
                        }
                    elif not self.useVal:
                        bestTestAcc = max(bestTestAcc, testAcc)
                        finalAcc = testAcc
                        self.result = {
                            'minError': 1 - bestTestAcc,
                            'finalAcc': finalAcc,
                            'bestValAcc': bestValAcc
                        }
                    print 'BestValAcc=%f%%,BestTestAcc=%f%%,FinalAcc=%f%%' % (
                        bestValAcc * 100., bestTestAcc * 100., finalAcc * 100.)
                num += 1

            x = float(epoch)
            trainResult = [testTrain(i) for i in xrange(trainBatches)]
            trainCost, trainError = np.mean(trainResult, axis=0)
            trainAcc = 1 - trainError
            self.costValues.append({'x': x, 'value': trainCost})
            self.trainAccs.append({'x': x, 'acc': trainAcc})
            if self.useVal:
                validateError = [
                    validateModel(i) for i in xrange(validateBatches)
                ]
                validateAcc = 1 - np.mean(validateError)
                self.validateAccs.append({'x': x, 'acc': validateAcc})
                print 'Epoch=%i,TrainAcc=%f%%,ValidateAcc=%f%%' % (
                    epoch, trainAcc * 100., validateAcc * 100.)
            else:
                print 'Epoch=%i,TrainAcc=%f%%' % (epoch, trainAcc * 100.)
            print 'costValue=%f, learningRate=%f' % (trainCost,
                                                     self.learningRate)

            testPredict, testError = testModel(testX, testY)
            assert len(testY) == len(testPredict)
            testMatrix = np.zeros(shape=(self.categories, self.categories),
                                  dtype='int32')
            for case in xrange(len(testY)):
                testMatrix[testY[case], testPredict[case]] += 1
            testAcc = 1 - testError
            self.testAccs.append({'x': x, 'acc': testAcc})
            print 'TestAcc=%f%%' % (testAcc * 100.)

            if self.useVal and validateAcc > bestValAcc:
                bestValAcc = validateAcc
                bestTestAcc = max(bestTestAcc, testAcc)
                finalAcc = testAcc
                localOpt = 0
                maxEpoch = max(maxEpoch, epoch * 1.5)
                self.result = {
                    'minError': 1 - bestTestAcc,
                    'finalAcc': finalAcc,
                    'bestValAcc': bestValAcc
                }
            elif not self.useVal:
                bestTestAcc = max(bestTestAcc, testAcc)
                finalAcc = testAcc
                self.result = {
                    'minError': 1 - bestTestAcc,
                    'finalAcc': finalAcc,
                    'bestValAcc': bestValAcc
                }
            print 'BestValAcc=%f%%,BestTestAcc=%f%%,FinalAcc=%f%%' % (
                bestValAcc * 100., bestTestAcc * 100., finalAcc * 100.)

        testPredictInfo = {
            'testPredict': testPredict,
            'predictMatrix': testMatrix
        }
        return testPredictInfo, finalAcc
コード例 #43
0
ファイル: mlc_model.py プロジェクト: lifu-tu/INFNET
    def __init__(self, params, num_lables, num_features):
        self.textfile = open(params.outfile, 'w')

        hidden1 = params.hidden1
        hidden2 = params.hidden2
        hidden1_a = params.hidden1_a
        hidden2_a = params.hidden2_a
        eta = params.eta
        L2 = params.L2
        C1 = params.C1

        ## for the local energy function
        l_in = lasagne.layers.InputLayer((None, num_features))
        l_y1 = lasagne.layers.DenseLayer(l_in, hidden1)
        l_y2 = lasagne.layers.DenseLayer(l_y1, hidden2)
        l_local = lasagne.layers.DenseLayer(
            l_y2,
            num_lables,
            b=None,
            nonlinearity=lasagne.nonlinearities.linear)

        g1 = T.fmatrix()
        y1 = T.fmatrix()

        c_params0 = lasagne.layers.get_all_params(l_y2, trainable=True)
        c_params1 = lasagne.layers.get_all_params(l_local, trainable=True)
        f = open(params.FeatureNet, 'rb')
        para = pickle.load(f)
        f.close()

        for idx, p in enumerate(c_params1):
            if idx < (len(c_params1) - 1):
                p.set_value(para[idx])
            else:
                p.set_value(-para[idx])
        local_cost = lasagne.layers.get_output(l_local, {l_in: g1})
        local_cost = T.sum(local_cost * y1, axis=1)

        ## for the global energy function
        l_in1 = lasagne.layers.InputLayer((None, num_lables))
        l_label1 = lasagne.layers.DenseLayer(
            l_in1, C1, nonlinearity=lasagne.nonlinearities.softplus)
        l_label2 = lasagne.layers.DenseLayer(
            l_label1, 1, b=None, nonlinearity=lasagne.nonlinearities.linear)
        global_cost = lasagne.layers.get_output(l_label2, {l_in1: y1})
        global_cost = T.sum(global_cost, axis=1)
        d_params = lasagne.layers.get_all_params(l_label2)
        d_params.append(l_local.W)

        self.d_params = d_params
        energy_cost = local_cost + global_cost
        self.cost_function = theano.function([g1, y1], energy_cost)
        """
                for the inference network
                """
        g2 = T.fmatrix()
        l_in_a = lasagne.layers.InputLayer((None, num_features))
        l_y1_a = lasagne.layers.DenseLayer(l_in_a, hidden1_a)
        l_y2_a = lasagne.layers.DenseLayer(l_y1_a, hidden2_a)
        l_local_a = lasagne.layers.DenseLayer(
            l_y2_a,
            num_lables,
            b=None,
            nonlinearity=lasagne.nonlinearities.sigmoid)
        a_params = lasagne.layers.get_all_params(l_local_a, trainable=True)
        self.a_params = a_params
        f = open(params.infNet, 'rb')
        PARA = pickle.load(f)
        f.close()
        for idx, p in enumerate(a_params):
            p.set_value(PARA[idx])

        train_y = lasagne.layers.get_output(l_local_a, {l_in_a: g2})
        self.a_function = theano.function([g2], train_y)

        g = T.fmatrix()
        y = T.fmatrix()
        predy = lasagne.layers.get_output(l_local_a, {l_in_a: g})
        local_cost = lasagne.layers.get_output(l_local, {l_in: g})
        pos_local_cost = T.sum(local_cost * y, axis=1)
        neg_local_cost = T.sum(local_cost * predy, axis=1)
        pos_global_cost = lasagne.layers.get_output(l_label2, {l_in1: y})
        neg_global_cost = lasagne.layers.get_output(l_label2, {l_in1: predy})

        yy = T.cast(y, 'int32')
        delta0 = T.sum((y - predy)**2, axis=1)

        margin_type = params.margin_type
        if (margin_type == 0):
            hinge_cost = delta0 - (neg_local_cost + T.sum(
                neg_global_cost, axis=1)) + (pos_local_cost +
                                             T.sum(pos_global_cost, axis=1))
        elif (margin_type == 1):
            hinge_cost = 1 - (neg_local_cost + T.sum(
                neg_global_cost, axis=1)) + (pos_local_cost +
                                             T.sum(pos_global_cost, axis=1))
        elif (margin_type == 2):
            hinge_cost = -(neg_local_cost + T.sum(neg_global_cost, axis=1)) + (
                pos_local_cost + T.sum(pos_global_cost, axis=1))
        elif (margin_type == 3):
            hinge_cost = delta0 * (
                1 - (neg_local_cost + T.sum(neg_global_cost, axis=1)) +
                (pos_local_cost + T.sum(pos_global_cost, axis=1)))

        hinge_cost = hinge_cost * T.gt(hinge_cost, 0)
        d_cost = T.mean(hinge_cost)
        d_cost0 = d_cost
        margin_pred_y_loss = -T.mean(predy * T.log(predy) +
                                     (1 - predy) * T.log(1 - predy))
        g_cost = -d_cost + L2 * sum(
            lasagne.regularization.l2(x)
            for x in a_params) + params.regu_pretrain * sum(
                lasagne.regularization.l2(x - PARA[index])
                for index, x in enumerate(a_params)) + margin_pred_y_loss
        d_cost = d_cost + L2 * sum(
            lasagne.regularization.l2(x) for x in d_params)

        self.a_params = a_params
        updates_g = lasagne.updates.adam(g_cost, a_params, eta)
        self.train_g = theano.function([g, y],
                                       [g_cost, d_cost0, margin_pred_y_loss],
                                       updates=updates_g)

        updates_d = lasagne.updates.adam(d_cost, d_params, eta)
        self.train_d = theano.function([g, y], [d_cost, d_cost0],
                                       updates=updates_d)

        t0 = T.fscalar()
        g0 = T.fmatrix()
        y00 = T.imatrix()
        local_cost0 = lasagne.layers.get_output(l_local, {l_in: g0})
        predy0 = lasagne.layers.get_output(l_local_a, {l_in_a: g0},
                                           deterministic=True)
        pred_test = T.gt(predy0, t0)

        neg_local_cost0 = T.sum(local_cost0 * predy0, axis=1)
        neg_global_cost0 = lasagne.layers.get_output(l_label2, {l_in1: predy0})

        energy_cost20 = T.mean(neg_local_cost0 +
                               T.sum(neg_global_cost0, axis=1))
        energy_cost2 = energy_cost20 - T.mean(predy0 * T.log(predy0) +
                                              (1 - predy0) * T.log(1 - predy0))
        #############
        ## optimizer for final returning of the inference network
        updates_test = lasagne.updates.adam(energy_cost2, a_params, 0.00001)
        y0 = T.imatrix()
        neg_local_cost_test = T.sum(local_cost0 * pred_test, axis=1)
        neg_global_cost_test = lasagne.layers.get_output(
            l_label2, {l_in1: pred_test})
        energy_cost_test = T.mean(neg_local_cost_test +
                                  T.sum(neg_global_cost_test, axis=1))
        pg = T.eq(pred_test, y0)
        prec = 1.0 * (T.sum(pg * y0, axis=1) +
                      eps) / (T.sum(pred_test, axis=1) + eps)
        recall = 1.0 * (T.sum(pg * y0, axis=1) + eps) / (T.sum(y0, axis=1) +
                                                         eps)
        f1 = 2 * prec * recall / (prec + recall)
        f1 = T.mean(f1)

        prec = T.mean(prec)
        recall = T.mean(recall)

        self.test = theano.function([g0], [energy_cost20, energy_cost2],
                                    updates=updates_test)
        self.test_a = theano.function([g0, y0, t0], [
            energy_cost20, prec, recall, f1,
            T.sum(pred_test, axis=1), energy_cost_test
        ])
        self.test_time = theano.function([g0], predy0)
コード例 #44
0
import numpy as np
import theano
import theano.tensor as T

x = T.fscalar('x')
y = T.fscalar('y')
f = x**10 + y
f_fn = theano.function([x, y], f)

print "f(x, y) =", theano.printing.pprint(f)

print "f(2, 3) = ", f_fn(2, 3)

print "=" * 30

X = T.fmatrix('X')
y = T.fvector('y')
f = X.dot(y)
f_fn = theano.function([X, y], f)
X0 = np.array(range(90), dtype=np.float32).reshape(9, 10)
y0 = np.array(range(10), dtype=np.float32)

print "f(X, y) =", theano.printing.pprint(f)
print "X ="
print X0
print "y ="
print y0
print "Xy ="
print f_fn(X0, y0)
コード例 #45
0
def PSD_conv_linear_combination():
    
 
          
        Decoder_size =(1, 64, 11,11) 
        Encoder_size = ( 64,1,11,11) 
      
        phi = T.tanh
        Data_type = 1
	    

        if Data_type == 1:

 
		print "... Loading Cat_vs_Dog data"
		size =70
		m = size
		n = size
		Input_size = [size,size] 
	 
		
		train, valid, test = load_gray()  
		x_test =test[0].astype('float32')  
		y_test = test[1].astype('int32') 
		x_valid = valid[0].astype('float32') 
		y_valid = valid[1].astype('int32')  
		x_train = train[0].astype('float32') 
		y_train = train[1].astype('int32')  	  
 	  
 
	        
		meanstd_train = x_train.std()
		mean = x_train.mean(1).reshape(( x_train.shape[0],1))
		var = x_train.std(1).reshape(( x_train.shape[0],1))+ 0.1 * meanstd_train
		mean2 = x_test.mean(1).reshape(( x_test.shape[0],1))
		meanstd_test = x_test.std()
		var2 = x_test.std(1).reshape(( x_test.shape[0],1)) + 0.1 * meanstd_test 
		x_train -=  mean 
		x_train /= var
		
		x_test -= mean2
		x_test /= var2
 
		train_set_x= theano.shared(np.array(x_train.reshape((x_train.shape[0],1,size,size)), dtype='float32'))
		train_set_y= theano.shared(np.array(y_train, dtype='int32'))

		test_set_y= theano.shared(np.array(y_test, dtype='int32'))
		test_set_x= theano.shared(np.array(x_test.reshape((x_test.shape[0],1,size,size)), dtype='float32'))  
	   
		
		train_set_x = T.reshape(lecun_lcn(train_set_x, kernel_size=7, threshold = 1e-4, use_divisor=False),
		          ((x_train.shape[0],size*size)))
		test_set_x = T.reshape(lecun_lcn(	test_set_x, kernel_size=7, threshold = 1e-4, use_divisor=False), 
		           ((x_test.shape[0],size*size)))

		train_set_x /= T.reshape( T.max(train_set_x,axis=1)*1.0,(x_train.shape[0],1))
		test_set_x /=T.reshape(  T.max(test_set_x , axis=1)*1.0,(x_test.shape[0],1))
 
		dispims(np.array(f()).reshape((x_train.shape[0], size*size))[:100].transpose(), size,size, 0, layout=(10,10),
				    name='data.png' ) 
 
 
	 
        batch_size = 200


        Lambda = 10.0**(-3)
        w = Input_size[0]
        s = Decoder_size[2]
        h = Input_size[1]
        M = Decoder_size[1]
        Sparse_Matrix_size = ( batch_size, M, w+s-1,h+s-1)
        rng = np.random.RandomState()
        index = T.lscalar()
        x = T.cast(T.fmatrix('x'), 'float32') 
        y = T.ivector('y')
        I = T.cast(x.reshape((batch_size, Input_size[0],Input_size[1])), 'float32')
        I  = I.dimshuffle(0,'x',1, 2)
 

        fan_in = np.prod(Decoder_size[1:])
 
        fan_out = (Decoder_size[0] * np.prod(Decoder_size[2:]) )/(np.prod((2.,2.)))
 
        D_bound = 0.01 
        Decoder_Matrix =theano.shared( np.asarray(
                                rng.uniform(low=-D_bound,
                                high=D_bound,
                                size = Decoder_size),
                                dtype='float32'), borrow=True) 
      
        En_bound = 0.01
        Encoder_Matrix =theano.shared( np.asarray(
                                rng.uniform(low=-D_bound,
                                high=En_bound,
                                size = Encoder_size),
                                dtype='float32'), borrow=True)
 
        E_bound = 0.01  
        Esparse_Matrix = theano.shared( np.asarray(
                                rng.uniform(low=-E_bound,
                                high=E_bound,
                                size =Sparse_Matrix_size ),
                                dtype='float32' ), borrow=True)
 
        P1 =T.reshape(  theano.sandbox.cuda.dnn.dnn_conv(
                         Esparse_Matrix, 
                        Decoder_Matrix),(batch_size, Input_size[0],Input_size[1])) 
 

        Sum = T.reshape(T.mean(P1, axis=0),(1, Input_size[0],Input_size[1]))
 
        E1 =0.5*T.sum(T.mean(( I-Sum)**2, axis=0))
 
        Encoder_mapp = T.reshape (T.tanh(  theano.sandbox.cuda.dnn.dnn_conv(
            	I, 
            	  Encoder_Matrix,'full' )),Sparse_Matrix_size  ) 
	 
 	 
        Reconstruction =     theano.sandbox.cuda.dnn.dnn_conv(
            	img= Encoder_mapp,
            	kerns=   Decoder_Matrix )  
        


 

	get_Reconstruction = theano.function (
                     [index],
                      Reconstruction,
		    givens={
		    x:   train_set_x[index * batch_size: (index + 1) * batch_size]
		    }
		    )

 
        P2 = T.sum(T.mean( ( Esparse_Matrix  -Encoder_mapp  )**2,  axis=0)  )
  
 
        cost = E1 +    P2  + Lambda*T.sum(T.mean(abs(Esparse_Matrix), axis =0))
  
        test_model = theano.function(
                        [index],
                        cost,
                        givens={
                        x:  test_set_x[index * batch_size: (index + 1) * batch_size]
                        }
                        )
        test_traint_model = theano.function(
                  [index],
                  cost,
                  givens={
                  x:  train_set_x[index * batch_size: (index + 1) * batch_size]
                  }
                  )
        Esparse_Matrix_grad = T.grad(cost,  [Esparse_Matrix])
   
        Esparse_lr =  T.fscalar()
        Esparse_Update  = gradient_updates_momentum(cost, params =[Esparse_Matrix] , learning_rate = Esparse_lr, momentum=0.9)
 
 
        train_Esparse_model = theano.function(
                [index, Esparse_lr ],
                cost,
                updates= Esparse_Update,
                givens={
                x:  train_set_x[index * batch_size: (index + 1) * batch_size]
                }
                )
 

        get_Z_grad = theano.function(
		[index],
		 Esparse_Matrix_grad ,
		givens={
		x:   train_set_x[index * batch_size: (index + 1) * batch_size]
		})
 
	U_d = [  Decoder_Matrix]
	U_e =   [ Encoder_Matrix]  
	
	U_grads_d = T.grad( cost, U_d)
	U_grads_e = T.grad( cost, U_e)
	
	L_rate_encoder  = T.fscalar()
	L_rate_decoder = T.fscalar()
	
	U_updates = [
		(param_i,( param_i - L_rate_decoder  * grad_i)/(1E-3 + T.sqrt( T.sum(( param_i - L_rate_decoder  * grad_i)**2 , axis=0))))
		for param_i, grad_i in zip(U_d, U_grads_d)
		] +   [
		(param_i, param_i - L_rate_encoder * grad_i)
		for param_i, grad_i in zip(U_e, U_grads_e)
		] 
        
        
 
        train_ED_model = theano.function(
                [index, L_rate_encoder,L_rate_decoder],
                cost,
                updates=U_updates,
                givens={
                x:   train_set_x[index * batch_size: (index + 1) * batch_size]
                }
                )
 
  
 
        F = T.reshape( abs(Encoder_mapp), (batch_size, M*(( w+s-1)**2)))
        meanstd_F = F.std()
	  F -=  F.mean(1)[:,None]
	  F /= F.std(1)[False:,None]+ 0.1 * meanstd_F
コード例 #46
0
import theano
import theano.tensor as T

# define some symbolic variables
theano_matrix1 = T.matrix(name='theano_matrix1')
theano_matrix2 = T.matrix(name='theano_matrix2')

# define some functions

# dot product/matrix product
theano_dot = theano.function([theano_matrix1, theano_matrix2],
                             T.dot(theano_matrix1, theano_matrix2),
                             name='theano_dot')

theano_scalar = T.fscalar(name='theano_scalar')
theano_scale = theano.function([theano_matrix1, theano_scalar],
                               theano_matrix1 * theano_scalar,
                               name='scale')

# elementwise product
theano_multiply = theano.function([theano_matrix1, theano_matrix2],
                                  theano_matrix1 * theano_matrix2,
                                  name='theano_multiply')

theano_row_vector = T.row(name='theano_row_vector')
theano_col_vector = T.col(name='theano_col_vector')

theano_subtract_row = theano.function([theano_matrix1, theano_row_vector],
                                      theano_matrix1 - theano_row_vector,
                                      name='theano_subtract_row')
コード例 #47
0
    def __init__(self,
                 q,
                 p,
                 prior=None,
                 n_batch=100,
                 optimizer=lasagne.updates.adam,
                 optimizer_params={},
                 clip_grad=None,
                 max_norm_constraint=None,
                 train_iw=False,
                 test_iw=True,
                 iw_alpha=0,
                 seed=1234):
        super(VAE, self).__init__(n_batch=n_batch, seed=seed)

        self.q = q
        self.p = p
        if prior:
            self.prior = prior
        else:
            self.prior = get_prior(self.q)

        # set prior distribution mode
        if self.prior.__class__.__name__ == "MultiPriorDistributions":
            self.prior.prior = get_prior(self.q.distributions[-1])
            self.prior_mode = "MultiPrior"
        else:
            self.prior_mode = "Normal"

        self.train_iw = train_iw
        self.test_iw = test_iw

        # set inputs
        x = self.q.inputs
        l = T.iscalar("l")
        k = T.iscalar("k")
        annealing_beta = T.fscalar("beta")

        # training
        if self.train_iw:
            inputs = x + [l, k]
            lower_bound, loss, params = self._vr_bound(x, l, k, iw_alpha,
                                                       False)
        else:
            inputs = x + [l, annealing_beta]
            lower_bound, loss, params = self._elbo(x, l, annealing_beta, False)
        lower_bound = T.mean(lower_bound, axis=0)
        updates = self._get_updates(loss, params, optimizer, optimizer_params,
                                    clip_grad, max_norm_constraint)

        self.lower_bound_train = theano.function(inputs=inputs,
                                                 outputs=lower_bound,
                                                 updates=updates,
                                                 on_unused_input='ignore')

        # test
        if self.test_iw:
            inputs = x + [l, k]
            lower_bound, _, _ = self._vr_bound(x, l, k, 0, True)
        else:
            inputs = x + [l]
            lower_bound, _, _ = self._elbo(x, l, 1, True)
            lower_bound = T.sum(lower_bound, axis=1)

        self.lower_bound_test = theano.function(inputs=inputs,
                                                outputs=lower_bound,
                                                on_unused_input='ignore')
コード例 #48
0
ファイル: ciresan2012.py プロジェクト: yesyu/ciresan
    def __init__(self,
                 datasets,
                 nkerns=[32, 48],
                 batch_size=1000,
                 normalized_width=20,
                 distortion=0,
                 cuda_convnet=1,
                 params=[None, None, None, None, None, None, None, None]):
        """ Demonstrates Ciresan 2012 on MNIST dataset

        Some minor differences here:
        ---
        - Ciresan initializes Conv layers with: "uniform random distribution
            in the range [−0.05, 0.05]." (Ciresan IJCAI 2011)
        - Ciresan uses a sigma of 6
        - Ciresan uses nkerns=[20, 40] which were increased here to be nkerns=[32, 48]
            in order to be compatible with cuda_convnet

        :type learning_rate: float
        :param learning_rate: learning rate used (factor for the stochastic
                              gradient)

        :type n_epochs: int
        :param n_epochs: maximal number of epochs to run the optimizer

        :type dataset: string
        :param dataset: path to the dataset used for training /testing (MNIST here)

        :type nkerns: list of ints
        :param nkerns: number of kernels on each layer

        :type params: list of None or Numpy matricies/arrays
        :param params: W/b weights in the order: layer3W, layer3b, layer2W, layer2b, layer1W, layer1b, layer0W, layer0b
        """

        layer3W, layer3b, layer2W, layer2b, layer1W, layer1b, layer0W, layer0b = params
        rng = numpy.random.RandomState(23455)

        # TODO: could make this a theano sym variable to abstract
        # loaded data from column instantiation
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]

        # TODO: could move this to train method
        # compute number of minibatches for training, validation and testing
        self.n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        self.n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        self.n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        self.n_train_batches /= batch_size
        self.n_valid_batches /= batch_size
        self.n_test_batches /= batch_size

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch
        learning_rate = T.fscalar()

        # start-snippet-1
        x = T.matrix('x')  # the data is presented as rasterized images
        y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        print '... building the column'

        if distortion:
            distortion_layer = ElasticLayer(x.reshape((batch_size, 29, 29)),
                                            29,
                                            magnitude=ALPHA,
                                            sigma=SIGMA)

            network_input = distortion_layer.output.reshape(
                (batch_size, 1, 29, 29))
        else:
            network_input = x.reshape((batch_size, 1, 29, 29))

        if cuda_convnet:
            layer0_input = network_input.dimshuffle(1, 2, 3, 0)
        else:
            layer0_input = network_input

        layer0_imageshape = (1, 29, 29,
                             batch_size) if cuda_convnet else (batch_size, 1,
                                                               29, 29)
        layer0_filtershape = (1, 4, 4,
                              nkerns[0]) if cuda_convnet else (nkerns[0], 1, 4,
                                                               4)

        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=layer0_imageshape,
                                    filter_shape=layer0_filtershape,
                                    poolsize=(2, 2),
                                    cuda_convnet=cuda_convnet,
                                    W=layer0W,
                                    b=layer0b)

        layer1_imageshape = (nkerns[0], 13, 13,
                             batch_size) if cuda_convnet else (batch_size,
                                                               nkerns[0], 13,
                                                               13)
        layer1_filtershape = (nkerns[0], 5, 5,
                              nkerns[1]) if cuda_convnet else (nkerns[1],
                                                               nkerns[0], 5, 5)

        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer0.output,
                                    image_shape=layer1_imageshape,
                                    filter_shape=layer1_filtershape,
                                    poolsize=(3, 3),
                                    cuda_convnet=cuda_convnet,
                                    W=layer1W,
                                    b=layer1b)

        # the HiddenLayer being fully-connected, it operates on 2D matrices of
        # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
        # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
        # or (500, 50 * 4 * 4) = (500, 800) with the default values.
        if cuda_convnet:
            layer2_input = layer1.output.dimshuffle(3, 0, 1, 2).flatten(2)
        else:
            layer2_input = layer1.output.flatten(2)

        layer2 = HiddenLayer(rng,
                             input=layer2_input,
                             n_in=nkerns[1] * 3 * 3,
                             n_out=150,
                             W=layer2W,
                             b=layer2b,
                             activation=T.tanh)

        # classify the values of the fully-connected sigmoidal layer
        layer3 = LogisticRegression(input=layer2.output,
                                    n_in=150,
                                    n_out=10,
                                    W=layer3W,
                                    b=layer3b)

        # the cost we minimize during training is the NLL of the model
        cost = layer3.negative_log_likelihood(y)

        # create a function to compute the mistakes that are made by the model
        self.test_model = theano.function(
            [index],
            layer3.errors(y),
            givens={
                x: test_set_x[index * batch_size:(index + 1) * batch_size],
                y: test_set_y[index * batch_size:(index + 1) * batch_size]
            })

        # create a function to compute probabilities of all output classes
        self.test_output_batch = theano.function(
            [index],
            layer3.p_y_given_x,
            givens={
                x: test_set_x[index * batch_size:(index + 1) * batch_size]
            })

        self.validate_model = theano.function(
            [index],
            layer3.errors(y),
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]
            })

        # create a list of all model parameters to be fit by gradient descent
        self.params = layer3.params + layer2.params + layer1.params + layer0.params
        self.column_params = [
            nkerns, batch_size, normalized_width, distortion, cuda_convnet
        ]

        # create a list of gradients for all model parameters
        grads = T.grad(cost, self.params)

        # train_model is a function that updates the model parameters by
        # SGD Since this model has many parameters, it would be tedious to
        # manually create an update rule for each model parameter. We thus
        # create the updates list by automatically looping over all
        # (params[i], grads[i]) pairs.
        updates = [(param_i, param_i - (learning_rate) * grad_i)
                   for param_i, grad_i in zip(self.params, grads)]

        # Suggested by Alex Krizhevsky, found on:
        # http://yyue.blogspot.com/2015/01/a-brief-overview-of-deep-learning.html
        optimal_ratio = 0.001
        # should show what multiple current learning rate is of optimal learning rate
        grads_L1 = sum([abs(grad).sum() for grad in grads])
        params_L1 = sum([abs(param).sum() for param in self.params])
        update_ratio = (learning_rate /
                        (optimal_ratio)) * (grads_L1 / params_L1)

        self.train_model = theano.function(
            [index, learning_rate], [cost, update_ratio],
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]
            })
コード例 #49
0
ファイル: thesne.py プロジェクト: waleking/tsnetwork
def find_Y(X_shared,
           Y_shared,
           sigma_shared,
           N,
           output_dims,
           n_epochs,
           initial_lr,
           final_lr,
           lr_switch,
           init_stdev,
           initial_momentum,
           final_momentum,
           momentum_switch,
           initial_l_kl,
           final_l_kl,
           l_kl_switch,
           initial_l_e,
           final_l_e,
           l_e_switch,
           initial_l_c,
           final_l_c,
           l_c_switch,
           initial_l_r,
           final_l_r,
           l_r_switch,
           r_eps,
           Adj_shared,
           g=None,
           save_every=None,
           output_folder=None,
           verbose=0):
    # Optimization hyperparameters
    initial_lr = np.array(initial_lr, dtype=floath)
    final_lr = np.array(final_lr, dtype=floath)
    initial_momentum = np.array(initial_momentum, dtype=floath)
    final_momentum = np.array(final_momentum, dtype=floath)

    # Hyperparameters used within Theano
    lr = T.fscalar('lr')
    lr_shared = theano.shared(initial_lr)
    momentum = T.fscalar('momentum')
    momentum_shared = theano.shared(initial_momentum)

    # Cost parameters
    initial_l_kl = np.array(initial_l_kl, dtype=floath)
    final_l_kl = np.array(final_l_kl, dtype=floath)
    initial_l_e = np.array(initial_l_e, dtype=floath)
    final_l_e = np.array(final_l_e, dtype=floath)
    initial_l_c = np.array(initial_l_c, dtype=floath)
    final_l_c = np.array(final_l_c, dtype=floath)
    initial_l_r = np.array(initial_l_r, dtype=floath)
    final_l_r = np.array(final_l_r, dtype=floath)

    # Cost parameters used within Theano
    l_kl = T.fscalar('l_kl')
    l_kl_shared = theano.shared(initial_l_kl)
    l_e = T.fscalar('l_e')
    l_e_shared = theano.shared(initial_l_e)
    l_c = T.fscalar('l_c')
    l_c_shared = theano.shared(initial_l_c)
    l_r = T.fscalar('l_r')
    l_r_shared = theano.shared(initial_l_r)

    # High-dimensional observations (connectivities of vertices)
    X = T.fmatrix('X')
    # 2D projection (coordinates of vertices)
    Y = T.fmatrix('Y')

    # Adjacency matrix
    Adj = T.fmatrix('Adj')

    # Standard deviations used for Gaussians to attain perplexity
    sigma = T.fvector('sigma')

    # Y velocities (for momentum-based descent)
    Yv = T.fmatrix('Yv')
    Yv_shared = theano.shared(np.zeros((N, output_dims), dtype=floath))

    # Function for retrieving cost for all individual data points
    costs = cost_var(X, Y, sigma, Adj, l_kl, l_e, l_c, l_r, r_eps)

    # Sum of all costs (scalar)
    cost = T.sum(costs)

    # Gradient of the cost w.r.t. Y
    grad_Y = T.grad(cost, Y)

    # Update step for velocity
    update_Yv = theano.function(
        [],
        None,
        givens={
            X: X_shared,
            sigma: sigma_shared,
            Y: Y_shared,
            Yv: Yv_shared,
            Adj: Adj_shared,
            lr: lr_shared,
            momentum: momentum_shared,
            l_kl: l_kl_shared,
            l_e: l_e_shared,
            l_c: l_c_shared,
            l_r: l_r_shared
        },
        updates=[(Yv_shared, momentum * Yv - lr * grad_Y)])

    # Gradient descent step
    update_Y = theano.function([], [],
                               givens={
                                   Y: Y_shared,
                                   Yv: Yv_shared
                               },
                               updates=[(Y_shared, Y + Yv)])

    # Build function to retrieve cost
    get_cost = theano.function(
        [],
        cost,
        givens={
            X: X_shared,
            sigma: sigma_shared,
            Y: Y_shared,
            Adj: Adj_shared,
            l_kl: l_kl_shared,
            l_e: l_e_shared,
            l_c: l_c_shared,
            l_r: l_r_shared
        })

    # Build function to retrieve per-vertex cost
    get_costs = theano.function(
        [],
        costs,
        givens={
            X: X_shared,
            sigma: sigma_shared,
            Y: Y_shared,
            Adj: Adj_shared,
            l_kl: l_kl_shared,
            l_e: l_e_shared,
            l_c: l_c_shared,
            l_r: l_r_shared
        })

    # Optimization loop
    for epoch in range(n_epochs):

        # Switch parameter if a switching point is reached.
        if epoch == lr_switch:
            lr_shared.set_value(final_lr)
        if epoch == momentum_switch:
            momentum_shared.set_value(final_momentum)
        if epoch == l_kl_switch:
            l_kl_shared.set_value(final_l_kl)
        if epoch == l_e_switch:
            l_e_shared.set_value(final_l_e)
        if epoch == l_c_switch:
            l_c_shared.set_value(final_l_c)
        if epoch == l_r_switch:
            l_r_shared.set_value(final_l_r)
            if final_l_r != 0:
                # Give a nudge to co-located vertices in the epoch before the
                # repulsion kicks in (otherwise they don't feel any).
                Y_shared.set_value(switch_shake(Y_shared.get_value()))

        # Do update step for velocity
        update_Yv()
        # Do a gradient descent step
        update_Y()

        c = get_cost()
        if np.isnan(float(c)):
            raise NaNException('Encountered NaN for cost.')

        if verbose:
            print('[tsne] Epoch: {0}. Cost: {1:.6f}.'.format(
                epoch + 1, float(c)),
                  end='\r')

        if output_folder is not None and g is not None and save_every is not None and epoch % save_every == 0:
            # Get per-vertex cost for colour-coding
            cs = get_costs()

            # Save a snapshot
            save_drawing(output_folder,
                         g,
                         Y_shared.get_value().T,
                         'tsne_snap_' + str(epoch).zfill(5),
                         formats=['jpg'],
                         verbose=False,
                         edge_colors="rgb",
                         draw_vertices=False,
                         opacity=0.3)

    # Get per-vertex cost
    cs = get_costs()

    if verbose:
        print('\n[tsne] Done! ')

    return np.array(Y_shared.get_value()), cs
コード例 #50
0
def run_conv_nnet2(use_gpu):  # pretend we are training LeNet for MNIST
    if use_gpu:
        shared_fn = tcn.shared_constructor
    else:
        shared_fn = shared

    # cumulativ rounding error affect this comparaison of result. So we lower the tolerance.
    # TODO: why the last two example see the error lower? We are converging?
    # n_train=10, n_batch=3, n_kern=1, n_kern1=1, error see of 1e-9
    # n_train=10, n_batch=3, n_kern=10, n_kern1=1, error see of -1.27777e-06
    # n_train=10, n_batch=3, n_kern=10, n_kern1=10, error see of -6.91377e-05
    # n_train=10, n_batch=30, n_kern=10, n_kern1=10, error see of -0.00185963
    # n_train=10, n_batch=60, n_kern=10, n_kern1=10, error see of -5.26905e-05
    # n_train=30, n_batch=60, n_kern=10, n_kern1=10, error see of -3.8147e-06

    # n_train=30, n_batch=60, n_kern=20, n_kern1=10, error see of 6.82771e-05
    # n_train=30, n_batch=60, n_kern=20, n_kern1=30, error see of 0.000231534
    n_batch = 60
    shape_img = (n_batch, 1, 32, 32)

    n_kern = 20
    shape_kern = (n_kern, 1, 5, 5)

    n_kern1 = 10
    shape_kern1 = (n_kern1, n_kern, 5, 5)

    n_train = 30
    if config.mode == 'DEBUG_MODE':
        n_train = 1

    logical_hid_shape = tcn.blas.GpuConv.logical_output_shape_2d(
        tuple(shape_img[2:]), tuple(shape_kern[2:]), 'valid')
    logical_hid_shape1 = tcn.blas.GpuConv.logical_output_shape_2d(
        (logical_hid_shape[0] // 2, logical_hid_shape[1] // 2),
        tuple(shape_kern1[2:]), 'valid')
    n_hid = n_kern1 * logical_hid_shape1[0] * logical_hid_shape1[1]
    n_out = 10

    w0 = shared_fn(0.01 * (my_rand(*shape_kern) - 0.5), 'w0')
    b0 = shared_fn(my_zeros((n_kern, )), 'b0')
    w1 = shared_fn(0.01 * (my_rand(*shape_kern1) - 0.5), 'w1')
    b1 = shared_fn(my_zeros((n_kern1, )), 'b1')
    v = shared_fn(my_zeros((n_hid, n_out)), 'c')
    c = shared_fn(my_zeros(n_out), 'c')

    x = tensor.Tensor(dtype='float32', broadcastable=(0, 1, 0, 0))('x')
    y = tensor.fmatrix('y')
    lr = tensor.fscalar('lr')

    conv_op = conv.ConvOp(shape_img[2:], shape_kern[2:], n_kern, n_batch, 1, 1)
    conv_op1 = conv.ConvOp(
        (n_kern, logical_hid_shape[0] // 2, logical_hid_shape[1] // 2),
        shape_kern1[2:], n_kern1, n_batch, 1, 1)

    hid = tensor.tanh(conv_op(x, w0) + b0.dimshuffle((0, 'x', 'x')))
    hid1 = tensor.tanh(
        conv_op1(hid[:, :, ::2, ::2], w1) + b1.dimshuffle((0, 'x', 'x')))
    hid_flat = hid1.reshape((n_batch, n_hid))
    out = tensor.tanh(tensor.dot(hid_flat, v) + c)
    loss = tensor.sum(0.5 * (out - y)**2 * lr)
    # print 'loss type', loss.type

    params = [w0, b0, w1, b1, v, c]
    gparams = tensor.grad(loss, params)

    mode = get_mode(use_gpu)

    # print 'building pfunc ...'
    train = pfunc([x, y, lr], [loss],
                  mode=mode,
                  updates=[(p, p - g) for p, g in zip(params, gparams)])

    #    for i, n in enumerate(train.maker.fgraph.toposort()):
    #        print i, n

    xval = my_rand(*shape_img)
    yval = my_rand(n_batch, n_out)  # int32 make all 0...
    lr = theano._asarray(0.01, dtype='float32')
    for i in xrange(n_train):
        rval = train(xval, yval, lr)

    print_mode(mode)
    return rval
コード例 #51
0
ファイル: RNN_clip.py プロジェクト: jukaradayi/OpenSAT
    def __init__(
            self,
            Nlayers=1,  # number of layers
            Ndirs=1,  # unidirectional or bidirectional
            Nx=100,  # input size
            Nh=100,  # hidden layer size
            Ny=100,  # output size
            Ah="relu",  # hidden unit activation (e.g. relu, tanh, lstm)
            Ay="linear",  # output unit activation (e.g. linear, sigmoid, softmax)
            predictPer="frame",  # frame or sequence
            loss=None,  # loss function (e.g. mse, ce, ce_group, hinge, squared_hinge)
            L1reg=0.0,  # L1 regularization
            L2reg=0.0,  # L2 regularization
            momentum=0.0,  # SGD momentum
            seed=15213,  # random seed for initializing the weights
            frontEnd=None,  # a lambda function for transforming the input
            filename=None,  # initialize from file
            initParams=None,  # initialize from given dict
    ):

        if filename is not None:  # load parameters from file
            with smart_open(filename, "rb") as f:
                initParams = dill.load(f)
        if initParams is not None:  # load parameters from given dict
            self.paramNames = []
            self.params = []
            for k, v in initParams.iteritems():
                if type(v) is numpy.ndarray:
                    self.addParam(k, v)
                else:
                    setattr(self, k, v)
                    self.paramNames.append(k)
            # F*ck, locals()[k] = v doesn't work; I have to do this statically
            Nlayers, Ndirs, Nx, Nh, Ny, Ah, Ay, predictPer, loss, L1reg, L2reg, momentum, frontEnd \
                = self.Nlayers, self.Ndirs, self.Nx, self.Nh, self.Ny, self.Ah, self.Ay, self.predictPer, self.loss, self.L1reg, self.L2reg, self.momentum, self.frontEnd
        else:  # Initialize parameters randomly
            # Names of parameters to save to file
            self.paramNames = [
                "Nlayers", "Ndirs", "Nx", "Nh", "Ny", "Ah", "Ay", "predictPer",
                "loss", "L1reg", "L2reg", "momentum", "frontEnd"
            ]
            for name in self.paramNames:
                value = locals()[name]
                setattr(self, name, value)

            # Values of parameters for building the computational graph
            self.params = []

            # Initialize random number generators
            global rng
            rng = numpy.random.RandomState(seed)

            # Construct parameter matrices
            Nlstm = 4 if Ah == 'lstm' else 1
            self.addParam("Win", rand_init((Nx, Nh * Ndirs * Nlstm), Ah))
            self.addParam("Wrec",
                          rand_init((Nlayers, Ndirs, Nh, Nh * Nlstm), Ah))
            self.addParam(
                "Wup",
                rand_init((Nlayers - 1, Nh * Ndirs, Nh * Ndirs * Nlstm), Ah))
            self.addParam("Wout", rand_init((Nh * Ndirs, Ny), Ay))
            if Ah != "lstm":
                self.addParam("Bhid", zeros((Nlayers, Nh * Ndirs)))
            else:
                self.addParam(
                    "Bhid",
                    numpy.tile(
                        numpy.hstack([
                            full((Nlayers, Nh), 1.0),
                            zeros((Nlayers, Nh * 3))
                        ]), (1, Ndirs)))
            self.addParam("Bout", zeros(Ny))
            self.addParam("h0", zeros((Nlayers, Ndirs, Nh)))
            if Ah == "lstm":
                self.addParam("c0", zeros((Nlayers, Ndirs, Nh)))

        # Compute total number of parameters
        self.nParams = sum(x.get_value().size for x in self.params)

        # Initialize gradient tensors when using momentum
        if momentum > 0:
            self.dparams = [
                theano.shared(zeros(x.get_value().shape)) for x in self.params
            ]

        # Build computation graph
        input = T.ftensor3()
        mask = T.imatrix()
        mask_int = [(mask % 2).nonzero(), (mask >= 2).nonzero()]
        mask_float = [
            T.cast((mask % 2).dimshuffle((1, 0)).reshape(
                (mask.shape[1], mask.shape[0], 1)), theano.config.floatX),
            T.cast((mask >= 2).dimshuffle((1, 0)).reshape(
                (mask.shape[1], mask.shape[0], 1)), theano.config.floatX)
        ]

        # mask_int = [(mask & 1).nonzero(), (mask & 2).nonzero()]
        # mask_float = [T.cast((mask & 1).dimshuffle((1, 0)).reshape((mask.shape[1], mask.shape[0], 1)), theano.config.floatX),
        #               T.cast(((mask & 2) / 2).dimshuffle((1, 0)).reshape((mask.shape[1], mask.shape[0], 1)), theano.config.floatX)]

        def step_rnn(x_t, mask, h_tm1, W, h0):
            h_tm1 = T.switch(mask, h0, h_tm1)
            return [ACTIVATION[Ah](x_t + h_tm1.dot(W))]

        def step_lstm(x_t, mask, c_tm1, h_tm1, W, c0, h0):
            c_tm1 = T.switch(mask, c0, c_tm1)
            h_tm1 = T.switch(mask, h0, h_tm1)
            a = x_t + h_tm1.dot(W)
            f_t = T.nnet.sigmoid(a[:, :Nh])
            i_t = T.nnet.sigmoid(a[:, Nh:Nh * 2])
            o_t = T.nnet.sigmoid(a[:, Nh * 2:Nh * 3])
            c_t = T.tanh(a[:, Nh * 3:]) * i_t + c_tm1 * f_t
            h_t = T.tanh(c_t) * o_t
            return [c_t, h_t]

        x = input if frontEnd is None else frontEnd(input)
        for i in range(Nlayers):
            h = (x.dimshuffle((1, 0, 2)).dot(self.Win)
                 if i == 0 else h.dot(self.Wup[i - 1])) + self.Bhid[i]
            rep = lambda x: T.extra_ops.repeat(
                x.reshape((1, -1)), h.shape[1], axis=0)
            if Ah != "lstm":
                h = T.concatenate([
                    theano.scan(
                        fn=step_rnn,
                        sequences=[
                            h[:, :, Nh * d:Nh * (d + 1)], mask_float[d]
                        ],
                        outputs_info=[rep(self.h0[i, d])],
                        non_sequences=[self.Wrec[i, d],
                                       rep(self.h0[i, d])],
                        go_backwards=(d == 1),
                    )[0][::(1 if d == 0 else -1)] for d in range(Ndirs)
                ],
                                  axis=2)
            else:
                h = T.concatenate([
                    theano.scan(
                        fn=step_lstm,
                        sequences=[
                            h[:, :, Nh * 4 * d:Nh * 4 * (d + 1)], mask_float[d]
                        ],
                        outputs_info=[rep(self.c0[i, d]),
                                      rep(self.h0[i, d])],
                        non_sequences=[
                            self.Wrec[i, d],
                            rep(self.c0[i, d]),
                            rep(self.h0[i, d])
                        ],
                        go_backwards=(d == 1),
                    )[0][1][::(1 if d == 0 else -1)] for d in range(Ndirs)
                ],
                                  axis=2)
        h = h.dimshuffle((1, 0, 2))
        if predictPer == "sequence":
            h = T.concatenate([
                h[mask_int[1 - d]][:, Nh * d:Nh * (d + 1)]
                for d in range(Ndirs)
            ],
                              axis=1)
        output = ACTIVATION[Ay](h.dot(self.Wout) + self.Bout)

        # Compute loss function
        if loss is None:
            loss = {
                "linear": "mse",
                "sigmoid": "ce",
                "softmax": "ce_group"
            }[self.Ay]
        if loss == "ctc":
            label = T.imatrix()
            cost = ctc_cost(output, mask, label)
        else:
            if predictPer == "sequence":
                label = T.fmatrix()
                y = output
                t = label
            elif predictPer == "frame":
                label = T.ftensor3()
                indices = (mask >= 0).nonzero()
                y = output[indices]
                t = label[indices]
            cost = T.mean({
                "ce":
                -T.mean(T.log(y) * t + T.log(1 - y) * (1 - t), axis=1),
                "ce_group":
                -T.log((y * t).sum(axis=1)),
                "mse":
                T.mean((y - t)**2, axis=1),
                "hinge":
                T.mean(relu(1 - y * (t * 2 - 1)), axis=1),
                "squared_hinge":
                T.mean(relu(1 - y * (t * 2 - 1))**2, axis=1),
            }[loss])

        # Add regularization
        cost += sum(abs(x).sum() for x in self.params) / self.nParams * L1reg
        cost += sum(T.sqr(x).sum() for x in self.params) / self.nParams * L2reg

        # Compute updates for network parameters
        updates = []
        lrate = T.fscalar()
        clip = T.fscalar()
        grad = T.grad(cost, self.params)
        grad_clipped = [T.maximum(T.minimum(g, clip), -clip) for g in grad]
        if momentum > 0:
            for w, d, g in zip(self.params, self.dparams, grad_clipped):
                updates.append(
                    (w,
                     w + momentum * momentum * d - (1 + momentum) * lrate * g))
                updates.append((d, momentum * d - lrate * g))
        else:
            for w, g in zip(self.params, grad_clipped):
                updates.append((w, w - lrate * g))

        # Create functions to be called from outside
        self.train = theano.function(
            inputs=[input, mask, label, lrate, clip],
            outputs=cost,
            updates=updates,
        )

        self.predict = theano.function(inputs=[input, mask], outputs=output)
def get_options(batchsize, nepochs, plotevery, learningrate, normalizegrads,
                clipgrads, enabledebug, optimizer, yzeromean, yunitvar,
                datadir, outputdir):

    global batch_size
    batch_size = batchsize
    global epochs
    epochs = nepochs

    print("Changing pwd to {}".format(outputdir))
    os.chdir(outputdir)

    mydir = os.path.join(os.getcwd(),
                         datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    os.makedirs(mydir)
    os.chdir(mydir)

    app_name = sys.argv[0]
    global logger
    logger = get_logger(app_name=app_name, logfolder=mydir)

    # Load dataset
    X, Y = load_data(datadir + os.sep + "coulomb.txt",
                     datadir + os.sep + "energies.txt")
    Y, Y_mean, Y_std, Y_binarized = preprocess_targets(Y,
                                                       zero_mean=yzeromean,
                                                       unit_var=yunitvar)
    [X_train, X_test], [Y_train,
                        Y_test], splits = get_data_splits(X,
                                                          Y,
                                                          splits=[90, 10])
    [Y_binarized_train, Y_binarized_test] = np.split(Y_binarized, splits)[:-1]

    np.savez('Y_vals.npz',
             Y_train=Y_train,
             Y_test=Y_test,
             Y_binarized_test=Y_binarized_test,
             Y_binarized_train=Y_binarized_train,
             Y_mean=Y_mean,
             Y_std=Y_std)
    np.savez('X_vals.npz', X_train=X_train, X_test=X_test)

    dataDim = X.shape[1:]
    outputDim = Y.shape[1]
    datapoints = len(X_train)
    print("datapoints = %d" % datapoints)

    # # making the datapoints shared variables
    # X_train           = make_shared(X_train)
    # X_test            = make_shared(X_test)
    # Y_train           = make_shared(Y_train)
    # Y_test            = make_shared(Y_test)
    # Y_binarized_train = make_shared(Y_binarized_train)
    # Y_binarized_test  = make_shared(Y_binarized_test)

    # TODO !!!!I am here
    # print("Train set size {}, Train set (labelled) size {}, Test set size {}," +
    #         "Validation set size {}".format(
    #             train_set[0].size,train_set_labeled[0].size,
    #             test_set[0].size, valid_set[0].size))

    eigen_value_count = 20

    # Defining the model now.
    th_coulomb = T.ftensor4()
    th_energies = T.fmatrix()
    th_energies_bin = T.fmatrix()
    th_learningrate = T.fscalar()

    l_input = InputLayer(shape=(None, 1, 29, 29),
                         input_var=th_coulomb,
                         name="Input")
    l_input = FlattenLayer(l_input, name="FlattenInput")
    l_pseudo_bin = DenseLayer(l_input,
                              num_units=2000,
                              nonlinearity=sigmoid,
                              name="PseudoBinarized")

    l_h1 = []
    l_h2 = []
    l_realOut = []
    l_binOut = []

    for branch_num in range(eigen_value_count):
        l_h1.append(
            DenseLayer(l_pseudo_bin,
                       num_units=1000,
                       nonlinearity=rectify,
                       name="hidden_1_%d" % branch_num))
        l_h2.append(
            DenseLayer(l_h1[-1],
                       num_units=400,
                       nonlinearity=rectify,
                       name="hidden_2_%d" % branch_num))
        l_realOut.append(
            DenseLayer(l_h2[-1],
                       num_units=1,
                       nonlinearity=linear,
                       name="realOut_%d" % branch_num))
        l_binOut.append(
            DenseLayer(l_h2[-1],
                       num_units=1,
                       nonlinearity=sigmoid,
                       name="binOut"))

    l_realOut_cat = ConcatLayer(l_realOut, name="real_concat")
    l_binOut_cat = ConcatLayer(l_binOut, name="bin_concat")
    l_output = ElemwiseMergeLayer([l_binOut_cat, l_realOut_cat],
                                  T.mul,
                                  name="final_output")

    energy_output = get_output(l_output)
    binary_output = get_output(l_binOut_cat)

    loss_real = T.mean(abs(energy_output - th_energies))
    loss_binary = T.mean(binary_crossentropy(binary_output, th_energies_bin))
    loss = loss_real + loss_binary

    params = get_all_params(l_output)
    grad = T.grad(loss, params)

    if normalizegrads is not None:
        grad = lasagne.updates.total_norm_constraint(grad,
                                                     max_norm=normalizegrads)

    if clipgrads is not None:
        grad = [T.clip(g, -clipgrads, clipgrads) for g in grad]

    optimization_algo = get_optimizer[optimizer]
    # updates = optimization_algo(grad, params, learning_rate=learningrate)
    updates = optimization_algo(grad, params, learning_rate=th_learningrate)

    train_fn = theano.function(
        [th_coulomb, th_energies, th_energies_bin, th_learningrate],
        [loss, energy_output],
        updates=updates,
        allow_input_downcast=True)
    get_grad = theano.function([th_coulomb, th_energies, th_energies_bin],
                               grad)
    # get_updates = theano.function([th_data, th_labl], [updates.values()])
    # val_fn    = theano.function([th_coulomb, th_energies, th_energies_bin], [loss, energy_output], updates=updates, allow_input_downcast=True)
    val_fn = theano.function([th_coulomb, th_energies, th_energies_bin],
                             [loss, energy_output],
                             allow_input_downcast=True)

    datapoints = len(X_train)
    print("datapoints = %d" % datapoints)

    with open(os.path.join(mydir, "data.txt"), "w") as f:
        script = app_name
        for elem in [
                "meta_seed", "dataDim", "batch_size", "epochs", "learningrate",
                "normalizegrads", "clipgrads", "enabledebug", "optimizer",
                "plotevery", "script"
        ]:
            f.write("{} : {}\n".format(elem, eval(elem)))

    train_loss_lowest = np.inf
    test_loss_lowest = np.inf

    for epoch in range(epochs):
        batch_start = 0
        train_loss = []

        if learningrate == None:
            if epoch < 50:
                learning_rate = 0.0001
            elif epoch < 100:
                learning_rate = 0.00001
            elif epoch < 500:
                learning_rate = 0.000001
            else:
                learning_rate = 0.0000001
        else:
            learning_rate = learningrate

        indices = np.random.permutation(datapoints)
        minibatches = int(datapoints / batch_size)
        for minibatch in range(minibatches):
            train_idxs = indices[batch_start:batch_start + batch_size]
            X_train_batch = X_train[train_idxs, :]
            Yr_train_batch = Y_train[train_idxs, :]
            Yb_train_batch = Y_binarized_train[train_idxs, :]

            train_output = train_fn(X_train_batch, Yr_train_batch,
                                    Yb_train_batch, learning_rate)
            batch_start = batch_start + batch_size

            train_loss.append(train_output[0])

            if enabledebug:
                # Debugging information
                batchIdx = epoch * minibatches + minibatch
                fn = 'params_{:>010d}'.format()  # saving params
                param_values = get_all_param_values(l_output)
                param_norm = np.linalg.norm(
                    np.hstack([param.flatten() for param in param_values]))
                gradients = get_grad(X_train_batch, Yr_train_batch,
                                     Yb_train_batch)
                gradient_norm = np.linalg.norm(
                    np.hstack([gradient.flatten() for gradient in gradients]))
                logger.debug(
                    "Epoch : {:0>4}  minibatch {:0>3} Gradient Norm : {:>0.4}, Param Norm : {:>0.4} GradNorm/ParamNorm : {:>0.4} (Values from Prev. Minibatch) Train loss {}"
                    .format(epoch, minibatch, gradient_norm, param_norm,
                            gradient_norm / param_norm, train_loss[-1]))
                param_names = [
                    param.__str__() for param in get_all_params(l_output)
                ]
                np.savez(fn + '.npz', **dict(zip(param_names, param_values)))
                np.savez('Y_train_pred_{}.npz'.format(batchIdx),
                         Y_train_pred=train_output[1])
                if train_loss[-1] < train_loss_lowest:
                    train_loss_lowest = train_loss[-1]
                    np.savez('Y_train_pred_best.npz',
                             Y_train_pred=train_output[1])
                    logger.debug(
                        "Found the best training prediction (Y_train_pred_best) at %d epoch %d minibatch"
                        % (epoch, minibatch))
                if np.isnan(gradient_norm):
                    pdb.set_trace()

        if (epoch % plotevery == 0):
            logger.info("Epoch {} of {}".format(epoch, epochs))

            fn = 'params_{:>03d}'.format(epoch)  # saving params
            param_values = get_all_param_values(l_output)
            param_norm = np.linalg.norm(
                np.hstack([param.flatten() for param in param_values]))
            param_names = [
                param.__str__() for param in get_all_params(l_output)
            ]
            if not enabledebug:
                np.savez(fn + '.npz', **dict(zip(param_names, param_values)))
                np.savez('Y_train_pred_{}.npz'.format(epoch),
                         Y_train_pred=train_output[1])
                mean_train_loss = np.mean(train_loss)
                if mean_train_loss < train_loss_lowest:
                    train_loss_lowest = mean_train_loss
                    np.savez('Y_train_pred_best.npz',
                             Y_train_pred=train_output[1])
                    logger.info(
                        "Found the best training prediction (Y_train_pred_best) at %d epoch"
                        % epoch)

            gradients = get_grad(X_train_batch, Yr_train_batch, Yb_train_batch)
            gradient_norm = np.linalg.norm(
                np.hstack([gradient.flatten() for gradient in gradients]))
            logger.info(
                "  Gradient Norm : {:>0.4}, Param Norm : {:>0.4} GradNorm/ParamNorm : {:>0.4} "
                .format(gradient_norm, param_norm, gradient_norm / param_norm))
            logger.info("  Train loss {:>0.4}".format(np.mean(train_loss)))

            test_loss, test_prediction = val_fn(X_test, Y_test,
                                                Y_binarized_test)
            np.savez('Y_test_pred_{}.npz'.format(epoch),
                     Y_test_pred=test_prediction)
            logger.info("  Test loss {}".format(test_loss))
            if test_loss < test_loss_lowest:
                test_loss_lowest = test_loss
                np.savez('Y_test_pred_best.npz', Y_test_pred=test_prediction)
                logger.info(
                    "Found the best test prediction (Y_test_pred_best) at %d epoch"
                    % epoch)
コード例 #53
0
    def __init__(self, We_initial, char_embedd_table_initial, params):

        We = theano.shared(We_initial)
        We_inf = theano.shared(We_initial)

        embsize = We_initial.shape[1]
        hidden = params.hidden

        hidden_inf = params.hidden_inf

        input_var = T.imatrix(name='inputs')
        target_var = T.imatrix(name='targets')
        mask_var = T.fmatrix(name='masks')
        mask_var1 = T.fmatrix(name='masks1')
        length = T.iscalar()
        t_t = T.fscalar()

        Wyy0 = np.random.uniform(
            -0.02, 0.02,
            (params.num_labels + 1, params.num_labels)).astype('float32')
        Wyy = theano.shared(Wyy0)

        char_input_var = T.itensor3()

        char_embedd_dim = params.char_embedd_dim
        char_dic_size = len(params.char_dic)
        char_embedd_table = theano.shared(char_embedd_table_initial)
        char_embedd_table_inf = theano.shared(char_embedd_table_initial)

        l_in_word = lasagne.layers.InputLayer((None, None))
        l_mask_word = lasagne.layers.InputLayer(shape=(None, None))

        if params.emb == 1:
            l_emb_word = lasagne.layers.EmbeddingLayer(
                l_in_word,
                input_size=We_initial.shape[0],
                output_size=embsize,
                W=We)
        else:
            l_emb_word = lasagne_embedding_layer_2(l_in_word, embsize, We)

        layer_char_input = lasagne.layers.InputLayer(shape=(None, None,
                                                            Max_Char_Length),
                                                     input_var=char_input_var,
                                                     name='char-input')

        layer_char = lasagne.layers.reshape(layer_char_input, (-1, [2]))
        layer_char_embedding = lasagne.layers.EmbeddingLayer(
            layer_char,
            input_size=char_dic_size,
            output_size=char_embedd_dim,
            W=char_embedd_table,
            name='char_embedding')

        layer_char = lasagne.layers.DimshuffleLayer(layer_char_embedding,
                                                    pattern=(0, 2, 1))

        # first get some necessary dimensions or parameters
        conv_window = 3
        num_filters = params.num_filters

        # construct convolution layer
        cnn_layer = lasagne.layers.Conv1DLayer(
            layer_char,
            num_filters=num_filters,
            filter_size=conv_window,
            pad='full',
            nonlinearity=lasagne.nonlinearities.tanh,
            name='cnn')
        # infer the pool size for pooling (pool size should go through all time step of cnn)
        _, _, pool_size = cnn_layer.output_shape

        # construct max pool layer
        pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer,
                                                   pool_size=pool_size)
        # reshape the layer to match lstm incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters]
        output_cnn_layer = lasagne.layers.reshape(pool_layer,
                                                  (-1, length, [1]))

        # finally, concatenate the two incoming layers together.
        l_emb_word = lasagne.layers.concat([output_cnn_layer, l_emb_word],
                                           axis=2)

        l_lstm_wordf = lasagne.layers.LSTMLayer(l_emb_word,
                                                hidden,
                                                mask_input=l_mask_word)
        l_lstm_wordb = lasagne.layers.LSTMLayer(l_emb_word,
                                                hidden,
                                                mask_input=l_mask_word,
                                                backwards=True)

        concat = lasagne.layers.concat([l_lstm_wordf, l_lstm_wordb], axis=2)

        l_reshape_concat = lasagne.layers.ReshapeLayer(concat,
                                                       (-1, 2 * hidden))

        l_local = lasagne.layers.DenseLayer(
            l_reshape_concat,
            num_units=params.num_labels,
            nonlinearity=lasagne.nonlinearities.linear)

        network_params = lasagne.layers.get_all_params(l_local, trainable=True)
        network_params.append(Wyy)

        print len(network_params)
        f = open(
            'ccctag_BiLSTM_CNN_CRF_num_filters_30_dropout_1_LearningRate_0.01_0.0_400_emb_1_tagversoin_2.pickle',
            'r')
        data = pickle.load(f)
        f.close()

        for idx, p in enumerate(network_params):

            p.set_value(data[idx])

        l_in_word_a = lasagne.layers.InputLayer((None, None))
        l_mask_word_a = lasagne.layers.InputLayer(shape=(None, None))

        l_emb_word_a = lasagne.layers.EmbeddingLayer(
            l_in_word_a,
            input_size=We_initial.shape[0],
            output_size=embsize,
            W=We_inf,
            name='inf_word_embedding')

        layer_char_input_a = lasagne.layers.InputLayer(
            shape=(None, None, Max_Char_Length),
            input_var=char_input_var,
            name='char-input')

        layer_char_a = lasagne.layers.reshape(layer_char_input_a, (-1, [2]))
        layer_char_embedding_a = lasagne.layers.EmbeddingLayer(
            layer_char_a,
            input_size=char_dic_size,
            output_size=char_embedd_dim,
            W=char_embedd_table_inf,
            name='char_embedding')

        layer_char_a = lasagne.layers.DimshuffleLayer(layer_char_embedding_a,
                                                      pattern=(0, 2, 1))

        # first get some necessary dimensions or parameters
        conv_window = 3
        num_filters = params.num_filters
        #_, sent_length, _ = incoming2.output_shape

        # dropout before cnn?
        if params.dropout:
            layer_char_a = lasagne.layers.DropoutLayer(layer_char_a, p=0.5)

# construct convolution layer
        cnn_layer_a = lasagne.layers.Conv1DLayer(
            layer_char_a,
            num_filters=num_filters,
            filter_size=conv_window,
            pad='full',
            nonlinearity=lasagne.nonlinearities.tanh,
            name='cnn')
        # infer the pool size for pooling (pool size should go through all time step of cnn)
        #_, _, pool_size = cnn_layer.output_shape

        # construct max pool layer
        pool_layer_a = lasagne.layers.MaxPool1DLayer(cnn_layer_a,
                                                     pool_size=pool_size)
        # reshape the layer to match lstm incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters]
        output_cnn_layer_a = lasagne.layers.reshape(pool_layer_a,
                                                    (-1, length, [1]))

        # finally, concatenate the two incoming layers together.
        l_emb_word_a = lasagne.layers.concat(
            [output_cnn_layer_a, l_emb_word_a], axis=2)

        if params.dropout:
            l_emb_word_a = lasagne.layers.DropoutLayer(l_emb_word_a, p=0.5)

        l_lstm_wordf_a = lasagne.layers.LSTMLayer(l_emb_word_a,
                                                  hidden_inf,
                                                  mask_input=l_mask_word_a)
        l_lstm_wordb_a = lasagne.layers.LSTMLayer(l_emb_word_a,
                                                  hidden_inf,
                                                  mask_input=l_mask_word_a,
                                                  backwards=True)

        l_reshapef_a = lasagne.layers.ReshapeLayer(l_lstm_wordf_a,
                                                   (-1, hidden_inf))
        l_reshapeb_a = lasagne.layers.ReshapeLayer(l_lstm_wordb_a,
                                                   (-1, hidden_inf))
        concat2_a = lasagne.layers.ConcatLayer([l_reshapef_a, l_reshapeb_a])

        if params.dropout:
            concat2_a = lasagne.layers.DropoutLayer(concat2_a, p=0.5)

        l_local_a = lasagne.layers.DenseLayer(
            concat2_a,
            num_units=params.num_labels,
            nonlinearity=lasagne.nonlinearities.softmax)

        a_params = lasagne.layers.get_all_params(l_local_a, trainable=True)
        self.a_params = a_params

        def inner_function(targets_one_step, mask_one_step, prev_label,
                           tg_energy):
            """
                        :param targets_one_step: [batch_size, t]
                        :param prev_label: [batch_size, t]
                        :param tg_energy: [batch_size]
                        :return:
                        """
            new_ta_energy = T.dot(prev_label, Wyy[:-1, :-1])
            new_ta_energy_t = tg_energy + T.sum(
                new_ta_energy * targets_one_step, axis=1)
            tg_energy_t = T.switch(mask_one_step, new_ta_energy_t, tg_energy)

            return [targets_one_step, tg_energy_t]

        local_energy = lasagne.layers.get_output(
            l_local, {
                l_in_word: input_var,
                l_mask_word: mask_var,
                layer_char_input_a: char_input_var
            })
        local_energy = local_energy.reshape((-1, length, params.num_labels))
        local_energy = local_energy * mask_var[:, :, None]

        #####################
        # for the end symbole of a sequence
        ####################

        end_term = Wyy[:-1, -1]
        local_energy = local_energy + end_term.dimshuffle(
            'x', 'x', 0) * mask_var1[:, :, None]

        predy0 = lasagne.layers.get_output(
            l_local_a, {
                l_in_word_a: input_var,
                l_mask_word_a: mask_var,
                layer_char_input_a: char_input_var
            })
        predy_inf = lasagne.layers.get_output(
            l_local_a, {
                l_in_word_a: input_var,
                l_mask_word_a: mask_var,
                layer_char_input_a: char_input_var
            },
            deterministic=True)
        predy_inf = predy_inf.reshape((-1, length, params.num_labels))

        predy_in = T.argmax(predy0, axis=1)
        A = T.extra_ops.to_one_hot(predy_in, params.num_labels)
        A = A.reshape((-1, length, params.num_labels))

        predy = predy0.reshape((-1, length, params.num_labels))
        predy = predy * mask_var[:, :, None]

        targets_shuffled = predy.dimshuffle(1, 0, 2)
        target_time0 = targets_shuffled[0]

        masks_shuffled = mask_var.dimshuffle(1, 0)

        initial_energy0 = T.dot(target_time0, Wyy[-1, :-1])

        initials = [target_time0, initial_energy0]
        [_, target_energies], _ = theano.scan(
            fn=inner_function,
            outputs_info=initials,
            sequences=[targets_shuffled[1:], masks_shuffled[1:]])
        cost11 = target_energies[-1] + T.sum(
            T.sum(local_energy * predy, axis=2) * mask_var, axis=1)

        cost = T.mean(-cost11)

        ### compute the energy for inference step
        predy_inf = predy_inf * mask_var[:, :, None]

        targets_inf_shuffled = predy_inf.dimshuffle(1, 0, 2)
        target_inf_time0 = targets_inf_shuffled[0]

        initial_inf_energy0 = T.dot(target_inf_time0, Wyy[-1, :-1])

        initials_inf = [target_inf_time0, initial_inf_energy0]
        [_, target_inf_energies], _ = theano.scan(
            fn=inner_function,
            outputs_info=initials_inf,
            sequences=[targets_inf_shuffled[1:], masks_shuffled[1:]])
        cost_inf = target_inf_energies[-1] + T.sum(
            T.sum(local_energy * predy_inf, axis=2) * mask_var, axis=1)

        #from adam import adam
        #updates_a = adam(cost, a_params, params.eta)

        updates_a = lasagne.updates.sgd(cost, a_params, params.eta)
        updates_a = lasagne.updates.apply_momentum(updates_a,
                                                   a_params,
                                                   momentum=0.9)

        self.train_fn = theano.function(
            [input_var, char_input_var, mask_var, mask_var1, length],
            cost,
            updates=updates_a,
            on_unused_input='ignore')

        prediction = T.argmax(predy_inf, axis=2)
        corr = T.eq(prediction, target_var)
        corr_train = (corr * mask_var).sum(dtype=theano.config.floatX)
        num_tokens = mask_var.sum(dtype=theano.config.floatX)

        self.eval_fn = theano.function([
            input_var, char_input_var, target_var, mask_var, mask_var1, length
        ], [corr_train, num_tokens, prediction, -cost_inf],
                                       on_unused_input='ignore')
コード例 #54
0
def train():
    global logfile_path
    global trainfile
    global train0file
    global test1file

    batch_size = int(256)
    filter_sizes = [1,2,3]
    num_filters = 1000
    words_num_dim = 50
    embedding_size = 300
    learning_rate = 0.001
    n_epochs = 9050
    validation_freq = 50
    keep_prob_value = 0.7
    margin_size = 0.05

    logfile_path = os.path.join(logfile_path, 'CNN-' + GetNowTime() + '-' \
                   + 'batch_size-' + str(batch_size) + '-' \
                   + 'num_filters-' + str(num_filters) + '-' \
                   + 'embedding_size-' + str(embedding_size) + '-' \
                   + 'n_epochs-' + str(n_epochs) + '-' \
                   + 'freq-' + str(validation_freq) + '-' \
                   + '-log.txt')

    log("New start ...", logfile_path)
    log(str(time.asctime(time.localtime(time.time()))), logfile_path)
    log("batch_size = " + str(batch_size), logfile_path)
    log("filter_sizes = " + str(filter_sizes), logfile_path)
    log("num_filters = " + str(num_filters), logfile_path)
    log("embedding_size = " + str(embedding_size), logfile_path)
    log("learning_rate = " + str(learning_rate), logfile_path)
    log("n_epochs = " + str(n_epochs), logfile_path)
    log("margin_size = " + str(margin_size), logfile_path)
    log("words_num_dim = " + str(words_num_dim), logfile_path)
    log("validation_freq = " + str(validation_freq), logfile_path)
    log("keep_prob_value = " + str(keep_prob_value), logfile_path)
    log("train_1_file = " + str(trainfile.split('/')[-1]), logfile_path)
    log("train_0_file = " + str(train0file.split('/')[-1]), logfile_path)
    log("test_file = " + str(test1file.split('/')[-1]), logfile_path)
    log("vector_file = " + str(vectorsfile.split('/')[-1]), logfile_path)

    vocab = build_vocab()
    #word_embeddings is list, shape = numOfWords*100
    word_embeddings = load_word_embeddings(vocab, embedding_size)
    trainList = load_train_list()
    testList, qa_raw_testList = load_test_list()
    train0Dict = load_train0_dict()
    #train_x1.shape = 256*100
    #train_x1, train_x2, train_x3 = load_train_data(trainList, vocab, batch_size, words_num_dim)
    train_x1, train_x2, train_x3 = load_train_data_from_2files(train0Dict, trainList, vocab, batch_size, words_num_dim)

    x1, x2, x3 = T.matrix('x1'), T.matrix('x2'), T.matrix('x3')
    keep_prob = T.fscalar('keep_prob')
    model = QACnn(
        input1=x1, input2=x2, input3=x3, keep_prob=keep_prob,
        word_embeddings=word_embeddings,
        batch_size=batch_size,
        sequence_len=train_x1.shape[1],
        embedding_size=embedding_size,
        filter_sizes=filter_sizes,
        num_filters=num_filters,
        margin_size=margin_size)
    dbg_x1 = model.dbg_x1
    dbg_outputs_1 = model.dbg_outputs_1

    cost, cos12, cos13 = model.cost, model.cos12, model.cos13
    params, accuracy = model.params, model.accuracy
    grads = T.grad(cost, params)

    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    p1, p2, p3 = T.matrix('p1'), T.matrix('p2'), T.matrix('p3')
    prob = T.fscalar('prob')
    train_model = theano.function(
        [p1, p2, p3, prob],
        [cost, accuracy, dbg_x1, dbg_outputs_1],
        updates=updates,
        givens={
            x1: p1, x2: p2, x3: p3, keep_prob: prob
        }
    )

    v1, v2, v3 = T.matrix('v1'), T.matrix('v2'), T.matrix('v3')
    validate_model = theano.function(
        inputs=[v1, v2, v3, prob],
        outputs=[cos12, cos13],
        #updates=updates,
        givens={
            x1: v1, x2: v2, x3: v3, keep_prob: prob
        }
    )

    epoch = 0
    done_looping = False
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        #train_x1, train_x2, train_x3 = load_train_data(trainList, vocab, batch_size)
        train_x1, train_x2, train_x3 = load_train_data_from_2files(train0Dict, trainList, vocab, batch_size, words_num_dim)
        #print train_x3.shape
        cost_ij, acc, dbg_x1, dbg_outputs_1 = train_model(train_x1, train_x2, train_x3, keep_prob_value)
        log('load data done ...... epoch:' + str(epoch) + ' cost:' + str(cost_ij) + ', acc:' + str(acc), logfile_path)
        if epoch % validation_freq == 0:
            log('Evaluation ......', logfile_path)
            validation(validate_model, testList, vocab, batch_size, words_num_dim, qa_raw_testList)
コード例 #55
0
def build_conv_nnet2_classif(use_gpu,
                             isize,
                             ksize,
                             n_batch,
                             downsample_ops=True,
                             verbose=0,
                             version=-1,
                             check_isfinite=True):
    if use_gpu:
        shared_fn = tcn.shared_constructor
    else:
        shared_fn = shared

    isize1 = isize
    isize2 = isize
    if isinstance(isize, (tuple, )):
        isize1 = isize[0]
        isize2 = isize[1]

    shape_img = (n_batch, 1, isize1, isize2)

    n_kern = 20  # 6 were used in LeNet5
    shape_kern = (n_kern, 1, ksize, ksize)

    n_kern1 = 30  # 16 were used in LeNet5
    shape_kern1 = (n_kern1, n_kern, ksize, ksize)

    logical_hid_shape = tcn.blas.GpuConv.logical_output_shape_2d(
        (isize1, isize2), (ksize, ksize), 'valid')
    logical_hid_shape1 = tcn.blas.GpuConv.logical_output_shape_2d(
        (logical_hid_shape[0] // 2, logical_hid_shape[1] // 2), (ksize, ksize),
        'valid')
    n_hid = n_kern1 * logical_hid_shape1[0] * logical_hid_shape1[1]
    n_out = 10

    w0 = shared_fn(0.01 * (my_rand(*shape_kern) - 0.5), 'w0')
    b0 = shared_fn(my_zeros((n_kern, )), 'b0')
    w1 = shared_fn(0.01 * (my_rand(*shape_kern1) - 0.5), 'w1')
    b1 = shared_fn(my_zeros((n_kern1, )), 'b1')
    v = shared_fn(0.01 * my_randn(n_hid, n_out), 'v')
    c = shared_fn(my_zeros(n_out), 'c')

    # print 'ALLOCATING ARCH: w0 shape', w0.get_value(borrow=True).shape
    # print 'ALLOCATING ARCH: w1 shape', w1.get_value(borrow=True).shape
    # print 'ALLOCATING ARCH: v shape', v.get_value(borrow=True).shape

    x = tensor.Tensor(dtype='float32', broadcastable=(0, 1, 0, 0))('x')
    y = tensor.fmatrix('y')
    lr = tensor.fscalar('lr')

    conv_op = conv.ConvOp(shape_img[2:],
                          shape_kern[2:],
                          n_kern,
                          n_batch,
                          1,
                          1,
                          verbose=verbose,
                          version=version)
    conv_op1 = conv.ConvOp(
        (n_kern, logical_hid_shape[0] // 2, logical_hid_shape[1] // 2),
        shape_kern1[2:],
        n_kern1,
        n_batch,
        1,
        1,
        verbose=verbose,
        version=version)

    ds_op = downsample.DownsampleFactorMax((2, 2), ignore_border=False)
    if downsample_ops:
        hid = tensor.tanh(ds_op(conv_op(x, w0) + b0.dimshuffle((0, 'x', 'x'))))
    else:
        hid = tensor.tanh((conv_op(x, w0) + b0.dimshuffle(
            (0, 'x', 'x')))[:, :, ::2, ::2])
    hid1 = tensor.tanh(conv_op1(hid, w1) + b1.dimshuffle((0, 'x', 'x')))
    hid_flat = hid1.reshape((n_batch, n_hid))
    out = tensor.nnet.softmax(tensor.dot(hid_flat, v) + c)
    loss = tensor.sum(
        tensor.nnet.crossentropy_categorical_1hot(out, tensor.argmax(
            y, axis=1)) * lr)
    # print 'loss type', loss.type

    params = [w0, b0, w1, b1, v, c]
    gparams = tensor.grad(loss, params)

    mode = get_mode(use_gpu, check_isfinite)

    # print 'building pfunc ...'
    train = pfunc([x, y, lr], [loss],
                  mode=mode,
                  updates=[(p, p - g) for p, g in zip(params, gparams)])

    if verbose:
        theano.printing.debugprint(train)
    if use_gpu:
        # Check that GpuConv is used
        topo = train.maker.fgraph.toposort()
        conv_ops = (tcn.blas.GpuConv, tcn.dnn.GpuDnnConv,
                    tcn.dnn.GpuDnnConvGradI, tcn.dnn.GpuDnnConvGradW,
                    tcn.blas.BaseGpuCorrMM)

        assert len([n for n in topo if isinstance(n.op, conv_ops)]) > 0

    shape_target = (n_batch, n_out)
    return train, params, shape_img, shape_target, mode
コード例 #56
0
def train():
    batch_size = int(256)
    filter_sizes = [2, 3, 5]
    num_filters = 500
    embedding_size = 100
    learning_rate = 0.001
    n_epochs = 2000000
    validation_freq = 1000
    keep_prob_value = 0.25

    vocab = build_vocab()
    word_embeddings = load_word_embeddings(vocab, embedding_size)
    trainList = load_train_list()
    testList = load_test_list()
    train_x1, train_x2, train_x3 = load_data(trainList, vocab, batch_size)

    x1, x2, x3 = T.matrix('x1'), T.matrix('x2'), T.matrix('x3')
    keep_prob = T.fscalar('keep_prob')
    model = QACnn(input1=x1,
                  input2=x2,
                  input3=x3,
                  keep_prob=keep_prob,
                  word_embeddings=word_embeddings,
                  batch_size=batch_size,
                  sequence_len=train_x1.shape[1],
                  embedding_size=embedding_size,
                  filter_sizes=filter_sizes,
                  num_filters=num_filters)
    dbg_x1 = model.dbg_x1
    dbg_outputs_1 = model.dbg_outputs_1

    cost, cos12, cos13 = model.cost, model.cos12, model.cos13
    print 'cost'
    print cost
    params, accuracy = model.params, model.accuracy
    grads = T.grad(cost, params)

    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    p1, p2, p3 = T.matrix('p1'), T.matrix('p2'), T.matrix('p3')
    prob = T.fscalar('prob')
    train_model = theano.function([p1, p2, p3, prob],
                                  [cost, accuracy, dbg_x1, dbg_outputs_1],
                                  updates=updates,
                                  givens={
                                      x1: p1,
                                      x2: p2,
                                      x3: p3,
                                      keep_prob: prob
                                  })

    v1, v2, v3 = T.matrix('v1'), T.matrix('v2'), T.matrix('v3')
    validate_model = theano.function(
        inputs=[v1, v2, v3, prob],
        outputs=[cos12, cos13],
        #updates=updates,
        givens={
            x1: v1,
            x2: v2,
            x3: v3,
            keep_prob: prob
        })

    epoch = 0
    done_looping = False
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        train_x1, train_x2, train_x3 = load_data(trainList, vocab, batch_size)
        #print train_x3.shape
        cost_ij, acc, dbg_x1, dbg_outputs_1 = train_model(
            train_x1, train_x2, train_x3, keep_prob_value)
        print 'load data done ...... epoch:' + str(epoch) + ' cost:' + str(
            cost_ij) + ', acc:' + str(acc)
        if epoch % validation_freq == 0:
            print 'Evaluation ......'
            validation(validate_model, testList, vocab, batch_size)
    def __init__(self, We_initial, char_embedd_table_initial, params):
        self.textfile = open(params.outfile, 'w')
        We = theano.shared(We_initial)

        # initial embedding for the InfNet
        We_inf = theano.shared(We_initial)
        embsize = We_initial.shape[1]
        hidden = params.hidden
        self.en_hidden_size = params.hidden_inf
        self.num_labels = 17
        self.de_hidden_size = params.de_hidden_size

        char_embedd_dim = params.char_embedd_dim
        char_dic_size = len(params.char_dic)
        char_embedd_table = theano.shared(char_embedd_table_initial)
        char_embedd_table_inf = theano.shared(char_embedd_table_initial)

        input_var = T.imatrix(name='inputs')
        target_var = T.imatrix(name='targets')
        target_var_in = T.imatrix(name='targets')
        mask_var = T.fmatrix(name='masks')
        mask_var1 = T.fmatrix(name='masks1')
        char_input_var = T.itensor3(name='char-inputs')

        length = T.iscalar()
        length0 = T.iscalar()
        t_t = T.fscalar()
        t_t0 = T.fscalar()

        use_dropout = T.fscalar()
        use_dropout0 = T.fscalar()

        Wyy0 = np.random.uniform(
            -0.02, 0.02,
            (self.num_labels + 1, self.num_labels + 1)).astype('float32')
        Wyy = theano.shared(Wyy0)

        l_in_word = lasagne.layers.InputLayer((None, None))
        l_mask_word = lasagne.layers.InputLayer(shape=(None, None))

        if params.emb == 1:
            l_emb_word = lasagne.layers.EmbeddingLayer(
                l_in_word,
                input_size=We_initial.shape[0],
                output_size=embsize,
                W=We,
                name='word_embedding')
        else:
            l_emb_word = lasagne_embedding_layer_2(l_in_word, embsize, We)

        layer_char_input = lasagne.layers.InputLayer(shape=(None, None,
                                                            Max_Char_Length),
                                                     input_var=char_input_var,
                                                     name='char-input')

        layer_char = lasagne.layers.reshape(layer_char_input, (-1, [2]))
        layer_char_embedding = lasagne.layers.EmbeddingLayer(
            layer_char,
            input_size=char_dic_size,
            output_size=char_embedd_dim,
            W=char_embedd_table,
            name='char_embedding')

        layer_char = lasagne.layers.DimshuffleLayer(layer_char_embedding,
                                                    pattern=(0, 2, 1))

        # first get some necessary dimensions or parameters
        conv_window = 3
        num_filters = params.num_filters

        # construct convolution layer
        cnn_layer = lasagne.layers.Conv1DLayer(
            layer_char,
            num_filters=num_filters,
            filter_size=conv_window,
            pad='full',
            nonlinearity=lasagne.nonlinearities.tanh,
            name='cnn')
        # infer the pool size for pooling (pool size should go through all time step of cnn)
        _, _, pool_size = cnn_layer.output_shape

        # construct max pool layer
        pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer,
                                                   pool_size=pool_size)
        # reshape the layer to match lstm incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters]
        output_cnn_layer = lasagne.layers.reshape(pool_layer,
                                                  (-1, length, [1]))

        # finally, concatenate the two incoming layers together.
        incoming = lasagne.layers.concat([output_cnn_layer, l_emb_word],
                                         axis=2)

        l_lstm_wordf = lasagne.layers.LSTMLayer(incoming,
                                                hidden,
                                                mask_input=l_mask_word)
        l_lstm_wordb = lasagne.layers.LSTMLayer(incoming,
                                                hidden,
                                                mask_input=l_mask_word,
                                                backwards=True)

        concat = lasagne.layers.concat([l_lstm_wordf, l_lstm_wordb], axis=2)

        l_reshape_concat = lasagne.layers.ReshapeLayer(concat,
                                                       (-1, 2 * hidden))

        l_local = lasagne.layers.DenseLayer(
            l_reshape_concat,
            num_units=self.num_labels + 1,
            nonlinearity=lasagne.nonlinearities.linear)

        network_params = lasagne.layers.get_all_params(l_local, trainable=True)
        network_params.append(Wyy)

        print len(network_params)
        f = open(
            'NER_BiLSTM_CNN_CRF_.Batchsize_10_dropout_1_LearningRate_0.005_0.0_50_hidden_200.pickle',
            'r')
        data = pickle.load(f)
        f.close()

        for idx, p in enumerate(network_params):

            p.set_value(data[idx])

        self.params = []
        self.hos = []
        self.Cos = []
        self.encoder_lstm_layers = []
        self.decoder_lstm_layers = []
        self.lstm_layers_num = 1

        ei, di, dt = T.imatrices(3)  #place holders
        decoderInputs0, em, em1, dm, tf, di0 = T.fmatrices(6)
        ci = T.itensor3()

        #### the last one is for the stary symbole
        self.de_lookuptable = theano.shared(name="Decoder LookUpTable",
                                            value=init_xavier_uniform(
                                                self.num_labels + 1,
                                                self.de_hidden_size),
                                            borrow=True)

        self.linear = theano.shared(
            name="Linear",
            value=init_xavier_uniform(
                self.de_hidden_size + 2 * self.en_hidden_size,
                self.num_labels),
            borrow=True)
        self.linear_bias = theano.shared(
            name="Hidden to Bias",
            value=np.asarray(np.random.randn(self.num_labels, ) * 0.,
                             dtype=theano.config.floatX),
            borrow=True)
        #self.hidden_decode = theano.shared(name="Hidden to Decode", value= init_xavier_uniform(2*hidden, self.de_hidden_size), borrow = True)

        #self.hidden_bias = theano.shared(
        #        name="Hidden to Bias",
        #        value=np.asarray(np.random.randn(self.de_hidden_size, )*0., dtype=theano.config.floatX) ,
        #        borrow=True
        #        )

        input_var_shuffle = input_var.dimshuffle(1, 0)
        mask_var_shuffle = mask_var.dimshuffle(1, 0)
        target_var_in_shuffle = target_var_in.dimshuffle(1, 0)
        target_var_shuffle = target_var.dimshuffle(1, 0)

        self.params += [
            We_inf, self.linear, self.de_lookuptable, self.linear_bias
        ]

        ######[batch, sent_length, embsize]
        state_below = We_inf[input_var_shuffle.flatten()].reshape(
            (input_var_shuffle.shape[0], input_var_shuffle.shape[1], embsize))

        ###### character word embedding
        layer_char_input_inf = lasagne.layers.InputLayer(
            shape=(None, None, Max_Char_Length),
            input_var=char_input_var,
            name='char-input')
        layer_char_inf = lasagne.layers.reshape(layer_char_input_inf,
                                                (-1, [2]))
        layer_char_embedding_inf = lasagne.layers.EmbeddingLayer(
            layer_char_inf,
            input_size=char_dic_size,
            output_size=char_embedd_dim,
            W=char_embedd_table_inf,
            name='char_embedding_inf')

        layer_char_inf = lasagne.layers.DimshuffleLayer(
            layer_char_embedding_inf, pattern=(0, 2, 1))
        #layer_char_inf = lasagne.layers.DropoutLayer(layer_char_inf, p=0.5)

        cnn_layer_inf = lasagne.layers.Conv1DLayer(
            layer_char_inf,
            num_filters=num_filters,
            filter_size=conv_window,
            pad='full',
            nonlinearity=lasagne.nonlinearities.tanh,
            name='cnn_inf')

        pool_layer_inf = lasagne.layers.MaxPool1DLayer(cnn_layer_inf,
                                                       pool_size=pool_size)
        output_cnn_layer_inf = lasagne.layers.reshape(pool_layer_inf,
                                                      (-1, length, [1]))
        char_params = lasagne.layers.get_all_params(output_cnn_layer_inf,
                                                    trainable=True)
        self.params += char_params

        ###### [batch, sent_length, num_filters]
        #char_state_below = lasagne.layers.get_output(output_cnn_layer_inf, {layer_char_input_inf:char_input_var})
        char_state_below = lasagne.layers.get_output(output_cnn_layer_inf)

        char_state_below = dropout_layer(char_state_below, use_dropout, trng)

        char_state_shuff = char_state_below.dimshuffle(1, 0, 2)
        state_below = T.concatenate([state_below, char_state_shuff], axis=2)

        state_below = dropout_layer(state_below, use_dropout, trng)

        enclstm_f = LSTM(embsize + num_filters, self.en_hidden_size)
        enclstm_b = LSTM(embsize + num_filters, self.en_hidden_size, True)
        self.encoder_lstm_layers.append(enclstm_f)  #append
        self.encoder_lstm_layers.append(enclstm_b)  #append
        self.params += enclstm_f.params + enclstm_b.params  #concatenate

        hs_f, Cs_f = enclstm_f.forward(state_below, mask_var_shuffle)
        hs_b, Cs_b = enclstm_b.forward(state_below, mask_var_shuffle)

        hs = T.concatenate([hs_f, hs_b], axis=2)
        Cs = T.concatenate([Cs_f, Cs_b], axis=2)

        hs0 = T.concatenate([hs_f[-1], hs_b[0]], axis=1)
        Cs0 = T.concatenate([Cs_f[-1], Cs_b[0]], axis=1)
        #self.hos += T.tanh(tensor.dot(hs0, self.hidden_decode) + self.hidden_bias),
        #self.Cos += T.tanh(tensor.dot(Cs0, self.hidden_decode) + self.hidden_bias),
        self.hos += T.alloc(np.asarray(0., dtype=theano.config.floatX),
                            input_var_shuffle.shape[1], self.de_hidden_size),
        self.Cos += T.alloc(np.asarray(0., dtype=theano.config.floatX),
                            input_var_shuffle.shape[1], self.de_hidden_size),

        Encoder = hs

        state_below = self.de_lookuptable[
            target_var_in_shuffle.flatten()].reshape(
                (target_var_in_shuffle.shape[0],
                 target_var_in_shuffle.shape[1], self.de_hidden_size))

        for i in range(self.lstm_layers_num):
            declstm = LSTM(self.de_hidden_size, self.de_hidden_size)
            self.decoder_lstm_layers += declstm,  #append
            self.params += declstm.params  #concatenate
            ho, Co = self.hos[i], self.Cos[i]
            state_below, Cs = declstm.forward(state_below, mask_var_shuffle,
                                              ho, Co)

        decoder_lstm_outputs = T.concatenate([state_below, Encoder], axis=2)
        linear_outputs = T.dot(decoder_lstm_outputs,
                               self.linear) + self.linear_bias[None, None, :]
        softmax_outputs, updates = theano.scan(
            fn=lambda x: T.nnet.softmax(x),
            sequences=[linear_outputs],
        )

        def _NLL(pred, y, m):
            return -m * T.log(pred[T.arange(input_var.shape[0]), y])

        """
		costs, _ = theano.scan(fn=_NLL, sequences=[softmax_outputs, target_var_shuffle, mask_var_shuffle])
                #loss = costs.sum() / mask_var.sum() + params.L2*sum(lasagne.regularization.l2(x) for x in self.params)
		loss = costs.sum() / mask_var.sum()		

                updates = lasagne.updates.sgd(loss, self.params, self.eta)
                updates = lasagne.updates.apply_momentum(updates, self.params, momentum=0.9)

		###################################################
                #### using the ground truth when training
                ##################################################
                self._train = theano.function(
                        inputs=[ei, em, di, dm, dt],
                        outputs=[loss, softmax_outputs],
                        updates=updates,
                        givens={input_var:ei, mask_var:em, target_var_in:di, decoderMask:dm, target_var:dt}
                        )
		"""

        def _step2(ctx_, state_, hs_, Cs_):

            hs, Cs = [], []
            token_idxs = T.cast(state_.argmax(axis=-1), "int32")
            msk_ = T.fill((T.zeros_like(token_idxs, dtype="float32")), 1.)
            msk_ = msk_.dimshuffle('x', 0)
            state_below0 = self.de_lookuptable[token_idxs].reshape(
                (1, ctx_.shape[0], self.de_hidden_size))
            for i, lstm in enumerate(self.decoder_lstm_layers):
                h, C = lstm.forward(state_below0, msk_, hs_[i],
                                    Cs_[i])  #mind msk
                hs += h[-1],
                Cs += C[-1],
                state_below0 = h

            hs, Cs = T.as_tensor_variable(hs), T.as_tensor_variable(Cs)
            state_below0 = state_below0.reshape(
                (ctx_.shape[0], self.de_hidden_size))
            state_below0 = T.concatenate([ctx_, state_below0], axis=1)

            newpred = T.dot(state_below0,
                            self.linear) + self.linear_bias[None, :]
            state_below = T.nnet.softmax(newpred)
            ##### the beging symbole probablity is 0
            extra_p = T.zeros_like(hs[:, :, 0])
            state_below = T.concatenate([state_below, extra_p.T], axis=1)

            return state_below, hs, Cs

        hs0, Cs0 = T.as_tensor_variable(
            self.hos, name="hs0"), T.as_tensor_variable(self.Cos, name="Cs0")

        train_outputs, _ = theano.scan(fn=_step2,
                                       sequences=[Encoder],
                                       outputs_info=[decoderInputs0, hs0, Cs0],
                                       n_steps=input_var_shuffle.shape[0])

        predy = train_outputs[0].dimshuffle(1, 0, 2)
        predy = predy[:, :, :-1] * mask_var[:, :, None]
        predy0 = predy.reshape((-1, 17))

        def inner_function(targets_one_step, mask_one_step, prev_label,
                           tg_energy):
            """
                        :param targets_one_step: [batch_size, t]
                        :param prev_label: [batch_size, t]
                        :param tg_energy: [batch_size]
                        :return:
                        """
            new_ta_energy = T.dot(prev_label, Wyy[:-1, :-1])
            new_ta_energy_t = tg_energy + T.sum(
                new_ta_energy * targets_one_step, axis=1)
            tg_energy_t = T.switch(mask_one_step, new_ta_energy_t, tg_energy)

            return [targets_one_step, tg_energy_t]

        local_energy = lasagne.layers.get_output(
            l_local, {
                l_in_word: input_var,
                l_mask_word: mask_var,
                layer_char_input: char_input_var
            })
        local_energy = local_energy.reshape((-1, length, 17))
        local_energy = local_energy * mask_var[:, :, None]

        #####################
        # for the end symbole of a sequence
        ####################

        end_term = Wyy[:-1, -1]
        local_energy = local_energy + end_term.dimshuffle(
            'x', 'x', 0) * mask_var1[:, :, None]

        #predy0 = lasagne.layers.get_output(l_local_a, {l_in_word_a:input_var, l_mask_word_a:mask_var})

        predy_in = T.argmax(predy0, axis=1)
        A = T.extra_ops.to_one_hot(predy_in, 17)
        A = A.reshape((-1, length, 17))

        #predy = predy0.reshape((-1, length, 25))
        #predy = predy*mask_var[:,:,None]

        targets_shuffled = predy.dimshuffle(1, 0, 2)
        target_time0 = targets_shuffled[0]

        masks_shuffled = mask_var.dimshuffle(1, 0)

        initial_energy0 = T.dot(target_time0, Wyy[-1, :-1])

        initials = [target_time0, initial_energy0]
        [_, target_energies], _ = theano.scan(
            fn=inner_function,
            outputs_info=initials,
            sequences=[targets_shuffled[1:], masks_shuffled[1:]])
        cost11 = target_energies[-1] + T.sum(
            T.sum(local_energy * predy, axis=2) * mask_var, axis=1)

        # compute the ground-truth energy

        targets_shuffled0 = A.dimshuffle(1, 0, 2)
        target_time00 = targets_shuffled0[0]

        initial_energy00 = T.dot(target_time00, Wyy[-1, :-1])

        initials0 = [target_time00, initial_energy00]
        [_, target_energies0], _ = theano.scan(
            fn=inner_function,
            outputs_info=initials0,
            sequences=[targets_shuffled0[1:], masks_shuffled[1:]])
        cost110 = target_energies0[-1] + T.sum(
            T.sum(local_energy * A, axis=2) * mask_var, axis=1)

        #predy_f =  predy.reshape((-1, 25))
        y_f = target_var.flatten()

        if (params.annealing == 0):
            lamb = params.L3
        elif (params.annealing == 1):
            lamb = params.L3 * (1 - 0.01 * t_t)

        if (params.regutype == 0):
            ce_hinge = lasagne.objectives.categorical_crossentropy(
                predy0 + eps, y_f)
            ce_hinge = ce_hinge.reshape((-1, length))
            ce_hinge = T.sum(ce_hinge * mask_var, axis=1)
            cost = T.mean(-cost11) + lamb * T.mean(ce_hinge)
        else:

            entropy_term = -T.sum(predy0 * T.log(predy0 + eps), axis=1)
            entropy_term = entropy_term.reshape((-1, length))
            entropy_term = T.sum(entropy_term * mask_var, axis=1)
            cost = T.mean(-cost11) - lamb * T.mean(entropy_term)

        ##from adam import adam
        ##updates_a = adam(cost, self.params, params.eta)

        #updates_a = lasagne.updates.sgd(cost, self.params, params.eta)
        #updates_a = lasagne.updates.apply_momentum(updates_a, self.params, momentum=0.9)
        #norm = T.sqrt(sum(T.sum(updates_a[tensor]**2) for tensor in self.params))
        #target_norm = T.clip(norm, 0, 10.0)
        #multiplier = target_norm / (1e-8 + norm)

        from momentum import momentum
        updates_a = momentum(cost, self.params, params.eta, momentum=0.9)

        if (params.regutype == 0):
            self.train_fn = theano.function(
                inputs=[ei, ci, dt, em, em1, length0, t_t0, di0, use_dropout0],
                outputs=[cost, ce_hinge],
                updates=updates_a,
                on_unused_input='ignore',
                givens={
                    input_var: ei,
                    char_input_var: ci,
                    target_var: dt,
                    mask_var: em,
                    mask_var1: em1,
                    length: length0,
                    t_t: t_t0,
                    decoderInputs0: di0,
                    use_dropout: use_dropout0
                })
            #self.train_fn = theano.function([input_var, target_var, mask_var, mask_var1, length, t_t], [cost, ce_hinge], updates = updates_a, on_unused_input='ignore')
        else:

            self.train_fn = theano.function(
                inputs=[ei, ci, dt, em, em1, length0, t_t0, di0, use_dropout0],
                outputs=[cost, entropy_term],
                updates=updates_a,
                on_unused_input='ignore',
                givens={
                    input_var: ei,
                    char_input_var: ci,
                    target_var: dt,
                    mask_var: em,
                    mask_var1: em1,
                    length: length0,
                    t_t: t_t0,
                    decoderInputs0: di0,
                    use_dropout: use_dropout0
                })
            #self.train_fn = theano.function([input_var, target_var, mask_var, mask_var1, length, t_t], [cost, entropy_term], updates = updates_a, on_unused_input='ignore')

        prediction = T.argmax(predy, axis=2)
        corr = T.eq(prediction, target_var)
        corr_train = (corr * mask_var).sum(dtype=theano.config.floatX)
        num_tokens = mask_var.sum(dtype=theano.config.floatX)

        self.eval_fn = theano.function(
            inputs=[ei, ci, dt, em, em1, length0, di0, use_dropout0],
            outputs=[cost11, cost110, corr_train, num_tokens, prediction],
            on_unused_input='ignore',
            givens={
                input_var: ei,
                char_input_var: ci,
                target_var: dt,
                mask_var: em,
                mask_var1: em1,
                length: length0,
                decoderInputs0: di0,
                use_dropout: use_dropout0
            })
コード例 #58
0
cost = T.sqr(pred_freq - target_freq).mean()
#lib.load_params('iter_latest_wavenet.p')
# cost = T.nnet.categorical_crossentropy(
#     predicted_sequences,
#     target_sequences.flatten()
# ).mean()

# By default we report cross-entropy cost in bits.
# Switch to nats by commenting out this line:
#cost = cost * lib.floatX(1.44269504089)

params = lib.search(cost, lambda x: hasattr(x, 'param'))
lib.print_params_info(cost, params)
#updates = lib.optimizers.Adam(cost, params, 1e-3,gradClip=True,value=GRAD_CLIP)
grads = T.grad(cost, wrt=params)
lr = T.fscalar()
updates = lasagne.updates.adam(grads, params, learning_rate=lr)

print "Gradients Computed"

train_fn = theano.function([sequences, lr], [cost, pred_freq],
                           updates=updates,
                           on_unused_input='warn')

print "Training!"
DATA_PATH = "/data/lisatmp3/kumarrit/blizzard"
for epoch in xrange(NB_EPOCH):
    costs = []
    times = []
    #data_feeder = list(dataset.feed_epoch(DATA_PATH, N_FILES, BATCH_SIZE, SEQ_LEN, FRAME_SIZE, Q_LEVELS, Q_ZERO,RF))
    data_feeder = list(
コード例 #59
0
    def __init__(self, x_dim, hidden_dim, y_dim, w_spread, p_drop):

        # parameters of the model
        self.wfx = theano.shared(
            name="wfx",
            value=w_spread *
            np.random.uniform(-1., 1.,
                              (x_dim + hidden_dim + 1, hidden_dim)).astype(
                                  theano.config.floatX),
            borrow=True)
        self.wbx = theano.shared(
            name="wbx",
            value=w_spread *
            np.random.uniform(-1., 1.,
                              (x_dim + hidden_dim + 1, hidden_dim)).astype(
                                  theano.config.floatX),
            borrow=True)

        self.wf1 = theano.shared(
            name="wf1",
            value=w_spread *
            np.random.uniform(-1., 1.,
                              (2 * hidden_dim + hidden_dim + 1,
                               hidden_dim)).astype(theano.config.floatX),
            borrow=True)
        self.wb1 = theano.shared(
            name="wb1",
            value=w_spread *
            np.random.uniform(-1., 1.,
                              (2 * hidden_dim + hidden_dim + 1,
                               hidden_dim)).astype(theano.config.floatX),
            borrow=True)

        self.wy = theano.shared(
            name="wy",
            value=w_spread * np.random.uniform(
                -1., 1.,
                (2 * hidden_dim + 1, y_dim)).astype(theano.config.floatX),
            borrow=True)

        h_zeros = theano.shared(name="hfx_0",
                                value=np.zeros(hidden_dim,
                                               dtype=theano.config.floatX),
                                borrow=True)

        # bundle
        self.params = [self.wfx, self.wbx, self.wf1, self.wb1, self.wy]

        # define recurrent neural network
        # (for each input word predict all output tags)
        x = T.fmatrix("x")
        y = T.fmatrix("y")
        learn_rate = T.fscalar('learn_rate')

        activation = T.tanh

        #activation = T.nnet.sigmoid
        #activation = lambda x: x * (x > 0)  # reLU
        #activation = lambda x: x * ((x > 0) + 0.01)
        #activation = lambda x: T.minimum(x * (x > 0), 6)  # capped reLU

        def model(x, wfx, hfx_0, wbx, hbx_0, wf1, hf1_0, wb1, hb1_0, wy,
                  p_drop):
            def recurrence_x(x_cur, h_prev, w, mask):
                h = activation(T.dot(T.concatenate([x_cur, h_prev, [one]]), w))
                h_ = dropout_apply(h, mask, p_drop)
                return h_

            def recurrence_h(f_cur, b_cur, h_prev, w, mask):
                h = activation(
                    T.dot(T.concatenate([f_cur, b_cur, h_prev, [one]]), w))
                h_ = dropout_apply(h, mask, p_drop)
                return h_

            def recurrence_y(f_cur, b_cur, w):
                y = activation(T.dot(T.concatenate([f_cur, b_cur, [one]]), w))
                return y

            one = np.float32(1.)
            if p_drop > 0.:
                masks = dropout_masks(
                    p_drop,
                    [hfx_0.shape, hbx_0.shape, hf1_0.shape, hb1_0.shape])
            else:
                masks = [[]] * 4

            hfx, _ = theano.scan(fn=recurrence_x,
                                 sequences=x,
                                 non_sequences=[wfx, masks[0]],
                                 outputs_info=[hfx_0],
                                 n_steps=x.shape[0])
            hbx_rev, _ = theano.scan(fn=recurrence_x,
                                     sequences=x,
                                     non_sequences=[wbx, masks[1]],
                                     outputs_info=[hbx_0],
                                     n_steps=x.shape[0],
                                     go_backwards=True)
            hbx, _ = theano.scan(fn=lambda x: x,
                                 sequences=hbx_rev,
                                 n_steps=x.shape[0],
                                 go_backwards=True)

            hf1, _ = theano.scan(fn=recurrence_h,
                                 sequences=[hfx, hbx],
                                 non_sequences=[wf1, masks[2]],
                                 outputs_info=[hf1_0],
                                 n_steps=x.shape[0])
            hb1_rev, _ = theano.scan(fn=recurrence_h,
                                     sequences=[hfx, hbx],
                                     non_sequences=[wb1, masks[3]],
                                     outputs_info=[hb1_0],
                                     n_steps=x.shape[0],
                                     go_backwards=True)
            hb1, _ = theano.scan(fn=lambda x: x,
                                 sequences=hb1_rev,
                                 n_steps=x.shape[0],
                                 go_backwards=True)

            y, _ = theano.scan(fn=recurrence_y,
                               sequences=[hf1, hb1],
                               non_sequences=[wy],
                               outputs_info=[None],
                               n_steps=x.shape[0])
            return y

        y_pred = model(x, self.wfx, h_zeros, self.wbx, h_zeros, self.wf1,
                       h_zeros, self.wb1, h_zeros, self.wy, 0.)
        y_noise = model(x, self.wfx, h_zeros, self.wbx, h_zeros, self.wf1,
                        h_zeros, self.wb1, h_zeros, self.wy, p_drop)

        #loss = lambda y_pred, y: T.mean((y_pred - y) ** 2)  # MSE
        #loss = lambda y_pred, y: T.sum((y_pred - y) ** 16) ** (1./16)
        #loss = lambda y_pred, y: T.max((y_pred - y) ** 2)
        loss = lambda y_pred, y: T.max(abs(y - y_pred)) + T.mean(
            (y - y_pred)**2)
        #loss = lambda y_pred, y: T.sum((y_pred - y) ** 16) ** (1./16) + T.mean((y - y_pred) ** 2)
        l1_reg = 0.001
        l1 = sum([
            T.mean(w)
            for w in [self.wfx, self.wbx, self.wf1, self.wb1, self.wy]
        ])
        l2_reg = 0.001
        l2 = sum([
            T.mean(w**2)
            for w in [self.wfx, self.wbx, self.wf1, self.wb1, self.wy]
        ])

        # define gradients and updates
        cost = loss(y_noise, y) + l1_reg * l1 + l2_reg * l2
        #updates = sgd(cost, self.params, learn_rate)
        #updates = rmsprop(cost, self.params, learn_rate)
        updates = adam(cost, self.params, learn_rate)

        # compile theano functions
        self.predict = theano.function(inputs=[x], outputs=y_pred)
        self.train = theano.function(
            inputs=[x, y, learn_rate],
            outputs=[cost,
                     T.min(y_noise),
                     T.max(y_noise),
                     T.mean(y_noise)],
            updates=updates)
コード例 #60
0
args = setup()
print('all argument: ', args)
temp_lambda = None
loss_change = []
tmp_weights = None
random_seed(args.seed)
    

if args.model == 'convnet':
    x = T.ftensor4('x')
elif args.model == 'mlp':
    x = T.matrix('x')
else:
    raise AttributeError
y = T.matrix('y')
lr_ele = T.fscalar('lr_ele')

mom = args.momEle #momentum
lr_hyper = T.fscalar('lr_hyper')
grad_valid_weight = T.tensor4('grad_valid_weight')


model = DenseNet(x=x, y=y, args=args)


velocities = [theano.shared(np.asarray(param.get_value(borrow=True)*0., dtype=theano.config.floatX), broadcastable=param.broadcastable, name=param.name+'_vel') for param in model.params_theta]

#extra lr parameters
log_learning_rates = [theano.shared(np.full_like(param.get_value(borrow=True), np.log(args.lrEle), dtype=theano.config.floatX), broadcastable=param.broadcastable, name=param.name+'_llr') for param in model.params_theta]
temp_llrs = None