def __init__(self, verbose=True):
     if verbose: logger.debug('Build Multilayer Perceptron Ranking model...')
     # Positive input setting
     self.inputPL = T.matrix(name='inputPL', dtype=floatX)
     self.inputPR = T.matrix(name='inputPR', dtype=floatX)
     # Negative input setting
     self.inputNL = T.matrix(name='inputNL', dtype=floatX)
     self.inputNR = T.matrix(name='inputNR', dtype=floatX)
     # Standard input setting
     self.inputL = T.matrix(name='inputL', dtype=floatX)
     self.inputR = T.matrix(name='inputR', dtype=floatX)
     # Build activation function
     self.act = Activation('tanh')
     # Connect input matrices
     self.inputP = T.concatenate([self.inputPL, self.inputPR], axis=1)
     self.inputN = T.concatenate([self.inputNL, self.inputNR], axis=1)
     self.input = T.concatenate([self.inputL, self.inputR], axis=1)
     # Build hidden layer
     self.hidden_layer = HiddenLayer(self.input, (2*edim, args.hidden), act=self.act)
     self.hidden = self.hidden_layer.output
     self.hiddenP = self.hidden_layer.encode(self.inputP)
     self.hiddenN = self.hidden_layer.encode(self.inputN)
     # Dropout parameter
     #srng = T.shared_randomstreams.RandomStreams(args.seed)
     #mask = srng.binomial(n=1, p=1-args.dropout, size=self.hidden.shape)
     #maskP = srng.binomial(n=1, p=1-args.dropout, size=self.hiddenP.shape)
     #maskN = srng.binomial(n=1, p=1-args.dropout, size=self.hiddenN.shape)
     #self.hidden *= T.cast(mask, floatX)
     #self.hiddenP *= T.cast(maskP, floatX)
     #self.hiddenN *= T.cast(maskN, floatX)
     # Build linear output layer
     self.score_layer = ScoreLayer(self.hidden, args.hidden)
     self.output = self.score_layer.output
     self.scoreP = self.score_layer.encode(self.hiddenP)
     self.scoreN = self.score_layer.encode(self.hiddenN)
     # Stack all the parameters
     self.params = []
     self.params += self.hidden_layer.params
     self.params += self.score_layer.params
     # Build cost function
     self.cost = T.mean(T.maximum(T.zeros_like(self.scoreP), 1.0-self.scoreP+self.scoreN))
     # Construct the gradient of the cost function with respect to the model parameters
     self.gradparams = T.grad(self.cost, self.params)
     # Count the total number of parameters in this model
     self.num_params = edim * args.hidden + args.hidden + args.hidden + 1
     # Build class method
     self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.output)
     self.compute_cost_and_gradient = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
                                                      outputs=self.gradparams+[self.cost, self.scoreP, self.scoreN])
     self.show_scores = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], 
                                        outputs=[self.scoreP, self.scoreN])
     if verbose:
         logger.debug('Architecture of MLP Ranker built finished, summarized below: ')
         logger.debug('Input dimension: %d' % edim)
         logger.debug('Hidden dimension: %d' % args.hidden)
         logger.debug('Total number of parameters used in the model: %d' % self.num_params)
    def __init__(self,
                 random_generator,
                 theano_random_generator=None,
                 x_dim=28 * 28,
                 y_dim=10,
                 hidden_layer_sizes=[500, 500],
                 corruption_levels=[0.1, 0.1]):
        """
        """
        # Declare empty sigmoid layer array for MLP
        self.sigmoid_layers = []

        # Declare an empty array of DenoisingAutoEncoder
        self.autoencoder_layers = []

        self.params = []
        self.n_layers = len(hidden_layer_sizes)

        if theano_random_generator == None:
            self.theano_random_generator = RandomStreams(
                random_generator.randint(2**30))
        else:
            self.theano_random_generator = theano_random_generator

        # Inputs using Theano
        self.x = T.matrix("x")
        self.y = T.ivector("y")

        # Initialize all parameters
        for i in range(self.n_layers):
            # Define x and y dimensions
            if i == 0:
                internal_x_dim = x_dim
            else:
                internal_x_dim = hidden_layer_sizes[i - 1]
            internal_y_dim = hidden_layer_sizes[i]

            # Find inputs
            if i == 0:
                internal_input = self.x
            else:
                internal_input = self.sigmoid_layers[i - 1].output

            # Define Sigmoid Layer
            self.sigmoid_layers.append(
                HiddenLayer(internal_input,
                            internal_x_dim,
                            internal_y_dim,
                            random_generator,
                            activation=T.nnet.sigmoid))

            # Define input
            self.autoencoder_layers.append(
                DenoisingAutoEncoder(random_generator,
                                     theano_random_generator,
                                     internal_x_dim,
                                     internal_y_dim,
                                     internal_input,
                                     W=self.sigmoid_layers[i].W,
                                     b=self.sigmoid_layers[i].b))

            # Uppdate parameters
            self.params.extend(self.sigmoid_layers[i].params)

        # Finally add logistic layer
        self.logistic_layer = LogisticRegression(
            self.sigmoid_layers[-1].output, hidden_layer_sizes[-1], y_dim)

        self.params.extend(self.logistic_layer.params)

        # These are two important costs
        # Finetuning after pretraining individual AutoEncoders
        self.finetune_cost = self.logistic_layer.negative_log_likelihood(
            self.y)

        # Error from prediction
        self.error = self.logistic_layer.error(self.y)
Example #3
0
    rng,
    input=layer0.output,
    image_shape=(batch_size, nkerns[0], 8, 8),
    filter_shape=(nkerns[1], nkerns[0], 4, 4),
    poolsize=(1, 1),
    stride=(1, 1),
    W=params[4].get_value(),
    b=params[5].get_value(),
)

layer2_input = layer1.output.flatten(2)
layer2 = HiddenLayer(
    rng,
    inputs=layer2_input,
    n_in=nkerns[1] * 5 * 5,
    n_out=500,
    activation=T.tanh,
    W=params[2].get_value(),
    b=params[3].get_value(),
)

layer3 = LogisticRegression(input=layer2.output,
                            n_in=500,
                            n_out=36,
                            W=params[0].get_value(),
                            b=params[1].get_value())

forward = theano.function(
    inputs=[input],
    outputs=layer3.p_y_given_x,
    on_unused_input='warn',
def evaluate_lenet5(learning_rate=0.01,
                    n_epochs=4,
                    emb_size=100,
                    batch_size=50,
                    describ_max_len=20,
                    type_size=12,
                    filter_size=[3, 5],
                    maxSentLen=100,
                    hidden_size=[300, 300]):

    model_options = locals().copy()
    print "model options", model_options
    # emb_root = '/save/wenpeng/datasets/LORELEI/multi-lingual-emb/2018-il9-il10/multi-emb/'
    emb_root = '/save/wenpeng/datasets/LORELEI/multi-lingual-emb/2018-il9-il10/'
    test_file_path = '/save/wenpeng/datasets/LORELEI/il9/il9-setE-as-test-input_ner_filtered_w2.txt'
    output_file_path = '/save/wenpeng/datasets/LORELEI/il9/il9_system_output_noMT_epoch4.json'
    seed = 1234
    np.random.seed(seed)
    rng = np.random.RandomState(
        seed)  #random seed, control the model generates the same results
    srng = T.shared_randomstreams.RandomStreams(rng.randint(seed))
    word2id = {}
    # all_sentences, all_masks, all_labels, all_other_labels, word2id=load_BBN_il5Trans_il5_dataset(maxlen=maxSentLen)  #minlen, include one label, at least one word in the sentence
    train_p1_sents, train_p1_masks, train_p1_labels, word2id = load_trainingData_types(
        word2id, maxSentLen)
    train_p2_sents, train_p2_masks, train_p2_labels, train_p2_other_labels, word2id = load_trainingData_types_plus_others(
        word2id, maxSentLen)
    test_sents, test_masks, test_labels, word2id = load_il10_NI_test(
        word2id, maxSentLen)

    label_sent, label_mask = load_SF_type_descriptions(word2id, type_size,
                                                       describ_max_len)
    label_sent = np.asarray(label_sent, dtype='int32')
    label_mask = np.asarray(label_mask, dtype=theano.config.floatX)

    train_p1_sents = np.asarray(train_p1_sents, dtype='int32')
    train_p1_masks = np.asarray(train_p1_masks, dtype=theano.config.floatX)
    train_p1_labels = np.asarray(train_p1_labels, dtype='int32')
    train_p1_size = len(train_p1_labels)

    train_p2_sents = np.asarray(train_p2_sents, dtype='int32')
    train_p2_masks = np.asarray(train_p2_masks, dtype=theano.config.floatX)
    train_p2_labels = np.asarray(train_p2_labels, dtype='int32')
    train_p2_other_labels = np.asarray(train_p2_other_labels, dtype='int32')
    train_p2_size = len(train_p2_labels)
    '''
    combine train_p1 and train_p2
    '''
    train_sents = np.concatenate([train_p1_sents, train_p2_sents], axis=0)
    train_masks = np.concatenate([train_p1_masks, train_p2_masks], axis=0)
    train_labels = np.concatenate([train_p1_labels, train_p2_labels], axis=0)
    train_size = train_p1_size + train_p2_size

    test_sents = np.asarray(test_sents, dtype='int32')
    test_masks = np.asarray(test_masks, dtype=theano.config.floatX)
    test_labels = np.asarray(test_labels, dtype='int32')
    test_size = len(test_sents)

    vocab_size = len(word2id) + 1  # add one zero pad index

    rand_values = rng.normal(
        0.0, 0.01,
        (vocab_size, emb_size))  #generate a matrix by Gaussian distribution
    rand_values[0] = np.array(np.zeros(emb_size), dtype=theano.config.floatX)
    id2word = {y: x for x, y in word2id.iteritems()}
    # word2vec=load_fasttext_multiple_word2vec_given_file([emb_root+'100k-ENG-multicca.300.ENG.txt',emb_root+'100k-HIN-multicca.d300.HIN.txt',emb_root+'100k-IL10-multicca.d300.IL10.txt'], 300)
    word2vec = load_fasttext_multiple_word2vec_given_file([
        emb_root + '100k-IL10-cca.d100.eng.txt',
        emb_root + '100k-IL10-cca.d100.IL10.txt'
    ], 100)
    rand_values = load_word2vec_to_init(rand_values, id2word, word2vec)
    embeddings = theano.shared(
        value=np.array(rand_values, dtype=theano.config.floatX), borrow=True
    )  #wrap up the python variable "rand_values" into theano variable

    #now, start to build the input form of the model
    sents_id_matrix = T.imatrix('sents_id_matrix')
    sents_mask = T.fmatrix('sents_mask')
    labels = T.imatrix('labels')  #batch*12
    other_labels = T.imatrix()  #batch*4

    des_id_matrix = T.imatrix()
    des_mask = T.fmatrix()
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    common_input = embeddings[sents_id_matrix.flatten()].reshape(
        (batch_size, maxSentLen, emb_size)).dimshuffle(
            0, 2, 1)  #the input format can be adapted into CNN or GRU or LSTM
    bow_emb = T.sum(common_input * sents_mask.dimshuffle(0, 'x', 1), axis=2)
    repeat_common_input = T.repeat(
        normalize_tensor3_colwise(common_input), type_size,
        axis=0)  #(batch_size*type_size, emb_size, maxsentlen)

    des_input = embeddings[des_id_matrix.flatten()].reshape(
        (type_size, describ_max_len, emb_size)).dimshuffle(0, 2, 1)
    bow_des = T.sum(des_input * des_mask.dimshuffle(0, 'x', 1),
                    axis=2)  #(tyope_size, emb_size)
    repeat_des_input = T.tile(
        normalize_tensor3_colwise(des_input),
        (batch_size, 1, 1))  #(batch_size*type_size, emb_size, maxsentlen)

    conv_W, conv_b = create_conv_para(rng,
                                      filter_shape=(hidden_size[0], 1,
                                                    emb_size, filter_size[0]))
    conv_W2, conv_b2 = create_conv_para(rng,
                                        filter_shape=(hidden_size[0], 1,
                                                      emb_size,
                                                      filter_size[1]))
    multiCNN_para = [conv_W, conv_b, conv_W2, conv_b2]

    conv_att_W, conv_att_b = create_conv_para(rng,
                                              filter_shape=(hidden_size[0], 1,
                                                            emb_size,
                                                            filter_size[0]))
    conv_W_context, conv_b_context = create_conv_para(
        rng, filter_shape=(hidden_size[0], 1, emb_size, 1))
    conv_att_W2, conv_att_b2 = create_conv_para(rng,
                                                filter_shape=(hidden_size[0],
                                                              1, emb_size,
                                                              filter_size[1]))
    conv_W_context2, conv_b_context2 = create_conv_para(
        rng, filter_shape=(hidden_size[0], 1, emb_size, 1))
    ACNN_para = [
        conv_att_W, conv_att_b, conv_W_context, conv_att_W2, conv_att_b2,
        conv_W_context2
    ]
    '''
    multi-CNN
    '''
    conv_model = Conv_with_Mask(
        rng,
        input_tensor3=common_input,
        mask_matrix=sents_mask,
        image_shape=(batch_size, 1, emb_size, maxSentLen),
        filter_shape=(hidden_size[0], 1, emb_size, filter_size[0]),
        W=conv_W,
        b=conv_b
    )  #mutiple mask with the conv_out to set the features by UNK to zero
    sent_embeddings = conv_model.maxpool_vec  #(batch_size, hidden_size) # each sentence then have an embedding of length hidden_size

    conv_model2 = Conv_with_Mask(
        rng,
        input_tensor3=common_input,
        mask_matrix=sents_mask,
        image_shape=(batch_size, 1, emb_size, maxSentLen),
        filter_shape=(hidden_size[0], 1, emb_size, filter_size[1]),
        W=conv_W2,
        b=conv_b2
    )  #mutiple mask with the conv_out to set the features by UNK to zero
    sent_embeddings2 = conv_model2.maxpool_vec  #(batch_size, hidden_size) # each sentence then have an embedding of length hidden_size
    '''
    GRU
    '''
    U1, W1, b1 = create_GRU_para(rng, emb_size, hidden_size[0])
    GRU_NN_para = [
        U1, W1, b1
    ]  #U1 includes 3 matrices, W1 also includes 3 matrices b1 is bias
    # gru_input = common_input.dimshuffle((0,2,1))   #gru requires input (batch_size, emb_size, maxSentLen)
    gru_layer = GRU_Batch_Tensor_Input_with_Mask(common_input, sents_mask,
                                                 hidden_size[0], U1, W1, b1)
    gru_sent_embeddings = gru_layer.output_sent_rep  # (batch_size, hidden_size)
    '''
    ACNN
    '''
    attentive_conv_layer = Attentive_Conv_for_Pair(
        rng,
        origin_input_tensor3=common_input,
        origin_input_tensor3_r=common_input,
        input_tensor3=common_input,
        input_tensor3_r=common_input,
        mask_matrix=sents_mask,
        mask_matrix_r=sents_mask,
        image_shape=(batch_size, 1, emb_size, maxSentLen),
        image_shape_r=(batch_size, 1, emb_size, maxSentLen),
        filter_shape=(hidden_size[0], 1, emb_size, filter_size[0]),
        filter_shape_context=(hidden_size[0], 1, emb_size, 1),
        W=conv_att_W,
        b=conv_att_b,
        W_context=conv_W_context,
        b_context=conv_b_context)
    sent_att_embeddings = attentive_conv_layer.attentive_maxpool_vec_l

    attentive_conv_layer2 = Attentive_Conv_for_Pair(
        rng,
        origin_input_tensor3=common_input,
        origin_input_tensor3_r=common_input,
        input_tensor3=common_input,
        input_tensor3_r=common_input,
        mask_matrix=sents_mask,
        mask_matrix_r=sents_mask,
        image_shape=(batch_size, 1, emb_size, maxSentLen),
        image_shape_r=(batch_size, 1, emb_size, maxSentLen),
        filter_shape=(hidden_size[0], 1, emb_size, filter_size[1]),
        filter_shape_context=(hidden_size[0], 1, emb_size, 1),
        W=conv_att_W2,
        b=conv_att_b2,
        W_context=conv_W_context2,
        b_context=conv_b_context2)
    sent_att_embeddings2 = attentive_conv_layer2.attentive_maxpool_vec_l
    '''
    cross-DNN-dataless
    '''
    #first map label emb into hidden space
    HL_layer_1_W, HL_layer_1_b = create_HiddenLayer_para(
        rng, emb_size, hidden_size[0])
    HL_layer_1_params = [HL_layer_1_W, HL_layer_1_b]
    HL_layer_1 = HiddenLayer(rng,
                             input=bow_des,
                             n_in=emb_size,
                             n_out=hidden_size[0],
                             W=HL_layer_1_W,
                             b=HL_layer_1_b,
                             activation=T.tanh)
    des_rep_hidden = HL_layer_1.output  #(type_size, hidden_size)
    dot_dnn_dataless_1 = T.tanh(sent_embeddings.dot(
        des_rep_hidden.T))  #(batch_size, type_size)
    dot_dnn_dataless_2 = T.tanh(sent_embeddings2.dot(des_rep_hidden.T))
    '''
    dataless cosine
    '''
    cosine_scores = normalize_matrix_rowwise(bow_emb).dot(
        normalize_matrix_rowwise(bow_des).T)
    cosine_score_matrix = T.nnet.sigmoid(
        cosine_scores)  #(batch_size, type_size)
    '''
    dataless top-30 fine grained cosine
    '''
    fine_grained_cosine = T.batched_dot(
        repeat_common_input.dimshuffle(0, 2, 1),
        repeat_des_input)  #(batch_size*type_size,maxsentlen,describ_max_len)
    fine_grained_cosine_to_matrix = fine_grained_cosine.reshape(
        (batch_size * type_size, maxSentLen * describ_max_len))
    sort_fine_grained_cosine_to_matrix = T.sort(fine_grained_cosine_to_matrix,
                                                axis=1)
    top_k_simi = sort_fine_grained_cosine_to_matrix[:,
                                                    -30:]  # (batch_size*type_size, 5)
    max_fine_grained_cosine = T.mean(top_k_simi, axis=1)
    top_k_cosine_scores = max_fine_grained_cosine.reshape(
        (batch_size, type_size))
    top_k_score_matrix = T.nnet.sigmoid(top_k_cosine_scores)

    acnn_LR_input = T.concatenate([
        dot_dnn_dataless_1, dot_dnn_dataless_2, cosine_score_matrix,
        top_k_score_matrix, sent_embeddings, sent_embeddings2,
        gru_sent_embeddings, sent_att_embeddings, sent_att_embeddings2, bow_emb
    ],
                                  axis=1)
    acnn_LR_input_size = hidden_size[0] * 5 + emb_size + 4 * type_size
    #classification layer, it is just mapping from a feature vector of size "hidden_size" to a vector of only two values: positive, negative
    acnn_U_a, acnn_LR_b = create_LR_para(rng, acnn_LR_input_size, 12)
    acnn_LR_para = [acnn_U_a, acnn_LR_b]
    acnn_layer_LR = LogisticRegression(
        rng,
        input=acnn_LR_input,
        n_in=acnn_LR_input_size,
        n_out=12,
        W=acnn_U_a,
        b=acnn_LR_b
    )  #basically it is a multiplication between weight matrix and input feature vector
    acnn_score_matrix = T.nnet.sigmoid(
        acnn_layer_LR.before_softmax)  #batch * 12
    acnn_prob_pos = T.where(labels < 1, 1.0 - acnn_score_matrix,
                            acnn_score_matrix)
    acnn_loss = -T.mean(T.log(acnn_prob_pos))

    acnn_other_U_a, acnn_other_LR_b = create_LR_para(rng, acnn_LR_input_size,
                                                     16)
    acnn_other_LR_para = [acnn_other_U_a, acnn_other_LR_b]
    acnn_other_layer_LR = LogisticRegression(rng,
                                             input=acnn_LR_input,
                                             n_in=acnn_LR_input_size,
                                             n_out=16,
                                             W=acnn_other_U_a,
                                             b=acnn_other_LR_b)
    acnn_other_prob_matrix = T.nnet.softmax(
        acnn_other_layer_LR.before_softmax.reshape((batch_size * 4, 4)))
    acnn_other_prob_tensor3 = acnn_other_prob_matrix.reshape(
        (batch_size, 4, 4))
    acnn_other_prob = acnn_other_prob_tensor3[
        T.repeat(T.arange(batch_size), 4),
        T.tile(T.arange(4), (batch_size)),
        other_labels.flatten()]
    acnn_other_field_loss = -T.mean(T.log(acnn_other_prob))

    params = multiCNN_para + GRU_NN_para + ACNN_para + acnn_LR_para + HL_layer_1_params  # put all model parameters together
    cost = acnn_loss + 1e-4 * ((conv_W**2).sum() + (conv_W2**2).sum() +
                               (conv_att_W**2).sum() + (conv_att_W2**2).sum())
    updates = Gradient_Cost_Para(cost, params, learning_rate)

    other_paras = params + acnn_other_LR_para
    cost_other = cost + acnn_other_field_loss
    other_updates = Gradient_Cost_Para(cost_other, other_paras, learning_rate)
    '''
    testing
    '''

    ensemble_NN_scores = acnn_score_matrix  #T.max(T.concatenate([att_score_matrix.dimshuffle('x',0,1), score_matrix.dimshuffle('x',0,1), acnn_score_matrix.dimshuffle('x',0,1)],axis=0),axis=0)
    # '''
    # majority voting, does not work
    # '''
    # binarize_NN = T.where(ensemble_NN_scores > 0.5, 1, 0)
    # binarize_dataless = T.where(cosine_score_matrix > 0.5, 1, 0)
    # binarize_dataless_finegrained = T.where(top_k_score_matrix > 0.5, 1, 0)
    # binarize_conc =  T.concatenate([binarize_NN.dimshuffle('x',0,1), binarize_dataless.dimshuffle('x',0,1),binarize_dataless_finegrained.dimshuffle('x',0,1)],axis=0)
    # sum_binarize_conc = T.sum(binarize_conc,axis=0)
    # binarize_prob = T.where(sum_binarize_conc > 0.0, 1, 0)
    # '''
    # sum up prob, works
    # '''
    # ensemble_scores_1 = 0.6*ensemble_NN_scores+0.4*top_k_score_matrix
    # binarize_prob = T.where(ensemble_scores_1 > 0.3, 1, 0)
    '''
    sum up prob, works
    '''
    ensemble_scores = ensemble_NN_scores  #0.6*ensemble_NN_scores+0.4*0.5*(cosine_score_matrix+top_k_score_matrix)
    binarize_prob = T.where(ensemble_scores > 0.3, 1, 0)
    '''
    test for other fields
    '''
    sum_tensor3 = acnn_other_prob_tensor3  #(batch, 4, 3)

    #train_model = theano.function([sents_id_matrix, sents_mask, labels], cost, updates=updates, on_unused_input='ignore')
    train_p1_model = theano.function(
        [sents_id_matrix, sents_mask, labels, des_id_matrix, des_mask],
        cost,
        updates=updates,
        allow_input_downcast=True,
        on_unused_input='ignore')
    train_p2_model = theano.function([
        sents_id_matrix, sents_mask, labels, des_id_matrix, des_mask,
        other_labels
    ],
                                     cost_other,
                                     updates=other_updates,
                                     allow_input_downcast=True,
                                     on_unused_input='ignore')
    test_model = theano.function(
        [sents_id_matrix, sents_mask, des_id_matrix, des_mask],
        binarize_prob,
        allow_input_downcast=True,
        on_unused_input='ignore')

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 50000000000  # look as this many examples regardless
    start_time = time.time()
    mid_time = start_time
    past_time = mid_time
    epoch = 0
    done_looping = False

    n_train_batches = train_size / batch_size
    train_batch_start = list(
        np.arange(n_train_batches) * batch_size) + [train_size - batch_size]
    n_train_p2_batches = train_p2_size / batch_size
    train_p2_batch_start = list(np.arange(n_train_p2_batches) *
                                batch_size) + [train_p2_size - batch_size]

    n_test_batches = test_size / batch_size
    n_test_remain = test_size % batch_size
    test_batch_start = list(
        np.arange(n_test_batches) * batch_size) + [test_size - batch_size]

    train_p2_batch_start_set = set(train_p2_batch_start)
    # max_acc_dev=0.0
    max_meanf1_test = 0.0
    max_weightf1_test = 0.0
    train_indices = range(train_size)
    train_p2_indices = range(train_p2_size)
    cost_i = 0.0
    other_cost_i = 0.0
    min_mean_frame = 100.0
    while epoch < n_epochs:
        epoch = epoch + 1
        random.Random(100).shuffle(train_indices)
        random.Random(100).shuffle(train_p2_indices)
        iter_accu = 0

        for batch_id in train_batch_start:  #for each batch
            # iter means how many batches have been run, taking into loop
            iter = (epoch - 1) * n_train_batches + iter_accu + 1
            iter_accu += 1
            train_id_batch = train_indices[batch_id:batch_id + batch_size]

            cost_i += train_p1_model(train_sents[train_id_batch],
                                     train_masks[train_id_batch],
                                     train_labels[train_id_batch], label_sent,
                                     label_mask)

            if batch_id in train_p2_batch_start_set:
                train_p2_id_batch = train_p2_indices[batch_id:batch_id +
                                                     batch_size]
                other_cost_i += train_p2_model(
                    train_p2_sents[train_p2_id_batch],
                    train_p2_masks[train_p2_id_batch],
                    train_p2_labels[train_p2_id_batch], label_sent, label_mask,
                    train_p2_other_labels[train_p2_id_batch])
            # else:
            #     random_batch_id = random.choice(train_p2_batch_start)
            #     train_p2_id_batch = train_p2_indices[random_batch_id:random_batch_id+batch_size]
            #     other_cost_i+=train_p2_model(
            #                         train_p2_sents[train_p2_id_batch],
            #                         train_p2_masks[train_p2_id_batch],
            #                         train_p2_labels[train_p2_id_batch],
            #                         label_sent,
            #                         label_mask,
            #                         train_p2_other_labels[train_p2_id_batch]
            #                         )
            #after each 1000 batches, we test the performance of the model on all test data
            if iter % 20 == 0:
                print 'Epoch ', epoch, 'iter ' + str(
                    iter) + ' average cost: ' + str(cost_i / iter), str(
                        other_cost_i /
                        iter), 'uses ', (time.time() - past_time) / 60.0, 'min'
                past_time = time.time()

                error_sum = 0.0
                all_pred_labels = []
                all_gold_labels = []
                for test_batch_id in test_batch_start:  # for each test batch
                    pred_labels = test_model(
                        test_sents[test_batch_id:test_batch_id + batch_size],
                        test_masks[test_batch_id:test_batch_id + batch_size],
                        label_sent, label_mask)
                    gold_labels = test_labels[test_batch_id:test_batch_id +
                                              batch_size]
                    # print 'pred_labels:', pred_labels
                    # print 'gold_labels;', gold_labels
                    all_pred_labels.append(pred_labels)
                    all_gold_labels.append(gold_labels)
                all_pred_labels = np.concatenate(all_pred_labels)
                all_gold_labels = np.concatenate(all_gold_labels)

                test_mean_f1, test_weight_f1 = average_f1_two_array_by_col(
                    all_pred_labels, all_gold_labels)
                if test_weight_f1 > max_weightf1_test:
                    max_weightf1_test = test_weight_f1
                if test_mean_f1 > max_meanf1_test:
                    max_meanf1_test = test_mean_f1
                print '\t\t\t\t\t\t\t\tcurrent f1s:', test_mean_f1, test_weight_f1, '\t\tmax_f1:', max_meanf1_test, max_weightf1_test

        print 'Epoch ', epoch, 'uses ', (time.time() - mid_time) / 60.0, 'min'
        mid_time = time.time()

        #print 'Batch_size: ', update_freq
    end_time = time.time()

    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Example #5
0
class DBN(object):
    """Deep Belief Network

    A deep belief network is obtained by stacking several RBMs on top of each
    other. The hidden layer of the RBM at layer `i` becomes the input of the
    RBM at layer `i+1`. The first layer RBM gets as input the input of the
    network, and the hidden layer of the last RBM represents the output. When
    used for classification, the DBN is treated as a MLP, by adding a logistic
    regression layer on top.
    """

    def __init__(self, numpy_rng,PV, kind =2,theano_rng=None, n_ins=784,h_activation  = [],
                 hidden_layers_sizes=[500, 500], n_outs=10):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """
        self.PV = theano.shared(value=PV,borrow=True)
        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels
        self.z1 = T.matrix('z1')
        self.z2 = T.matrix('z2')
        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            activation = None
            if h_activation[i] == 1:
                activation = T.nnet.sigmoid
            if h_activation[i] == 2:
                activation = T.tanh
            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a output layer on top of the MLP
        self.OutLayer = HiddenLayer(rng=numpy_rng,
                                    input=self.sigmoid_layers[-1].output,
                                    n_in=hidden_layers_sizes[-1],
                                    n_out=n_outs,activation=T.nnet.sigmoid,
                                    kind=2)
        self.params.extend(self.OutLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.OutLayer.sq_loss(self.z1,self.z2)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.OutLayer.errors(self.y)
        self.p_y_given_x = self.OutLayer.output

    def pretraining_functions(self, train_set_x, batch_size, k):
        '''Generates a list of functions, for performing one step of
        gradient descent at a given layer. The function will require
        as input the minibatch index, and to train an RBM you just
        need to iterate, calling the corresponding function on all
        minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared var. that contains all datapoints used
                            for training the RBM
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        :param k: number of Gibbs steps to do in CD-k / PCD-k

        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        learning_rate = T.scalar('lr')  # learning rate to use

        # number of batches
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:

            # get the cost and the updates list
            # using CD-k here (persisent=None) for training each RBM.
            # TODO: change cost function to reconstruction error
            cost, updates = rbm.get_cost_updates(learning_rate,
                                                 persistent=None, k=k)

            # compile the theano function
            fn = theano.function(
                inputs=[index, theano.Param(learning_rate, default=0.1)],
                outputs=cost,
                updates=updates,
                givens={
                    self.x: train_set_x[batch_begin:batch_end]
                }
            )
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, true_out, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on a
        batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                        the has to contain three pairs, `train`,
                        `valid`, `test` in this order, where each pair
                        is formed of two Theano variables, one for the
                        datapoints, the other for the labels
        :type batch_size: int
        :param batch_size: size of a minibatch
        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage

        '''

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        true_out = theano.shared(value=true_out,borrow=True)
        assert self.PV.get_value().shape[0] == train_set_x.get_value().shape[0]

        # compute number of minibatches for training, validation and testing
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate))

        train_fn = theano.function(
            inputs=[index],
            outputs=[self.finetune_cost,self.p_y_given_x],
            updates=updates,
            givens={
                self.x: train_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.z1: self.PV[index * batch_size: (index + 1) * batch_size],
                self.z2: true_out[index * batch_size: (index + 1) * batch_size]
            }
        )

        test_score_i = theano.function(
            [index],
            [self.errors,self.p_y_given_x],
            givens={
                self.x: test_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: test_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            }
        )

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: valid_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: valid_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            }
        )

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in xrange(n_valid_batches)]

        # Create a function that scans the entire test set

        def test_score():
            score_i     = []
            p_y_given_x = []
            for i in xrange(n_test_batches):
                tem = test_score_i(i)
                score_i.append(tem[0])
                p_y_given_x.append(tem[1])
            return [score_i,p_y_given_x]

        return train_fn, valid_score, test_score
Example #6
0
class BRNNMatchScorer(object):
	'''
	Bidirectional RNN for text matching as a classification problem.
	'''
	def __init__(self, config, verbose=True):
		# Construct two BRNNEncoders for matching two sentences
		self.encoderL = BRNNEncoder(config, verbose)
		self.encoderR = BRNNEncoder(config, verbose)
		# Link two parts
		self.params = []
		self.params += self.encoderL.params
		self.params += self.encoderR.params
		# Set up input
		# Note that there are three kinds of inputs altogether, including:
		# 1, inputL, inputR. This pair is used for computing the score after training
		# 2, inputPL, inputPR. This pair is used for training positive pairs
		# 3, inputNL, inputNR. This pair is used for training negative pairs
		self.inputL = self.encoderL.input
		self.inputR = self.encoderR.input
		# Positive 
		self.inputPL = T.matrix(name='inputPL', dtype=floatX)
		self.inputPR = T.matrix(name='inputPR', dtype=floatX)
		# Negative
		self.inputNL = T.matrix(name='inputNL', dtype=floatX)
		self.inputNR = T.matrix(name='inputNR', dtype=floatX)
		# Get output of two BRNNEncoders
		self.hiddenL = self.encoderL.output
		self.hiddenR = self.encoderR.output
		# Positive Hidden
		self.hiddenPL = self.encoderL.encode(self.inputPL)
		self.hiddenPR = self.encoderR.encode(self.inputPR)
		# Negative Hidden
		self.hiddenNL = self.encoderL.encode(self.inputNL)
		self.hiddenNR = self.encoderR.encode(self.inputNR)
		# Activation function
		self.act = Activation(config.activation)
		self.hidden = T.concatenate([self.hiddenL, self.hiddenR], axis=0)
		self.hiddenP = T.concatenate([self.hiddenPL, self.hiddenPR], axis=0)
		self.hiddenN = T.concatenate([self.hiddenNL, self.hiddenNR], axis=0)
		# Build hidden layer
		self.hidden_layer = HiddenLayer(self.hidden, 
										(4*config.num_hidden, config.num_mlp), 
										act=Activation(config.hiddenact))
		self.compressed_hidden = self.hidden_layer.output
		self.compressed_hiddenP = self.hidden_layer.encode(self.hiddenP)
		self.compressed_hiddenN = self.hidden_layer.encode(self.hiddenN)
		# Accumulate parameters
		self.params += self.hidden_layer.params
		# Dropout parameter
		srng = T.shared_randomstreams.RandomStreams(config.random_seed)
		mask = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hidden.shape)
		maskP = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hiddenP.shape)
		maskN = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hiddenN.shape)
		self.compressed_hidden *= T.cast(mask, floatX)
		self.compressed_hiddenP *= T.cast(maskP, floatX)
		self.compressed_hiddenN *= T.cast(maskN, floatX)
		# Score layer
		self.score_layer = ScoreLayer(self.compressed_hidden, config.num_mlp)
		self.output = self.score_layer.output
		self.scoreP = self.score_layer.encode(self.compressed_hiddenP)
		self.scoreN = self.score_layer.encode(self.compressed_hiddenN)
		# Accumulate parameters
		self.params += self.score_layer.params
		# Build cost function
		self.cost = T.mean(T.maximum(T.zeros_like(self.scoreP), 1.0 - self.scoreP + self.scoreN))
		# Construct the total number of parameters in the model
		self.gradparams = T.grad(self.cost, self.params)
		# Compute the total number of parameters in the model
		self.num_params_encoder = self.encoderL.num_params + self.encoderR.num_params
		self.num_params_classifier = 2 * config.num_hidden * config.num_mlp + config.num_mlp + \
									 config.num_mlp + 1
		self.num_params = self.num_params_encoder + self.num_params_classifier
		# Build class functions
		self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.output)
		# Compute the gradient of the objective function and cost and prediction
		self.compute_cost_and_gradient = theano.function(inputs=[self.inputPL, self.inputPR, 
																 self.inputNL, self.inputNR],
														 outputs=self.gradparams+[self.cost, self.scoreP, self.scoreN])
		# Output function for debugging purpose
		self.show_scores = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
										   outputs=[self.scoreP, self.scoreN])
		self.show_hiddens = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
											outputs=[self.hiddenP, self.hiddenN])
		if verbose:
			logger.debug('Architecture of BRNNMatchScorer built finished, summarized below: ')
			logger.debug('Input dimension: %d' % config.num_input)
			logger.debug('Hidden dimension of RNN: %d' % config.num_hidden)
			logger.debug('Hidden dimension of MLP: %d' % config.num_mlp)
			logger.debug('There are 2 BRNNEncoders used in the model.')
			logger.debug('Total number of parameters in this model: %d' % self.num_params)

	def update_params(self, grads, learn_rate):
		'''
		@grads: [np.ndarray]. List of numpy.ndarray for updating the model parameters.
				They are the corresponding gradients of model parameters.
		@learn_rate: scalar. Learning rate.
		'''
		for param, grad in zip(self.params, grads):
			p = param.get_value(borrow=True)
			param.set_value(p - learn_rate * grad, borrow=True)

	def set_params(self, params):
		'''
		@params: [np.ndarray]. List of numpy.ndarray to set the model parameters.
		'''
		for p, param in zip(self.params, params):
			p.set_value(param, borrow=True)

	def deepcopy(self, brnn):
		'''
		@brnn: BRNNMatchScorer. Copy the model parameters of another BRNNMatchScorer.
		'''
		assert len(self.params) == len(brnn.params)
		for p, param in zip(self.params, brnn.params):
			val = param.get_value()
			p.set_value(val)

	@staticmethod
	def save(fname, model):
		'''
		@fname: String. Filename to store the model.
		@model: BRNNMatcher. An instance of BRNNMatcher to be saved.
		'''
		with file(fname, 'wb') as fout:
			cPickle.dump(model, fout)

	@staticmethod
	def load(fname):
		'''
		@fname: String. Filename to load the model.
		'''
		with file(fname, 'rb') as fin:
			model = cPickle.load(fin)
		return model
def buildLayers(layer0_input, batch_size, dim, nkerns, rng, TT=None):
    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)

    W0 = None
    b0 = None
    W1 = None
    b1 = None
    W2 = None
    b2 = None
    W3 = None
    b3 = None
    W4 = None
    b4 = None
    W5 = None
    b5 = None

    if TT != None:
        W0 = TT.Layer0_param.W.get_value(borrow=True)
        b0 = TT.Layer0_param.b.get_value(borrow=True)
        W1 = TT.Layer1_param.W.get_value(borrow=True)
        b1 = TT.Layer1_param.b.get_value(borrow=True)
        W2 = TT.Layer2_param.W.get_value(borrow=True)
        b2 = TT.Layer2_param.b.get_value(borrow=True)
        W3 = TT.Layer3_param.W.get_value(borrow=True)
        b3 = TT.Layer3_param.b.get_value(borrow=True)
        W4 = TT.Layer4_param.W.get_value(borrow=True)
        b4 = TT.Layer4_param.b.get_value(borrow=True)
        W5 = TT.Layer5_param.W.get_value(borrow=True)
        b5 = TT.Layer5_param.b.get_value(borrow=True)

    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, dim, 128, 128),
                                filter_shape=(nkerns[0], dim, 5, 5),
                                poolsize=(2, 2),
                                Wi=W0,
                                bi=b0)

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 62, 62),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2),
                                Wi=W1,
                                bi=b1)

    layer2 = LeNetConvPoolLayer(rng,
                                input=layer1.output,
                                image_shape=(batch_size, nkerns[1], 29, 29),
                                filter_shape=(nkerns[2], nkerns[1], 6, 6),
                                poolsize=(2, 2),
                                Wi=W2,
                                bi=b2)

    #output 12*12

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer3_input = layer2.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=nkerns[2] * 12 * 12,
                         n_out=1024,
                         Wi=W3,
                         bi=b3)

    layer4 = HiddenLayer(rng,
                         input=layer3.output,
                         n_in=1024,
                         n_out=2048,
                         Wi=W4,
                         bi=b4)

    # classify the values of the fully-connected sigmoidal layer
    layer5 = HiddenLayer(rng,
                         input=layer4.output,
                         n_in=2048,
                         n_out=51,
                         Wi=W5,
                         bi=b5)

    return [layer0, layer1, layer2, layer3, layer4, layer5]
Example #8
0
    def __init__(
        self,
        numpy_rng,
        theano_rng=None,
        n_ins=784,
        hidden_layers_sizes=[500, 500],
        n_outs=10,
        corruption_levels=[0.1, 0.1],
        name_appendage = ''
    ):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                 # [int] labels
        # end-snippet-1

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        # start-snippet-2
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid,
                                        name_appendage=name_appendage+'_sigmoid_'+str(i))
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b,
                          name_appendage=name_appendage+'_dA_'+str(i))
            self.dA_layers.append(dA_layer)
        # end-snippet-2
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs
        )

        self.params.extend(self.logLayer.params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
Example #9
0
def test_SdA_regress(finetune_lr=0.05, pretraining_epochs=10,
             pretrain_lr=0.1, training_epochs=10000,
             dataset='mnist.pkl.gz', batch_size=20):
    datasets = load_data_half(dataset)

    train_set_x, train_set_y = datasets[0]##
    valid_set_x, valid_set_y = datasets[1]##
    test_set_x, test_set_y = datasets[2]##
    train_set_x=train_set_x.eval()
    train_set_y=train_set_y.eval()
    import theano
    train_set_x_lab=train_set_x[:,:]
    train_set_x_unlab=train_set_x[:,:]
    train_set_y_lab=train_set_y[:,:]
    train_set_y_unlab=train_set_y[:,:]
    train_set_x_lab=theano.shared(numpy.asarray(train_set_x_lab,
                                                dtype=theano.config.floatX),
                                  borrow=True)
    train_set_y_lab=theano.shared(numpy.asarray(train_set_y_lab,
                                                dtype=theano.config.floatX),
                                  borrow=True)
    train_set_x_unlab=theano.shared(numpy.asarray(train_set_x_unlab,
                                                  dtype=theano.config.floatX),
                                    borrow=True)
    train_set_y_unlab=theano.shared(numpy.asarray(train_set_y_unlab,
                                                  dtype=theano.config.floatX),
                                    borrow=True)

    # compute number of minibatches for training, validation and testing
    n_train_batches_l = train_set_y_lab.eval().shape[0]
    n_train_batches_l /= batch_size
    n_train_batches_u = train_set_y_unlab.eval().shape[0]
    n_train_batches_u /= batch_size
    # compute number of minibatches for training, validation and testing
    #n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    #n_train_batches /= batch_size

    # numpy random generator
    # start-snippet-3
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    # construct the stacked denoising autoencoder class
    #from SdA_orig import SdA as SdA_old
    hidden_layer_size = 100
    SdA_inp = SdA(numpy_rng,
                  n_ins=392,
                  hidden_layers_sizes=[hidden_layer_size]
    )
    SdA_out = SdA(numpy_rng,
                  n_ins=392,
                  hidden_layers_sizes=[hidden_layer_size]
    )
        
    # PRETRAINING THE MODEL #
    if 0 : # pretrain inp ae
        print '... getting the pretraining functions for INPUT AE'
        pretraining_fns = SdA_inp.pretraining_functions(train_set_x=train_set_x_unlab,
                                                    batch_size=batch_size)
    
        print '... pre-training the model'
        start_time = time.clock()
        ## Pre-train layer-wise
        corruption_levels = [.1, .2, .3]
        for i in xrange(SdA_inp.n_layers):
            # go through pretraining epochs
            for epoch in xrange(pretraining_epochs):
                # go through the training set
                c = []
                for batch_index in xrange(n_train_batches_u):
                    c.append(pretraining_fns[i](index=batch_index,
                             corruption=corruption_levels[i],
                             lr=pretrain_lr))
                print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
                print numpy.mean(c)
    
        end_time = time.clock()
    
        print >> sys.stderr, ('The pretraining code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.))
    if 0 : # pretrain out ae
        print '... getting the pretraining functions for OUTPUT AE'
        pretraining_fns = SdA_out.pretraining_functions(train_set_x=train_set_y_unlab,
                                                    batch_size=batch_size)
    
        print '... pre-training the model'
        start_time = time.clock()
        ## Pre-train layer-wise
        corruption_levels = [.5, .2, .3]
        for i in xrange(SdA_out.n_layers):
            # go through pretraining epochs
            for epoch in xrange(pretraining_epochs):
                # go through the training set
                c = []
                for batch_index in xrange(n_train_batches_u):
                    c.append(pretraining_fns[i](index=batch_index,
                             corruption=corruption_levels[i],
                             lr=pretrain_lr))
                print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
                print numpy.mean(c)
    
        end_time = time.clock()
    
        print >> sys.stderr, ('The pretraining code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.))
    
        
    if 0: # save aes
        f=open('aes_shallow_sig_nobias.pkl', 'w+')
        import pickle
        pickle.dump(SdA_inp, f)
        pickle.dump(SdA_out, f)
        f.flush()
        f.close() 
    if 0: # load aes
        f=open('aes_shallow_sig_nobias.pkl', 'r')
        import pickle
        SdA_inp=pickle.load(f)
        SdA_out=pickle.load(f)
        f.close()    
   
    if 1: # cca
        from dcca_numpy import netCCA_nobias, netCCA, dCCA
        from mlp_numpy import expit, logistic_prime, linear, linear_prime, relu, relu_prime, tanh, tanh_prime
        train_y1 = train_set_x_lab.eval()
        train_y2 = train_set_y_lab.eval()
        test_y1 = test_set_x.eval()
        test_y2 = test_set_y.eval()

        ##param1=((train_y1.shape[1],0,0),(2038, relu, relu_prime),(50, relu, relu_prime))
        ##param2=((train_y2.shape[1],0,0),(1608, relu, relu_prime),(50, relu, relu_prime))
        param1=((train_y1.shape[1],0,0),(hidden_layer_size, expit, logistic_prime))
        param2=((train_y2.shape[1],0,0),(hidden_layer_size, expit, logistic_prime))
        W1s = []
        b1s = []
        for i in range(len(SdA_inp.dA_layers)):
            W1s.append( SdA_inp.dA_layers[i].W.T.eval() )
            ##b1s.append( SdA_inp.dA_layers[i].b.eval() )
            ##b1s[-1] = b1s[-1].reshape((b1s[-1].shape[0], 1))
        W2s = []
        b2s = []
        for i in range(len(SdA_out.dA_layers)):
            W2s.append( SdA_out.dA_layers[i].W.T.eval() )
            ##b2s.append( SdA_out.dA_layers[i].b.eval() )
            ##b2s[-1] = b2s[-1].reshape((b2s[-1].shape[0], 1))

        numpy.random.seed(0)
        N1=netCCA_nobias(train_y1,param1, W1s)
        N2=netCCA_nobias(train_y2,param2, W2s)
        N = dCCA(train_y1, train_y2, N1, N2)
        N1.reconstruct(test_set_x.eval()[0,:])
        cnt = 0
        from dcca_numpy import cca_cost, cca, order_cost, cor_cost
        while True:
            X=N1.predict(test_set_x.eval())
            Y=N2.predict(test_set_y.eval())
            _H1 = numpy.dot(X, N.A1)
            _H2 = numpy.dot(Y, N.A2)
            print '****', cnt, cor_cost(_H1, _H2)
            X1_rec = numpy.tanh(X.dot(N1.weights[0]))
            X2_rec = numpy.tanh(Y.dot(N2.weights[0]))
            param=((hidden_layer_size,0,0),(hidden_layer_size, relu, relu_prime))
            from mlp_numpy import NeuralNetwork as NN

            lr=NN(X,Y,param)
            lr.train(X[:,:],Y[:,:],10, 0.005)
            Yh=lr.predict(X[:,:])
            X2_reg = N2.fs[-1](numpy.dot(Yh,N2.weights[0]))

            #X2_reg = N2.fs[-1](numpy.dot(_H1.dot(numpy.linalg.inv(N.A1)),N2.weights[0]))

            print '****', 'mse1:', numpy.mean((X1_rec-test_set_x.eval())**2.0)
            print '****', 'mse2:', numpy.mean((X2_rec-test_set_y.eval())**2.0)
            print '****', 'mse_map:', numpy.mean((X2_reg-test_set_y.eval())**2.0)

            if cnt % 2:
                N.train(5, True, 10000.0)
            else:
                N.train(5, False, 10000.0)

            cnt += 1
            f=open('netcca.pkl', 'w+')
            import pickle
            pickle.dump(N, f)
            pickle.dump(N, f)
            f.flush()
            f.close() 
            if cnt == 200:
                break
        for i in range(len(SdA_inp.dA_layers)):
            SdA_inp.dA_layers[i].W = theano.shared( N1.weights[i].T )
            SdA_inp.dA_layers[i].b = theano.shared( N1.biases[i][:,0] )
        
        for i in range(len(SdA_out.dA_layers)):
            SdA_out.dA_layers[i].W = theano.shared( N2.weights[i].T )
            SdA_out.dA_layers[i].b = theano.shared( N2.weights[i][:,0] )

        
    if 1 : # pretrain middle layer
        print '... pre-training MIDDLE layer'

        h1 = T.matrix('x')  # the data is presented as rasterized images
        h2 = T.matrix('y')  # the labels are presented as 1D vector of
        log_reg = HiddenLayer(numpy_rng, h1, hidden_layer_size, hidden_layer_size)

        if 1: # for middle layer
            learning_rate = 0.01
            fprop_inp = theano.function(
                [],
                SdA_inp.sigmoid_layers[-1].output,
                givens={
                    SdA_inp.sigmoid_layers[0].input: train_set_x_lab
                },
                name='fprop_inp'
            )
            fprop_out = theano.function(
                [],
                SdA_out.sigmoid_layers[-1].output,
                givens={
                    SdA_out.sigmoid_layers[0].input: train_set_y_lab
                },
                name='fprop_out'
            )
            #H11=fprop_inp() 
            #H21=fprop_out()
            ##H1=N1.predict(train_set_x.eval())
            ##H2=N2.predict(train_set_y.eval())
            H1=fprop_inp()
            H2=fprop_out()
            H1=theano.shared(H1)
            H2=theano.shared(H2)
            # compute the gradients with respect to the model parameters
            logreg_cost = log_reg.mse(h2)

            gparams = T.grad(logreg_cost, log_reg.params)
    
            # compute list of fine-tuning updates
            updates = [
                (param, param - gparam * learning_rate)
                for param, gparam in zip(log_reg.params, gparams)
            ]

            train_fn_middle = theano.function(
                inputs=[],
                outputs=logreg_cost,
                updates=updates,
                givens={
                    h1: H1,
                    h2: H2
                },
                name='train_middle'
            )
        epoch = 0
        while epoch < 10:
            print epoch, train_fn_middle()
            epoch += 1
            
    sda = SdA_regress(
        SdA_inp,
        SdA_out,
        log_reg,
        numpy_rng=numpy_rng,
        n_inp=28*28//2,
        hidden_layers_sizes_inp=[hidden_layer_size],
        hidden_layers_sizes_out=[hidden_layer_size],
        n_out=28*28//2
    )
    # end-snippet-3 start-snippet-4
    # end-snippet-4
    
    # FINETUNING THE MODEL #

    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model = sda.build_finetune_functions(
        datasets=datasets,
        batch_size=batch_size,
        learning_rate=finetune_lr
    )
    
        
    print '... finetunning the model'
    # early-stopping parameters
    patience = 10 * n_train_batches_l  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
                            # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches_l, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0
    fprop = theano.function(
        [],
        sda.sigmoid_layers[-1].output,
        givens={
            sda.x: test_set_x
        },
        name='fprop'
    )
    while True:
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches_l):
            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches_l + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches_l,
                       this_validation_loss ))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches_l,
                           test_score ))

            if patience <= iter:
                done_looping = True
                #break
            if 0: # vis weights
                fprop = theano.function(
                    [],
                    sda.sigmoid_layers[-1].output,
                    givens={
                        sda.x: test_set_x
                    },
                    name='fprop'
                )
                yh=fprop()
                yh=yh
    end_time = time.clock()
    print(
        (
            'Optimization complete with best validation score of %f %%, '
            'on iteration %i, '
            'with test performance %f %%'
        )
        % (best_validation_loss , best_iter + 1, test_score)
    )
    print >> sys.stderr, ('The training code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Example #10
0
def classify_lenet5(batch_size=500, output_size=20):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)


    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 37, 23))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 37, 23),
        filter_shape=(20, 1, 4, 2),
        poolsize=(2, 2),
    )

    # layer1 = LeNetConvPoolLayer(
    #     rng,
    #     input=layer0.output,
    #     image_shape=(batch_size, 20, 17, 11),
    #     filter_shape=(50, 20, 4, 2),
    #     poolsize=(2, 2),
    # )
    #
    # layer4 = LeNetConvPoolLayer(
    #     rng,
    #     input=layer1.output,
    #     image_shape=(batch_size, 50, 7, 5),
    #     filter_shape=(100, 50, 4, 2),
    #     poolsize=(2, 2),
    # )

    layer2_input = layer0.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=3740,
        n_out=output_size,
        activation=T.tanh,
        use_bias=True
    )

    # layer5 = HiddenLayer(
    #     rng,
    #     input=layer2.output,
    #     n_in=200,
    #     n_out=output_size,
    #     activation=T.tanh,
    #     use_bias=True
    # )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=output_size, n_out=2)

    model_params = pickle.load(open('../model/cnn_dist_'+str(output_size)+'.pkl'))
    #
    layer0.W = theano.shared(
        value=numpy.array(
            model_params[2].get_value(True),
            dtype=theano.config.floatX
        ),
        name='W',
        borrow=True
    )

    layer0.b = theano.shared(
        value=numpy.array(
            model_params[3].get_value(True),
            dtype=theano.config.floatX
        ),
        name='b',
        borrow=True
    )

    # layer1.W = theano.shared(
    #     value=numpy.array(
    #         model_params[-4].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='W',
    #     borrow=True
    # )
    #
    # layer1.b = theano.shared(
    #     value=numpy.array(
    #         model_params[-3].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='b',
    #     borrow=True
    # )
    #
    # layer4.W = theano.shared(
    #     value=numpy.array(
    #         model_params[-6].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='W',
    #     borrow=True
    # )
    #
    # layer4.b = theano.shared(
    #     value=numpy.array(
    #         model_params[-5].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='b',
    #     borrow=True
    # )

    layer2.W = theano.shared(
        value=numpy.array(
            model_params[0].get_value(True),
            dtype=theano.config.floatX
        ),
        name='W',
        borrow=True
    )

    layer2.b = theano.shared(
        value=numpy.array(
            model_params[1].get_value(True),
            dtype=theano.config.floatX
        ),
        name='b',
        borrow=True
    )

    # layer5.W = theano.shared(
    #     value=numpy.array(
    #         model_params[-10].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='W',
    #     borrow=True
    # )
    #
    # layer5.b = theano.shared(
    #     value=numpy.array(
    #         model_params[-9].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='b',
    #     borrow=True
    # )

    layer3.W = theano.shared(
        value=numpy.array(
            model_params[4].get_value(True),
            dtype=theano.config.floatX
        ),
        name='W',
        borrow=True
    )

    layer3.b = theano.shared(
        value=numpy.array(
            model_params[5].get_value(True),
            dtype=theano.config.floatX
        ),
        name='b',
        borrow=True
    )

    # params = layer3.params + layer5.params + layer2.params +  layer4.params + layer1.params + layer0.params

    datasets = load_data(None)

    sets = ['train', 'dev', 'test']
    dimension = [20000, 20000, 20000]
    for k in range(3):
        if k == 0:
            classify_set_x, classify_set_y, classify_set_z, classify_set_m, classify_set_c, classify_set_b= datasets[k]
        else:
            classify_set_x, classify_set_y, classify_set_z= datasets[k]

        # compute number of minibatches for training, validation and testing
        n_classify_batches = classify_set_x.get_value(borrow=True).shape[0]
        n_classify_batches /= batch_size

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch
        classify = theano.function(
                [index],
                layer2.output,
                givens={
                    x: classify_set_x[index * batch_size: (index + 1) * batch_size],
                }
            )

        r = []

        for i in xrange(n_classify_batches):
            m = classify(i)
            r.extend(m)
        r = np.array(r)
        print r.shape
        r = np.append(r, np.reshape(classify_set_y.eval(),(dimension[k], 1)), 1)
        numpy.savetxt('../extractedInformation/cnn_dist_'+str(output_size)+'/'+sets[k]+'.csv', r, delimiter=",")
def evaluate_lenet5(learning_rate=0.05, n_epochs=10,
                    nkerns=[20, 50], batch_size=50):
    global train_dataset_route
    global valid_dataset_route
    global train_limit
    global valid_limit
    print train_dataset_route, type(train_dataset_route)
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data.load_spc_data(train_dataset_route, valid_dataset_route, train_limit, valid_limit)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ishape = (100, 100)  # this is the size of MNIST images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, 100, 100))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
            image_shape=(batch_size, 1, 100, 100),
            filter_shape=(nkerns[0], 1, 40, 40), poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            image_shape=(batch_size, nkerns[0], 30, 30),
            filter_shape=(nkerns[1], nkerns[0], 15, 15), poolsize=(2, 2))

    # the TanhLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 8 * 8,
                         n_out=100, activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=100, n_out=2)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    '''
    test_model = theano.function([index], layer3.errors(y),
            givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    test_results = theano.function(inputs=[index],
            outputs= layer3.y_pred,
            givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})
    '''
    validate_model = theano.function([index], layer3.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function([index], cost, updates=updates,
          givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000 # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter , ' patience = ' , patience
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    layer0_state = layer0.__getstate__()
                    layer1_state = layer1.__getstate__()
                    layer2_state = layer2.__getstate__()
                    layer3_state = layer3.__getstate__()
                    trained_model_list = [layer0_state, layer1_state, layer2_state, layer3_state]
                    trained_model_array = numpy.asarray(trained_model_list)
                    classifier_file = open(train_model_route, 'w')
                    cPickle.dump([1,2,3], classifier_file, protocol=2)
                    numpy.save(classifier_file, trained_model_array)
                    classifier_file.close()

            if patience <= iter:
                done_looping = True
                print patience , iter
                break

    end_time = time.clock()
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
Example #12
0
def evaluate_lenet5(learning_rate=0.005, n_epochs=5,data = None,nkerns= 64, batch_size=30):


    #for i in range(len(x_val)):
        #if len(x_val[i]) == 490 and len(x_val[i][0]) == 640:
            #x1.append(x_val[i])
            #y1.append(y_val[i]-1)
            #if len(x1) == 80:
                #break

    from data_loader import load_data
    train, validate, test = load_data()
    x_train = np.array(train[0],'float32')
    y_train = train[1]
    x_valid = np.array(validate[0],'float32')
    y_valid = validate[1]
    x_test = np.array(test[0],'float32')
    y_test = test[1]
    x_train2 = theano.shared(numpy.asarray(x_train,dtype=theano.config.floatX))
    y_train_2 = theano.shared(numpy.asarray(y_train,dtype=theano.config.floatX))
    x_valid2 = theano.shared(numpy.asarray(x_valid,dtype=theano.config.floatX))
    y_valid_2 = theano.shared(numpy.asarray(y_valid,dtype=theano.config.floatX))
    x_test2 = theano.shared(numpy.asarray(x_test,dtype=theano.config.floatX))
    y_test_2 = theano.shared(numpy.asarray(y_test,dtype=theano.config.floatX))

    y_train2 = T.cast(y_train_2, 'int32')
    y_test2 = T.cast(y_test_2, 'int32')
    y_valid2 = T.cast(y_valid_2, 'int32')

    print len(x_train)
    print len(y_train)

    rng = numpy.random.RandomState(23455)

    n_train_batches = len(y_train)/batch_size
    n_valid_batches = len(y_valid)/batch_size
    n_test_batches = len(y_test)/batch_size
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are p

    layer0_input = x.reshape((batch_size, 1, 64, 64))

    '''构建第一层网络:
    image_shape:输入大小为490*640的特征图,batch_size个训练数据,每个训练数据有1个特征图
    filter_shape:卷积核个数为nkernes=64,因此本层每个训练样本即将生成64个特征图
    经过卷积操作,图片大小变为(490-7+1 , 640-7+1) = (484, 634)
    经过池化操作,图片大小变为 (484/2, 634/2) = (242, 317)
    最后生成的本层image_shape为(batch_size, nklearn, 242, 317)'''

    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 64, 64),
        filter_shape=(nkerns, 1, 7, 7),
        poolsize=(2, 2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns * 7 * 7),
    # (100, 64*7*7) with the default values.
    layer2_input = layer0.output.flatten(2)

    '''全链接:输入layer2_input是一个二维的矩阵,第一维表示样本,第二维表示上面经过卷积下采样后
    每个样本所得到的神经元,也就是每个样本的特征,HiddenLayer类是一个单层网络结构
    下面的layer2把神经元个数由800个压缩映射为500个'''
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns * 29 * 29,
        n_out=500,
        activation=T.tanh
    )

    layer2.output = dropout_layer(layer2.output,0.5)

    # 最后一层:逻辑回归层分类判别,把500个神经元,压缩映射成10个神经元,分别对应于手写字体的0~9
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=8)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            y: y_test2[index * batch_size: (index + 1) * batch_size],
            x: x_test2[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: x_valid2[index * batch_size: (index + 1) * batch_size],
            y: y_valid2[index * batch_size: (index + 1) * batch_size]
        }
    )

    #把所有的参数放在同一个列表里,可直接使用列表相加
    params = layer3.params + layer2.params  + layer0.params

    #梯度求导
    grads = T.grad(cost, params)

    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: x_train2[index * batch_size: (index + 1) * batch_size],
            y: y_train2[index * batch_size: (index + 1) * batch_size]
        }
    )

    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.2  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
    #while epoch < n_epochs:
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):#每一批训练数据

            cost_ij = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    with open('param0.pkl', 'wb') as f0:
                        pickle.dump(layer0.params, f0)
    f0.close()
    with open('param2.pkl', 'wb') as f2:
                        pickle.dump(layer2.params, f2)
    f2.close()
    with open('param3.pkl', 'wb') as f3:
                        pickle.dump(layer3.params, f3)
    f3.close()

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Example #13
0
    def __init__(self, numpy_rng,PV, kind =2,theano_rng=None, n_ins=784,h_activation  = [],
                 hidden_layers_sizes=[500, 500], n_outs=10):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """
        self.PV = theano.shared(value=PV,borrow=True)
        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels
        self.z1 = T.matrix('z1')
        self.z2 = T.matrix('z2')
        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            activation = None
            if h_activation[i] == 1:
                activation = T.nnet.sigmoid
            if h_activation[i] == 2:
                activation = T.tanh
            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a output layer on top of the MLP
        self.OutLayer = HiddenLayer(rng=numpy_rng,
                                    input=self.sigmoid_layers[-1].output,
                                    n_in=hidden_layers_sizes[-1],
                                    n_out=n_outs,activation=T.nnet.sigmoid,
                                    kind=2)
        self.params.extend(self.OutLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.OutLayer.sq_loss(self.z1,self.z2)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.OutLayer.errors(self.y)
        self.p_y_given_x = self.OutLayer.output
def run():
    
    preProcess = PreProcess()
    data = preProcess.run()
    
    train_set_x,train_set_y = data[0],data[3]
    valid_set_x,valid_set_y = data[1],data[4]#data[1],data[4]
    test_set_x,test_set_y = data[2],data[5]
    
    # network parameters
    num_kernels = [10,10]
    kernel_sizes = [(9, 9), (5, 5)]
    #exit()
    
    # training parameters
    learning_rate = 0.005
    batch_size = 50
    n_sports = np.max(train_set_y.eval())+1
    sigmoidal_output_size = 20
    
    if valid_set_y.eval().size<batch_size:
        print 'Error: Batch size is larger than size of validation set.'

    # Setup 2: compute batch sizes for train/test/validation
    # borrow=True gets us the value of the variable without making a copy.
    n_train_batches = train_set_x.get_value(borrow=True).shape[1]
    n_test_batches = test_set_x.get_value(borrow=True).shape[1]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[1]
    n_train_batches /= batch_size
    n_test_batches /= batch_size
    n_valid_batches /= batch_size


    # Setup 3.
    # Declare inputs to network - x and y are placeholders
    # that will be used in the training/testing/validation functions below.
    x = T.tensor3('x')  # input image data
    y = T.ivector('y') # input label data

    # ## Layer 0 - First convolutional Layer
    # The first layer takes **`(batch_size, 1, 28, 28)`** as input, convolves it with **10** different **9x9** filters, and then downsamples (via maxpooling) in a **2x2** region.  Each filter/maxpool combination produces an output of size **`(28-9+1)/2 = 10`** on a side.
    # The size of the first layer's output is therefore **`(batch_size, 10, 10, 10)`**. 
    
    class Convolution(object):
        
        def __init__(self,batch_size,num_kernels,kernel_sizes,channel):
            
            self.layer0_input_size = (batch_size, 1, 100, 100)  # fixed size from the data
            self.edge0 = (100 - kernel_sizes[0][0] + 1) / 2
            self.layer0_output_size = (batch_size, num_kernels[0], self.edge0, self.edge0)
            # check that we have an even multiple of 2 before pooling
            assert ((100 - kernel_sizes[0][0] + 1) % 2) == 0

            # The actual input is the placeholder x reshaped to the input size of the network
            self.layer0_input = x[channel].reshape(self.layer0_input_size)
            self.layer0 = LeNetConvPoolLayer(rng,
                                        input=self.layer0_input,
                                        image_shape=self.layer0_input_size,
                                        subsample= (1,1),
                                        filter_shape=(num_kernels[0], 1) + kernel_sizes[0],
                                        poolsize=(2, 2))


            # ## Layer 1 - Second convolutional Layer
            # The second layer takes **`(batch_size, 10, 10, 10)`** as input, convolves it with 10 different **10x5x5** filters, and then downsamples (via maxpooling) in a **2x2** region.  Each filter/maxpool combination produces an output of size **`(10-5+1)/2 = 3`** on a side.
            # The size of the second layer's output is therefore **`(batch_size, 10, 3, 3)`**. 
            self.layer1_input_size = self.layer0_output_size
            self.edge1 = (self.edge0 - kernel_sizes[1][0] + 1) / 2
            self.layer1_output_size = (batch_size, num_kernels[1], self.edge1, self.edge1)

            # check that we have an even multiple of 2 before pooling
            assert ((self.edge0 - kernel_sizes[1][0] + 1) % 2) == 0

            self.layer1 = LeNetConvPoolLayer(rng,
                                        input=self.layer0.output,
                                        image_shape=self.layer1_input_size,
                                        subsample= (1,1),
                                        filter_shape=(num_kernels[1], num_kernels[0]) + kernel_sizes[1],
                                        poolsize=(2, 2))
                                        
    conv = Convolution(batch_size,num_kernels,kernel_sizes,0)
    conv2 = Convolution(batch_size,num_kernels,kernel_sizes,1)
    conv3 = Convolution(batch_size,num_kernels,kernel_sizes,2)
                                
    # ## Layer 2 - Fully connected sigmoidal layer
    #exit()
    # The sigmoidal layer takes a vector as input.
    # We flatten all but the first two dimensions, to get an input of size **`(batch_size, 30 * 4 * 4)`**.
    
    #raw_random= raw_random.RandomStreamsBase()
    srng = theano.tensor.shared_randomstreams.RandomStreams(
                        rng.randint(999999))
                        
    #def rectify(X):                                                         
    #    return T.maximum(X,0.) 
        
    def dropout(X,p=0.5):
        if p>0:
            retain_prob = 1-p
            X *= srng.binomial(X.shape,p=retain_prob,dtype = theano.config.floatX)
            X /= retain_prob
        return X
    
    def rectify(X): 
        return T.maximum(X,0.)

    layer2_input = conv.layer1.output.flatten(2)
    layer2_input = T.concatenate((T.concatenate((conv.layer1.output.flatten(2),conv2.layer1.output.flatten(2)),axis=1),conv2.layer1.output.flatten(2)),axis=1)

    layer2 = HiddenLayer(rng,
                         input=dropout(layer2_input),
                         n_in= num_kernels[1] * conv.edge1 * conv.edge1*3,
                         n_out= num_kernels[1] * conv.edge1 * conv.edge1,
                         activation=rectify) #T.tanh
                         
    # EXTRA LAYER
    # A fully connected logistic regression layer converts the sigmoid's layer output to a class label.
    extra =  HiddenLayer(rng,
                         input=dropout(layer2.output),
                         n_in= num_kernels[1] * conv.edge1 * conv.edge1,
                         n_out=num_kernels[1] * conv.edge1 * conv.edge1,
                         activation=rectify) #T.tanh


    # ## Layer 3 - Logistic regression output layer
    # A fully connected logistic regression layer converts the sigmoid's layer output to a class label.
    layer3 = LogisticRegression(input=extra.output,
                                n_in=num_kernels[1] * conv.edge1 * conv.edge1,
                                n_out=n_sports)
                                

    # # Training the network
    # To train the network, we have to define a cost function.  We'll use the Negative Log Likelihood of the model, relative to the true labels **`y`**.

    # The cost we minimize during training is the NLL of the model.
    # Recall: y is a placeholder we defined above
    cost = layer3.negative_log_likelihood(y)


    # ### Gradient descent
    # We will train with Stochastic Gradient Descent.  To do so, we need the gradient of the cost relative to the parameters of the model.  We can get the parameters for each label via the **`.params`** attribute.

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + conv.layer1.params + conv.layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # ## Update
    updates = [
        (param_i, param_i - learning_rate * grad_i)  # <=== SGD update step
        for param_i, grad_i in zip(params, grads)
    ]

    index = T.lscalar()  # index to a batch of training/validation/testing examples

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[:,index * batch_size: (index + 1) * batch_size],  # <=== batching
            y: train_set_y[index * batch_size: (index + 1) * batch_size]   # <=== batching
        }
    )

    # ## Validation function
    # To track progress on a held-out set, we count the number of misclassified examples in the validation set.
    validate_model = theano.function(
            [index],
            layer3.errors(y),
            givens={
                x: valid_set_x[:,index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]
            }
        )

    # ## Test function
    # After training, we check the number of misclassified examples in the test set.
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[:,index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )   
	
    # guesses = theano.function(
    #         [],
    #         layer3.y_pred,
    #         givens={
    #             x: test_set_x
    #         }
    #     )
    # # Training loop 
    # We use SGD for a fixed number of iterations over the full training set (an "epoch").  Usually, we'd use a more complicated rule, such as iterating until a certain number of epochs fail to produce improvement in the validation set.  
    for epoch in range(90):
        costs = [train_model(i) for i in xrange(n_train_batches)]
        validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
        #print layer3.return_y_pred()
        print "Epoch {}    NLL {:.2}    %err in validation set {:.1%}".format(epoch + 1, np.mean(costs), np.mean(validation_losses))

    # ## Learned features
    #filters = tile_raster_images(layer0.W.get_value(borrow=True), img_shape=(9, 9), tile_shape=(1,10), tile_spacing=(3, 3),
    #                       scale_rows_to_unit_interval=True,
    #                       output_pixel_vals=True)

    #plt.imshow(filters)
    #plt.show()

    # ## Check performance on the test set
    test_errors = [test_model(i) for i in range(n_test_batches)]
    print "test errors: {:.1%}".format(np.mean(test_errors))
Example #15
0
    def evaluate_lenet5(self):
    #def evaluate_lenet5(learning_rate=0.1, n_epochs=2000, nkerns=[6, 12], batch_size=70, useAllSamples=0, kmax=30, ktop=5, filter_size=[10,7],
    #                    L2_weight=0.000005, dropout_p=0.5, useEmb=0, task=5, corpus=1):
        rng = numpy.random.RandomState(23455)
        
        #datasets, embedding_size, embeddings=read_data(root+'2classes/train.txt', root+'2classes/dev.txt', root+'2classes/test.txt', embeddingPath,60)

        #datasets = load_data(dataset)
        indices_train, trainY, trainLengths, trainLeftPad, trainRightPad= self.datasets[0]
        indices_dev, devY, devLengths, devLeftPad, devRightPad= self.datasets[1]
        indices_test, testY, testLengths, testLeftPad, testRightPad= self.datasets[2]
        n_train_batches=indices_train.shape[0]/self.batch_size
        n_valid_batches=indices_dev.shape[0]/self.batch_size
        n_test_batches=indices_test.shape[0]/self.batch_size
        remain_train=indices_train.shape[0]%self.batch_size
        
        train_batch_start=[]
        dev_batch_start=[]
        test_batch_start=[]
        if self.useAllSamples:
            train_batch_start=list(numpy.arange(n_train_batches)*self.batch_size)+[indices_train.shape[0]-self.batch_size]
            dev_batch_start=list(numpy.arange(n_valid_batches)*self.batch_size)+[indices_dev.shape[0]-self.batch_size]
            test_batch_start=list(numpy.arange(n_test_batches)*self.batch_size)+[indices_test.shape[0]-self.batch_size]
            n_train_batches=n_train_batches+1
            n_valid_batches=n_valid_batches+1
            n_test_batches=n_test_batches+1
        else:
            train_batch_start=list(numpy.arange(n_train_batches)*self.batch_size)
            dev_batch_start=list(numpy.arange(n_valid_batches)*self.batch_size)
            test_batch_start=list(numpy.arange(n_test_batches)*self.batch_size)
    
        indices_train_theano=theano.shared(numpy.asarray(indices_train, dtype=theano.config.floatX), borrow=True)
        indices_dev_theano=theano.shared(numpy.asarray(indices_dev, dtype=theano.config.floatX), borrow=True)
        indices_test_theano=theano.shared(numpy.asarray(indices_test, dtype=theano.config.floatX), borrow=True)
        indices_train_theano=T.cast(indices_train_theano, 'int32')
        indices_dev_theano=T.cast(indices_dev_theano, 'int32')
        indices_test_theano=T.cast(indices_test_theano, 'int32')
        
        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch
        x_index = T.imatrix('x_index')   # now, x is the index matrix, must be integer
        #y = T.ivector('y')  
        z = T.ivector('z')   # sentence length
        left=T.ivector('left')
        right=T.ivector('right')
        iteration= T.lscalar()
        
        x=self.embeddings_R[x_index.flatten()].reshape((self.batch_size,self.maxSentLength, self.embedding_size)).transpose(0, 2, 1).flatten()
        ishape = (self.embedding_size, self.maxSentLength)  # this is the size of MNIST images
        filter_size1=(self.embedding_size,self.filter_size[0])
        filter_size2=(self.embedding_size/2,self.filter_size[1])
        #poolsize1=(1, ishape[1]-filter_size1[1]+1) #?????????????????????????????
        poolsize1=(1, ishape[1]+filter_size1[1]-1)
    
        '''
        left_after_conv=T.maximum(0,left-filter_size1[1]+1)
        right_after_conv=T.maximum(0, right-filter_size1[1]+1)
        '''
        left_after_conv=left
        right_after_conv=right
        
        #kmax=30 # this can not be too small, like 20
        #ktop=6
        #poolsize2=(1, kmax-filter_size2[1]+1) #(1,6)
        poolsize2=(1, self.kmax+filter_size2[1]-1) #(1,6)
        dynamic_lengths=T.maximum(self.ktop,z/2+1)  # dynamic k-max pooling
        ######################
        # BUILD ACTUAL MODEL #
        ######################
        print '... building the model'
    
        # Reshape matrix of rasterized images of shape (batch_size,28*28)
        # to a 4D tensor, compatible with our LeNetConvPoolLayer
        layer0_input = x.reshape((self.batch_size, 1, ishape[0], ishape[1]))
    
        # Construct the first convolutional pooling layer:
        # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
        # maxpooling reduces this further to (24/2,24/2) = (12,12)
        # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
        '''
        layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
                image_shape=(batch_size, 1, ishape[0], ishape[1]),
                filter_shape=(nkerns[0], 1, filter_size1[0], filter_size1[1]), poolsize=poolsize1, k=kmax)
        '''
        layer0 = Conv_Fold_DynamicK_PoolLayer(rng, input=layer0_input,
                image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
                filter_shape=(self.nkerns[0], 1, filter_size1[0], filter_size1[1]), poolsize=poolsize1, k=dynamic_lengths, unifiedWidth=self.kmax, left=left_after_conv, right=right_after_conv, firstLayer=True)
        
        # Construct the second convolutional pooling layer
        # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
        # maxpooling reduces this further to (8/2,8/2) = (4,4)
        # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
        '''
        layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
                image_shape=(batch_size, nkerns[0], ishape[0], kmax),
                filter_shape=(nkerns[1], nkerns[0], filter_size2[0], filter_size2[1]), poolsize=poolsize2, k=ktop)
        '''
        '''
        left_after_conv=T.maximum(0, layer0.leftPad-filter_size2[1]+1)
        right_after_conv=T.maximum(0, layer0.rightPad-filter_size2[1]+1)
        '''
        left_after_conv=layer0.leftPad
        right_after_conv=layer0.rightPad
        dynamic_lengths=T.repeat([self.ktop],self.batch_size)  # dynamic k-max pooling
        '''
        layer1 = ConvFoldPoolLayer(rng, input=layer0.output,
                image_shape=(batch_size, nkerns[0], ishape[0]/2, kmax),
                filter_shape=(nkerns[1], nkerns[0], filter_size2[0], filter_size2[1]), poolsize=poolsize2, k=ktop, left=left_after_conv, right=right_after_conv)
        '''
        layer1 = Conv_Fold_DynamicK_PoolLayer(rng, input=layer0.output,
                image_shape=(self.batch_size, self.nkerns[0], ishape[0]/2, self.kmax),
                filter_shape=(self.nkerns[1], self.nkerns[0], filter_size2[0], filter_size2[1]), poolsize=poolsize2, k=dynamic_lengths, unifiedWidth=self.ktop, left=left_after_conv, right=right_after_conv, firstLayer=False)    
        
        # the HiddenLayer being fully-connected, it operates on 2D matrices of
        # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
        # This will generate a matrix of shape (20,32*4*4) = (20,512)
        
        
        layer2_input = layer1.output.flatten(2)
        #produce sentence embeddings
        layer2 = HiddenLayer(rng, input=layer2_input, n_in=self.nkerns[1] * (self.embedding_size/4) * self.ktop, n_out=self.sentEm_length, activation=T.tanh)
        
        context_matrix,  target_matrix=self.extract_contexts_targets(indices_matrix=x_index, sentLengths=z, leftPad=left)
        #note that context indices might be zero embeddings
        h_indices=context_matrix[:, self.context_size*iteration:self.context_size*(iteration+1)]
        w_indices=target_matrix[:, iteration:(iteration+1)]
        #r_h is the concatenation of context embeddings
        r_h=self.embed_context(h_indices)  #(batch_size, context_size*embedding_size)
        q_w=self.embed_target(w_indices)
        #q_hat: concatenate sentence embeddings and context embeddings
        q_hat=self.concatenate_sent_context(layer2.output, r_h)
        layer3 = HiddenLayer(rng, input=q_hat, n_in=self.sentEm_length+self.context_size*self.embedding_size, n_out=self.embedding_size, activation=T.tanh)
        self.params = layer3.params  + layer2.params+layer1.params + layer0.params+[self.embeddings_R, self.embeddings_Q]
        self.load_model_from_file()
        '''
        # load parameters
        netfile = open('/mounts/data/proj/wenpeng/CNN_LM/model_params')
        for para in self.params:
            para.set_value(cPickle.load(netfile), borrow=True)
        
        layer0.params[0].set_value(cPickle.load(netfile), borrow=True)
        layer0.params[1].set_value(cPickle.load(netfile), borrow=True)
        layer2.params[0].set_value(cPickle.load(netfile), borrow=True)
        layer2.params[1].set_value(cPickle.load(netfile), borrow=True)
        layer3.params[0].set_value(cPickle.load(netfile), borrow=True)
        layer3.params[1].set_value(cPickle.load(netfile), borrow=True)
        '''
        
        
        noise_indices, p_n_noise=self.get_noise()
        #noise_indices=theano.printing.Print('noise_indices')(noise_indices)
        s_theta_data=T.sum(layer3.output * q_w, axis=1).reshape((self.batch_size,1)) + self.bias[w_indices-1]  #bias[0] should be the bias of word index 1
        #s_theta_data=theano.printing.Print('s_theta_data')(s_theta_data)
        p_n_data = self.p_n[w_indices-1] #p_n[0] indicates the probability of word indexed 1
        delta_s_theta_data = s_theta_data - T.log(self.k * p_n_data)
        log_sigm_data = T.log(T.nnet.sigmoid(delta_s_theta_data))
        
        #create the noise, q_noise has shape(self.batch_size, self.k, self.embedding_size )
        q_noise = self.embed_noise(noise_indices)
        q_hat_res = layer3.output.reshape((self.batch_size, 1, self.embedding_size))
        s_theta_noise = T.sum(q_hat_res * q_noise, axis=2) + self.bias[noise_indices-1] #(batch_size, k)
        delta_s_theta_noise = s_theta_noise - T.log(self.k * p_n_noise)  # it should be matrix (batch_size, k)
        log_sigm_noise = T.log(1 - T.nnet.sigmoid(delta_s_theta_noise))
        sum_noise_per_example =T.sum(log_sigm_noise, axis=1)   #(batch_size, 1)
        # Calc objective function
        J = -T.mean(log_sigm_data) - T.mean(sum_noise_per_example)
        L2_reg = (layer3.W** 2).sum()+ (layer2.W** 2).sum()+ (layer1.W** 2).sum()+(layer0.W** 2).sum()+(self.embeddings_R**2).sum()+( self.embeddings_Q**2).sum()
        self.cost = J + self.L2_weight*L2_reg
        

        
        #cost = layer3.negative_log_likelihood(y)
        # create a function to compute the mistakes that are made by the model
        test_model = theano.function([index,iteration], [self.cost,layer2.output],
                 givens={
                    x_index: indices_test_theano[index: index + self.batch_size],
                    z: testLengths[index: index + self.batch_size],
                    left: testLeftPad[index: index + self.batch_size],
                    right: testRightPad[index: index + self.batch_size]})
        '''
        validate_model = theano.function([index,iteration], self.cost,
                givens={
                    x_index: indices_dev_theano[index: index + self.batch_size],
                    z: devLengths[index: index + self.batch_size],
                    left: devLeftPad[index: index + self.batch_size],
                    right: devRightPad[index: index + self.batch_size]})
    
        # create a list of all model parameters to be fit by gradient descent
        #self.params = layer3.params  + layer2.params+layer1.params + layer0.params+[self.embeddings_R, self.embeddings_Q]
        #params = layer3.params + layer2.params + layer0.params+[embeddings]
        
        accumulator=[]
        for para_i in self.params:
            eps_p=numpy.zeros_like(para_i.get_value(borrow=True),dtype=theano.config.floatX)
            accumulator.append(theano.shared(eps_p, borrow=True))
          
        # create a list of gradients for all model parameters
        grads = T.grad(self.cost, self.params)
        updates = []
        for param_i, grad_i, acc_i in zip(self.params, grads, accumulator):
            acc = acc_i + T.sqr(grad_i)
            if param_i == self.embeddings_R or param_i == self.embeddings_Q:
                updates.append((param_i, T.set_subtensor((param_i - self.ini_learning_rate * grad_i / T.sqrt(acc))[0], theano.shared(numpy.zeros(self.embedding_size)))))   #AdaGrad
            else:
                updates.append((param_i, param_i - self.ini_learning_rate * grad_i / T.sqrt(acc)))   #AdaGrad
            updates.append((acc_i, acc))    
           
        train_model = theano.function([index,iteration], [self.cost, self.params], updates=updates,
              givens={
                x_index: indices_train_theano[index: index + self.batch_size],
                z: trainLengths[index: index + self.batch_size],
                left: trainLeftPad[index: index + self.batch_size],
                right: trainRightPad[index: index + self.batch_size]})
        '''
        ###############
        # TRAIN MODEL #
        ###############
        print '... testing'
        start_time = time.clock()
        test_losses=[]
        i=0
        for batch_start in test_batch_start: 
            i=i+1
            sys.stdout.write( "Progress :[%3f] %% complete!\r" % (i*100.0/len(test_batch_start)) )
            sys.stdout.flush()
            #print str(i*100.0/len(test_batch_start))+'%...'
            total_iteration=max(self.test_lengths[batch_start: batch_start + self.batch_size])
            #for test, we need the cost among all the iterations in that batch

            for iteration in range(total_iteration):
                cost_i, sentEm=test_model(batch_start, iteration)
                test_losses.append(cost_i)
                #test_losses = [test_model(i) for i in test_batch_start]
        test_score = numpy.mean(test_losses)        
        print 'Test over, average test loss:'+str(test_score)
        '''
        # early-stopping parameters
        patience = 50000  # look as this many examples regardless
        patience_increase = 2  # wait this much longer when a new best is
                               # found
        improvement_threshold = 0.995  # a relative improvement of this much is
                                       # considered significant
        validation_frequency = min(20, patience / 2)
                                      # go through this many
                                      # minibatche before checking the network
                                      # on the validation set; in this case we
                                      # check every epoch
    
        best_params = None
        best_validation_loss = numpy.inf
        best_iter = 0
        test_score = 0.
        start_time = time.clock()
    
        epoch = 0
        done_looping = False
        vali_loss_list=[]
        while (epoch < self.n_epochs) and (not done_looping):
            epoch = epoch + 1
            #for minibatch_index in xrange(n_train_batches): # each batch
            minibatch_index=0
            for batch_start in train_batch_start: 
                # iter means how many batches have been runed, taking into loop
                iter = (epoch - 1) * n_train_batches + minibatch_index +1
    
                minibatch_index=minibatch_index+1
                total_iteration=max(self.train_lengths[batch_start: batch_start + self.batch_size])
                # we only care the last cost within those iterations
                cost_of_end_batch=0.0
                for iteration in range(total_iteration):
                    cost_of_end_batch, params_of_end_batch = train_model(batch_start, iteration)
                    #total_cost=total_cost+cost_ij
                #if iter ==1:
                #    exit(0)
                if iter % n_train_batches == 0:
                    print 'training @ iter = '+str(iter)+' cost: '+str(cost_of_end_batch)# +' error: '+str(error_ij)
                if iter % validation_frequency == 0:
    
                    # compute zero-one loss on validation set
                    #validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                    validation_losses=[]
                    for batch_start in dev_batch_start: 
                        total_iteration=max(self.dev_lengths[batch_start: batch_start + self.batch_size])
                        #for validate, we need the cost among all the iterations in that batch

                        for iteration in range(total_iteration):
                            validation_losses.append(validate_model(batch_start, iteration))
                    this_validation_loss = numpy.mean(validation_losses)
                    print('\t\tepoch %i, minibatch %i/%i, validation cost %f %%' % \
                      (epoch, minibatch_index , n_train_batches, \
                       this_validation_loss * 100.))
                    if this_validation_loss < minimal_of_list(vali_loss_list):
                        del vali_loss_list[:]
                        vali_loss_list.append(this_validation_loss)
                        #store params
                        self.best_params=params_of_end_batch
                    elif len(vali_loss_list)<self.vali_cost_list_length:
                        vali_loss_list.append(this_validation_loss)
                        if len(vali_loss_list)==self.vali_cost_list_length:
                            self.store_model_to_file()
                            print 'Training over, best model got at vali_cost:'+str(vali_loss_list[0])
                            exit(0)
                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:
    
                        #improve patience if loss improvement is good enough
                        if this_validation_loss < best_validation_loss *  \
                           improvement_threshold:
                            patience = max(patience, iter * patience_increase)
    
                        # save best validation score and iteration number
                        best_validation_loss = this_validation_loss
                        best_iter = iter
    
                        # test it on the test set
                        test_losses=[]
                        for batch_start in test_batch_start: 
                            total_iteration=max(self.test_lengths[batch_start: batch_start + self.batch_size])
                            #for test, we need the cost among all the iterations in that batch

                            for iteration in range(total_iteration):
                                cost_i, sentEm=test_model(batch_start, iteration)
                                test_losses.append(cost_i)
                        #test_losses = [test_model(i) for i in test_batch_start]
                        test_score = numpy.mean(test_losses)
                        print(('\t\t\t\tepoch %i, minibatch %i/%i, test error of best '
                           'model %f %%') %
                          (epoch, minibatch_index, n_train_batches,
                           test_score * 100.))
    
    
                if patience <= iter:
                    done_looping = True
                    break
        '''
        end_time = time.clock()
        print >> sys.stderr, ('The code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.))
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2**30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
        # of [int] labels
        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in range(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
Example #17
0
def evaluate(init_learning_rate=0.1, n_epochs=200,
                    datasets='Trace' ,nkerns=[256, 256], n_train_batch=10,
                    trans='euc', active_func=T.tanh, window_size = 0.2, 
                    ada_flag = False, pool_factor = 2, slice_ratio = 1
                    ):

    rng = numpy.random.RandomState(23455) #set random seed
    learning_rate = theano.shared(numpy.asarray(init_learning_rate,dtype=theano.config.floatX))
    #used for learning_rate decay
 
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    ori_len = datasets[3]
    slice_ratio = datasets[4]

    valid_num = valid_set_x.shape[0]
    increase_num = ori_len - int(ori_len * slice_ratio) + 1 #this can be used as the bath size
    print "increase factor is ", increase_num, ', ori len', ori_len
    valid_num_batch = valid_num / increase_num

    test_num = test_set_x.shape[0]
    test_num_batch = test_num / increase_num

    length_train = train_set_x.shape[1] #length after slicing.
    num_of_categories = int(train_set_y.max()) + 1
 
    window_size = int(length_train * window_size) if window_size < 1 else int(window_size)

    #*******set up the ma and ds********#
    ma_base,ma_step,ma_num   = 5, 6, 0
    ds_base,ds_step, ds_num  = 2, 1, 4

    ds_num_max = length_train / (pool_factor * window_size)
    ds_num = min(ds_num, ds_num_max)
    
    #*******set up the ma and ds********#

    (ma_train, ma_valid, ma_test , ma_lengths) = batch_movingavrg(train_set_x,
                                                    valid_set_x, test_set_x,
                                                    ma_base, ma_step, ma_num)
    (ds_train, ds_valid, ds_test , ds_lengths) = batch_downsample(train_set_x,
                                                    valid_set_x, test_set_x,
                                                    ds_base, ds_step, ds_num)
 
    #concatenate directly
    data_lengths = [length_train] 
    #downsample part:
    if ds_lengths != []:
        data_lengths +=  ds_lengths
        train_set_x = numpy.concatenate([train_set_x, ds_train], axis = 1)
        valid_set_x = numpy.concatenate([valid_set_x, ds_valid], axis = 1)
        test_set_x = numpy.concatenate([test_set_x, ds_test], axis = 1)

    #moving average part
    if ma_lengths != []:
        data_lengths += ma_lengths
        train_set_x = numpy.concatenate([train_set_x, ma_train], axis = 1)
        valid_set_x = numpy.concatenate([valid_set_x, ma_valid], axis = 1)
        test_set_x = numpy.concatenate([test_set_x, ma_test], axis = 1)

    train_set_x, train_set_y = shared_dataset(train_set_x, train_set_y)
    
    valid_set_x = shared_data_x(valid_set_x)
    test_set_x = shared_data_x(test_set_x)

    #compute number of minibatches for training, validation and testing
    n_train_size = train_set_x.get_value(borrow=True).shape[0]
    n_valid_size = valid_set_x.get_value(borrow=True).shape[0]
    n_test_size = test_set_x.get_value(borrow=True).shape[0]
    batch_size = n_train_size / n_train_batch
    n_train_batches = n_train_size / batch_size
    data_dim = train_set_x.get_value(borrow=True).shape[1]
    print 'train size', n_train_size, ',valid size', n_valid_size, ' test size', n_test_size
    print 'batch size ', batch_size
    print 'n_train_batches is ', n_train_batches
    print 'data dim is ', data_dim
    print '---------------------------'

    # allocate symbolic variables for the data
    index = T.lscalar('index')  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   
    y = T.ivector('y')  
                       
    x_vote = T.matrix('xvote')   # the data is presented as rasterized images
    #y_vote = T.ivector('y_vote')  # the labels are presented as 1D vector of

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print 'building the model...'

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = []
    inputs = x.reshape((batch_size, 1, data_dim, 1))
    
    layer0_input_vote = []
    inputs_vote = x_vote.reshape((increase_num, 1, data_dim, 1))
    ind = 0
    for i in xrange(len(data_lengths)):
        layer0_input.append(inputs[:,:,ind : ind + data_lengths[i],:])
        layer0_input_vote.append(inputs_vote[:,:,ind : ind + data_lengths[i],:])
        ind += data_lengths[i]

    layer0 = []
    layer0_vote = []
    feature_map_size = 0

    for i in xrange(len(layer0_input)):
        pool_size = (data_lengths[i] - window_size + 1) / pool_factor 
        feature_map_size += (data_lengths[i] - window_size + 1) / pool_size

        layer0.append(ShapeletPoolLayer(
            numpy.random.RandomState(23455 + i),
            input=layer0_input[i],
            image_shape=(batch_size, 1, data_lengths[i], 1),
            filter_shape=(nkerns[0], 1, window_size, 1),
            poolsize=(pool_size , 1),
            trans = trans,
            active_func=active_func
        ))
        layer0_vote.append(ShapeletPoolLayer(
            numpy.random.RandomState(23455 + i),
            input=layer0_input_vote[i],
            image_shape=(increase_num, 1, data_lengths[i], 1),
            filter_shape=(nkerns[0], 1, window_size, 1),
            poolsize=(pool_size , 1),
			W = layer0[i].W,
            trans = trans,
            active_func=active_func
        ))

    layer1_input = layer0[0].output.flatten(2)
    layer1_vote_input = layer0_vote[0].output.flatten(2)
    for i in xrange(1, len(data_lengths)):
        layer1_input = T.concatenate([layer1_input, layer0[i].output.flatten(2)], axis = 1)
        layer1_vote_input = T.concatenate([layer1_vote_input, layer0_vote[i].output.flatten(2)], axis = 1)

    # construct a fully-connected sigmoidal layer
    layer1 = HiddenLayer(
        rng,
        input=layer1_input,
        n_in=nkerns[0] * feature_map_size,
        n_out=nkerns[1],
        activation=active_func,
        previous_layer = None
    )
    # construct a fully-connected sigmoidal layer for prediction
    layer1_vote = HiddenLayer(
        rng,
        input=layer1_vote_input,
        n_in=nkerns[0] * feature_map_size,
        n_out=nkerns[1],
        activation=active_func,
        previous_layer = None,
        W = layer1.W,
        b = layer1.b
    )

    # classify the values of the fully-connected sigmoidal layer
    layer2 = LogisticRegression(input=layer1.output, n_in=nkerns[1], n_out= num_of_categories , previous_layer = None)
    layer2_vote = LogisticRegressionVote(input=layer1_vote.output, n_in=nkerns[1], n_out= num_of_categories , previous_layer = None, W = layer2.W, b = layer2.b)

    # the cost we minimize during training is the NLL of the model
    cost = layer2.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer2_vote.prediction(),
        givens={
            x_vote : test_set_x[index * (increase_num) : (index + 1) * (increase_num)]
        }
    )
    # function for validation set. Return the prediction value
    validate_model = theano.function(
        [index],
        layer2_vote.prediction(),
        givens={
            x_vote : valid_set_x[index * (increase_num) : (index + 1) * (increase_num)]
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer2.params + layer1.params
    for i in xrange(len(layer0_input)):
        params += layer0[i].params

    # Adagradient part
    grads = T.grad(cost, params)
    import copy
    G = [] 
    for i in xrange(2 + len(layer0_input)):
        G.append( theano.shared(
            numpy.zeros(params[i].shape.eval(), dtype=theano.config.floatX
            ),
            borrow=True
        ))

    # parameter update methods
    if ada_flag == True:
        updates = [
            (param_i, param_i -  learning_rate * (grad_i / (T.sqrt(G_i) + 1e-5) ))
            for param_i, grad_i, G_i in zip(params, grads, G)
        ]
    else:
        updates = [
            (param_i, param_i -  learning_rate * grad_i )
            for param_i, grad_i in zip(params, grads)
        ]
 
    update_G = theano.function(inputs=[index], outputs = G,
            updates=[(G_i, G_i  + T.sqr(grad_i) )
            for G_i, grad_i in zip(G,grads)],
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]
            }
            )
    reset_G = theano.function(inputs=[index],outputs = G,
            updates=[(G_i, grad_i - grad_i) 
            for G_i, grad_i in zip(G,grads)],
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]
            }
            )       

    #Our training function, return value: NLL cost and training error
    train_model = theano.function(
        [index],
        [cost, layer2.errors(y)],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    decrease_learning_rate = theano.function(inputs=[], outputs = learning_rate,
            updates={learning_rate: learning_rate * 1e-4})
    
    ###############
    # TRAIN MODEL #
    ###############
    print 'training...'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    best_test_iter = 0
    best_test_loss = numpy.inf
    test_patience = 200
    valid_loss = 0.
    test_loss = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    last_train_err = 1
    last_avg_err = float('inf')
    first_layer_prev = 0
    num_no_update_epoch = 0
    epoch_avg_cost = float('inf')
    epoch_avg_err = float('inf')

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        epoch_train_err = 0.
        epoch_cost = 0.
        if ada_flag:
            reset_G(0)
        num_no_update_epoch += 1
        if num_no_update_epoch == 500:
            break
        for minibatch_index in xrange(n_train_batches):

            iteration = (epoch - 1) * n_train_batches + minibatch_index

            if ada_flag:
                update_G(minibatch_index)
            
            [cost_ij,train_err] = train_model(minibatch_index)
            
            epoch_train_err = epoch_train_err + train_err
            epoch_cost = epoch_cost + cost_ij
            
            if (iteration + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                # validation set loss
                valid_results = [validate_model(i) for i in xrange(valid_num_batch)]
                valid_losses = []
                for i in xrange(valid_num_batch):
                    y_pred = valid_results[i]
                    label = valid_set_y[i * increase_num]
                    unique_value, sub_ind, correspond_ind, count = numpy.unique(y_pred, True, True, True)
                    unique_value = unique_value.tolist()
                    curr_err = 1.
                    if label in unique_value:
                        target_ind = unique_value.index(label)
                        count = count.tolist()
                        sorted_count = sorted(count)
                        if count[target_ind] == sorted_count[-1]:
                            if len(sorted_count) > 1 and sorted_count[-1] == sorted_count[-2]:
                                curr_err = 0.5 #tie
                            else:
                                curr_err = 0.
                    valid_losses.append(curr_err)
                valid_loss = sum(valid_losses) / float(len(valid_losses)) 

                print('...epoch %i, valid err: %.5f |' %
                      (epoch, valid_loss)),

                # if we got the best validation score until now
                if valid_loss <= best_validation_loss:
                    num_no_update_epoch = 0

                    #improve patience if loss improvement is good enough
                    if valid_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iteration * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = valid_loss
                    best_iter = iteration

                    # test it on the test set
                    test_results = [test_model(i) for i in xrange(test_num_batch)]
                    test_losses = []
                    for i in xrange(test_num_batch):
                        y_pred = test_results[i]
                        label = test_set_y[i * increase_num]
                        unique_value, sub_ind, correspond_ind, count = numpy.unique(y_pred, True, True, True)
                        unique_value = unique_value.tolist()
                        curr_err = 1
                        if label in unique_value:
                            target_ind = unique_value.index(label)
                            count = count.tolist()
                            sorted_count = sorted(count)
                            if count[target_ind] == sorted_count[-1]:
                                if len(sorted_count) > 1 and sorted_count[-1] == sorted_count[-2]:
                                    curr_err = 0.5 # tie
                                else:
                                    curr_err = 0.
                        test_losses.append(curr_err)
                    test_loss = sum(test_losses) / float(len(test_losses)) 
                    print(('test err: %.5f |') %
                          (test_loss)),

                    best_test_loss = test_loss
                    test_patience = 200

            #test_patience -= 1 
            #if test_patience <= 0:
            #    break
            
            if patience <= iteration:
                done_looping = True
                break

        epoch_avg_cost = epoch_cost/n_train_batches
        epoch_avg_err = epoch_train_err/n_train_batches
        #curr_lr = decrease_learning_rate()
        last_avg_err = epoch_avg_cost
 
        print ('train err %.5f, cost %.4f' %(epoch_avg_err,epoch_avg_cost))
        if epoch_avg_cost == 0:
             break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test error: %f %%' %
          (best_validation_loss * 100., best_iter + 1, best_test_loss * 100.))
    print('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    return best_validation_loss
Example #18
0
def runDeepLearning():
    ### Loading training set and separting it into training set and testing set

    myDataset = Dataset()
    preprocess = 0
    datasets = myDataset.loadTrain(preprocess)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]

    dataset_test = myDataset.loadTest(preprocess)
    test_set_x, test_set_y, test_set_y_array = dataset_test[0]
    # temporary solution to get the ground truth of sample out to test_set_y_array.
    # the reason is that after T.cast, test_set_y becomes TensorVariable, which I do not find way to output its
    # value...anyone can help?

    ### Model parameters
    learning_rate = 0.02
    n_epochs = 3000
    nkerns = [
        30, 40, 40
    ]  # number of kernal at each layer, current best performance is 50.0% on testing set, kernal number is [30,40,40]
    batch_size = 500

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ishape = (48, 48)  # size of input images
    nClass = 7

    rng = np.random.RandomState(23455)
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, ishape[0], ishape[0]))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, ishape[0],
                                             ishape[0]),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 22, 22),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    layer2 = LeNetConvPoolLayer(rng,
                                input=layer1.output,
                                image_shape=(nkerns[0], nkerns[1], 9, 9),
                                filter_shape=(nkerns[2], nkerns[1], 2, 2),
                                poolsize=(2, 2))

    # the TanhLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer3_input = layer2.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=nkerns[2] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=nClass)

    # the cost we minimize during training is the NLL of the model
    cost = layer4.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    test_model = theano.function(
        [index],
        layer4.errorsLabel(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set

                    #test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_output = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_losses = [item[0] for item in test_output]
                    #test_y_gt = [label[0] for label in item[1] for item in test_output] #
                    test_y_pred = np.array(
                        [label for label in item[1] for item in test_output])
                    test_y_gt = np.array(
                        [label for label in item[2] for item in test_output])
                    #test_y_pred = np.array([item[1] for item in test_output] )
                    ## the predicted_labels for the input
                    ### it seems that the batchsize cannot be change in Theano.function while training model ###
                    #test_label = reduce(lambda x,y: x+y,test_label)

                    #print test_y_pred
                    #print test_y_gt
                    #print test_set_y_array

                    errorNum = np.count_nonzero(test_y_gt - test_y_pred)
                    errorSampleIndex = [
                        i for i in range(len(test_y_pred))
                        if test_y_pred[i] != test_set_y_array[i]
                    ]
                    #print errorNum, len(errorSampleIndex)

                    test_score = np.mean(test_losses)
                    print(('  epoch %i, minibatch %i/%i, test error of best '
                           'model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
                    print((' on all test sample %f %%') %
                          ((float(errorNum) / float(len(test_y_pred)) * 100.)))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')

    #TODO: write the code to save the trained model and test the trained model on test data

    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    # save the misclassified samples
    myDataset.plotSample(test_set_x.get_value(), test_set_y,
                         [i for i in range(0, 100)])
    def classifier(rng, common_input_l, common_input_r, sents_mask_l,
                   sents_mask_r, drop_conv_W_2_pre, conv_b_2_pre,
                   drop_conv_W_2_gate, conv_b_2_gate, drop_conv_W_2, conv_b_2,
                   drop_conv_W_2_context, conv_b_2_context, labels):
        conv_layer_2_gate_l = Conv_with_Mask_with_Gate(
            rng,
            input_tensor3=common_input_l,
            mask_matrix=sents_mask_l,
            image_shape=(batch_size, 1, emb_size, maxSentLen),
            filter_shape=gate_filter_shape,
            W=drop_conv_W_2_pre,
            b=conv_b_2_pre,
            W_gate=drop_conv_W_2_gate,
            b_gate=conv_b_2_gate)
        conv_layer_2_gate_r = Conv_with_Mask_with_Gate(
            rng,
            input_tensor3=common_input_r,
            mask_matrix=sents_mask_r,
            image_shape=(batch_size, 1, emb_size, maxSentLen),
            filter_shape=gate_filter_shape,
            W=drop_conv_W_2_pre,
            b=conv_b_2_pre,
            W_gate=drop_conv_W_2_gate,
            b_gate=conv_b_2_gate)

        l_input_4_att = conv_layer_2_gate_l.output_tensor3  #conv_layer_2_gate_l.masked_conv_out_sigmoid*conv_layer_2_pre_l.masked_conv_out+(1.0-conv_layer_2_gate_l.masked_conv_out_sigmoid)*common_input_l
        r_input_4_att = conv_layer_2_gate_r.output_tensor3  #conv_layer_2_gate_r.masked_conv_out_sigmoid*conv_layer_2_pre_r.masked_conv_out+(1.0-conv_layer_2_gate_r.masked_conv_out_sigmoid)*common_input_r

        conv_layer_2 = Conv_for_Pair(
            rng,
            origin_input_tensor3=common_input_l,
            origin_input_tensor3_r=common_input_r,
            input_tensor3=l_input_4_att,
            input_tensor3_r=r_input_4_att,
            mask_matrix=sents_mask_l,
            mask_matrix_r=sents_mask_r,
            image_shape=(batch_size, 1, hidden_size[0], maxSentLen),
            image_shape_r=(batch_size, 1, hidden_size[0], maxSentLen),
            filter_shape=(hidden_size[1], 1, hidden_size[0], filter_size[0]),
            filter_shape_context=(hidden_size[1], 1, hidden_size[0], 1),
            W=drop_conv_W_2,
            b=conv_b_2,
            W_context=drop_conv_W_2_context,
            b_context=conv_b_2_context)
        attentive_sent_embeddings_l_2 = conv_layer_2.attentive_maxpool_vec_l
        attentive_sent_embeddings_r_2 = conv_layer_2.attentive_maxpool_vec_r
        # attentive_sent_sumpool_l_2 = conv_layer_2.attentive_sumpool_vec_l
        # attentive_sent_sumpool_r_2 = conv_layer_2.attentive_sumpool_vec_r

        HL_layer_1_input = T.concatenate([
            attentive_sent_embeddings_l_2, attentive_sent_embeddings_r_2,
            attentive_sent_embeddings_l_2 * attentive_sent_embeddings_r_2
        ],
                                         axis=1)

        HL_layer_1_input_size = hidden_size[
            1] * 3  #+extra_size#+(maxSentLen*2+10*2)#+hidden_size[1]*3+1

        HL_layer_1 = HiddenLayer(rng,
                                 input=HL_layer_1_input,
                                 n_in=HL_layer_1_input_size,
                                 n_out=hidden_size[0],
                                 activation=T.nnet.relu)
        HL_layer_2 = HiddenLayer(rng,
                                 input=HL_layer_1.output,
                                 n_in=hidden_size[0],
                                 n_out=hidden_size[0],
                                 activation=T.nnet.relu)

        LR_input_size = HL_layer_1_input_size + 2 * hidden_size[0]
        U_a = create_ensemble_para(
            rng, 3, LR_input_size)  # the weight matrix hidden_size*2
        LR_b = theano.shared(value=np.zeros((3, ), dtype=theano.config.floatX),
                             name='LR_b',
                             borrow=True)  #bias for each target class
        LR_para = [U_a, LR_b]

        LR_input = T.tanh(
            T.concatenate(
                [HL_layer_1_input, HL_layer_1.output, HL_layer_2.output],
                axis=1))
        layer_LR = LogisticRegression(
            rng, input=LR_input, n_in=LR_input_size, n_out=3, W=U_a, b=LR_b
        )  #basically it is a multiplication between weight matrix and input feature vector
        loss = layer_LR.negative_log_likelihood(
            labels
        )  #for classification task, we usually used negative log likelihood as loss, the lower the better.

        return loss, LR_para + HL_layer_1.params + HL_layer_2.params, layer_LR.p_y_given_x, layer_LR.errors(
            labels)
Example #20
0
def test_dA(learning_rate=0.01, training_epochs=15000,
            dataset='mnist.pkl.gz',
            batch_size=5, output_folder='dA_plots'):

    """
    This demo is tested on MNIST

    :type learning_rate: float
    :param learning_rate: learning rate used for training the DeNosing
                          AutoEncoder

    :type training_epochs: int
    :param training_epochs: number of epochs used for training

    :type dataset: string
    :param dataset: path to the picked dataset

    """
    ##datasets = load_data(dataset)
    #from SdA_mapping import load_data_half
    #datasets = load_data_half(dataset)
    print 'loading data'
    datasets, x_mean, y_mean, x_std, y_std = load_vc()
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]  
    test_set_x, test_set_y = datasets[2]
    print 'loaded data'

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    x1 = T.matrix('x1')  # the data is presented as rasterized images
    x2 = T.matrix('x2')  # the data is presented as rasterized images
    cor_reg = T.scalar('cor_reg')
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)
    ####################################
    # BUILDING THE MODEL NO CORRUPTION #
    ####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    #da = dA_joint(
        #numpy_rng=rng,
        #theano_rng=theano_rng,
        #input1=x1,
        #input2=x2,

        #n_visible1=28 * 28/2,
        #n_visible2=28 * 28/2,

        #n_hidden=500
    #)
    print 'initialize functions'

    da = dA_joint(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input1=x1,
        input2=x2,
        cor_reg=cor_reg,

        #n_visible1=28 * 28/2,
        #n_visible2=28 * 28/2,
        n_visible1=24,
        n_visible2=24,
        n_hidden=50
    )

    cost, updates = da.get_cost_updates(
        corruption_level=0.3,
        learning_rate=learning_rate
    )
    cor_reg_val = numpy.float32(5.0)
    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x1: train_set_x[index * batch_size: (index + 1) * batch_size],
            x2: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    fprop_x1 = theano.function(
               [],
               outputs=da.output1,
               givens={
                   x1: test_set_x
               },
               name='fprop_x1'
    )
    fprop_x2 = theano.function(
               [],
               outputs=da.output2,
               givens={
                   x2: test_set_y
               },
               name='fprop_x2'
    )
    fprop_x1t = theano.function(
               [],
               outputs=da.output1,
               givens={
                   x1: train_set_x
               },
               name='fprop_x1'
    )
    fprop_x2t = theano.function(
               [],
               outputs=da.output2,
               givens={
                   x2: train_set_y
               },
               name='fprop_x2'
    )
    rec_x1 = theano.function(
               [],
               outputs=da.rec1,
               givens={
                   x1: test_set_x
               },
               name='rec_x1'
    )
    rec_x2 = theano.function(
               [],
               outputs=da.rec2,
               givens={
                   x2: test_set_y
               },
               name='rec_x2'
    )
    fprop_x1_to_x2 = theano.function(
               [],
               outputs=da.reg,
               givens={
                   x1: test_set_x
               },
               name='fprop_x12x2'
    )
    updates_reg = [
            (da.cor_reg, da.cor_reg+theano.shared(numpy.float32(0.1)))
    ]
    update_reg = theano.function(
        [],
        updates=updates_reg
    )
    print 'initialize functions ended'

    
    start_time = time.clock()

    ############
    # TRAINING #
    ############
    print 'training started'
    X1=test_set_x.eval()
    X1 *= x_std
    X1 += x_mean
    X2=test_set_y.eval()
    X2 *= y_std
    X2 += y_mean
    from dcca_numpy import cor_cost
    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))
        
        #cor_reg_val += 1
        #da.cor_reg = theano.shared(cor_reg_val) 
        update_reg()
        
        X1H=rec_x1()
        X2H=rec_x2()
        X1H *= x_std
        X1H += x_mean
        X2H *= y_std
        X2H += y_mean
        H1=fprop_x1()
        H2=fprop_x2()
        print 'Training epoch'
        print 'Reconstruction ', numpy.mean(numpy.mean((X1H-X1)**2,1)),\
              numpy.mean(numpy.mean((X2H-X2)**2,1))
        
        if epoch%5 == 2 : # pretrain middle layer
            print '... pre-training MIDDLE layer'
            H1t=fprop_x1t()
            H2t=fprop_x2t()
            h1 = T.matrix('x')  # the data is presented as rasterized images
            h2 = T.matrix('y')  # the labels are presented as 1D vector of
            from mlp import HiddenLayer
            numpy_rng = numpy.random.RandomState(89677)
            log_reg = HiddenLayer(numpy_rng, h1, 50, 50, activation=T.tanh)

            if 1: # for middle layer
                learning_rate = 0.1
            
                #H1=theano.shared(H1)
                #H2=theano.shared(H2)
                # compute the gradients with respect to the model parameters
                logreg_cost = log_reg.mse(h2)
    
                gparams = T.grad(logreg_cost, log_reg.params)
        
                # compute list of fine-tuning updates
                updates = [
                    (param, param - gparam * learning_rate)
                    for param, gparam in zip(log_reg.params, gparams)
                ]
    
                train_fn_middle = theano.function(
                    inputs=[],
                    outputs=logreg_cost,
                    updates=updates,
                    givens={
                        h1: theano.shared(H1t),
                        h2: theano.shared(H2t)
                    },
                    name='train_middle'
                )
            epoch = 0
            while epoch < 100:
                print epoch, train_fn_middle()
                epoch += 1
            
            ##X2H=fprop_x1_to_x2()
            X2H=numpy.tanh(H1.dot(log_reg.W.eval())+log_reg.b.eval())
            X2H=numpy.tanh(X2H.dot(da.W2_prime.eval())+da.b2_prime.eval())

            X2H *= y_std
            X2H += y_mean
            print 'Regression ', numpy.mean(numpy.mean((X2H-X2)**2,1))
        
        print 'Correlation ', cor_cost(H1, H2)
    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The no corruption code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((training_time) / 60.))
    image = Image.fromarray(
        tile_raster_images(X=da.W1.get_value(borrow=True).T,
                           img_shape=(28, 14), tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('filters_corruption_0.png')
    
    from matplotlib import pyplot as pp
    pp.plot(H1[:10,:2],'b');pp.plot(H2[:10,:2],'r');pp.show()
    
    print cor
Example #21
0
                           filter_shape=(128, 256, 3, 3),
                           image_shape=(batch_size, 256, 10, 10),
                           conv_stride=(1, 1))

conv_out4 = MyConvnetLayer(rng,
                           input=conv_out3.output,
                           filter_shape=(128, 128, 3, 3),
                           image_shape=(batch_size, 128, 8, 8),
                           conv_stride=(1, 1))

layer5_input = conv_out4.output.flatten(2)

# construct a fully-connected sigmoidal layer
full_5 = HiddenLayer(rng,
                     input=layer5_input,
                     n_in=128 * 6 * 6,
                     n_out=256,
                     activation=T.tanh)
# classify the values of the fully-connected sigmoidal layer
full_5_softmax = LogisticRegression(input=full_5.output, n_in=256, n_out=5)
weight_decay = 1e-5
momentum = 0.9

# Cost function for minibatch
cost = T.mean(T.nnet.categorical_crossentropy(full_5_softmax.p_y_given_x, y))
# Concatenation of the params
params = full_5_softmax.params + full_5.params + conv_out4.params + conv_out3.params + conv_out2.params + conv_out1.params

# create theano function to compute filtered images
train_model = theano.function(
    [x, y, lr],
Example #22
0
class ExtGrCNNMatchScorer(object):
    '''
    Extended Gated Recursive Convolutional Neural Network for matching task. The last 
    layer of the model includes a linear layer for regression.
    '''
    def __init__(self, config=None, verbose=True):
        # Construct two GrCNNEncoders for matching two sentences
        self.encoderL = ExtGrCNNEncoder(config, verbose)
        self.encoderR = ExtGrCNNEncoder(config, verbose)
        # Link the parameters of two parts
        self.params = []
        self.params += self.encoderL.params
        self.params += self.encoderR.params
        # Build three kinds of inputs:
        # 1, inputL, inputR. This pair is used for computing the score after training
        # 2, inputPL, inputPR. This part is used for training positive pairs
        # 3, inputNL, inputNR. This part is used for training negative pairs
        self.inputL = self.encoderL.input
        self.inputR = self.encoderR.input
        # Positive
        self.inputPL = T.matrix(name='inputPL', dtype=floatX)
        self.inputPR = T.matrix(name='inputPR', dtype=floatX)
        # Negative
        self.inputNL = T.matrix(name='inputNL', dtype=floatX)
        self.inputNR = T.matrix(name='inputNR', dtype=floatX)
        # Linking input-output mapping
        self.hiddenL = self.encoderL.output
        self.hiddenR = self.encoderR.output
        # Positive 
        self.hiddenPL = self.encoderL.encode(self.inputPL)
        self.hiddenPR = self.encoderR.encode(self.inputPR)
        # Negative
        self.hiddenNL = self.encoderL.encode(self.inputNL)
        self.hiddenNR = self.encoderR.encode(self.inputNR)
        # Activation function
        self.act = Activation(config.activation)
        # MLP Component
        self.hidden = T.concatenate([self.hiddenL, self.hiddenR], axis=1)
        self.hiddenP = T.concatenate([self.hiddenPL, self.hiddenPR], axis=1)
        self.hiddenN = T.concatenate([self.hiddenNL, self.hiddenNR], axis=1)
        # Build hidden layer
        self.hidden_layer = HiddenLayer(self.hidden, (2*config.num_hidden, config.num_mlp), act=Activation(config.hiddenact))
        self.compressed_hidden = self.hidden_layer.output
        self.compressed_hiddenP = self.hidden_layer.encode(self.hiddenP)
        self.compressed_hiddenN = self.hidden_layer.encode(self.hiddenN)
        # Accumulate parameters
        self.params += self.hidden_layer.params
        # Dropout parameter
        srng = T.shared_randomstreams.RandomStreams(config.random_seed)
        mask = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hidden.shape)
        maskP = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hiddenP.shape)
        maskN = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hiddenN.shape)
        self.compressed_hidden *= T.cast(mask, floatX)
        self.compressed_hiddenP *= T.cast(maskP, floatX)
        self.compressed_hiddenN *= T.cast(maskN, floatX)
        # Score layers
        self.score_layer = ScoreLayer(self.compressed_hidden, config.num_mlp)
        self.output = self.score_layer.output
        self.scoreP = self.score_layer.encode(self.compressed_hiddenP)
        self.scoreN = self.score_layer.encode(self.compressed_hiddenN)
        # Accumulate parameters
        self.params += self.score_layer.params
        # Build cost function
        self.cost = T.mean(T.maximum(T.zeros_like(self.scoreP), 1.0 - self.scoreP + self.scoreN))
        # Construct the gradient of the cost function with respect to the model parameters
        self.gradparams = T.grad(self.cost, self.params)
        # Compute the total number of parameters in the model
        self.num_params_encoder = self.encoderL.num_params + self.encoderR.num_params
        self.num_params_classifier = 2 * config.num_hidden * config.num_mlp + \
                                     config.num_mlp + \
                                     config.num_mlp + 1
        self.num_params = self.num_params_encoder + self.num_params_classifier
        # Build class methods
        self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.output)
        self.compute_cost_and_gradient = theano.function(inputs=[self.inputPL, self.inputPR, 
                                                                 self.inputNL, self.inputNR],
                                                         outputs=self.gradparams+[self.cost, self.scoreP, self.scoreN])
        self.show_scores = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], 
                                           outputs=[self.scoreP, self.scoreN])
        self.show_hiddens = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
                                            outputs=[self.hiddenP, self.hiddenN])
        self.show_inputs = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
                                           outputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR])

        if verbose:
            logger.debug('Architecture of ExtGrCNNMatchScorer built finished, summarized below: ')
            logger.debug('Input dimension: %d' % config.num_input)
            logger.debug('Hidden dimension inside GrCNNMatchScorer pyramid: %d' % config.num_hidden)
            logger.debug('Hidden dimension MLP: %d' % config.num_mlp)
            logger.debug('Number of Gating functions: %d' % config.num_gates)
            logger.debug('There are 2 ExtGrCNNEncoders used in model.')
            logger.debug('Total number of parameters used in the model: %d' % self.num_params)

    def update_params(self, grads, learn_rate): 
        '''
        @grads: [np.ndarray]. List of numpy.ndarray for updating the model parameters.
        @learn_rate: scalar. Learning rate.
        '''
        for param, grad in zip(self.params, grads):
            p = param.get_value(borrow=True)
            param.set_value(p - learn_rate * grad, borrow=True)

    def set_params(self, params):
        '''
        @params: [np.ndarray]. List of numpy.ndarray to set the model parameters.
        '''
        for p, param in zip(self.params, params):
            p.set_value(param, borrow=True)

    def deepcopy(self, grcnn):
        '''
        @grcnn: GrCNNMatchScorer. Copy the model parameters of another GrCNNMatchScorer and use it.
        '''
        assert len(self.params) == len(grcnn.params)
        for p, param in zip(self.params, grcnn.params):
            val = param.get_value()
            p.set_value(val)

    @staticmethod
    def save(fname, model):
        '''
        @fname: String. Filename to store the model.
        @model: GrCNNMatchScorer. An instance of GrCNNMatchScorer to be saved.
        '''
        with file(fname, 'wb') as fout:
            cPickle.dump(model, fout)

    @staticmethod
    def load(fname):
        '''
        @fname: String. Filename to load the model.
        '''
        with file(fname, 'rb') as fin:
            model = cPickle.load(fin)
        return model
Example #23
0
    def __init__(self, input_data, n_in, hidden_layer= 100, n_out = 4, weights = None,act_func = T.nnet.sigmoid, filename = None):
        print "Linear_Regression: From RL_METHODS"
        """ Initialize the parameters of the logistic regression

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
                      architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
                     which the datapoints lie

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
                      which the labels lie

        """
        self.input = input_data
        
        
        W1 = b1 = W2 = b2 = None
        print weights
        if weights != None:
            try:
                W1, b1 = weights
            except Exception as e:
                W1 = weights[0]
        
        #self.linearRegression = LinearRegression(self.sigmoid_layer.output, n_in = hidden_layer, n_out = n_out)
        numpy_rng = numpy.random.RandomState()
        self.linearRegression = HiddenLayer(rng=numpy_rng,
                                        input= self.input,
                                        n_in= n_in,
                                        n_out = n_out,
                                        W_values = W1,
                                        b_values = b1,
                                        activation=act_func)
        '''
        self.linearRegression2 = HiddenLayer(rng=numpy_rng,
                                        input= self.linearRegression.output,
                                        n_in= hidden_layer,
                                        n_out = n_out,
                                        W_values = W2,
                                        b_values = b2, 
                                        activation=None)
        '''
        self.L1 = abs(self.linearRegression.W).sum()
            

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = T.mean(self.linearRegression.W ** 2)

                    
        self.params = self.linearRegression.params
        
        self.output = self.linearRegression.output
        self.cost = self.linearRegression.mse
        
        
        
        if filename != None:
            self.load(filename)
            print "Network Loaded from %s" % (filename)
Example #24
0
class ConvolutionalNeuralNetwork(Classifier):
    def __init__(self, rng, batch_size, nkerns=(20, 50)):
        self.batch_size = batch_size
        # 28x28 -> (24x24) // 2 = 12x12
        self.layer0 = LeNetConvPoolLayer(
            rng=rng,
            image_shape=(batch_size, 1, 28, 28),
            filter_shape=(nkerns[0], 1, 5, 5),
        )
        # 12x12 -> (8x8) // 2 = 4x4
        self.layer1 = LeNetConvPoolLayer(rng=rng,
                                         image_shape=(batch_size, nkerns[0],
                                                      12, 12),
                                         filter_shape=(nkerns[1], nkerns[0], 5,
                                                       5))
        # TODO: make this an MLP rather than a hidden layer -> LogReg
        # self.layer2 = MLP()
        self.layer2 = HiddenLayer(
            rng=rng,
            n_in=nkerns[1] * 4 * 4,
            n_out=500,
            activation=T.tanh,
        )
        self.layer3 = LogisticRegression(
            n_in=500,
            n_out=10,
        )

    def pre_logreg_output(self, x):
        layer0_input = x.reshape((self.batch_size, 1, 28, 28))
        l0_output = self.layer0.output(layer0_input)
        l1_output = self.layer1.output(l0_output)

        l2_input = l1_output.flatten(2)
        l2_output = self.layer2.output(l2_input)
        return l2_output

    def negative_log_likelihood(self, x, y):
        output = self.pre_logreg_output(x)
        return self.layer3.negative_log_likelihood(output, y)

    def pred_label(self, x):
        output = self.pre_logreg_output(x)
        output = output.flatten(1)
        return self.layer3.pred_label(output)

    def errors(self, x, y):
        output = self.pre_logreg_output(x)
        return self.layer3.errors(output, y)

    def train(self,
              train_x,
              train_y,
              test_x,
              test_y,
              valid_x,
              valid_y,
              alpha=0.13,
              batch_size=500,
              l1_reg=0.,
              l2_reg=0.0,
              n_epochs=1000):
        x = T.matrix('x')
        y = T.ivector('y')
        batch_size = self.batch_size

        layer0_input = x.reshape((batch_size, 1, 28, 28))
        cost = self.negative_log_likelihood(layer0_input, y)

        params = self.layer0.params + self.layer1.params + self.layer2.params + self.layer3.params
        grads = T.grad(cost, params)
        updates = [(param, param - alpha * grad)
                   for param, grad in zip(params, grads)]

        index = T.lscalar()
        train_func = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_x[index * batch_size:(index + 1) * batch_size],
                y: train_y[index * batch_size:(index + 1) * batch_size],
            })
        best_loss = self.run_batches(train_x,
                                     train_y,
                                     test_x,
                                     test_y,
                                     valid_x,
                                     valid_y,
                                     x,
                                     y,
                                     train_model_func=train_func,
                                     batch_size=batch_size,
                                     n_epochs=n_epochs)
        return best_loss
Example #25
0
def evaluate_lenet5(train, test, valid,
                    learning_rate=0.1, n_epochs=200,
                    nkerns=[20, 50], batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :param dataset train: Fuel dataset to use for training.
    :param dataset test: Fuel dataset to use for testing.
    :param dataset valid: Fuel dataset to use for validation.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    train_stream = DataStream.default_stream(
        train, iteration_scheme=SequentialScheme(train.num_examples,
                                                 batch_size))
    valid_stream = DataStream.default_stream(
        valid, iteration_scheme=SequentialScheme(train.num_examples,
                                                 batch_size))
    test_stream = DataStream.default_stream(
        test, iteration_scheme=SequentialScheme(train.num_examples,
                                                batch_size))

    x = T.tensor4('x')
    yi = T.imatrix('y')
    y = yi.reshape((yi.shape[0],))

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=x,
        image_shape=(batch_size, 1, 28, 28),
        filter_shape=(nkerns[0], 1, 5, 5),
        poolsize=(2, 2)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 4 * 4,
        n_out=500,
        activation=T.tanh
    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [x, yi],
        layer3.errors(y)
    )

    validate_model = theano.function(
        [x, yi],
        layer3.errors(y)
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [x, yi],
        cost,
        updates=updates
    )
def work(mode, data_name, test_dataname, pooling_mode="average_exc_pad"):
    print "mode: ", mode
    print "data_name: ", data_name
    print "pooling_mode: ", pooling_mode
    print "Started!"

    data_names = data_name.split(":")
    data_count = len(data_names)
    print "Train dataset:"
    for i in xrange(data_count):
        print "%d: %s" % (i, data_names[i])

    print "Test dataset:"
    test_data_names = test_dataname.split(":")
    test_data_count = len(test_data_names)
    for i in xrange(test_data_count):
        print "%d: %s" % (i, test_data_names[i])

    if test_data_count != data_count:
        raise Exception(
            "The amount of test and train dataset must be the same.")

    rng = numpy.random.RandomState(23455)
    docSentenceCount = T.ivector("docSentenceCount")
    sentenceWordCount = T.ivector("sentenceWordCount")
    corpus = T.matrix("corpus")
    docLabel = T.ivector('docLabel')

    hidden_layer_w = None
    hidden_layer_b = None
    logistic_layer_w = None
    logistic_layer_b = None
    layer0 = list()
    layer1 = list()
    layer2 = list()
    local_params = list()
    # for list-type data
    for i in xrange(data_count):
        layer0.append(DocEmbeddingNN(corpus, docSentenceCount, sentenceWordCount, rng, wordEmbeddingDim=200, \
                     sentenceLayerNodesNum=50, \
                     sentenceLayerNodesSize=[5, 200], \
                     docLayerNodesNum=10, \
                     docLayerNodesSize=[3, 50],
                     pooling_mode=pooling_mode))

        layer1.append(
            HiddenLayer(rng,
                        input=layer0[i].output,
                        n_in=layer0[i].outputDimension,
                        n_out=10,
                        activation=T.tanh,
                        W=hidden_layer_w,
                        b=hidden_layer_b))

        # 		hidden_layer_w = layer1[i].W
        # 		hidden_layer_b = layer1[i].b

        layer2.append(
            LogisticRegression(input=layer1[i].output,
                               n_in=10,
                               n_out=2,
                               W=logistic_layer_w,
                               b=logistic_layer_b))
        logistic_layer_w = layer2[i].W
        logistic_layer_b = layer2[i].b

        local_params.append(layer0[i].params + layer1[i].params)

    share_params = list(layer2[0].params)
    # construct the parameter array.
    params = list(layer2[0].params)

    for i in xrange(data_count):
        params += layer1[0].params + layer0[i].params

# 	data_name = "car"

    para_path = "data/" + data_name + "/log_model/" + pooling_mode + ".model"
    traintext = [
        "data/" + data_names[i] + "/train/text" for i in xrange(data_count)
    ]
    trainlabel = [
        "data/" + data_names[i] + "/train/label" for i in xrange(data_count)
    ]
    testtext = [
        "data/" + test_data_names[i] + "/test/text" for i in xrange(data_count)
    ]
    testlabel = [
        "data/" + test_data_names[i] + "/test/label"
        for i in xrange(data_count)
    ]

    # Load the parameters last time, optionally.
    loadParamsVal(para_path, params)

    if (mode == "train" or mode == "test"):
        train_model = list()
        valid_model = list()
        print "Loading train data."
        batchSize = 10
        share_learning_rate = 0.01
        local_learning_rate = 0.1
        n_batches = list()

        print "Loading test data."

        for i in xrange(data_count):
            cr_train = CorpusReader(minDocSentenceNum=5,
                                    minSentenceWordNum=5,
                                    dataset=traintext[i],
                                    labelset=trainlabel[i])
            docMatrixes, docSentenceNums, sentenceWordNums, ids, labels, _, _ = cr_train.getCorpus(
                [0, 100000])

            docMatrixes = transToTensor(docMatrixes, theano.config.floatX)
            docSentenceNums = transToTensor(docSentenceNums, numpy.int32)
            sentenceWordNums = transToTensor(sentenceWordNums, numpy.int32)
            labels = transToTensor(labels, numpy.int32)

            index = T.lscalar("index")

            n_batches.append((len(docSentenceNums.get_value()) - 1 - 1) /
                             batchSize + 1)
            print "Dataname: %s" % data_names[i]
            print "Train set size is ", len(docMatrixes.get_value())
            print "Batch size is ", batchSize
            print "Number of training batches  is ", n_batches[i]
            error = layer2[i].errors(docLabel)
            cost = layer2[i].negative_log_likelihood(docLabel)

            share_grads = T.grad(cost, share_params)
            share_updates = [
                (param_i, param_i - share_learning_rate * grad_i)
                for param_i, grad_i in zip(share_params, share_grads)
            ]

            grads = T.grad(cost, local_params[i])
            local_updates = [
                (param_i, param_i - local_learning_rate * grad_i)
                for param_i, grad_i in zip(local_params[i], grads)
            ]
            updates = share_updates + local_updates
            print "Compiling train computing graph."
            if mode == "train":
                train_model.append(
                    theano.function(
                        [index], [cost, error, layer2[i].y_pred, docLabel],
                        updates=updates,
                        givens={
                            corpus:
                            docMatrixes,
                            docSentenceCount:
                            docSentenceNums[index *
                                            batchSize:(index + 1) * batchSize +
                                            1],
                            sentenceWordCount:
                            sentenceWordNums,
                            docLabel:
                            labels[index * batchSize:(index + 1) * batchSize]
                        }))
            print "Compiled."

            print "Load test dataname: %s" % test_data_names[i]
            cr_test = CorpusReader(minDocSentenceNum=5,
                                   minSentenceWordNum=5,
                                   dataset=testtext[i],
                                   labelset=testlabel[i])
            validDocMatrixes, validDocSentenceNums, validSentenceWordNums, validIds, validLabels, _, _ = cr_test.getCorpus(
                [0, 1000])
            validDocMatrixes = transToTensor(validDocMatrixes,
                                             theano.config.floatX)
            validDocSentenceNums = transToTensor(validDocSentenceNums,
                                                 numpy.int32)
            validSentenceWordNums = transToTensor(validSentenceWordNums,
                                                  numpy.int32)
            validLabels = transToTensor(validLabels, numpy.int32)
            print "Validating set size is ", len(validDocMatrixes.get_value())
            print "Data loaded."

            print "Compiling test computing graph."
            valid_model.append(
                theano.function(
                    [], [
                        cost, error, layer2[i].y_pred, docLabel,
                        T.transpose(layer2[i].p_y_given_x)[1]
                    ],
                    givens={
                        corpus: validDocMatrixes,
                        docSentenceCount: validDocSentenceNums,
                        sentenceWordCount: validSentenceWordNums,
                        docLabel: validLabels
                    }))
            print "Compiled."
            costNum, errorNum, pred_label, real_label, pred_prob = valid_model[
                i]()
            print "Valid current model :", data_names[i]
            print "Cost: ", costNum
            print "Error: ", errorNum

            fpr, tpr, _ = roc_curve(real_label, pred_prob)
            roc_auc = auc(fpr, tpr)
            print "data_name: ", data_name
            print "ROC: ", roc_auc
            fpr, tpr, threshold = roc_curve(real_label, pred_label)
            if 1 in threshold:
                index_of_one = list(threshold).index(1)
                print "TPR: ", tpr[index_of_one]
                print "FPR: ", fpr[index_of_one]
                print "threshold: ", threshold[index_of_one]

        if mode == "test":
            return

        print "Start to train."
        epoch = 0
        n_epochs = 10
        ite = 0

        # ####Validate the model####
        # 		for dataset_index in xrange(data_count):
        # 			costNum, errorNum, pred_label, real_label, pred_prob = valid_model[dataset_index]()
        # 			print "Valid current model :", data_names[dataset_index]
        # 			print "Cost: ", costNum
        # 			print "Error: ", errorNum
        #
        # 			fpr, tpr, _ = roc_curve(real_label, pred_prob)
        # 			roc_auc = auc(fpr, tpr)
        # 			print "data_name: ", data_name
        # 			print "ROC: ", roc_auc
        # 			fpr, tpr, threshold = roc_curve(real_label, pred_label)
        # 			index_of_one = list(threshold).index(1)
        # 			print "TPR: ", tpr[index_of_one]
        # 			print "FPR: ", fpr[index_of_one]
        # 			print "threshold: ", threshold[index_of_one]

        while (epoch < n_epochs):
            epoch = epoch + 1
            #######################
            for i in range(max(n_batches)):
                for dataset_index in xrange(data_count):
                    if i >= n_batches[dataset_index]:
                        continue
                    # for list-type data
                    print "dataset_index: %d, i: %d" % (dataset_index, i)
                    costNum, errorNum, pred_label, real_label = train_model[
                        dataset_index](i)
                    ite = ite + 1
                    # for padding data
                    if (ite % 10 == 0):
                        print
                        print "Dataset name: ", data_names[dataset_index]
                        print "@iter: ", ite
                        print "Cost: ", costNum
                        print "Error: ", errorNum

            # Validate the model
            for dataset_index in xrange(data_count):
                costNum, errorNum, pred_label, real_label, pred_prob = valid_model[
                    dataset_index]()
                print "Valid current model :", data_names[dataset_index]
                print "Cost: ", costNum
                print "Error: ", errorNum

                fpr, tpr, _ = roc_curve(real_label, pred_prob)
                roc_auc = auc(fpr, tpr)
                print "data_name: ", data_name
                print "ROC: ", roc_auc

                fpr, tpr, threshold = roc_curve(real_label, pred_label)
                index_of_one = list(threshold).index(1)
                print "TPR: ", tpr[index_of_one]
                print "FPR: ", fpr[index_of_one]
                print "threshold: ", threshold[index_of_one]
            # Save model
            print "Saving parameters."
            saveParamsVal(para_path, params)
            print "Saved."
def evaluate_lenet5(learning_rate=0.01, n_epochs=10000,
                    dataset='cifar-10-batches-py',
                    nkerns=[32, 64, 128], batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # Example of how to reshape and display input
    # a=train_set_x[0].reshape((3,1024,1)).eval()
    # make_filter_fig(fname='results/input.png',
    #                 filters=a,
    #                 combine_chans=True)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ishape = (32, 32)  # this is the size of MNIST images
    nChannels = 3      # the number of channels

    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    reshaped_input = x.reshape((batch_size, 3, 32, 32))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (32-5+1+4,32-5+1+4)=(32,32)
    # maxpooling reduces this further to (32/2,32/2) = (16,16)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],16,16)
    conv0 = LeNetConvPoolLayer(
        rng, input=reshaped_input,
        image_shape=(batch_size, 3, 32, 32),
        filter_shape=(nkerns[0], 3, 5, 5),
        filter_pad=2,
        poolsize=(2, 2))

    # conv0_vis = HiddenLayer(rng, input=conv0.output.flatten(2),
    #                         n_in=nkerns[0] * 16 * 16,
    #                         n_out=3 * 32 * 32, activation=T.tanh)
    # print conv0_vis.W.eval().shape # (8192, 3072)

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (16-5+1+2,16-5+1+2)=(14,14)
    # maxpooling reduces this further to (14/2,14/2) = (7,7)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],7,7)
    conv1 = LeNetConvPoolLayer(
        rng, input=conv0.output,
        image_shape=(batch_size, nkerns[0], 16, 16),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        filter_pad=1,
        poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size,128*4*4) = (batch_size,2048)
    hidden_input = conv1.output.flatten(2)

    # construct a fully-connected sigmoidal layer 
    hidden = HiddenLayer(rng, input=hidden_input, n_in=nkerns[1] * 7 * 7,
                         n_out=1024, activation=T.tanh)
    hidden_vis = HiddenLayer(rng, input=hidden.output, n_in=1024,
                             n_out=3072, activation=T.nnet.sigmoid)

    # classify the values of the fully-connected sigmoidal layer
    softmax = LogisticRegression(input=hidden.output, n_in=1024, n_out=10)
    softmax_vis = HiddenLayer(rng, input=softmax.p_y_given_x,
                              n_in=10, n_out=3072,
                              activation=T.nnet.sigmoid)

    # the cost we minimize during training is the NLL of the model
    cost = softmax.negative_log_likelihood(y)
    hidden_vis_cost = hidden_vis.reconstruction_cost(x)
    softmax_vis_cost = softmax_vis.reconstruction_cost(x)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function([index], softmax.errors(y),
             givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    validate_model = theano.function([index], softmax.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

    # create a list of all model parameters to be fit by gradient descent
    params = softmax.params + hidden.params + conv1.params + conv0.params
    hidden_vis_params = hidden_vis.params
    softmax_vis_params = softmax_vis.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)
    hidden_vis_grads = T.grad(hidden_vis_cost, hidden_vis_params)
    softmax_vis_grads = T.grad(softmax_vis_cost, softmax_vis_params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))
    for param_i, grad_i in zip(hidden_vis_params, hidden_vis_grads):
        updates.append((param_i, param_i - learning_rate * grad_i))
    for param_i, grad_i in zip(softmax_vis_params, softmax_vis_grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]})

    print '... training'

    patience = 1000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    costs = []
    valid = []

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            cost_ij = train_model(minibatch_index)
            costs.append(cost_ij)

            if iter % 100 == 0:
                print('Step %d Cost %f' % (iter, cost_ij))
                make_filter_fig(fname='results/hidden.png',
                                filters=hidden_vis.W.T.eval().reshape((3,1024,1024)),
                                filter_start=0,
                                num_filters=16*16,
                                combine_chans=True)
                make_filter_fig(fname='results/softmax.png',
                                filters=softmax_vis.W.T.eval().reshape((3,1024,10)),
                                filter_start=0,
                                num_filters=10,
                                combine_chans=True)

                # rs = conv0_vis.W.reshape((3, nkerns[0] * 16 * 16, 32*32)) # (3,8192,1024)
                # rs2 = rs.dimshuffle(0,2,1)
                # make_filter_fig(fname='results/conv0.png',
                #                 filters=rs2.eval(),
                #                 filter_start=0,
                #                 num_filters=16*16,
                #                 combine_chans=True)

                # rs = conv0_vis.W.T # (3072,8192)
                # rs2 = rs.reshape((3, 1024, 8192))
                # make_filter_fig(fname='results/conv0-alt.png',
                #                 filters=rs2.eval(),
                #                 filter_start=0,
                #                 num_filters=16*16,
                #                 combine_chans=True)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                valid.append(this_validation_loss * 100.)
                print('epoch %i, minibatch %i/%i, validation error %.2f%%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    best_params = params

                    # test it on the test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)
                    print(('New Best! epoch %i, minibatch %i/%i, test error of best '
                           'model %.2f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    return best_params
def extract_256array(img_dir):
    #print "Loading params..."
    loaded_params=load_params('params-momentum_weightdecay-NEW-BIg4class-01.pkl')
    num_images=len(glob.glob(img_dir + '*.jpg'))

    if batch_size==0:
        sys.exit()

    ## MODEL CNN
    # Dichiarazione variabile simbolica immagine input
    x = T.tensor4('x')   # the data is presented as rasterized images
    # Variabili simboliche per i parametri caricati da file
    W1 = theano.shared(loaded_params[0][0].get_value());B1 = theano.shared(loaded_params[0][1].get_value())
    W2=theano.shared(loaded_params[1][0].get_value());B2 = theano.shared(loaded_params[1][1].get_value())
    W3=theano.shared(loaded_params[2][0].get_value());B3 = theano.shared(loaded_params[2][1].get_value())
    W4=theano.shared(loaded_params[3][0].get_value());B4 = theano.shared(loaded_params[3][1].get_value())
    W5=theano.shared(loaded_params[4][0].get_value());B5 = theano.shared(loaded_params[4][1].get_value())

    #batch_size=1
    #print "Building model..."
    layer0_input = x.reshape((batch_size, 3, 61, 61))
    # build symbolic expression that computes the convolution of input with filters in w
    conv_out1=MyConvnetLayer(W1,B1,input=layer0_input,filter_shape=(64, 3, 5, 5),image_shape=(batch_size, 3, 61, 61),conv_stride=(2,2),pool_stride=(2,2),poolsize=(3,3))
    #
    conv_out2=MyConvnetLayer(W2,B2,input=conv_out1.output,filter_shape=(256, 64, 5, 5),image_shape=(batch_size, 64, 14, 14),conv_stride=(1,1))

    conv_out3=MyConvnetLayer(W3,B3,input=conv_out2.output,filter_shape=(128, 256, 3, 3),image_shape=(batch_size, 256, 10, 10),conv_stride=(1,1))

    conv_out4=MyConvnetLayer(W4,B4,input=conv_out3.output,filter_shape=(128, 128, 3, 3),image_shape=(batch_size, 128, 8, 8),conv_stride=(1,1))

    layer5_input = conv_out4.output.flatten(2)

    #construct a fully-connected sigmoidal layer
    full_5 = HiddenLayer(
             W5,B5,
             input=layer5_input,
             n_in=128 * 6 * 6,
             n_out=256,
             activation=T.tanh
    )

    # create theano function to compute filtered images
    f_layer5 = theano.function([x],
               full_5.output,
               allow_input_downcast=True,on_unused_input='ignore'
            )
    ## END MODEL CNN

    if num_images<batch_size:
        batchsize=num_images
    else:
        batchsize=batch_size


    num_batch=int(math.ceil(num_images/float(batchsize)))
    features=np.zeros((num_images,256),theano.config.floatX,'C')
    for j in range(1,num_batch+1):
        images=np.zeros((batch_size,3,61,61),theano.config.floatX,'C')

        i=0
        num_img_batch=min(batchsize,num_images-batchsize*(j-1))
        for i_img in range(1,num_img_batch+1):
            img_name=img_dir+str(i_img+batchsize*(j-1))+'.jpg'
            img = Image.open(img_name)
            img_res=img.resize((61,61), PIL.Image.ANTIALIAS)
            # dimensions are (height, width, channel)
            img_res=sub_mean(img_res)
            img_res = numpy.asarray(img_res, dtype=theano.config.floatX) / 256.
            # put image in 4D tensor of shape (1, 3, height, width)
            img_ = img_res.transpose(2, 0, 1).reshape(1, 3, 61, 61)
            images[i,:,:,:]=img_
            i=i+1

        feature_256=f_layer5(images)
        range_feat=range(batchsize*(j-1),batchsize*(j-1)+ num_img_batch)
        for k in range(0,num_img_batch):
            features[range_feat[k]]=feature_256[k]
    #print feature_256
    return features
Example #29
0
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1])  # [?, 14, 14, 32]
    # conv and pool layer1
    layer1_conv = ConvLayer(layer0_pool.output,
                            filter_shape=[5, 5, 32, 64],
                            strides=[1, 1, 1, 1],
                            activation=tf.nn.relu,
                            padding="SAME")  # [?, 14, 14, 64]
    layer1_pool = MaxPoolLayer(layer1_conv.output,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1])  # [?, 7, 7, 64]
    # flatten layer
    layer2_flatten = FlattenLayer(layer1_pool.output, shape=[-1, 7 * 7 * 64])
    # fully-connected layer
    layer3_fullyconn = HiddenLayer(layer2_flatten.output,
                                   n_in=7 * 7 * 64,
                                   n_out=256,
                                   activation=tf.nn.relu)
    # dropout layer
    layer3_dropout = DropoutLayer(layer3_fullyconn.output, keep_prob=0.5)
    # the output layer
    layer4_output = LogisticRegression(layer3_dropout.output,
                                       n_in=256,
                                       n_out=10)

    # params for training
    params = layer0_conv.params + layer1_conv.params + layer3_fullyconn.params + layer4_output.params
    # train dicts for dropout
    train_dicts = layer3_dropout.train_dicts
    # prediction dicts for dropout
    pred_dicts = layer3_dropout.pred_dicts
Example #30
0
def sgd_optimize(learning_rate=0.1,
                 n_epochs=200,
                 batch_size=500,
                 nkerns=[20, 50]):
    # Load input
    train, valid, test = util.load()
    print "loading 0 - ", train[0].shape[0], " train inputs in gpu memory"
    train_x, train_y = util.create_theano_shared(train)

    print "loading 0 - ", valid[0].shape[0], " validation inputs in gpu memory"
    valid_x, valid_y = util.create_theano_shared(valid)

    print "loading 0 - ", test[0].shape[0], " test inputs in gpu memory"
    test_x, test_y = util.create_theano_shared(test)

    # Define symbolic input matrices
    print "Building Model..."
    index = T.iscalar()
    x = T.matrix("x")
    y = T.ivector("y")
    random_generator = numpy.random.RandomState(1)

    # Create Layer0 of Lenet Model
    layer0_input = x.reshape( (batch_size, 1, 28, 28) )
    filter_shape0 = (nkerns[0], 1, 5, 5)
    image_shape0 = (batch_size, 1, 28, 28) 
    layer0 = LeNetConvPoolLayer(layer0_input, filter_shape0, image_shape0, random_generator)
    
    # Create Layer1 of Lenet model
    filter_shape1 = (nkerns[1], nkerns[0], 5, 5)
    image_shape1 = (batch_size, nkerns[0], 12, 12)
    layer1 = LeNetConvPoolLayer(layer0.output, filter_shape1, image_shape1, random_generator)

    # Create Layer2 which is a simple MLP hidden layer
    layer2_input = layer1.output.flatten(2)
    layer2 = HiddenLayer(layer2_input, nkerns[1] * 4 * 4, 500, random_generator)

    # Finally, Layer3 is LogisticRegression layer
    layer3 = LogisticRegression(layer2.output, 500, 10)

    # Define error
    error = layer3.error(y)

    # Create cost function
    cost = layer3.negative_log_likelihood(y)

    # Gradient and update functions
    params = layer3.params + layer2.params + layer1.params + layer0.params
    grads = T.grad(cost, wrt=params)
    updates = list()
    for i in range(len(params)):
        updates.append( (params[i], params[i] - learning_rate * grads[i]) )

    # Train model
    train_model = theano.function(
                    inputs=[index],
                    outputs=cost,
                    updates=updates,
                    givens = {
                       x: train_x[index*batch_size : (index+1)*batch_size],
                       y: train_y[index*batch_size : (index+1)*batch_size]
                    })

    # Valid model
    valid_model = theano.function(
                    inputs=[index],
                    outputs=error,
                    givens = {
                       x: valid_x[index*batch_size : (index+1)*batch_size],
                       y: valid_y[index*batch_size : (index+1)*batch_size]
                    })
    
    # Test Model 
    test_model  = theano.function(
                    inputs=[index],
                    outputs=error,
                    givens={
                       x: test_x[index*batch_size : (index+1)*batch_size],
                       y: test_y[index*batch_size : (index+1)*batch_size]
                    })

    # Create number of minibatches
    n_train_batches = train[0].shape[0] / batch_size
    n_valid_batches = valid[0].shape[0] / batch_size
    n_test_batches = test[0].shape[0] / batch_size

    # Finally, main loop for training
    util.train_test_model(n_epochs, train_model, valid_model, test_model,
                          n_train_batches, n_valid_batches, n_test_batches)
Example #31
0
    def __init__(self, D, M, Q, Domain_number, D_Y, M_Y):

        self.Xlabel = T.matrix('Xlabel')

        self.X = T.matrix('X')
        self.Y = T.matrix('Y')
        N = self.X.shape[0]

        self.Weight = T.matrix('Weight')

        ker = kernel(Q)
        mmd = MMD(M, Domain_number)

        mu_value = np.random.randn(M, D)
        Sigma_b_value = np.zeros((M, M)) + np.log(0.01)

        Z_value = np.random.randn(M, Q)

        ls_value = np.zeros(Domain_number) + np.log(0.1)

        self.mu = theano.shared(value=mu_value, name='mu', borrow=True)
        self.Sigma_b = theano.shared(value=Sigma_b_value,
                                     name='Sigma_b',
                                     borrow=True)
        self.Z = theano.shared(value=Z_value, name='Z', borrow=True)
        self.ls = theano.shared(value=ls_value, name='ls', borrow=True)

        self.hiddenLayer_x = HiddenLayer(rng=rng,
                                         input=self.X,
                                         n_in=D,
                                         n_out=20,
                                         activation=T.nnet.relu,
                                         number='_x')
        self.hiddenLayer_m = HiddenLayer(rng=rng,
                                         input=self.hiddenLayer_x.output,
                                         n_in=20,
                                         n_out=Q,
                                         activation=T.nnet.relu,
                                         number='_m')
        self.hiddenLayer_S = HiddenLayer(rng=rng,
                                         input=self.hiddenLayer_x.output,
                                         n_in=20,
                                         n_out=Q,
                                         activation=T.nnet.relu,
                                         number='_S')

        #################################################################################
        ###モデルの計算X側
        m = self.hiddenLayer_m.output
        S_0 = self.hiddenLayer_S.output
        S_1 = T.exp(S_0)
        S = T.sqrt(S_1)

        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        eps_NQ = srng.normal((N, Q))
        eps_M = srng.normal((M, D))  #平均と分散で違う乱数を使う必要があるので別々に銘銘

        beta = T.exp(self.ls)

        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) +
                       T.diag(T.exp(T.diag(self.Sigma_b))))

        #スケール変換
        mu_scaled, Sigma_scaled = ker.sf2**0.5 * self.mu, ker.sf2**0.5 * Sigma

        Xtilda = m + S * eps_NQ
        self.U = mu_scaled + Sigma_scaled.dot(eps_M)

        Kmm = ker.RBF(self.Z)
        Kmm = mmd.MMD_kenel_Xonly(mmd.Zlabel_T, Kmm, self.Weight)
        KmmInv = sT.matrix_inverse(Kmm)

        Kmn = ker.RBF(self.Z, Xtilda)
        Kmn = mmd.MMD_kenel_ZX(self.Xlabel, Kmn, self.Weight)

        Knn = ker.RBF(Xtilda)
        Knn = mmd.MMD_kenel_Xonly(self.Xlabel, Knn, self.Weight)

        Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn))

        Kinterval = T.dot(KmmInv, Kmn)

        mean_U = T.dot(Kinterval.T, self.U)
        betaI = T.diag(T.dot(self.Xlabel, beta))
        Covariance = betaI
        ##############################################################################################
        ###Y側の計算
        ker_Y = kernel(Q, number='_Y')
        muY_value = np.random.randn(M_Y, D_Y)
        SigmaY_b_value = np.zeros((M_Y, M_Y)) + np.log(0.01)

        ZY_value = np.random.randn(M_Y, Q)

        lsY_value = np.zeros(1) + np.log(0.1)

        self.muY = theano.shared(value=muY_value, name='muY', borrow=True)
        self.SigmaY_b = theano.shared(value=SigmaY_b_value,
                                      name='SigmaY_b',
                                      borrow=True)
        self.ZY = theano.shared(value=ZY_value, name='ZY', borrow=True)
        self.lsY = theano.shared(value=lsY_value, name='lsY', borrow=True)

        epsY_NQ = srng.normal((N, Q))
        epsY_M = srng.normal((M_Y, D_Y))

        betaY0 = T.exp(self.lsY)
        betaY = T.tile(betaY0, N)
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        SigmaY = T.tril(self.SigmaY_b - T.diag(T.diag(self.SigmaY_b)) +
                        T.diag(T.exp(T.diag(self.SigmaY_b))))

        #スケール変換
        muY_scaled, SigmaY_scaled = ker_Y.sf2**0.5 * self.muY, ker_Y.sf2**0.5 * SigmaY

        XtildaY = m + S * epsY_NQ
        self.UY = muY_scaled + SigmaY_scaled.dot(epsY_M)

        KmmY = ker_Y.RBF(self.ZY)
        KmmInvY = sT.matrix_inverse(KmmY)

        KmnY = ker_Y.RBF(self.ZY, XtildaY)

        KnnY = ker_Y.RBF(XtildaY)

        KtildaY = KnnY - T.dot(KmnY.T, T.dot(KmmInvY, KmnY))

        KintervalY = T.dot(KmmInvY, KmnY)

        mean_UY = T.dot(KintervalY.T, self.UY)
        betaIY = T.diag(betaY)
        CovarianceY = betaIY

        ##############################################################################################
        ###パラメータの格納
        self.params = []

        self.params_X = [self.mu, self.Sigma_b, self.Z, self.ls]
        self.params_Y = [self.muY, self.SigmaY_b, self.ZY, self.lsY]

        self.loc_params = []
        self.loc_params.extend(self.hiddenLayer_x.params)
        self.loc_params.extend(self.hiddenLayer_m.params)
        self.loc_params.extend(self.hiddenLayer_S.params)

        self.local_params = {}
        for i in self.loc_params:
            self.local_params[str(i)] = i

        self.params_X.extend(ker.params)
        self.params_X.extend(mmd.params)
        self.params_Y.extend(ker_Y.params)

        self.global_params_X = {}
        for i in self.params_X:
            self.global_params_X[str(i)] = i

        self.global_params_Y = {}
        for i in self.params_Y:
            self.global_params_Y[str(i)] = i

        self.params.extend(self.params_X)
        self.params.extend(self.params_Y)
        self.params.extend(self.loc_params)

        self.wrt = {}
        for i in self.params:
            self.wrt[str(i)] = i

###############################################################################################
###最終的な尤度
        self.LL = (self.log_mvn(self.X, mean_U, Covariance) -
                   0.5 * T.sum(T.dot(betaI, Ktilda)))
        self.KL_U = -self.KLD_U(mu_scaled, Sigma_scaled, Kmm, KmmInv)

        self.LLY = (self.log_mvn(self.Y, mean_UY, CovarianceY) -
                    0.5 * T.sum(T.dot(betaIY, KtildaY)))
        self.KL_UY = -self.KLD_U(muY_scaled, SigmaY_scaled, KmmY, KmmInvY)

        self.KL_X = -self.KLD_X(m, S)
    def __init__(self,
                 rng,
                 input,
                 n_in,
                 n_hiddens,
                 n_out,
                 dropout_rates,
                 activation=None,
                 n_slack=0):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
        architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: list of int
        :param n_hidden: number of hidden units

        """
        self.params = []
        self.W = []
        self.b = []

        self.W_actual = []
        self.b_actual = []

        # keep track of model input
        self.input = input

        # Multiple hidden layers
        print >> sys.stderr, dropout_rates
        last_layer_out = self.input
        last_layer_dropout = _dropout_from_layer(rng,
                                                 self.input,
                                                 p=dropout_rates[0])
        last_layer_size = n_in

        slacks = numpy.append(
            numpy.asarray([n_slack], dtype='int32'),
            numpy.zeros((len(n_hiddens) - 1, ), dtype='int32'))
        for i in range(0, len(n_hiddens)):
            # dropped-out path: for training
            dropoutLayer = DropoutHiddenLayer(rng=rng,
                                              input=last_layer_dropout,
                                              activation=activation,
                                              n_in=last_layer_size,
                                              n_out=n_hiddens[i],
                                              dropout_rate=dropout_rates[i +
                                                                         1],
                                              n_slack=slacks[i])
            last_layer_dropout = dropoutLayer.output

            self.params += dropoutLayer.params
            self.W += [dropoutLayer.W]
            self.b += [dropoutLayer.b]

            # original (untouched) path: for testing
            hiddenLayer = HiddenLayer(rng=rng,
                                      input=last_layer_out,
                                      activation=activation,
                                      n_in=last_layer_size,
                                      n_out=n_hiddens[i],
                                      W=dropoutLayer.W *
                                      (1. - dropout_rates[i]),
                                      b=dropoutLayer.b,
                                      n_slack=slacks[i])
            last_layer_out = hiddenLayer.output
            last_layer_size = n_hiddens[i]

            self.W_actual += [hiddenLayer.W]
            self.b_actual += [hiddenLayer.b]

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        # Dropped-out path: for training
        self.dropoutLogRegressionLayer = LogisticRegression(
            rng=rng,
            input=last_layer_dropout,
            n_in=(n_hiddens[-1] if len(n_hiddens) > 0 else n_in),
            n_out=n_out)
        self.params += self.dropoutLogRegressionLayer.params

        # original (untouched) path: for testing
        self.logRegressionLayer = LogisticRegression(
            rng=rng,
            input=last_layer_out,
            n_in=(n_hiddens[-1] if len(n_hiddens) > 0 else n_in),
            n_out=n_out,
            W=self.dropoutLogRegressionLayer.W * (1. - dropout_rates[-1]),
            b=self.dropoutLogRegressionLayer.b)

        # prediction of the MLP is given by the prediction of the output of the
        # model, computed in the logistic regression layer
        self.dropout_errors = self.dropoutLogRegressionLayer.errors
        self.dropout_negative_log_likelihood = self.dropoutLogRegressionLayer.negative_log_likelihood

        self.y_pred = self.logRegressionLayer.y_pred
        self.errors = self.logRegressionLayer.errors
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
    def __init__(self, D, M, Q, Domain_number, Hiddenlayerdim1,
                 Hiddenlayerdim2):

        self.Xlabel = T.matrix('Xlabel')

        self.X = T.matrix('X')
        N = self.X.shape[0]

        self.Weight = T.matrix('Weight')

        ker = kernel(Q)
        #mmd=MMD(M,Domain_number)
        mu_value = np.random.randn(M, D) * 1e-2
        Sigma_b_value = np.zeros((M, M))  # + np.log(0.01)

        Z_value = np.random.randn(M, Q)

        ls_value = np.zeros(Domain_number) + np.log(0.1)

        self.mu = theano.shared(value=mu_value, name='mu', borrow=True)
        self.Sigma_b = theano.shared(value=Sigma_b_value,
                                     name='Sigma_b',
                                     borrow=True)
        self.Z = theano.shared(value=Z_value, name='Z', borrow=True)
        self.ls = theano.shared(value=ls_value, name='ls', borrow=True)

        self.params = [self.mu, self.Sigma_b, self.Z, self.ls]

        self.hiddenLayer_x = HiddenLayer(rng=rng,
                                         input=self.X,
                                         n_in=D,
                                         n_out=Hiddenlayerdim1,
                                         activation=T.nnet.relu,
                                         number='_x')
        #self.hiddenLayer_hidden = HiddenLayer(rng=rng,input=self.hiddenLayer_x.output,n_in=Hiddenlayerdim1,n_out=Hiddenlayerdim2,activation=T.nnet.relu,number='_h')
        self.hiddenLayer_m = HiddenLayer(rng=rng,
                                         input=self.hiddenLayer_x.output,
                                         n_in=Hiddenlayerdim1,
                                         n_out=Q,
                                         activation=T.nnet.relu,
                                         number='_m')
        self.hiddenLayer_S = HiddenLayer(rng=rng,
                                         input=self.hiddenLayer_x.output,
                                         n_in=Hiddenlayerdim1,
                                         n_out=Q,
                                         activation=T.nnet.relu,
                                         number='_S')

        self.loc_params = []
        self.loc_params.extend(self.hiddenLayer_x.params)
        #self.loc_params.extend(self.hiddenLayer_hidden.params)
        self.loc_params.extend(self.hiddenLayer_m.params)
        self.loc_params.extend(self.hiddenLayer_S.params)

        self.local_params = {}
        for i in self.loc_params:
            self.local_params[str(i)] = i

        self.params.extend(ker.params)
        #self.params.extend(mmd.params)

        self.hyp_params = {}
        for i in [self.mu, self.Sigma_b, self.ls]:
            self.hyp_params[str(i)] = i

        self.Z_params = {}
        for i in [self.Z]:
            self.Z_params[str(i)] = i

        self.global_params = {}
        for i in self.params:
            self.global_params[str(i)] = i

        self.params.extend(self.hiddenLayer_x.params)
        #self.params.extend(self.hiddenLayer_hidden.params)
        self.params.extend(self.hiddenLayer_m.params)
        self.params.extend(self.hiddenLayer_S.params)

        self.wrt = {}
        for i in self.params:
            self.wrt[str(i)] = i

        m = self.hiddenLayer_m.output
        S_0 = self.hiddenLayer_S.output
        S_1 = T.exp(S_0)
        S = T.sqrt(S_1)

        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        eps_NQ = srng.normal((N, Q))
        eps_M = srng.normal((M, D))  #平均と分散で違う乱数を使う必要があるので別々に銘銘
        eps_ND = srng.normal((N, D))

        beta = T.exp(self.ls)
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) +
                       T.diag(T.exp(T.diag(self.Sigma_b))))

        #スケール変換
        mu_scaled, Sigma_scaled = ker.sf2**0.5 * self.mu, ker.sf2**0.5 * Sigma

        Xtilda = m + S * eps_NQ
        self.U = mu_scaled + Sigma_scaled.dot(eps_M)

        Kmm = ker.RBF(self.Z)
        #Kmm=mmd.MMD_kenel_Xonly(mmd.Zlabel_T,Kmm,self.Weight)
        KmmInv = sT.matrix_inverse(Kmm)

        Kmn = ker.RBF(self.Z, Xtilda)
        #Kmn=mmd.MMD_kenel_ZX(self.Xlabel,Kmn,self.Weight)

        Knn = ker.RBF(Xtilda)
        #Knn=mmd.MMD_kenel_Xonly(self.Xlabel,Knn,self.Weight)

        Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn))

        F = T.dot(Kmn.T, T.dot(KmmInv, self.U)) + T.dot(
            T.maximum(Ktilda, 1e-16)**0.5, eps_ND)

        #Kinterval=T.dot(KmmInv,Kmn)

        mean_U = F  #T.dot(Kinterval.T,self.U)
        betaI = T.diag(T.dot(self.Xlabel, beta))
        Covariance = betaI

        self.LL = self.log_mvn(self.X, mean_U,
                               Covariance)  # - 0.5*T.sum(T.dot(betaI,Ktilda)))
        self.KL_X = -self.KLD_X(m, S)
        self.KL_U = -self.KLD_U(mu_scaled, Sigma_scaled, Kmm, KmmInv)
Example #34
0
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='mnist.pkl.gz', nkerns=[20, 50], batch_size=500):
    rng = numpy.random.RandomState(23455)
    datasets = load_data(dataset)  #加载数据
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size
    # 定义几个变量,index表示batch下标,x表示输入的训练数据,y对应其标签  
    index = T.lscalar()  
    x = T.matrix('x')   
    y = T.ivector('y')  
    ############
    # 构建模型 #
    ############
    print '... building the model'
    # 我们加载进来的batch大小的数据是(batch_size, 28 * 28),但是LeNetConvPoolLayer的输入是四维的,所以要reshape
    layer0_input = x.reshape((batch_size, 1, 28, 28))
    # layer0即第一个LeNetConvPoolLayer层  
    # 输入的单张图片(28,28),经过conv得到(28-5+1 , 28-5+1) = (24, 24),  
    # 经过maxpooling得到(24/2, 24/2) = (12, 12)  
    # 因为每个batch有batch_size张图,第一个LeNetConvPoolLayer层有nkerns[0]个卷积核,  
    # 故layer0输出为(batch_size, nkerns[0], 12, 12) 
    layer0 = LeNetConvPoolLayer(
        rng, input=layer0_input, 
        image_shape=(batch_size, 1, 28, 28),
        filter_shape=(nkerns[0], 1, 5, 5), 
        poolsize=(2, 2)
    )
    # layer1即第二个LeNetConvPoolLayer层  
    # 输入是layer0的输出,每张特征图为(12,12),经过conv得到(12-5+1, 12-5+1) = (8, 8),  
    # 经过maxpooling得到(8/2, 8/2) = (4, 4)  
    # 因为每个batch有batch_size张图(特征图),第二个LeNetConvPoolLayer层有nkerns[1]个卷积核  
    # ,故layer1输出为(batch_size, nkerns[1], 4, 4)  
    layer1 = LeNetConvPoolLayer(
        rng,  input=layer0.output,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )
    #前面定义好了两个LeNetConvPoolLayer(layer0和layer1),layer1后面接layer2,这是一个全连接层,相当于MLP里面的隐含层  
    #故可以用MLP中定义的HiddenLayer来初始化layer2,layer2的输入是二维的(batch_size, num_pixels) ,  
    #故要将上层中同一张图经不同卷积核卷积出来的特征图合并为一维向量,  
    #也就是将layer1的输出(batch_size, nkerns[1], 4, 4)flatten为(batch_size, nkerns[1]*4*4)=(500,800),作为layer2的输入。  
    #(500,800)表示有500个样本,每一行代表一个样本。layer2的输出大小是(batch_size,n_out)=(500,500) 
    layer2_input = layer1.output.flatten(2)
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 4 * 4,
        n_out=500,
        activation=T.tanh
    )
    # 最后一层layer3是分类层,用的是逻辑回归中定义的LogisticRegression,  
    # layer3的输入是layer2的输出(500,500),layer3的输出就是(batch_size,n_out)=(500,10) 
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
    # 代价函数NLL  
    cost = layer3.negative_log_likelihood(y)
    # test_model计算测试误差,x、y根据给定的index具体化,然后调用layer3,  
    # layer3又会逐层地调用layer2、layer1、layer0,故test_model其实就是整个CNN结构,  
    # test_model的输入是x、y,输出是layer3.errors(y)的输出,即误差。
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )    
    # 下面是train_model,涉及到优化算法即SGD,需要计算梯度、更新参数     
    params = layer3.params + layer2.params + layer1.params + layer0.params  # 参数集  
    grads = T.grad(cost, params) # 对各个参数的梯度
    # 因为参数太多,在updates规则里面一个一个具体地写出来是很麻烦的,
    # 所以下面用了一个for..in..,自动生成规则对(param_i, param_i - learning_rate * grad_i)
    updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ]
    #train_model,代码分析同test_model。train_model里比test_model、validation_model多出updates规则
    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    ############
    # 训练模型 #
    ############
    print '... training'    
    patience = 10000 # 提早终止参数
    patience_increase = 2 
    improvement_threshold = 0.995  
    # 这样设置validation_frequency可以保证每一次epoch都会在验证集上测试。
    validation_frequency = min(n_train_batches, patience / 2)
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()
    epoch = 0
    done_looping = False
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if iter % 100 == 0:
            	print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)
            if (iter + 1) % validation_frequency == 0:
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.))
                if this_validation_loss < best_validation_loss:
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    test_losses = [ test_model(i) for i in xrange(n_test_batches) ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches, test_score * 100.))

            if patience <= iter:
                done_looping = True
                break
    layer0.save_net("layer0")
    layer1.save_net("layer1")
    layer2.save_net("layer2")
    layer3.save_net("layer3")
    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def evaluate_lenet5(learning_rate=0.05,
                    n_epochs=100,
                    dataset='F:/MOUD/0MOUD/jul14/x50_1/cktest/moud6.pkl.gz',
                    nkerns=[5, 5, 5, 5, 5, 5, 5, 5, 5],
                    batch_size=50,
                    dirn='iti',
                    indexd=0):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """
    global layer0gW
    global layer1gW
    global layer1bgW
    global layer1cgW
    global layer1dgW
    global layer1egW
    global layer1fgW
    global layer1ggW
    global layer1hgW
    global layer2gW
    global layer3gW
    global layer0gb
    global layer1gb
    global layer1bgb
    global layer1cgb
    global layer1dgb
    global layer1egb
    global layer1fgb
    global layer1ggW
    global layer1hgW
    global layer2gb
    global layer3gb
    global all_test
    global batchm
    global eval_print1
    global eval_print2
    global eval_print3
    global neuron
    global epoch_cd
    global indk

    epoch_cd = 2
    neuron = 5
    batchm = 20
    batch_size = batchm

    for nk in range(9):
        nkerns[nk] = neuron

    dirgtest = dirn

    l_r = T.scalar('l_r', dtype=theano.config.floatX)

    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]

    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    im1x = 100 + 8
    im1y = 100
    poolx = 1
    pooly = 1

    layer0_input = x.reshape((batch_size, 1, im1x, im1y))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    nk1x = 7
    nk1y = im1y

    #
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, im1x, im1y),
                                filter_shape=(nkerns[0], 1, nk1x, nk1y),
                                poolsize=(poolx, pooly))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    im2x = (im1x - nk1x + 1) / poolx
    im2y = (im1y - nk1y + 1) / pooly
    #im2x = (im1x+nk1x-1)/poolx
    #im2y = (im1y+nk1y-1)/pooly
    nk2x = 6
    nk2y = im2y

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], im2x,
                                             im2y),
                                filter_shape=(nkerns[1], nkerns[0], nk2x,
                                              nk2y),
                                poolsize=(poolx, pooly))

    # Construct the third convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    im2bx = (im2x - nk2x + 1) / poolx
    im2by = (im2y - nk2y + 1) / pooly
    #im2bx = (im2x+nk2x-1)/poolx
    #im2by = (im2y+nk2y-1)/pooly
    nk2bx = 5
    nk2by = im2by

    layer1b = LeNetConvPoolLayer(rng,
                                 input=layer1.output,
                                 image_shape=(batch_size, nkerns[1], im2bx,
                                              im2by),
                                 filter_shape=(nkerns[2], nkerns[1], nk2bx,
                                               nk2by),
                                 poolsize=(poolx, pooly))

    # Construct the fourth convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    im2cx = (im2bx - nk2bx + 1) / poolx
    im2cy = (im2by - nk2by + 1) / pooly
    nk2cx = 4
    nk2cy = im2cy

    layer1c = LeNetConvPoolLayer(rng,
                                 input=layer1b.output,
                                 image_shape=(batch_size, nkerns[2], im2cx,
                                              im2cy),
                                 filter_shape=(nkerns[3], nkerns[2], nk2cx,
                                               nk2cy),
                                 poolsize=(poolx, pooly))

    # Construct the fifth convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    im2dx = (im2cx - nk2cx + 1) / poolx
    im2dy = (im2cy - nk2cy + 1) / pooly
    nk2dx = 3
    nk2dy = im2dy

    layer1d = LeNetConvPoolLayer(rng,
                                 input=layer1c.output,
                                 image_shape=(batch_size, nkerns[3], im2dx,
                                              im2dy),
                                 filter_shape=(nkerns[4], nkerns[3], nk2dx,
                                               nk2dy),
                                 poolsize=(poolx, pooly))

    # Construct the sixth convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    im2ex = (im2dx - nk2dx + 1) / poolx
    im2ey = (im2dy - nk2dy + 1) / pooly
    nk2ex = 3
    nk2ey = im2ey

    layer1e = LeNetConvPoolLayer(rng,
                                 input=layer1d.output,
                                 image_shape=(batch_size, nkerns[4], im2ex,
                                              im2ey),
                                 filter_shape=(nkerns[5], nkerns[4], nk2ex,
                                               nk2ey),
                                 poolsize=(poolx, pooly))

    # Construct the seven convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    im2fx = (im2ex - nk2ex + 1) / poolx
    im2fy = (im2ey - nk2ey + 1) / pooly
    nk2fx = 3
    nk2fy = im2fy

    layer1f = LeNetConvPoolLayer(rng,
                                 input=layer1e.output,
                                 image_shape=(batch_size, nkerns[5], im2fx,
                                              im2fy),
                                 filter_shape=(nkerns[6], nkerns[5], nk2fx,
                                               nk2fy),
                                 poolsize=(poolx, pooly))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1b.output.flatten(2)
    #im3x = (im2hx-nk2hx+1)/poolx
    #im3y = (im2hy-nk2hy+1)/pooly
    im3x = (im2bx - nk2bx + 1) / poolx
    im3y = (im2by - nk2by + 1) / pooly

    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[4] * im3x * im3y,
                         n_out=100,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=100, n_out=2)

    # the cost we minimize during training is the NLL of the model

    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    test_model2 = theano.function(
        [index],
        layer3.errors2(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    # params = layer3.params + layer2.params + layer1f.params + layer1e.params + layer1d.params + layer1c.params + layer1b.params + layer1.params + layer0.params
    params = layer3.params + layer2.params + layer1b.params + layer1.params + layer0.params
    #layer1h.params + layer1g.params + layer1f.params + layer1e.params + layer1d.params + layer1c.params + layer1b.params + layer1.params + layer0.params

    if indexd > indk:

        epoch_cd = 1
        learning_rate = 0
        n_epochs = 1
        f = file(dirgtest + "/weights/layer0w_" + str(indexd) + ".save", 'rb')
        lay_params = cPickle.load(f)
        Wl1, bl1 = lay_params
        layer0.W.set_value(Wl1.get_value())
        layer0.b.set_value(bl1.get_value())
        f.close()
        f = file(dirgtest + "/weights/layer1w_" + str(indexd) + ".save", 'rb')
        lay_params = cPickle.load(f)
        Wl1, bl1 = lay_params
        layer1.W.set_value(Wl1.get_value())
        layer1.b.set_value(bl1.get_value())
        f.close()
        f = file(dirgtest + "/weights/layer1bw_" + str(indexd) + ".save", 'rb')
        lay_params = cPickle.load(f)
        Wl1, bl1 = lay_params
        layer1b.W.set_value(Wl1.get_value())
        layer1b.b.set_value(bl1.get_value())
        f.close()
        #f = file(dirgtest+"/weights/layer1cw_"+str(indexd)+".save",'rb')
        #lay_params = cPickle.load(f)
        #Wl1, bl1 = lay_params
        #layer1c.W.set_value(Wl1.get_value());
        #layer1c.b.set_value(bl1.get_value());
        #f.close()
        #f = file(dirgtest+"/weights/layer1dw_"+str(indexd)+".save",'rb')
        #lay_params = cPickle.load(f)
        #Wl1, bl1 = lay_params
        #layer1d.W.set_value(Wl1.get_value());
        #layer1d.b.set_value(bl1.get_value());
        #f.close()
        #f = file(dirgtest+"/weights/layer1ew_"+str(indexd)+".save",'rb')
        #lay_params = cPickle.load(f)
        #Wl1, bl1 = lay_params
        #layer1e.W.set_value(Wl1.get_value());
        #layer1e.b.set_value(bl1.get_value());
        #f.close()
        #f = file(dirgtest+"/weights/layer1fw_"+str(indexd)+".save",'rb')
        #lay_params = cPickle.load(f)
        #Wl1, bl1 = lay_params
        #layer1f.W.set_value(Wl1.get_value());
        #layer1f.b.set_value(bl1.get_value());
        #f.close()
        f = file(dirgtest + "/weights/layer2w_" + str(indexd) + ".save", 'rb')
        lay_params = cPickle.load(f)
        Wl1, bl1 = lay_params
        layer2.W.set_value(Wl1.get_value())
        layer2.b.set_value(bl1.get_value())
        f.close()
        f = file(dirgtest + "/weights/layer3w_" + str(indexd) + ".save", 'rb')
        lay_params = cPickle.load(f)
        Wl1, bl1 = lay_params
        layer3.W.set_value(Wl1.get_value())
        layer3.b.set_value(bl1.get_value())
        f.close()

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - l_r * grad_i)
               for param_i, grad_i in zip(params, grads)]

    #updates = [
    #    (param_i, param_i - learning_rate * grad_i)
    #    for param_i, grad_i in zip(params, grads)
    #]

    train_model = theano.function(
        [index, l_r],
        #[index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        learning_rate = 0.99 * learning_rate
        if epoch == 1:

            print layer0.W.get_value().shape
            print layer0.b.get_value().shape

            eval_set_x = test_set_x
            eval_shape = train_set_x.get_value(borrow=True).shape
            eval_layer2 = theano.function(
                [index],
                layer0_input,
                givens={
                    x: eval_set_x[index * batch_size:(index + 1) * batch_size]
                })
            eval_print1 = [eval_layer2(i) for i in xrange(n_test_batches)]

            eval_set_x = train_set_x

            eval_layer2 = theano.function(
                [index],
                layer0_input,
                givens={
                    x: eval_set_x[index * batch_size:(index + 1) * batch_size]
                })

            eval_print2 = [eval_layer2(i) for i in xrange(n_train_batches)]

            eval_set_x = valid_set_x

            eval_layer2 = theano.function(
                [index],
                layer0_input,
                givens={
                    x: eval_set_x[index * batch_size:(index + 1) * batch_size]
                })

            eval_print3 = [eval_layer2(i) for i in xrange(n_valid_batches)]

            if indk == 10:
                Wl1, bl1 = morbrun1(nk1x, nk1y, im1x, im1y)
                learning_rate = 0

        if epoch == 2:

            print layer1.W.get_value().shape
            print layer1.b.get_value().shape
            layer0.W.set_value(Wl1.get_value())
            layer0.b.set_value(bl1.get_value())

            eval_set_x = test_set_x
            eval_shape = train_set_x.get_value(borrow=True).shape
            eval_layer2 = theano.function(
                [index],
                layer0.output,
                givens={
                    x: eval_set_x[index * batch_size:(index + 1) * batch_size]
                })
            eval_print1 = [eval_layer2(i) for i in xrange(n_test_batches)]

            eval_set_x = train_set_x

            eval_layer2 = theano.function(
                [index],
                layer0.output,
                givens={
                    x: eval_set_x[index * batch_size:(index + 1) * batch_size]
                })

            eval_print2 = [eval_layer2(i) for i in xrange(n_train_batches)]

            eval_set_x = valid_set_x

            eval_layer2 = theano.function(
                [index],
                layer0.output,
                givens={
                    x: eval_set_x[index * batch_size:(index + 1) * batch_size]
                })

            eval_print3 = [eval_layer2(i) for i in xrange(n_valid_batches)]

            if indk == 10:
                Wl2, bl2 = morbrun1(nk2x, nk2y, im2x, im2y, neuron)

        if epoch == 3:
            print layer1b.W.get_value().shape
            layer1.W.set_value(Wl2.get_value())
            layer1.b.set_value(bl2.get_value())
            layer0.W.set_value(Wl1.get_value())
            layer0.b.set_value(bl1.get_value())

            eval_set_x = test_set_x
            eval_shape = train_set_x.get_value(borrow=True).shape
            eval_layer2 = theano.function(
                [index],
                layer1.output,
                givens={
                    x: eval_set_x[index * batch_size:(index + 1) * batch_size]
                })
            eval_print1 = [eval_layer2(i) for i in xrange(n_test_batches)]

            eval_set_x = train_set_x

            eval_layer2 = theano.function(
                [index],
                layer1.output,
                givens={
                    x: eval_set_x[index * batch_size:(index + 1) * batch_size]
                })

            eval_print2 = [eval_layer2(i) for i in xrange(n_train_batches)]

            eval_set_x = valid_set_x

            eval_layer2 = theano.function(
                [index],
                layer1.output,
                givens={
                    x: eval_set_x[index * batch_size:(index + 1) * batch_size]
                })

            eval_print3 = [eval_layer2(i) for i in xrange(n_valid_batches)]

            if indk == 10:
                Wl3, bl3 = morbrun1(nk2bx, nk2by, im2bx, im2by, neuron)

                layer1b.W.set_value(Wl3.get_value())
                layer1b.b.set_value(bl3.get_value())
                layer1.W.set_value(Wl2.get_value())
                layer1.b.set_value(bl2.get_value())
                layer0.W.set_value(Wl1.get_value())
                layer0.b.set_value(bl1.get_value())

                n_in = nkerns[4] * im3x * im3y
                n_out = 100
                W_values = numpy.asarray(rng.uniform(
                    low=-numpy.sqrt(6. / (n_in + n_out)),
                    high=numpy.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)),
                                         dtype=theano.config.floatX)
                layer2.W.set_value(W_values)
                layer2.b.set_value(numpy.zeros(n_out))
                n_in = 100
                n_out = 2
                W_values = numpy.asarray(rng.uniform(
                    low=-numpy.sqrt(6. / (n_in + n_out)),
                    high=numpy.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)),
                                         dtype=theano.config.floatX)
                layer3.W.set_value(W_values)
                layer3.b.set_value(numpy.zeros(n_out))
                learning_rate = 0.01


#
#
#        if epoch == 4:
#             print layer1c.W.get_value().shape
#
#             layer1b.W.set_value(Wl3.get_value());
#             layer1b.b.set_value(bl3.get_value());
#             layer1.W.set_value(Wl2.get_value());
#             layer1.b.set_value(bl2.get_value());
#             layer0.W.set_value(Wl1.get_value());
#             layer0.b.set_value(bl1.get_value());
#
#             eval_set_x = test_set_x;
#             eval_shape = train_set_x.get_value(borrow=True).shape;
#             eval_layer2 = theano.function([index], layer1b.output,
#                givens={
#                x: eval_set_x[index * batch_size: (index + 1) * batch_size]})
#             eval_print1 = [
#                   eval_layer2(i)
#                   for i in xrange(n_test_batches)
#                 ]
#
#             eval_set_x = train_set_x;
#
#             eval_layer2 = theano.function([index], layer1b.output,
#             givens={
#                x: eval_set_x[index * batch_size: (index + 1) * batch_size]})
#
#             eval_print2 = [
#                   eval_layer2(i)
#                   for i in xrange(n_train_batches)
#                 ]
#
#             eval_set_x = valid_set_x;
#
#             eval_layer2 = theano.function([index], layer1b.output,
#             givens={
#                x: eval_set_x[index * batch_size: (index + 1) * batch_size]})
#
#             eval_print3 = [
#                   eval_layer2(i)
#                   for i in xrange(n_valid_batches)
#                 ]
#             if indk == 10:
#              Wl4, bl4 = morbrun1(nk2cx,nk2cy,im2cx,im2cy,neuron)
#
#
#
#        if epoch == 5:
#             print layer1d.W.get_value().shape
#             layer1c.W.set_value(Wl4.get_value());
#             layer1c.b.set_value(bl4.get_value());
#             layer1b.W.set_value(Wl3.get_value());
#             layer1b.b.set_value(bl3.get_value());
#             layer1.W.set_value(Wl2.get_value());
#             layer1.b.set_value(bl2.get_value());
#             layer0.W.set_value(Wl1.get_value());
#             layer0.b.set_value(bl1.get_value());
#             eval_set_x = test_set_x;
#             eval_shape = train_set_x.get_value(borrow=True).shape;
#             eval_layer2 = theano.function([index], layer1c.output,
#                givens={
#                x: eval_set_x[index * batch_size: (index + 1) * batch_size]})
#             eval_print1 = [
#                   eval_layer2(i)
#                   for i in xrange(n_test_batches)
#                 ]
#
#             eval_set_x = train_set_x;
#
#             eval_layer2 = theano.function([index], layer1c.output,
#             givens={
#                x: eval_set_x[index * batch_size: (index + 1) * batch_size]})
#
#             eval_print2 = [
#                   eval_layer2(i)
#                   for i in xrange(n_train_batches)
#                 ]
#
#             eval_set_x = valid_set_x;
#
#             eval_layer2 = theano.function([index], layer1c.output,
#             givens={
#                x: eval_set_x[index * batch_size: (index + 1) * batch_size]})
#
#             eval_print3 = [
#                   eval_layer2(i)
#                   for i in xrange(n_valid_batches)
#                 ]
#
#             if indk == 10:
#              Wl5, bl5 = morbrun1(nk2dx,nk2dy,im2dx,im2dy,neuron)
#
#
#
#        if epoch == 6:
#             print layer1e.W.get_value().shape
#             layer1d.W.set_value(Wl5.get_value());
#             layer1d.b.set_value(bl5.get_value());
#             layer1c.W.set_value(Wl4.get_value());
#             layer1c.b.set_value(bl4.get_value());
#             layer1b.W.set_value(Wl3.get_value());
#             layer1b.b.set_value(bl3.get_value());
#             layer1.W.set_value(Wl2.get_value());
#             layer1.b.set_value(bl2.get_value());
#             layer0.W.set_value(Wl1.get_value());
#             layer0.b.set_value(bl1.get_value());
#
#             eval_set_x = test_set_x;
#             eval_shape = train_set_x.get_value(borrow=True).shape;
#             eval_layer2 = theano.function([index], layer1d.output,
#                givens={
#                x: eval_set_x[index * batch_size: (index + 1) * batch_size]})
#             eval_print1 = [
#                   eval_layer2(i)
#                   for i in xrange(n_test_batches)
#                 ]
#
#             eval_set_x = train_set_x;
#
#             eval_layer2 = theano.function([index], layer1d.output,
#             givens={
#                x: eval_set_x[index * batch_size: (index + 1) * batch_size]})
#
#             eval_print2 = [
#                   eval_layer2(i)
#                   for i in xrange(n_train_batches)
#                 ]
#
#             eval_set_x = valid_set_x;
#
#             eval_layer2 = theano.function([index], layer1d.output,
#             givens={
#                x: eval_set_x[index * batch_size: (index + 1) * batch_size]})
#
#             eval_print3 = [
#                   eval_layer2(i)
#                   for i in xrange(n_valid_batches)
#                 ]
#
#             if indk == 10:
#              Wl6,bl6 = morbrun1(nk2ex,nk2ey,im2ex,im2ey,neuron)
#
#
#        if epoch == 7:
#             print layer1f.W.get_value().shape
#             layer1e.W.set_value(Wl6.get_value());
#             layer1e.b.set_value(bl6.get_value());
#             layer1d.W.set_value(Wl5.get_value());
#             layer1d.b.set_value(bl5.get_value());
#             layer1c.W.set_value(Wl4.get_value());
#             layer1c.b.set_value(bl4.get_value());
#             layer1b.W.set_value(Wl3.get_value());
#             layer1b.b.set_value(bl3.get_value());
#             layer1.W.set_value(Wl2.get_value());
#             layer1.b.set_value(bl2.get_value());
#             layer0.W.set_value(Wl1.get_value());
#             layer0.b.set_value(bl1.get_value());
#
#             eval_set_x = test_set_x;
#             eval_shape = train_set_x.get_value(borrow=True).shape;
#             eval_layer2 = theano.function([index], layer1e.output,
#                givens={
#                x: eval_set_x[index * batch_size: (index + 1) * batch_size]})
#             eval_print1 = [
#                   eval_layer2(i)
#                   for i in xrange(n_test_batches)
#                 ]
#
#             eval_set_x = train_set_x;
#
#             eval_layer2 = theano.function([index], layer1e.output,
#             givens={
#                x: eval_set_x[index * batch_size: (index + 1) * batch_size]})
#
#             eval_print2 = [
#                   eval_layer2(i)
#                   for i in xrange(n_train_batches)
#                 ]
#
#             eval_set_x = valid_set_x;
#
#             eval_layer2 = theano.function([index], layer1e.output,
#             givens={
#                x: eval_set_x[index * batch_size: (index + 1) * batch_size]})
#
#             eval_print3 = [
#                   eval_layer2(i)
#                   for i in xrange(n_valid_batches)
#                 ]
#
#             if indk == 10:
#              Wl7,bl7 = morbrun1(nk2fx,nk2fy,im2fx,im2fy,neuron)
#              layer1f.W.set_value(Wl7.get_value());
#              layer1f.b.set_value(bl7.get_value());
#              layer1e.W.set_value(Wl6.get_value());
#              layer1e.b.set_value(bl6.get_value());
#              layer1d.W.set_value(Wl5.get_value());
#              layer1d.b.set_value(bl5.get_value());
#              layer1c.W.set_value(Wl4.get_value());
#              layer1c.b.set_value(bl4.get_value());
#              layer1b.W.set_value(Wl3.get_value());
#              layer1b.b.set_value(bl3.get_value());
#              layer1.W.set_value(Wl2.get_value());
#              layer1.b.set_value(bl2.get_value());
#              layer0.W.set_value(Wl1.get_value());
#              layer0.b.set_value(bl1.get_value());
#
#              n_in=nkerns[4] * im3x * im3y
#              n_out=100
#              W_values = numpy.asarray(
#                rng.uniform(
#                    low=-numpy.sqrt(6. / (n_in + n_out)),
#                    high=numpy.sqrt(6. / (n_in + n_out)),
#                    size=(n_in, n_out)
#                ),
#                dtype=theano.config.floatX
#              )
#              layer2.W.set_value(W_values);
#              layer2.b.set_value(numpy.zeros(n_out))
#              n_in=100
#              n_out=2
#              W_values = numpy.asarray(
#                rng.uniform(
#                    low=-numpy.sqrt(6. / (n_in + n_out)),
#                    high=numpy.sqrt(6. / (n_in + n_out)),
#                    size=(n_in, n_out)
#                ),
#                dtype=theano.config.floatX
#              )
#              layer3.W.set_value(W_values);
#              layer3.b.set_value(numpy.zeros(n_out))
#              learning_rate=0.01

        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index, learning_rate)
            #cost_ij = train_model(minibatch_index)
            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]

                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()

    try:
        os.remove(dirgtest + "/run2/pred_y" + str(indexd) + ".csv")
    except OSError:
        pass

    predy = open(dirgtest + "/run2/pred_y" + str(indexd) + ".csv", 'a')

    test_losses = [test_model2(i) for i in xrange(n_test_batches)]

    np.savetxt(predy, test_losses, delimiter='\n')

    predy.close()

    print('Optimization complete.')
    print(
        'Best validation score of %f %% obtained at iteration %i, '
        'with test performance %f %%' %
        (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    all_test += test_score
    print str(all_test) + ' ' + str(indexd)

    with open(dirgtest + "/run2/cv_score.txt", "a") as myfile:
        myfile.write(str(test_score) + "\n")

    if indk == 10:

        print "saving \n"

        f = file(dirgtest + "/weights/layer0w_" + str(indexd) + ".save", 'wb')
        cPickle.dump(layer0.params, f, protocol=cPickle.HIGHEST_PROTOCOL)
        f.close()

        f = file(dirgtest + "/weights/layer1w_" + str(indexd) + ".save", 'wb')
        cPickle.dump(layer1.params, f, protocol=cPickle.HIGHEST_PROTOCOL)
        f.close()

        f = file(dirgtest + "/weights/layer1bw_" + str(indexd) + ".save", 'wb')
        cPickle.dump(layer1b.params, f, protocol=cPickle.HIGHEST_PROTOCOL)
        f.close()

        #f = file(dirgtest+"/weights/layer1cw_"+str(indexd)+".save", 'wb')
        #cPickle.dump(layer1c.params, f, protocol=cPickle.HIGHEST_PROTOCOL)
        #f.close()
        #
        #f = file(dirgtest+"/weights/layer1dw_"+str(indexd)+".save", 'wb')
        #cPickle.dump(layer1d.params, f, protocol=cPickle.HIGHEST_PROTOCOL)
        #f.close()
        #
        #f = file(dirgtest+"/weights/layer1ew_"+str(indexd)+".save", 'wb')
        #cPickle.dump(layer1e.params, f, protocol=cPickle.HIGHEST_PROTOCOL)
        #f.close()
        #
        #f = file(dirgtest+"/weights/layer1fw_"+str(indexd)+".save", 'wb')
        #cPickle.dump(layer1f.params, f, protocol=cPickle.HIGHEST_PROTOCOL)
        #f.close()

        f = file(dirgtest + "/weights/layer2w_" + str(indexd) + ".save", 'wb')
        cPickle.dump(layer2.params, f, protocol=cPickle.HIGHEST_PROTOCOL)
        f.close()

        f = file(dirgtest + "/weights/layer3w_" + str(indexd) + ".save", 'wb')
        cPickle.dump(layer3.params, f, protocol=cPickle.HIGHEST_PROTOCOL)
        f.close()
Example #36
0
def build_lenet5(params, nkerns=[48, 128, 192, 192], batch_size=1):
    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ishape = (50, 50)  # this is the size of MNIST images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, 50, 50))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(params[5],
                                input=layer0_input,
                                image_shape=(batch_size, 1, 50, 50),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(params[4],
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 23, 23),
                                filter_shape=(nkerns[1], nkerns[0], 3, 3),
                                poolsize=(2, 2))
    '''layer1_3 = LeNetConvPoolLayer(rng, input=layer1.output,
            image_shape=(batch_size, nkerns[1], 10, 10),
            filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2))'''

    layer1_3 = LeNetConvPoolLayerNoPooling(params[3],
                                           input=layer1.output,
                                           image_shape=(batch_size, nkerns[1],
                                                        10, 10),
                                           filter_shape=(nkerns[2], nkerns[1],
                                                         3, 3))

    layer1_4 = LeNetConvPoolLayer(params[2],
                                  input=layer1_3.output,
                                  image_shape=(batch_size, nkerns[2], 8, 8),
                                  filter_shape=(nkerns[3], nkerns[2], 3, 3),
                                  poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1_4.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(None,
                         input=layer2_input,
                         n_in=nkerns[3] * 3 * 3,
                         n_out=1920,
                         W=params[1][0],
                         b=params[1][1],
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(params[0],
                                input=layer2.output,
                                n_in=1920,
                                n_out=58)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    predict_model = theano.function([x], layer3.y_pred)
    #predict_model = theano.function([x], layer3.p_y_given_x)

    return predict_model
Example #37
0
def predict(testNumber, dataset='dataset3.pkl', MEAN=True):
    """
    An example of how to load a trained model and use it
    to predict labels.
    """

    rng = numpy.random.RandomState(23455)
    finalSize = 200
    index = T.lscalar()
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')
    # load the saved model
    #layer4 = pickle.load(open('best_model.pkl'))
    basePath = r'C:\Users\Matt\Desktop\DogProj\data'
    f = open(os.path.join(basePath, 'best_model.pkl'), 'rb')
    layer4.W, layer4.b, layer3.W, layer3.b, layer2.W, layer2.b, layer1.W, layer1.b, layer0.W, layer0.b, validHolder, trainHolder = pickle.load(
        f)  #
    print('blah')
    print(numpy.array(layer0.W.get_value())[3, 0, ...])
    f.close()
    # compile a predictor function

    # We can test it on some examples from test test
    ##dataset='dataset3.pkl'
    dataset = os.path.join(basePath, dataset)
    datasets = load_data(dataset)
    test_set_x, test_set_y = datasets[2]
    valid_set_x, valid_set_y = datasets[1]
    print(numpy.array(valid_set_y))
    #test_set_x = test_set_x.get_value()
    if MEAN == True:
        test_set_x.set_value(
            test_set_x.get_value(borrow=True) -
            numpy.mean(test_set_x.get_value(borrow=True)))
    test_set_x = test_set_x.get_value()
    layer0_input = x.reshape((testNumber, 1, 200, 200))
    layer0new = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=layer0.image_shape,
        filter_shape=layer0.filter_shape,  #5,5 before
        poolsize=(2, 2))
    layer0new.W.set_value(layer0.W.get_value())
    layer0new.b.set_value(layer0.b.get_value())
    layer1new = LeNetConvPoolLayer(rng,
                                   input=layer0new.output,
                                   image_shape=layer1.image_shape,
                                   filter_shape=layer1.filter_shape,
                                   poolsize=(2, 2))
    layer1new.W.set_value(layer1.W.get_value())
    layer1new.b.set_value(layer1.b.get_value())
    layer2new = LeNetConvPoolLayer(rng,
                                   input=layer1new.output,
                                   image_shape=layer2.image_shape,
                                   filter_shape=layer2.filter_shape,
                                   poolsize=(2, 2))
    layer2new.W.set_value(layer2.W.get_value())
    layer2new.b.set_value(layer2.b.get_value())
    layer3_input = layer2new.output.flatten(2)
    layer3new = HiddenLayer(
        rng,
        input=layer3_input,
        n_in=layer3.n_in,
        n_out=layer3.
        n_out,  #was 50, isn't this batch_size? nope no. hidden units
        activation=T.tanh)
    layer3new.W.set_value(layer3.W.get_value())
    layer3new.b.set_value(layer3.b.get_value())
    layer4new = LogisticRegression(rng,
                                   input=layer3new.output,
                                   n_in=layer3.n_out,
                                   n_out=2)
    layer4new.W.set_value(layer4.W.get_value())
    layer4new.b.set_value(layer4.b.get_value())
    test_model = theano.function([index], [
        layer4new.y_pred, y, layer4new.p_y_given_x, x, layer0.W, layer1.W,
        layer2.W, test_set_y
    ],
                                 givens={
                                     x: test_set_x[0:testNumber, ...],
                                     y: test_set_y[0:testNumber]
                                 },
                                 on_unused_input='warn')
    print('test_set_y')

    predicted_values = test_model(1)
    print(numpy.array(predicted_values[7]))
    ''',y,p_y_given_x,x'''
    filter0_ = numpy.array(predicted_values[4])[0, 0, ...]
    filter0 = filter0_ / (abs(filter0_).max() / 255.0)
    filter01_ = numpy.array(predicted_values[4])[1, 0, ...]
    filter01 = filter01_ / (abs(filter01_).max() / 255.0)
    filter02_ = numpy.array(predicted_values[4])[2, 0, ...]
    filter02 = filter02_ / (abs(filter02_).max() / 255.0)
    filter03_ = numpy.array(predicted_values[4])[3, 0, ...]
    filter03 = filter03_ / (abs(filter03_).max() / 255.0)
    filter04_ = numpy.array(predicted_values[4])[4, 0, ...]
    filter04 = filter04_ / (abs(filter04_).max() / 255.0)
    filter05_ = numpy.array(predicted_values[4])[5, 0, ...]
    filter05 = filter05_ / (abs(filter05_).max() / 255.0)
    filter06_ = numpy.array(predicted_values[4])[6, 0, ...]
    filter06 = filter06_ / (abs(filter06_).max() / 255.0)
    filter07_ = numpy.array(predicted_values[4])[7, 0, ...]
    filter07 = filter07_ / (abs(filter07_).max() / 255.0)
    filter08_ = numpy.array(predicted_values[4])[8, 0, ...]
    filter08 = filter08_ / (abs(filter08_).max() / 255.0)
    filter09_ = numpy.array(predicted_values[4])[9, 0, ...]
    filter09 = filter09_ / (abs(filter09_).max() / 255.0)

    filter1_ = numpy.array(predicted_values[5])[0, 0, ...]
    filter1 = filter0_ / (abs(filter0_).max() / 255.0)
    filter11_ = numpy.array(predicted_values[5])[1, 0, ...]
    filter11 = filter11_ / (abs(filter11_).max() / 255.0)
    filter12_ = numpy.array(predicted_values[5])[2, 0, ...]
    filter12 = filter12_ / (abs(filter12_).max() / 255.0)
    filter13_ = numpy.array(predicted_values[5])[3, 0, ...]
    filter13 = filter13_ / (abs(filter13_).max() / 255.0)
    filter14_ = numpy.array(predicted_values[5])[4, 0, ...]
    filter14 = filter14_ / (abs(filter14_).max() / 255.0)
    filter15_ = numpy.array(predicted_values[5])[5, 0, ...]
    filter15 = filter15_ / (abs(filter15_).max() / 255.0)
    filter16_ = numpy.array(predicted_values[5])[6, 0, ...]
    filter16 = filter16_ / (abs(filter16_).max() / 255.0)
    filter17_ = numpy.array(predicted_values[5])[7, 0, ...]
    filter17 = filter17_ / (abs(filter17_).max() / 255.0)
    filter18_ = numpy.array(predicted_values[5])[8, 0, ...]
    filter18 = filter18_ / (abs(filter18_).max() / 255.0)
    filter19_ = numpy.array(predicted_values[5])[9, 0, ...]
    filter19 = filter19_ / (abs(filter19_).max() / 255.0)

    filter2_ = numpy.array(predicted_values[6])[0, 0, ...]
    filter2 = filter2_ / (abs(filter2_).max() / 255.0)
    filter21_ = numpy.array(predicted_values[6])[1, 0, ...]
    filter21 = filter21_ / (abs(filter21_).max() / 255.0)
    filter22_ = numpy.array(predicted_values[6])[2, 0, ...]
    filter22 = filter22_ / (abs(filter22_).max() / 255.0)
    filter23_ = numpy.array(predicted_values[6])[3, 0, ...]
    filter23 = filter23_ / (abs(filter23_).max() / 255.0)
    filter24_ = numpy.array(predicted_values[6])[4, 0, ...]
    filter24 = filter24_ / (abs(filter24_).max() / 255.0)
    filter25_ = numpy.array(predicted_values[6])[5, 0, ...]
    filter25 = filter25_ / (abs(filter25_).max() / 255.0)
    filter26_ = numpy.array(predicted_values[6])[6, 0, ...]
    filter26 = filter26_ / (abs(filter26_).max() / 255.0)
    filter27_ = numpy.array(predicted_values[6])[7, 0, ...]
    filter27 = filter27_ / (abs(filter27_).max() / 255.0)
    filter28_ = numpy.array(predicted_values[6])[8, 0, ...]
    filter28 = filter28_ / (abs(filter28_).max() / 255.0)
    filter29_ = numpy.array(predicted_values[6])[9, 0, ...]
    filter29 = filter29_ / (abs(filter29_).max() / 255.0)

    totFilter0 = numpy.hstack([
        filter0, filter01, filter02, filter03, filter04, filter05, filter06,
        filter07, filter08, filter09
    ])
    totFilter1 = numpy.hstack([
        filter1, filter11, filter12, filter13, filter14, filter15, filter16,
        filter17, filter18, filter19
    ])
    totFilter2 = numpy.hstack([
        filter2, filter21, filter22, filter23, filter24, filter25, filter26,
        filter27, filter28, filter29
    ])
    totFilter = numpy.vstack([totFilter0, totFilter1, totFilter2])
    #plt.imshow(totFilter, cmap = cm.Greys_r, interpolation='nearest')
    #plt.show()

    #print(layer3.output)

    print(test_set_x.shape)
    print("Predicted values for the first 2 examples in test set:")
    y = predicted_values[1]
    print(numpy.array(range(predicted_values[2].shape[0])))
    #print([0:(predicted_values[2].shape[0])])
    print(
        numpy.transpose(numpy.array(range(
            predicted_values[2].shape[0]))).shape)
    CountTrans = numpy.transpose(
        numpy.array(range(predicted_values[2].shape[0])))
    print(CountTrans.shape[0])
    CountTrans = CountTrans.reshape(CountTrans.shape[0], 1)
    #CountTrans.dimshuffle('x', 0)
    print(CountTrans.shape)
    predPrint = numpy.hstack([predicted_values[2], CountTrans])
    print(predPrint)
    print(predicted_values[0])
    print('test error = ' + str(sum(predicted_values[0] != y) / y.shape[0]))
    print('Actual values:')
    print(y)
    print('herehererererererererererererere')
    print(predicted_values[2][:, y[0]].shape)
    print(validHolder)
    print(predicted_values[2])
    #.plot(validHolder)
    #plt.plot(trainHolder)
    #plt.show()
    return (predicted_values[2][:, y[0]], validHolder,
            predicted_values[2][testNumber - 1, y[testNumber - 1]])
Example #38
0
def evaluate_lenet5(learning_rate=0.1,
                    n_epochs=100,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 50],
                    batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """
    rng = numpy.random.RandomState(23455)

    #    datasets = load_data(dataset)
    #    train_set_x, train_set_y = datasets[0]
    #    valid_set_x, valid_set_y = datasets[1]
    #    test_set_x, test_set_y = datasets[2]

    data = load_svmlight_file("./MachineLearning/DS3.libsvm")

    XA, testSetX, YA, testSetY = train_test_split(data[0],
                                                  data[1],
                                                  test_size=0.3,
                                                  random_state=1)
    trainSetX, validSetX, trainSetY, validSetY = train_test_split(
        XA, YA, test_size=0.5, random_state=1)

    train_set_x, train_set_y = shared_dataset((trainSetX.toarray(), trainSetY))
    valid_set_x, valid_set_y = shared_dataset((validSetX.toarray(), validSetY))
    test_set_x, test_set_y = shared_dataset((testSetX.toarray(), testSetY))

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)

    p = params
    for k in range(len(p)):
        lenOfP = 0
        evP = p[k].eval()
        lenOfP = checkLen(evP)

        print("W-B Count :", lenOfP, evP.shape)
Example #39
0
def evaluate_lenet5(
    learning_rate=0.01,
    n_epochs=1,
    dataset='dataset3.pkl',
    nkerns=[20, 50, 50],
    batch_size=10,
    L1Value=0.00005,
    L2Value=0.0003
):  #nkerns should be 20,50,50, was 2,2,2 then 5,5,5 (slower cz more weights)
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """
    basePath = r'C:\Users\Matt\Desktop\DogProj\data'
    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    print('train set x')
    print(numpy.max(train_set_x.get_value(borrow=True)))
    print(numpy.min(train_set_x.get_value(borrow=True)))
    print((train_set_x.get_value(borrow=True)).sum() /
          ((train_set_x.get_value(borrow=True).shape[1]) *
           (train_set_x.get_value(borrow=True).shape[0])))
    #for L in range(train_set_x.get_value(borrow=True).shape[1]):
    #    train_set_x.set_value(train_set_x.get_value(borrow=True)[L,...]-numpy.mean(train_set_x.get_value(borrow=True)[L,...]))
    #train_set_x.set_value(train_set_x.get_value(borrow=True)-numpy.mean(train_set_x.get_value(borrow=True)))
    a = (train_set_x.get_value(borrow=True) > 0)  #.astype(float)
    b = (train_set_x.get_value(borrow=True) < 0)  #.astype(float)
    #train_set_x.set_value(a)
    valid_set_x.set_value(
        valid_set_x.get_value(borrow=True) -
        numpy.mean(valid_set_x.get_value(borrow=True)))
    a = (valid_set_x.get_value(borrow=True) > 0)  #.astype(float)
    b = (valid_set_x.get_value(borrow=True) < 0)  #.astype(float)
    #valid_set_x.set_value(a)
    test_set_x.set_value(
        test_set_x.get_value(borrow=True) -
        numpy.mean(test_set_x.get_value(borrow=True)))
    a = (test_set_x.get_value(borrow=True) > 0)  #.astype(float)
    b = (test_set_x.get_value(borrow=True) < 0)  #.astype(float)
    #test_set_x.set_value(a)
    print(numpy.max(train_set_x.get_value(borrow=True)))
    print(numpy.min(train_set_x.get_value(borrow=True)))
    print((train_set_x.get_value(borrow=True)).sum() /
          ((train_set_x.get_value(borrow=True).shape[1]) *
           (train_set_x.get_value(borrow=True).shape[0])))
    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')
    finalSize = 200

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, finalSize, finalSize))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, finalSize, finalSize),
        filter_shape=(nkerns[0], 1, 9, 9),  #5,5 before
        poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 96, 96),
                                filter_shape=(nkerns[1], nkerns[0], 9, 9),
                                poolsize=(2, 2))

    layer2 = LeNetConvPoolLayer(rng,
                                input=layer1.output,
                                image_shape=(batch_size, nkerns[1], 44, 44),
                                filter_shape=(nkerns[2], nkerns[1], 9, 9),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer3_input = layer2.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer3 = HiddenLayer(
        rng,
        input=layer3_input,
        n_in=nkerns[2] * 18 * 18,
        n_out=81,  #was 50, isn't this batch_size? nope no. hidden units
        activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer4 = LogisticRegression(rng, input=layer3.output, n_in=81, n_out=2)

    # the cost we minimize during training is the NLL of the model
    cost = (layer4.negative_log_likelihood(y) + L2Value *
            (layer0.L2 + layer0.L2 + layer3.L2 + layer4.L2))

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params0 = layer3.params + layer2.params + layer1.params + layer0.params
    params1 = layer4.params
    # create a list of gradients for all model parameters
    grads0 = T.grad(cost, params0)
    grads1 = T.grad(cost, params1)
    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    '''
    updates = [
        for param_i, grad_i in zip(params0, grads0):
            (param_i, param_i - learning_rate * grad_i)
        for param_j, grad_j in zip(params1, grads1):
        (param_j, param_j - learning_rate/10 * grad_j)
        
    ]
    '''
    updates = []
    for param_i, grad_i in zip(params0, grads0):
        updates = updates + [(param_i, param_i - learning_rate * grad_i)]
    for param_j, grad_j in zip(params1, grads1):
        updates = updates + [(param_j, param_j - learning_rate / 5 * grad_j)]

    train_model = theano.function(
        [index],
        [
            cost,
            layer4.p_y_given_x,
            layer4.y_pred,
            layer0.W,
            layer1.W,  #5
            layer2.W,
            layer3.W,
            layer4.W,
            layer0.output,
            layer4.b,
            layer4.p_y_given_x,  #6
            y,
            layer4.errors(y),
            layer0.preOutput,
            layer1.preOutput,
            layer2.preOutput,  #5
            layer0.output,
            layer2.output,
            layer3.preOutput,
            layer4.preOutput,  #4
            layer4.W,
            layer4.b,
            layer4.input,
            test_set_y,  #4
            layer0.b,
            layer1.b,
            layer2.b,
            layer3.b,
            layer4.b
        ],  #5
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    ''',
        mode=theano.compile.MonitorMode(
                        pre_func=inspect_inputs,
                        post_func=inspect_outputs)'''
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 500  #10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.9995  #0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False
    counter3 = 0
    counter4 = 0
    filterHolder = []
    validHolder = []
    trainHolder = []
    costHolder = []

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1

        costHolder = []
        for minibatch_index in range(int(n_train_batches)):

            iter = (epoch - 1) * n_train_batches + minibatch_index
            print(iter)
            print(epoch)
            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)
            print('bbbb')
            print((layer4.W))
            print(numpy.array(cost_ij[7]))
            print(cost_ij[7].shape)
            print('test set y')
            print(numpy.array(cost_ij[23]))
            print()
            #print(cost_ij)
            #print(numpy.array(cost_ij[3])[0,0,...])
            #print(numpy.array(cost_ij[3])[1,0,...])
            #print('shape')
            #print(numpy.array(cost_ij[3])[0,0,...])
            filter0_ = numpy.array(cost_ij[3])[0, 0, ...]
            filter0 = filter0_ / (filter0_.max() / 255.0)
            #filter0 = filter0_
            filter01_ = numpy.array(cost_ij[3])[1, 0, ...]
            filter01 = filter01_ / (filter01_.max() / 255.0)
            #filter0 = filter0_
            #print('filter0.shape')
            #print(filter0.shape)
            filter1_ = numpy.array(cost_ij[4])[0, 0, ...]
            filter1 = filter1_ / (filter1_.max() / 255.0)
            #filter1 =filter1_
            filter11_ = numpy.array(cost_ij[4])[1, 0, ...]
            filter11 = filter11_ / (filter11_.max() / 255.0)
            #filter1 =filter1_
            filter2_ = numpy.array(cost_ij[5])[0, 0, ...]
            filter2 = filter2_ / (filter2_.max() / 255.0)
            filter21_ = numpy.array(cost_ij[5])[1, 0, ...]
            filter21 = filter21_ / (filter21_.max() / 255.0)
            #filter2 =filter2_
            hiddenW_ = numpy.array(cost_ij[6])[:, 0]
            hiddenW = hiddenW_ / (hiddenW_.max() / 255.0)
            #hiddenW=hiddenW_
            logRWT_ = numpy.array(cost_ij[7])[:, 0]
            logRWT = logRWT_ / (logRWT_.max() / 255.0)
            #logRWT=logRWT_
            logRWF_ = numpy.array(cost_ij[7])[:, 1]
            logRWF = logRWF_ / (logRWF_.max() / 255.0)
            #logRWF=logRWF_
            hiddenW = numpy.reshape(hiddenW[0:81], [9, 9])
            logRWT = numpy.reshape(logRWT[0:81], [9, 9])  #was [5,5]
            logRWF = numpy.reshape(logRWF[0:81], [9, 9])
            #gradientP = numpy.array(cost_ij[7])

            #print('shapes')
            #print(hiddenW.shape)
            #print(logRWT.shape)
            #filter1.resize([filter0.shape[0],filter0.shape[1]])
            #print(filter1)
            #print()
            filter1 = numpy.vstack([
                filter1,
                numpy.zeros(
                    [filter0.shape[0] - filter1.shape[0], filter1.shape[1]])
            ])
            filter1 = numpy.hstack([
                filter1,
                numpy.zeros(
                    [filter1.shape[0], filter0.shape[1] - filter1.shape[1]])
            ])
            filter2 = numpy.vstack([
                filter2,
                numpy.zeros(
                    [filter0.shape[0] - filter2.shape[0], filter2.shape[1]])
            ])
            filter2 = numpy.hstack([
                filter2,
                numpy.zeros(
                    [filter2.shape[0], filter0.shape[1] - filter2.shape[1]])
            ])
            hiddenW = numpy.vstack([
                hiddenW,
                numpy.zeros(
                    [filter0.shape[0] - hiddenW.shape[0], hiddenW.shape[1]])
            ])
            hiddenW = numpy.hstack([
                hiddenW,
                numpy.zeros(
                    [hiddenW.shape[0], filter0.shape[1] - hiddenW.shape[1]])
            ])
            logRWT = numpy.vstack([
                logRWT,
                numpy.zeros(
                    [filter0.shape[0] - logRWT.shape[0], logRWT.shape[1]])
            ])
            logRWT = numpy.hstack([
                logRWT,
                numpy.zeros(
                    [logRWT.shape[0], filter0.shape[1] - logRWT.shape[1]])
            ])
            logRWF = numpy.vstack([
                logRWF,
                numpy.zeros(
                    [filter0.shape[0] - logRWF.shape[0], logRWF.shape[1]])
            ])
            logRWF = numpy.hstack([
                logRWF,
                numpy.zeros(
                    [logRWF.shape[0], filter0.shape[1] - logRWF.shape[1]])
            ])
            totFilter = numpy.hstack(
                [filter0, filter1, filter2, hiddenW, logRWT, logRWF])
            totlayer2 = numpy.hstack(
                [filter01, filter11, filter21,
                 numpy.zeros([9, 3 * 9])])
            totFilter = numpy.vstack([totFilter, totlayer2])
            '''
            print('preOutput1')
            print(numpy.mean(abs(numpy.array(cost_ij[13]))))
            print(numpy.mean(abs(numpy.array(cost_ij[14]))))
            print(numpy.mean(abs(numpy.array(cost_ij[15]))))
            print('preOutput3')
            print(numpy.mean(abs(numpy.array(cost_ij[18]))))
            
            print('postlayer0')
            print(numpy.mean(abs(numpy.array(cost_ij[16]))))
            print('postlayer2')
            print(numpy.mean(abs(numpy.array(cost_ij[17]))))
            print('preOutput4')
            print((numpy.array(cost_ij[19])))
            print('layer4W,layer4B,layer4input')
            #print((numpy.array(cost_ij[20])))
            #print((numpy.array(cost_ij[21])))
            print((numpy.array(cost_ij[22])))
            '''
            #totFilter = numpy.array(cost_ij[8][0,0,...])
            #print(filter0)
            #plt.imshow(filter0, cmap = cm.Greys_r,interpolation="nearest")
            #plt.show()
            filterHolder.append(totFilter)  #=filter0
            costHolder.append(numpy.mean(cost_ij[12]))
            if iter > 1:
                a = 1
                '''
                #print(filterHolder[int(iter-1)][0])
                print('abs values')
                ##print(filter0_)
                ##print(filter1_)
                ##print(numpy.reshape(hiddenW_[0:81],[9,9]))
                print(numpy.reshape(logRWT_[0:81],[9,9]))
                print(numpy.array(cost_ij[9]).shape)
                print((numpy.array(cost_ij[9])[0]))
                print('end abs values')
                print('p_y_given_x')
                print(numpy.array(cost_ij[10]))
                print(numpy.array(cost_ij[11]))
                
                print(iter)
                #print(len(filterHolder))
                ##print(filterHolder[int(iter)][0:9,0:9]-filterHolder[int(iter)-1][0:9,0:9])
                ##print(filterHolder[int(iter)][0:9,9:18]-filterHolder[int(iter)-1][0:9,9:18])
                #print(filterHolder[int(iter-1)][0:9,0:9].shape)
                #print(filterHolder[int(iter-1)][2].shape)
                #print(filterHolder[int(iter-1)][3].shape)
                '''
                print('filterHolders')
                print(
                    numpy.mean(filterHolder[int(iter)][0:9, 0:9] -
                               filterHolder[int(iter) - 1][0:9, 0:9]))
                print(
                    numpy.mean(filterHolder[int(iter)][0:9, 9:18] -
                               filterHolder[int(iter) - 1][0:9, 9:18]))
                print(
                    numpy.mean(filterHolder[int(iter)][0:9, 18:27] -
                               filterHolder[int(iter) - 1][0:9, 18:27]))
                print(
                    numpy.mean(filterHolder[int(iter)][0:9, 36:45] -
                               filterHolder[int(iter) - 1][0:9, 36:45]))

                print('cost')
                print(numpy.array(cost_ij[0]))
                print('p_y_given_x')
                print(numpy.array(cost_ij[10]))
                print((numpy.array(cost_ij[11])))

            counter4 += 1
            '''
            print('layer4.Wb')
            print(layer4.W.get_value())
            print(layer4.b.get_value())
            
            
            print('layer3.Wb')
            print(layer3.W.get_value())
            print(layer3.b.get_value())
            print('layer2.Wb')
            print(layer2.W.get_value())
            print(layer2.b.get_value())
            print('layer1.Wb')
            print(layer1.W.get_value())
            print(layer1.b.get_value())
            print('layer0.Wb')
            print(layer0.W.get_value())
            print(layer0.b.get_value())
            '''
            #print(layer4.input.eval())
            #print(layer4.p_y_given_x.eval({'input':layer4.input,'SelfW':layer4.W,'SelfB':layer4.b}))

            #x_printed = theano.printing.Print('this is a very important value')(x)

            #f = theano.function([x], x * 5)
            #f_with_print = theano.function([x], x_printed * 5)
            #assert numpy.all( f_with_print([1, 2, 3]) == [5, 10, 15])

            if (iter + 1) % validation_frequency == 0:

                if (counter3) % 5 == 0:
                    a = 1
                    '''
                    filter01_ = numpy.array(cost_ij[3])[2,0,...]
                    filter01 = filter01_/(filter01_.max())
                    filterHolder.append(filter01)
                    totFilter = numpy.array(cost_ij[8][0,2,...])
                    filterHolder.append(totFilter)
                    totFilter = numpy.array(cost_ij[8][1,2,...])
                    filterHolder.append(totFilter)
                    totFilter = numpy.array(cost_ij[8][2,2,...])
                    filterHolder.append(totFilter)
                    totFilter = numpy.array(cost_ij[8][3,2,...])
                    filterHolder.append(totFilter)
                    totFilter = numpy.array(cost_ij[8][4,2,...])
                    filterHolder.append(totFilter)
                    '''
                    #totFilter = numpy.array(cost_ij[8][5,0,...])
                    #filterHolder.append(totFilter)
                counter3 += 1

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(int(n_valid_batches))
                ]
                this_validation_loss = numpy.mean(validation_losses)
                validHolder.append(this_validation_loss)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    best_params = params0 + params1

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(int(n_test_batches))
                    ]
                    test_score = numpy.mean((test_losses))
                    print(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
                    with open(os.path.join(basePath, 'best_modelWEB.pkl'),
                              'wb') as f:
                        pickle.dump([
                            numpy.array(cost_ij[7]),
                            numpy.array(cost_ij[28]),
                            numpy.array(cost_ij[6]),
                            numpy.array(cost_ij[27]),
                            numpy.array(cost_ij[5]),
                            numpy.array(cost_ij[26]),
                            numpy.array(cost_ij[4]),
                            numpy.array(cost_ij[25]),
                            numpy.array(cost_ij[3]),
                            numpy.array(cost_ij[24]), validHolder, trainHolder
                        ], f)

            if iter > 150000:
                break
            if patience <= iter:
                a = 1
                #done_looping = True
                #break
        trainHolder.append(sum(costHolder) / len(costHolder))
        print('TrainHolder : ')
        print(trainHolder)
        with open(os.path.join(basePath, 'final_modelWEB.pkl'), 'wb') as f:
            pickle.dump([
                numpy.array(cost_ij[7]),
                numpy.array(cost_ij[28]),
                numpy.array(cost_ij[6]),
                numpy.array(cost_ij[27]),
                numpy.array(cost_ij[5]),
                numpy.array(cost_ij[26]),
                numpy.array(cost_ij[4]),
                numpy.array(cost_ij[25]),
                numpy.array(cost_ij[3]),
                numpy.array(cost_ij[24]), validHolder, trainHolder
            ], f)
    end_time = timeit.default_timer()
    print('Optimization complete.')
    print((params0 + params1))
    print('Valid Holder')
    print(validHolder)
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print('The code for file ' + os.path.split(__file__)[1] +
          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    fig = plt.figure()  # make figure
    im = plt.imshow(filterHolder[0], cmap=cm.Greys_r, interpolation="nearest")

    def updatefig(j):
        # set the data in the axesimage object
        im.set_array(filterHolder[j])
        print(j)
        #print(filterHolder[j])
        # return the artists set
        return im,

    ani = animation.FuncAnimation(fig,
                                  updatefig,
                                  frames=len(filterHolder),
                                  interval=10,
                                  blit=True,
                                  repeat=True)

    #plt.imshow(filterHolder[0], cmap = cm.Greys_r, interpolation="nearest")
    #print(filterHolder)
    plt.show()
class MLPRanker(object):
    def __init__(self, verbose=True):
        if verbose: logger.debug('Build Multilayer Perceptron Ranking model...')
        # Positive input setting
        self.inputPL = T.matrix(name='inputPL', dtype=floatX)
        self.inputPR = T.matrix(name='inputPR', dtype=floatX)
        # Negative input setting
        self.inputNL = T.matrix(name='inputNL', dtype=floatX)
        self.inputNR = T.matrix(name='inputNR', dtype=floatX)
        # Standard input setting
        self.inputL = T.matrix(name='inputL', dtype=floatX)
        self.inputR = T.matrix(name='inputR', dtype=floatX)
        # Build activation function
        self.act = Activation('tanh')
        # Connect input matrices
        self.inputP = T.concatenate([self.inputPL, self.inputPR], axis=1)
        self.inputN = T.concatenate([self.inputNL, self.inputNR], axis=1)
        self.input = T.concatenate([self.inputL, self.inputR], axis=1)
        # Build hidden layer
        self.hidden_layer = HiddenLayer(self.input, (2*edim, args.hidden), act=self.act)
        self.hidden = self.hidden_layer.output
        self.hiddenP = self.hidden_layer.encode(self.inputP)
        self.hiddenN = self.hidden_layer.encode(self.inputN)
        # Dropout parameter - test
        self.thidden = (1-args.dropout) * self.hidden
        self.thiddenP = (1-args.dropout) * self.hiddenP
        self.thiddenN = (1-args.dropout) * self.hiddenN
        # Dropout parameter - train
        srng = T.shared_randomstreams.RandomStreams(args.seed)
        mask = srng.binomial(n=1, p=1-args.dropout, size=self.hidden.shape)
        maskP = srng.binomial(n=1, p=1-args.dropout, size=self.hiddenP.shape)
        maskN = srng.binomial(n=1, p=1-args.dropout, size=self.hiddenN.shape)
        self.hidden *= T.cast(mask, floatX)
        self.hiddenP *= T.cast(maskP, floatX)
        self.hiddenN *= T.cast(maskN, floatX)
        # Build linear output layer
        self.score_layer = ScoreLayer(self.hidden, args.hidden)
        self.output = self.score_layer.output
        self.scoreP = self.score_layer.encode(self.hiddenP)
        self.scoreN = self.score_layer.encode(self.hiddenN)
        # Build for test
        self.toutput = self.score_layer.encode(self.thidden)
        self.tscoreP = self.score_layer.encode(self.thiddenP)
        self.tscoreN = self.score_layer.encode(self.thiddenN)
        # Stack all the parameters
        self.params = []
        self.params += self.hidden_layer.params
        self.params += self.score_layer.params
        # Build cost function
        self.cost = T.mean(T.maximum(T.zeros_like(self.scoreP), 1.0-self.scoreP+self.scoreN))
        # Construct the gradient of the cost function with respect to the model parameters
        self.gradparams = T.grad(self.cost, self.params)
        # Count the total number of parameters in this model
        self.num_params = edim * args.hidden + args.hidden + args.hidden + 1
        # Build class method
        self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.toutput)
        self.compute_cost_and_gradient = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
                                                         outputs=self.gradparams+[self.cost, self.scoreP, self.scoreN])
        self.show_scores = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], 
                                           outputs=[self.tscoreP, self.tscoreN])
        if verbose:
            logger.debug('Architecture of MLP Ranker built finished, summarized below: ')
            logger.debug('Input dimension: %d' % edim)
            logger.debug('Hidden dimension: %d' % args.hidden)
            logger.debug('Total number of parameters used in the model: %d' % self.num_params)

    def update_params(self, grads, learn_rate):
        for param, grad in zip(self.params, grads):
            p = param.get_value(borrow=True)
            param.set_value(p - learn_rate * grad, borrow=True)

    @staticmethod
    def save(fname, model):
        with file(fname, 'wb') as fout:
            cPickle.dump(model, fout)

    @staticmethod
    def load(fname):
        with file(fname, 'rb') as fin:
            model = cPickle.load(fin)
        return model
Example #41
0
def evaluate_transfer_lenet5(
        learning_rate=0.1,
        alpha=1,
        n_epochs=20,
        source_dataset='../data/resize_mnist_whiten.pkl.gz',
        target_dataset='../data/usps_whiten.pkl.gz',
        training_dataset='../data/shuffled_training_data_big_new.pkl.gz',
        nkerns=[20, 50],
        batch_size=4000):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_source_data(source_dataset)

    target_datasets = load_target_data(target_dataset)

    transfer_training_datasets = load_transfer_training_data(training_dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    target_set_x, target_set_y = target_datasets
    transfer_training_set_x, transfer_training_set_y = transfer_training_datasets

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_target_batches = target_set_x.get_value(borrow=True).shape[0]
    n_transfer_training_batches = transfer_training_set_x.get_value(
        borrow=True).shape[0]

    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size
    n_target_batches /= batch_size
    n_transfer_training_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    y_in = T.ivector('y_in')
    ishape = (16, 16)  # this is the size of MNIST images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, 16, 16))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 16, 16),
                                filter_shape=(nkerns[0], 1, 3, 3),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 7, 7),
                                filter_shape=(nkerns[1], nkerns[0], 3, 3),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 2 * 2,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    x_prob = layer3.py_given_x()

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params
    marginal_params = layer1.params + layer0.params

    # calculate marginal MMD and its gradients
    # todo:
    #  q1:最外层为什么是mean而不是2范数,
    #  q2:论文给人的感觉是l-1层是layer2, 代码中却是layer1
    marginal_MMD = T.mean(
        T.mean(layer2_input[T.arange(batch_size / 2)]) -
        T.mean(layer2_input[T.arange(batch_size / 2, batch_size, 1)]))
    marginal_grads = T.grad(T.dot(marginal_MMD, marginal_MMD), marginal_params)

    # the cost we minimize during training is the NLL of the model
    lost_cost = layer3.negative_log_likelihood(y)

    # calculate conditional MMD
    conditional_cost_all = T.mean(x_prob[0:batch_size/2:1,0:10:1],axis = 0)\
        - T.mean(x_prob[batch_size/2:batch_size:1,0:10:1],axis = 0)
    conditional_cost = T.dot(conditional_cost_all, conditional_cost_all)

    #add classification loss and conditional MMD all together
    cost = lost_cost + 100 * conditional_cost
    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    target_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: target_set_x[index * batch_size:(index + 1) * batch_size],
            y: target_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.

    # todo: 为何损失函数将marginal_MMD和cost分开,梯度同时也是分开更新
    marginal_updates = []
    for param_i, marginal_grad_i in zip(marginal_params, marginal_grads):
        marginal_updates.append(
            (param_i, param_i - 100 * learning_rate * marginal_grad_i))

    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # todo: 用最新的预测结果替换目标域标签代码在哪里?
    transfer_model = theano.function(
        [index], [cost, conditional_cost],
        updates=updates,
        givens={
            x: transfer_training_set_x[index * batch_size:(index + 1) *
                                       batch_size],
            y: transfer_training_set_y[index * batch_size:(index + 1) *
                                       batch_size]
        })

    marginal_model = theano.function(
        [index],
        marginal_MMD,
        updates=marginal_updates,
        givens={
            x: transfer_training_set_x[index * batch_size:(index + 1) *
                                       batch_size]
        })

    y_out = layer3.get_output()

    target_predict = theano.function(
        [index],
        y_out,
        givens={x: target_set_x[index * batch_size:(index + 1) * batch_size]})

    update_target_training_label = theano.function(
        [index],
        y_out,  #updates=label_updates,
        givens={
            x: transfer_training_set_x[index * batch_size:(index + 1) *
                                       batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    batch_number = n_transfer_training_batches  #n_train_batches
    validation_frequency = min(batch_number, patience / 2)
    update_frequency = min(batch_number, patience / 2) * 10
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1

        for minibatch_index in xrange(batch_number):

            iter = (epoch - 1) * batch_number + minibatch_index

            cost = transfer_model(minibatch_index)
            MMD_margin = marginal_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, batch_number, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of the '
                           'model %f %%') % (epoch, minibatch_index + 1,
                                             batch_number, test_score * 100.))

                    target_losses = [
                        target_model(i) for i in xrange(n_target_batches)
                    ]
                    target_score = numpy.mean(target_losses)
                    print(
                        ('     epoch %i, minibatch %i/%i, target error of the '
                         'model %f %%') %
                        (epoch, minibatch_index + 1,
                         n_transfer_training_batches, target_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    #print('Best validation score of %f %% obtained at iteration %i,'\
    #      'with test performance %f %%' %
    #      (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    noisy_y = target_set_y.eval()
    for i in xrange(n_target_batches):
        noisy_y[i * batch_size:(i + 1) * batch_size] = target_predict(i)

    resizeValidSet = [target_set_x, noisy_y]
    fw = gzip.open("../data/predict_data_CNN_new.pkl.gz", 'wb')
    cPickle.dump(resizeValidSet, fw)
    fw.close()
def evaluate_lenet5(datasets,
                    learning_seed=0.01, n_epochs=500, 
                    batch_size=250,
                    save_folder='./cache',
                    channel_count=1):

    """ Evaluate a convnet for three dimensional image inputs.

    :type learning_seed: float
    :param learning_seed: learning rate used (factor for the stochastic
                          gradient) during initialization.

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type batch_size: integer
    :param batch_size: size for batched testing

    :type channel_count: integer
    :param channel_count: number of channels per image

    """

    rng = numpy.random.RandomState(23455)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    new_rate = T.lscalar()   # The learning rate.

    # start-snippet-1
    r = T.dscalar('r')  # the learning rate as a variable.
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape 
    # (batch_size, channel_count, 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (32, 32) is the size of CIFAR images.
    layer0_input = x.reshape((batch_size, channel_count, 32, 32))


    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (32+2-3+1 , 32+2-3+1) = (32, 32)
    # maxpooling reduces this further to (32/2, 32/2) = (16, 16)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 16, 16)
    
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, channel_count, 32, 32),
        filter_shape=(128, channel_count, 3, 3),
        poolsize=(1, 1)
    )

    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, 128, 32, 32),
        filter_shape=(128, 128, 3, 3),
        poolsize=(2, 2)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (16+2-3+1, 16+2-3+1) = (16, 16)
    # maxpooling reduces this further to (16/2, 16/2) = (8, 8)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 8, 8)
    
    layer2 = LeNetConvPoolLayer(
        rng,
        input=layer1.output,
        image_shape=(batch_size, 128, 16, 16),
        filter_shape=(256, 128, 3, 3),
        poolsize=(1, 1)
    )

    layer3 = LeNetConvPoolLayer(
        rng,
        input=layer2.output,
        image_shape=(batch_size, 256, 16, 16),
        filter_shape=(256, 256, 3, 3),
        poolsize=(1, 1)
    )

    layer4 = LeNetConvPoolLayer(
        rng,
        input=layer3.output,
        image_shape=(batch_size, 256, 16, 16),
        filter_shape=(256, 256, 3, 3),
        poolsize=(1, 1)
    )

    layer5 = LeNetConvPoolLayer(
        rng,
        input=layer4.output,
        image_shape=(batch_size, 256, 16, 16),
        filter_shape=(256, 256, 3, 3),
        poolsize=(2, 2)
    )

    # Construct the third convolutional pooling layer
    # filtering reduces the image size to (8+2-3+1, 8+2-3+1) = (8, 8)
    # No maxpooling (aka maxpooling (1, 1))
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 8, 8)
    
    layer6 = LeNetConvPoolLayer(
        rng,
        input=layer5.output,
        image_shape=(batch_size, 256, 8, 8),
        filter_shape=(512, 256, 3, 3),
        poolsize=(1, 1)
    )

    layer7 = LeNetConvPoolLayer(
        rng,
        input=layer6.output,
        image_shape=(batch_size, 512, 8, 8),
        filter_shape=(512, 512, 3, 3),
        poolsize=(1, 1)
    )

    layer8 = LeNetConvPoolLayer(
        rng,
        input=layer7.output,
        image_shape=(batch_size, 512, 8, 8),
        filter_shape=(512, 512, 3, 3),
        poolsize=(1, 1)
    )

    layer9 = LeNetConvPoolLayer(
        rng,
        input=layer8.output,
        image_shape=(batch_size, 512, 8, 8),
        filter_shape=(512, 512, 3, 3),
        poolsize=(2, 2)
    )

    # Construct the third convolutional pooling layer
    # filtering reduces the image size to (8+2-3+1, 8+2-3+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer10 = LeNetConvPoolLayer(
        rng,
        input=layer9.output,
        image_shape=(batch_size, 512, 4, 4),
        filter_shape=(512, 512, 3, 3),
        poolsize=(1, 1)
    )

    layer11 = LeNetConvPoolLayer(
        rng,
        input=layer10.output,
        image_shape=(batch_size, 512, 4, 4),
        filter_shape=(512, 512, 3, 3),
        poolsize=(1, 1)
    )

    layer12 = LeNetConvPoolLayer(
        rng,
        input=layer11.output,
        image_shape=(batch_size, 512, 4, 4),
        filter_shape=(512, 512, 3, 3),
        poolsize=(1, 1)
    )

    layer13 = LeNetConvPoolLayer(
        rng,
        input=layer12.output,
        image_shape=(batch_size, 512, 4, 4),
        filter_shape=(512, 512, 3, 3),
        poolsize=(1, 1)
    )


    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (500, 512 * 4 * 4) = (500, 8192) with the default values.
    layer14_input = layer13.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer14 = HiddenLayer(
        rng,
        input=layer14_input,
        n_in=512 * 4 * 4,
        n_out=2048,
        activation=relu
    )

    layer15 = HiddenLayer(
        rng,
        input=layer14.output,
        n_in=2048,
        n_out=1024,
        activation=relu
    )

    # classify the values of the fully-connected sigmoidal layer 
    # there are 10 labels in total.
    layer16 = HiddenLayer(
        rng,
        input=layer15.output,
        n_in=1024,
        n_out=10,
        activation=relu
    )

    # the cost we minimize during training is the NLL of the model
    cost = layer16.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer16.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer16.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer16.params + \
            layer15.params + \
            layer14.params + \
            layer13.params + \
            layer12.params + \
            layer11.params + \
            layer10.params + \
            layer9.params  + \
            layer8.params  + \
            layer7.params  + \
            layer6.params  + \
            layer6.params  + \
            layer5.params  + \
            layer4.params  + \
            layer3.params  + \
            layer2.params  + \
            layer1.params  + \
            layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - r * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index, new_rate],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size],
            r: new_rate
        }
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    cur_learning_rate = learning_seed
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index, cur_learning_rate)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

                else: # Did not get a new best validation score.
                    cur_learning_rate /= 10

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Example #43
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=100,
                 hidden_layers_size=None,
                 n_outs=1,
                 L1_reg=0.00,
                 L2_reg=0.0001):
        if hidden_layers_size is None:
            hidden_layers_size = [100, 100]

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_size)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2**30))

        self.x = T.matrix('x')
        self.y = T.vector('y')

        for i in range(self.n_layers):
            if i == 0:
                input_sizes = n_ins
            else:
                input_sizes = hidden_layers_size[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_sizes,
                                        n_out=hidden_layers_size[i],
                                        activation=T.nnet.sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_sizes,
                            n_hidden=hidden_layers_size[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        self.linearRegressionLayer = LinearRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_size[-1],
            n_out=n_outs)
        self.L1 = abs(self.sigmoid_layers[-1].W).sum() + abs(
            self.linearRegressionLayer.W).sum()
        self.L2_sqr = (self.sigmoid_layers[-1].W**
                       2).sum() + (self.linearRegressionLayer.W**2).sum()
        self.squared_errors = self.linearRegressionLayer.squared_errors(self.y)
        self.finetune_cost = self.squared_errors + L1_reg * self.L1 + L2_reg * self.L2_sqr
        self.y_pred = self.linearRegressionLayer.p_y_given_x
        self.params = self.params + self.linearRegressionLayer.params
Example #44
0
def test_SdA(finetune_lr=0.1, pretraining_epochs=0,
             pretrain_lr=0.05, training_epochs=100,
             dataset='mnist.pkl.gz', batch_size=10):
    """
    Demonstrates how to train and test a stochastic denoising autoencoder.

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used in the finetune stage
    (factor for the stochastic gradient)

    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining

    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training

    :type n_iter: int
    :param n_iter: maximal number of iterations ot run the optimizer

    :type dataset: string
    :param dataset: path the the pickled dataset

    """

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    train_set_x=train_set_x.eval()
    train_set_y=train_set_y.eval()

    train_set_x_lab=train_set_x[:1000,:]
    train_set_x_unlab=train_set_x[1000:,:]
    train_set_y_lab=train_set_y[:1000]
    train_set_y_unlab=train_set_y[1000:]

    import theano
    train_set_x_lab=theano.shared(numpy.asarray(train_set_x_lab,
                                                dtype=theano.config.floatX),
                                  borrow=True)
    train_set_y_lab=theano.shared(numpy.asarray(train_set_y_lab,
                                                dtype=theano.config.floatX),
                                  borrow=True)
    train_set_y_lab=T.cast(train_set_y_lab, 'int32')
    train_set_x_unlab=theano.shared(numpy.asarray(train_set_x_unlab,
                                                  dtype=theano.config.floatX),
                                    borrow=True)
    train_set_y_unlab=theano.shared(numpy.asarray(train_set_y_unlab,
                                                  dtype=theano.config.floatX),
                                    borrow=True)
    train_set_y_unlab=T.cast(train_set_y_unlab, 'int32')

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_y_lab.eval().shape[0]
    n_train_batches /= batch_size
    n_train_batches_u = train_set_y_unlab.eval().shape[0]
    n_train_batches_u /= batch_size

    # numpy random generator
    # start-snippet-3
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    # construct the stacked denoising autoencoder class
    hidden_layer_size = 100
    sda = SdA(
        numpy_rng=numpy_rng,
        n_ins=28 * 28,
        hidden_layers_sizes=[100],
        n_outs=10
    )
    # end-snippet-3 start-snippet-4
    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x_unlab,
                                                batch_size=batch_size)

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    corruption_levels = [0.1, 0.2, 0.3]
    for i in xrange(sda.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],##$
                         lr=pretrain_lr))
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print numpy.mean(c)

    end_time = time.clock()

    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    fprop = theano.function(
            [],
            sda.output,
            givens={
                sda.x: test_set_x
            },
            name='fp'
        )
    Q=fprop()
    print 'rec', ((Q-test_set_x.eval())**2).mean()
    from utils import tile_raster_images,plot_weights
    import PIL.Image as Image
    image = Image.fromarray(
        tile_raster_images(X=sda.dA_layers[0].W.get_value(borrow=True).T,
                           img_shape=(28, 28), tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('filters_corruption_0.png')

    # end-snippet-4
    ########################
    # FINETUNING THE MODEL FOR REGRESSION #
    if 0 : # pretrain middle layer
        print '... pre-training MIDDLE layer'

        h1 = T.matrix('x')  # the data is presented as rasterized images
        h2 = T.matrix('y')  # the labels are presented as 1D vector of
        log_reg = HiddenLayer(numpy_rng, h1, hidden_layer_size, hidden_layer_size)

        if 1: # for middle layer
            learning_rate = 0.05
            fprop_inp = theano.function(
                [],
                SdA_inp.sigmoid_layers[-1].output,
                givens={
                    SdA_inp.sigmoid_layers[0].input: train_set_x
                },
                name='fprop_inp'
            )
            fprop_out = theano.function(
                [],
                SdA_out.sigmoid_layers[-1].output,
                givens={
                    SdA_out.sigmoid_layers[0].input: train_set_y
                },
                name='fprop_out'
            )
            H11=fprop_inp() 
            H21=fprop_out()
            H1=N1.predict(train_set_x.eval())
            H2=N2.predict(train_set_y.eval())

            H1=theano.shared(H1)
            H2=theano.shared(H2)
            # compute the gradients with respect to the model parameters
            logreg_cost = log_reg.mse(h2)

            gparams = T.grad(logreg_cost, log_reg.params)
    
            # compute list of fine-tuning updates
            updates = [
                (param, param - gparam * learning_rate)
                for param, gparam in zip(log_reg.params, gparams)
            ]

            train_fn_middle = theano.function(
                inputs=[],
                outputs=logreg_cost,
                updates=updates,
                givens={
                    h1: H1,
                    h2: H2
                },
                name='train_middle'
            )
        epoch = 0
        while epoch < 10:
            print epoch, train_fn_middle()
            epoch += 1
            
    
    from mlp import MLP
    net = MLP(numpy_rng, train_set_x_lab, 28*14, hidden_layer_size, 28*14, W1=sda.dA_layers[0].W, b1=sda.dA_layers[0].b, W2=None, b2=None)
    ########################
    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model = sda.build_finetune_functions(
        datasets=datasets,
        batch_size=batch_size,
        learning_rate=finetune_lr
    )

    print '... finetunning the model'
    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
                            # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(
        (
            'Optimization complete with best validation score of %f %%, '
            'on iteration %i, '
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
    )
    print >> sys.stderr, ('The training code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Example #45
0
def evaluate_lenet5(learning_rate=0.1,
                    n_epochs=200,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 50],
                    batch_size=500):
    """ Demonstrates lenet on MNIST dataset


    :learning_rate: 梯度下降法的学习率

    :n_epochs: 最大迭代次数

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :nkerns: 每个卷积层的卷积核个数,第一层卷积核个数为 nkerns[0]=20,第二层卷积核个数
    为50个
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)  # 加载训练数据,训练数据包含三个部分

    train_set_x, train_set_y = datasets[0]  # 训练数据
    valid_set_x, valid_set_y = datasets[1]  # 验证数据
    test_set_x, test_set_y = datasets[2]  # 测试数据

    # 计算批量训练可以分多少批数据进行训练,这个只要是知道批量训练的人都知道
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]  # 训练数据个数
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size  # 批数
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 28, 28))
    '''''构建第一层网络:
    image_shape:输入大小为28*28的特征图,batch_size个训练数据,每个训练数据有1个特征图
    filter_shape:卷积核个数为nkernes[0]=20,因此本层每个训练样本即将生成20个特征图
    经过卷积操作,图片大小变为(28-5+1 , 28-5+1) = (24, 24)
    经过池化操作,图片大小变为 (24/2, 24/2) = (12, 12)
    最后生成的本层image_shape为(batch_size, nkerns[0], 12, 12)'''
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))
    '''''构建第二层网络:输入batch_size个训练图片,经过第一层的卷积后,每个训练图片有nkernes[0]个特征图,每个特征图
    大小为12*12
    经过卷积后,图片大小变为(12-5+1, 12-5+1) = (8, 8)
    经过池化后,图片大小变为(8/2, 8/2) = (4, 4)
    最后生成的本层的image_shape为(batch_size, nkerns[1], 4, 4)'''
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)
    '''''全链接:输入layer2_input是一个二维的矩阵,第一维表示样本,第二维表示上面经过卷积下采样后
    每个样本所得到的神经元,也就是每个样本的特征,HiddenLayer类是一个单层网络结构
    下面的layer2把神经元个数由800个压缩映射为500个'''
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # 最后一层:逻辑回归层分类判别,把500个神经元,压缩映射成10个神经元,分别对应于手写字体的0~9
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # 把所有的参数放在同一个列表里,可直接使用列表相加
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # 梯度求导
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):  # 每一批训练数据

            cost_ij = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * \
                            improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                        # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Example #46
0
    def __init__(self, config=None, verbose=True):
        # Construct two GrCNNEncoders for matching two sentences
        self.encoderL = ExtGrCNNEncoder(config, verbose)
        self.encoderR = ExtGrCNNEncoder(config, verbose)
        # Link the parameters of two parts
        self.params = []
        self.params += self.encoderL.params
        self.params += self.encoderR.params
        # Build three kinds of inputs:
        # 1, inputL, inputR. This pair is used for computing the score after training
        # 2, inputPL, inputPR. This part is used for training positive pairs
        # 3, inputNL, inputNR. This part is used for training negative pairs
        self.inputL = self.encoderL.input
        self.inputR = self.encoderR.input
        # Positive
        self.inputPL = T.matrix(name='inputPL', dtype=floatX)
        self.inputPR = T.matrix(name='inputPR', dtype=floatX)
        # Negative
        self.inputNL = T.matrix(name='inputNL', dtype=floatX)
        self.inputNR = T.matrix(name='inputNR', dtype=floatX)
        # Linking input-output mapping
        self.hiddenL = self.encoderL.output
        self.hiddenR = self.encoderR.output
        # Positive 
        self.hiddenPL = self.encoderL.encode(self.inputPL)
        self.hiddenPR = self.encoderR.encode(self.inputPR)
        # Negative
        self.hiddenNL = self.encoderL.encode(self.inputNL)
        self.hiddenNR = self.encoderR.encode(self.inputNR)
        # Activation function
        self.act = Activation(config.activation)
        # MLP Component
        self.hidden = T.concatenate([self.hiddenL, self.hiddenR], axis=1)
        self.hiddenP = T.concatenate([self.hiddenPL, self.hiddenPR], axis=1)
        self.hiddenN = T.concatenate([self.hiddenNL, self.hiddenNR], axis=1)
        # Build hidden layer
        self.hidden_layer = HiddenLayer(self.hidden, (2*config.num_hidden, config.num_mlp), act=Activation(config.hiddenact))
        self.compressed_hidden = self.hidden_layer.output
        self.compressed_hiddenP = self.hidden_layer.encode(self.hiddenP)
        self.compressed_hiddenN = self.hidden_layer.encode(self.hiddenN)
        # Accumulate parameters
        self.params += self.hidden_layer.params
        # Dropout parameter
        srng = T.shared_randomstreams.RandomStreams(config.random_seed)
        mask = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hidden.shape)
        maskP = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hiddenP.shape)
        maskN = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hiddenN.shape)
        self.compressed_hidden *= T.cast(mask, floatX)
        self.compressed_hiddenP *= T.cast(maskP, floatX)
        self.compressed_hiddenN *= T.cast(maskN, floatX)
        # Score layers
        self.score_layer = ScoreLayer(self.compressed_hidden, config.num_mlp)
        self.output = self.score_layer.output
        self.scoreP = self.score_layer.encode(self.compressed_hiddenP)
        self.scoreN = self.score_layer.encode(self.compressed_hiddenN)
        # Accumulate parameters
        self.params += self.score_layer.params
        # Build cost function
        self.cost = T.mean(T.maximum(T.zeros_like(self.scoreP), 1.0 - self.scoreP + self.scoreN))
        # Construct the gradient of the cost function with respect to the model parameters
        self.gradparams = T.grad(self.cost, self.params)
        # Compute the total number of parameters in the model
        self.num_params_encoder = self.encoderL.num_params + self.encoderR.num_params
        self.num_params_classifier = 2 * config.num_hidden * config.num_mlp + \
                                     config.num_mlp + \
                                     config.num_mlp + 1
        self.num_params = self.num_params_encoder + self.num_params_classifier
        # Build class methods
        self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.output)
        self.compute_cost_and_gradient = theano.function(inputs=[self.inputPL, self.inputPR, 
                                                                 self.inputNL, self.inputNR],
                                                         outputs=self.gradparams+[self.cost, self.scoreP, self.scoreN])
        self.show_scores = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], 
                                           outputs=[self.scoreP, self.scoreN])
        self.show_hiddens = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
                                            outputs=[self.hiddenP, self.hiddenN])
        self.show_inputs = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
                                           outputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR])

        if verbose:
            logger.debug('Architecture of ExtGrCNNMatchScorer built finished, summarized below: ')
            logger.debug('Input dimension: %d' % config.num_input)
            logger.debug('Hidden dimension inside GrCNNMatchScorer pyramid: %d' % config.num_hidden)
            logger.debug('Hidden dimension MLP: %d' % config.num_mlp)
            logger.debug('Number of Gating functions: %d' % config.num_gates)
            logger.debug('There are 2 ExtGrCNNEncoders used in model.')
            logger.debug('Total number of parameters used in the model: %d' % self.num_params)
Example #47
0
def cnn(pre_run,kind, PV, true_out ,learning_rate=0.1, n_epochs=200,
        datasets='mnist.pkl.gz',batch_size=100,
        path="", name="",input_layer={},
        hidden={},ConvPool={},out_layer={}):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)


    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    array_PV = PV
    PV = theano.shared(value=0.5*numpy.ones(PV.shape,dtype="float32"),borrow=True) 
    
    true_out = theano.shared(value=true_out,borrow=True)
    assert PV.get_value().shape[0] == train_set_x.get_value().shape[0]
    z1 = T.matrix('z1')
    z2 = T.matrix('z2')

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, int(input_layer['channel']), 
                                          int(input_layer['width']),
                                          int(input_layer['height'])))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    #
    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    #
    # CP is a list storing the ConvPool layer.
    CP = []
    for i in xrange(len(ConvPool)):
        tem = 'ConvPool'+ str(i)
        if i == 0:
            activation = None
            if int(ConvPool[tem]['activation']) == 1:
                activation = T.nnet.sigmoid
            if int(ConvPool[tem]['activation']) == 2:
                activation = T.tanh
            CP.append(LeNetConvPoolLayer( rng,
                                          activation = activation,
                                          input=layer0_input,
                                          image_shape=( batch_size, 
                                                        int(ConvPool[tem]['channel']), 
                                                        int(ConvPool[tem]['width']),
                                                        int(ConvPool[tem]['height'])),
                                          filter_shape=(int(ConvPool[tem]['filters']), 
                                                        int(ConvPool[tem]['channel']), 
                                                        int(ConvPool[tem]['filter_width']), 
                                                        int(ConvPool[tem]['filter_height'])),
                                          poolsize=(    int(ConvPool[tem]['pool_width']), 
                                                        int(ConvPool[tem]['pool_height']))))
        if i != 0:
            activation = None
            if int(ConvPool[tem]['activation']) == 1:
                activation = T.nnet.sigmoid
            if int(ConvPool[tem]['activation']) == 2:
                activation = T.tanh
            CP.append(LeNetConvPoolLayer( rng,
                                          activation = activation,
                                          input=CP[-1].output,
                                          image_shape=( batch_size, 
                                                        int(ConvPool[tem]['channel']), 
                                                        int(ConvPool[tem]['width']),
                                                        int(ConvPool[tem]['height'])),
                                          filter_shape=(int(ConvPool[tem]['filters']), 
                                                        int(ConvPool[tem]['channel']), 
                                                        int(ConvPool[tem]['filter_width']), 
                                                        int(ConvPool[tem]['filter_height'])),
                                          poolsize=(    int(ConvPool[tem]['pool_width']), 
                                                        int(ConvPool[tem]['pool_height']))))


    ConvPool_output = CP[-1].output.flatten(2)

    # construct a fully-connected sigmoidal layer
    # HL is a list storing the Hidden layer.
    HL = []
    for i in xrange(len(hidden)):
        ite = len(ConvPool) + i
        tem = 'hidden_layer_'+ str(ite)

        if ite == len(ConvPool):
            activation = None
            if int(hidden[tem]['activation']) == 1:
                activation = T.nnet.sigmoid
            if int(hidden[tem]['activation']) == 2:
                activation = T.tanh
            HL.append( HiddenLayer(rng,
                                   input=ConvPool_output,
                                   n_in =int(hidden[tem]['n_in']),
                                   n_out=int(hidden[tem]['n_out']),
                                   activation=activation))

        if ite != len(ConvPool):
            activation = None
            if int(hidden[tem]['activation']) == 1:
                activation = T.nnet.sigmoid
            if int(hidden[tem]['activation']) == 2:
                activation = T.tanh
            HL.append( HiddenLayer(rng,
                                   input=HL[-1].output,
                                   n_in =int(hidden[tem]['n_in']),
                                   n_out=int(hidden[tem]['n_out']),
                                   activation=activation))

    hidden_output = HL[-1].output

    # classify the values of the fully-connected output layer
    OutLayer = HiddenLayer(rng=rng,                        \
                           input=hidden_output,            \
                           n_in=int(out_layer['n_in']),    \
                           n_out=int(out_layer['n_out']),  \
                           activation=T.nnet.sigmoid,      \
                           kind=2)

    # the cost we minimize during training is the NLL of the model
    cost = OutLayer.sq_loss(z1,z2)
    y_x = OutLayer.output

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        [OutLayer.errors(y),y_x],
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        OutLayer.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = OutLayer.params 
    tem = len(HL)
    for i in xrange(len(HL)):
        params += HL[tem-1].params
        tem = tem -1
    tem = len(CP)
    for i in xrange(len(CP)):
        params += CP[tem-1].params
        tem = tem -1   

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        [ cost,y_x],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],            
            z1:         PV[index * batch_size: (index + 1) * batch_size],
            z2:   true_out[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    Hpy_out = []

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1

        if epoch == pre_run:
            PV.set_value(array_PV)

        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            
            tem = train_model(minibatch_index)
            cost_ij = tem[0]
            if epoch == n_epochs:
                Hpy_out.append(tem[1])

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)[0]
                        for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            #if patience <= iter:
            #    done_looping = True
            #    break
        

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    # save the test result and return the train Hpy after train finished.
    test_out = []
    for minibatch_index in xrange(n_test_batches):
        tem = test_model(minibatch_index)
        test_out.append(tem[1])

    test_tem = numpy.asarray(test_out).reshape((n_test_batches * batch_size,  \
                                                        int(out_layer['n_out'])))
    cPickle.dump(test_tem,open("./config-example/test_tem/"+name+".pkl","wb"))
    return Hpy_out