def construct_model(params, datasets, filter_hs=[3, 4, 5], batch_size=200): rng = np.random.RandomState(1234) input_height = len(datasets[0][0]) - 2 input_width = params["embedding"].shape[1] filter_shapes = [p[0].shape for p in params["convs"]] pool_sizes = [(input_height - s[2] + 1, input_width - s[3] + 1) for s in filter_shapes] param_sizes = { "input_height": input_height, "input_width": input_width, "filter_shapes": filter_shapes, "pool_sizes": pool_sizes } print "Param sizes: ", param_sizes index = T.iscalar() x = T.matrix('x') y = T.ivector('y') print '....Construct model' word_embedding = params["embedding"] words = shared(word_embedding, name='embedding') layer0_input = words[T.cast(x.flatten(), dtype="int32")].reshape(\ (x.shape[0], 1, x.shape[1], words.shape[1])) # construct layers conv_layers = [] conv_params = params["convs"] layer1_inputs = [] for i, filter_h in enumerate(filter_hs): filter_shape = filter_shapes[i] pool_size = pool_sizes[i] conv_W = shared(value=np.asarray(conv_params[i][0], dtype=theano.config.floatX), borrow=True, name='conv_W') conv_b = shared(value=np.asarray(conv_params[i][1], dtype=theano.config.floatX), borrow=True, name='conv_b') conv_layer = nn.ConvPoolLayer(rng, input=layer0_input, input_shape=(batch_size, 1, input_height, input_width), filter_shape=filter_shape, pool_size=pool_size, activation=ReLU, W=conv_W, b=conv_b) conv_layers.append(conv_layer) layer1_input = conv_layer.output.flatten(2) layer1_inputs.append(layer1_input) layer1_input = T.concatenate(layer1_inputs, 1) # population classifier pop_hidden_units = [300, 13] clf_w, clf_b = params["clf"] Ws = [ shared(value=np.asarray(clf_w, dtype=theano.config.floatX), borrow=True, name='logis_w') ] bs = [ shared(value=np.asarray(clf_b, dtype=theano.config.floatX), borrow=True, name='logis_b') ] pop_classifier = nn.MLPDropout(rng, input=layer1_input, layer_sizes=pop_hidden_units, dropout_rates=[0.5], activations=[ReLU], Ws=Ws, bs=bs) pop_loss = pop_classifier.errors(y) pop_pred = pop_classifier.preds # construct data set if datasets[0].shape[0] % batch_size > 0: extra_data_num = batch_size - datasets[0].shape[0] % batch_size train_set = np.random.permutation(datasets[0]) extra_data = train_set[:extra_data_num] new_data = np.append(datasets[0], extra_data, axis=0) else: new_data = dataset[0] new_data = np.random.permutation(new_data) n_batches = new_data.shape[0] / batch_size n_train_batches = int(np.round(n_batches * 0.9)) train_set = new_data[:n_train_batches * batch_size, :] train_set_x = theano.shared(np.asarray(train_set[:, :input_height], dtype=theano.config.floatX), borrow=True) train_set_pop_y = T.cast( theano.shared(np.asarray(train_set[:, -2], dtype=theano.config.floatX), borrow=True), 'int32') print '...construct test function' test_fn = function( inputs=[index], outputs=[pop_loss, pop_pred], givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_pop_y[index * batch_size:(index + 1) * batch_size] }) results = [test_fn(i) for i in xrange(n_train_batches)] pop_losses = [r[0] for r in results] pop_train_perf = 1 - np.mean(pop_losses) pop_predictions = np.concatenate([r[1] for r in results]) rs = {} rs["pop_preds"] = list(pop_predictions) rs["pop_truth"] = list(map(int, train_set[:, -2])) print "Population Train Performance %f" % pop_train_perf return rs
def run_cnn(exp_name, dataset, embedding, log_fn, perf_fn, k=0, emb_dm=100, batch_size=100, filter_hs=[1, 2, 3], hidden_units=[200, 100, 11], dropout_rate=0.5, shuffle_batch=True, n_epochs=300, lr_decay=0.95, activation=ReLU, sqr_norm_lim=9, non_static=True, print_freq=5): """ Train and Evaluate CNN event encoder model :dataset: list containing three elements[(train_x, train_y), (valid_x, valid_y), (test_x, test_y)] :embedding: word embedding with shape (|V| * emb_dm) :filter_hs: filter height for each paralle cnn layer :dropout_rate: dropout rate for full connected layers :n_epochs: the max number of iterations """ start_time = timeit.default_timer() rng = np.random.RandomState(1234) input_height = len(dataset[0][0][0][0]) num_sens = len(dataset[0][0][0]) print "--input height ", input_height num_maps = hidden_units[0] ################### # start snippet 1 # ################### print "start to construct the model ...." word_x = T.tensor3("word_x") freq_x = T.tensor3("freq_x") pos_x = T.tensor3("pos_x") sent_x = T.matrix("sent_x") y_event = T.ivector("y_event") words = shared(value=np.asarray(embedding, dtype=theano.config.floatX), name="embedding", borrow=True) sym_dim = 20 # the frequency embedding is 21 * sym_dim matrix freq_val = np.random.random((21, sym_dim)).astype(theano.config.floatX) freqs = shared(value=freq_val, borrow=True, name="freqs") pos_val = np.random.random((21, sym_dim)).astype(theano.config.floatX) poss = shared(value=pos_val, borrow=True, name="poss") # define function to keep padding vector as zero zero_vector_tensor = T.vector() zero_vec = np.zeros(emb_dm, dtype=theano.config.floatX) set_zero = function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0, :], zero_vector_tensor))]) freq_zero_tensor = T.vector() freq_zero_vec = np.zeros(sym_dim, dtype=theano.config.floatX) freq_set_zero = function([freq_zero_tensor], updates=[(freqs, T.set_subtensor(freqs[0, :], freq_zero_tensor))]) pos_zero_tensor = T.vector() pos_zero_vec = np.zeros(sym_dim, dtype=theano.config.floatX) pos_set_zero = function([pos_zero_tensor], updates=[(poss, T.set_subtensor(poss[0, :], pos_zero_tensor))]) word_x_emb = words[T.cast(word_x.flatten(), dtype="int32")].reshape( (word_x.shape[0] * word_x.shape[1], 1, word_x.shape[2], emb_dm)) freq_x_emb = freqs[T.cast(freq_x.flatten(), dtype="int32")].reshape( (freq_x.shape[0] * freq_x.shape[1], 1, freq_x.shape[2], sym_dim)) pos_x_emb = poss[T.cast(pos_x.flatten(), dtype="int32")].reshape( (pos_x.shape[0] * pos_x.shape[1], 1, pos_x.shape[2], sym_dim)) layer0_input = T.concatenate([word_x_emb, freq_x_emb, pos_x_emb], axis=3) conv_layers = [] layer1_inputs = [] for i in xrange(len(filter_hs)): filter_shape = (num_maps, 1, filter_hs[i], emb_dm + sym_dim + sym_dim) pool_size = (input_height - filter_hs[i] + 1, 1) conv_layer = nn.ConvPoolLayer(rng, input=layer0_input, input_shape=None, filter_shape=filter_shape, pool_size=pool_size, activation=activation) sen_vecs = conv_layer.output.reshape( (word_x.shape[0], 1, word_x.shape[1], num_maps)) # construct multi-layer sentence vectors conv_layers.append(conv_layer) layer1_inputs.append(sen_vecs) sen_vec = T.concatenate(layer1_inputs, 3) # score the sentences theta_value = np.random.random((len(filter_hs) * num_maps, 1)) theta = shared(value=np.asarray(theta_value, dtype=theano.config.floatX), name="theta", borrow=True) weighted_sen_vecs, sen_score = keep_max(sen_vec, theta, k, sent_x) sen_score_cost = T.mean(T.sum(sen_score, axis=2).flatten(1)) doc_vec = T.sum(weighted_sen_vecs, axis=2) layer1_input = doc_vec.flatten(2) final_sen_score = sen_score.flatten(2) ############## # classifier pop# ############## params = [] for conv_layer in conv_layers: params += conv_layer.params params.append(theta) params.append(words) params.append(freqs) params.append(poss) gamma = as_floatX(0.001) beta1 = as_floatX(0.000) beta2 = as_floatX(0.000) total_cost = gamma * sen_score_cost total_dropout_cost = gamma * sen_score_cost print "Construct classifier ...." hidden_units[0] = num_maps * len(filter_hs) model = nn.MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, dropout_rates=[dropout_rate], activations=[activation]) params += model.params cost = model.negative_log_likelihood(y_event) dropout_cost = model.dropout_negative_log_likelihood(y_event) total_cost += cost + beta1 * model.L1 total_dropout_cost += dropout_cost + beta1 * model.L1 # using adagrad total_grad_updates = sgd_updates_adadelta(params, total_dropout_cost, lr_decay, 1e-6, sqr_norm_lim) total_preds = model.preds ##################### # Construct Dataset # ##################### print "Copy data to GPU and constrct train/valid/test func" train_word_x, train_freq_x, train_pos_x, train_sent_x, train_event_y = shared_dataset( dataset[0]) test_word_x, test_freq_x, test_pos_x, test_sent_x, test_event_y = shared_dataset( dataset[1]) n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size)) n_test_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size)) ##################### # Train model func # ##################### index = T.iscalar() train_func = function( [index], total_cost, updates=total_grad_updates, givens={ word_x: train_word_x[index * batch_size:(index + 1) * batch_size], freq_x: train_freq_x[index * batch_size:(index + 1) * batch_size], pos_x: train_pos_x[index * batch_size:(index + 1) * batch_size], sent_x: train_sent_x[index * batch_size:(index + 1) * batch_size], y_event: train_event_y[index * batch_size:(index + 1) * batch_size], }) test_pred = function( [index], total_preds, givens={ word_x: test_word_x[index * batch_size:(index + 1) * batch_size], freq_x: test_freq_x[index * batch_size:(index + 1) * batch_size], pos_x: test_pos_x[index * batch_size:(index + 1) * batch_size], sent_x: test_sent_x[index * batch_size:(index + 1) * batch_size] }) test_sentence_est = function( [index], final_sen_score, givens={ word_x: test_word_x[index * batch_size:(index + 1) * batch_size], freq_x: test_freq_x[index * batch_size:(index + 1) * batch_size], pos_x: test_pos_x[index * batch_size:(index + 1) * batch_size], sent_x: test_sent_x[index * batch_size:(index + 1) * batch_size] }) train_sentence_est = function( [index], final_sen_score, givens={ word_x: train_word_x[index * batch_size:(index + 1) * batch_size], freq_x: train_freq_x[index * batch_size:(index + 1) * batch_size], pos_x: train_pos_x[index * batch_size:(index + 1) * batch_size], sent_x: train_sent_x[index * batch_size:(index + 1) * batch_size] }) # apply early stop strategy patience = 100 patience_increase = 2 improvement_threshold = 1.005 n_test = len(dataset[1][0]) epoch = 0 best_params = None best_validation_score = 0. test_perf = 0 done_loop = False log_file = open(log_fn, 'w') print "Start to train the model....." cpu_tst_event_y = np.asarray(dataset[1][4]) def compute_score(true_list, pred_list): mat = np.equal(true_list, pred_list) score = np.mean(mat) return score best_score = 0.0 while (epoch < n_epochs) and not done_loop: start_time = timeit.default_timer() epoch += 1 costs = [] for minibatch_index in np.random.permutation(range(n_train_batches)): cost_epoch = train_func(minibatch_index) costs.append(cost_epoch) set_zero(zero_vec) freq_set_zero(freq_zero_vec) pos_set_zero(pos_zero_vec) if epoch % 1 == 0: # do test test_event_preds = np.concatenate( [test_pred(i) for i in xrange(n_test_batches)]) test_event_score = compute_score(cpu_tst_event_y, test_event_preds) precision, recall, beta, support = precision_recall_fscore_support( cpu_tst_event_y, test_event_preds, pos_label=1) with open( os.path.join(perf_fn, "%s_%d.event_pred" % (exp_name, epoch)), 'w') as epf: for p in test_event_preds: epf.write("%d\n" % int(p)) message = "Epoch %d test event perf %f, precision [%f, %f], recall[%f %f] , f1[%f, %f], train cost %f" % ( epoch, test_event_score, precision[0], precision[1], recall[0], recall[1], beta[0], beta[1], np.mean(costs)) evl_score = beta[1] print message log_file.write(message + "\n") log_file.flush() if (evl_score > best_score): best_score = evl_score # save the sentence score test_sen_score = [ test_sentence_est(i) for i in xrange(n_test_batches) ] score_file = "./results/%s_%d_test.score" % (exp_name, epoch) with open(score_file, "wb") as sm: cPickle.dump(test_sen_score, sm) end_time = timeit.default_timer() print "Finish one iteration using %f m" % ( (end_time - start_time) / 60.) log_file.flush() log_file.close()
def train_cnn_encoder(datasets, word_embedding, input_width=64, filter_hs=[3, 4, 5], hidden_units=[100, 2], dropout_rate=[0.5], shuffle_batch=True, n_epochs=100, batch_size=50, lr_decay=0.95, activations=[ReLU], sqr_norm_lim=9, non_static=True): start_time = timeit.default_timer() rng = np.random.RandomState(1234) input_height = len(datasets[0][0]) - 2 filter_width = input_width feature_maps = hidden_units[0] filter_shapes = [] pool_sizes = [] for filter_h in filter_hs: filter_shapes.append((feature_maps, 1, filter_h, filter_width)) pool_sizes.append( (input_height - filter_h + 1, input_width - filter_width + 1)) parameters = [("Input Shape", input_height, input_width), ("Filter Shape", filter_shapes), ("Pool Sizes", pool_sizes), ("dropout rate", dropout_rate), ("hidden units", hidden_units), ("shuffle_batch", shuffle_batch), ("n_epochs", n_epochs), ("batch size", batch_size)] print parameters # construct the model index = T.iscalar() x = T.matrix("x") y = T.ivector("y") words = shared(value=word_embedding, name="embedding") zero_vector_tensor = T.vector() zero_vec = np.zeros(input_width, dtype=theano.config.floatX) set_zero = function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0, :], zero_vector_tensor))]) layer0_input = words[T.cast(x.flatten(), dtype="int32")].reshape( (x.shape[0], 1, x.shape[1], words.shape[1])) conv_layers = [] layer1_inputs = [] for i in xrange(len(filter_hs)): filter_shape = filter_shapes[i] pool_size = pool_sizes[i] conv_layer = nn.ConvPoolLayer(rng, input=layer0_input, input_shape=(batch_size, 1, input_height, input_width), filter_shape=filter_shape, pool_size=pool_size, activation=ReLU) layer1_input = conv_layer.output.flatten(2) conv_layers.append(conv_layer) layer1_inputs.append(layer1_input) layer1_input = T.concatenate(layer1_inputs, 1) ################### # Population Task # ################### hidden_units[0] = feature_maps * len(filter_hs) pop_classifier = nn.MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, dropout_rates=dropout_rate, activations=activations) pop_params = pop_classifier.params for conv_layer in conv_layers: pop_params += conv_layer.params if non_static: pop_params.append(words) pop_cost = pop_classifier.negative_log_likelihood(y) pop_dropout_cost = pop_classifier.dropout_negative_log_likelihood(y) pop_grad_updates = sgd_updates_adadelta(pop_params, pop_dropout_cost, lr_decay, 1e-6, sqr_norm_lim) ################### # EventType Task # ################### event_type_hidden_units = [feature_maps * len(filter_hs), 12] type_classifier = nn.MLPDropout(rng, input=layer1_input, layer_sizes=event_type_hidden_units, dropout_rates=dropout_rate, activations=activations) type_params = type_classifier.params for conv_layer in conv_layers: type_params += conv_layer.params if non_static: type_params.append(words) type_cost = type_classifier.negative_log_likelihood(y) type_dropout_cost = type_classifier.dropout_negative_log_likelihood(y) type_grad_updates = sgd_updates_adadelta(type_params, type_dropout_cost, lr_decay, 1e-6, sqr_norm_lim) ###################### # Construct Data Set # ###################### np.random.seed(1234) if datasets[0].shape[0] % batch_size > 0: extra_data_num = batch_size - datasets[0].shape[0] % batch_size train_set = np.random.permutation(datasets[0]) extra_data = train_set[:extra_data_num] new_data = np.append(datasets[0], extra_data, axis=0) else: new_data = datasets[0] new_data = np.random.permutation(new_data) n_batches = new_data.shape[0] / batch_size n_train_batches = int(np.round(n_batches * 0.9)) # divide the train set intp train/val sets if datasets[1].shape[0] % batch_size > 0: extra_data_num = batch_size - datasets[1].shape[0] % batch_size test_set = np.random.permutation(datasets[1]) extra_data = test_set[:extra_data_num] new_test_data = np.append(datasets[1], extra_data, axis=0) else: new_test_data = datasets[1] test_set_x = new_test_data[:, :input_height] test_set_pop_y = np.asarray(new_test_data[:, -2], "int32") test_set_type_y = np.asarray(new_test_data[:, -1], "int32") train_set = new_data[:n_train_batches * batch_size, :] val_set = new_data[n_train_batches * batch_size:, :] print train_set[:, -1] borrow = True train_set_x = theano.shared(np.asarray(train_set[:, :input_height], dtype=theano.config.floatX), borrow=borrow) train_set_pop_y = T.cast( theano.shared(np.asarray(train_set[:, -2], dtype=theano.config.floatX), borrow=borrow), 'int32') train_set_type_y = T.cast( theano.shared(np.asarray(train_set[:, -1], dtype=theano.config.floatX), borrow=borrow), 'int32') val_set_x = theano.shared(np.asarray(val_set[:, :input_height], dtype=theano.config.floatX), borrow=borrow) val_set_pop_y = T.cast( theano.shared(np.asarray(val_set[:, -2], dtype=theano.config.floatX), borrow=borrow), 'int32') val_set_type_y = T.cast( theano.shared(np.asarray(val_set[:, -1], dtype=theano.config.floatX), borrow=borrow), 'int32') n_val_batches = n_batches - n_train_batches n_test_batches = test_set_x.shape[0] / batch_size print 'n_test_batches: %d' % n_test_batches # transform the data into shared varibale for GPU computing test_set_x = theano.shared(np.asarray(test_set_x, dtype=theano.config.floatX), borrow=borrow) test_set_pop_y = theano.shared(test_set_pop_y, borrow=True) test_set_type_y = theano.shared(test_set_type_y, borrow=True) #################### # Train Model Func # #################### # population model val_pop_model = function( [index], pop_classifier.errors(y), givens={ x: val_set_x[index * batch_size:(index + 1) * batch_size], y: val_set_pop_y[index * batch_size:(index + 1) * batch_size] }) test_pop_model = function( [index], pop_classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_pop_y[index * batch_size:(index + 1) * batch_size] }) real_test_pop_model = function( [index], pop_classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_pop_y[index * batch_size:(index + 1) * batch_size] }) train_pop_model = function( [index], pop_cost, updates=pop_grad_updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_pop_y[index * batch_size:(index + 1) * batch_size] }) # event type model val_type_model = function( [index], type_classifier.errors(y), givens={ x: val_set_x[index * batch_size:(index + 1) * batch_size], y: val_set_type_y[index * batch_size:(index + 1) * batch_size] }) test_type_model = function( [index], type_classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_type_y[index * batch_size:(index + 1) * batch_size] }) real_test_type_model = function( [index], type_classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_type_y[index * batch_size:(index + 1) * batch_size] }) train_type_model = function( [index], type_cost, updates=type_grad_updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_type_y[index * batch_size:(index + 1) * batch_size] }) """ test_pred_layers = [] test_size = test_set_x.shape[0] test_layer0_input = words[T.cast(x.flatten(), dtype="int32")].reshape((test_size, 1, input_height, input_width)) for conv_layer in conv_layers: test_layer0_output = conv_layer.predict(test_layer0_input, test_size) test_pred_layers.append(test_layer0_output.flatten(2)) test_layer1_input = T.concatenate(test_pred_layers, 1) test_pop_y_pred = pop_classifier.predict(test_layer1_input) test_pop_error = T.mean(T.neq(test_pop_y_pred, y)) test_pop_model_all = function([x, y], test_pop_error) test_type_y_pred = type_classifier.predict(test_layer1_input) test_type_error = T.mean(T.neq(test_type_y_pred, y)) test_type_model_all = function([x, y], test_type_error) """ # start to training the model print "Start training the model...." epoch = 0 best_pop_val_perf = 0 best_type_val_perf = 0 while (epoch < n_epochs): epoch += 1 if shuffle_batch: for minibatch_index in np.random.permutation( range(n_train_batches)): if minibatch_index % 10 == 0: print minibatch_index cost_pop_epoch = train_pop_model(minibatch_index) set_zero(zero_vec) cost_type_epoch = train_type_model(minibatch_index) set_zero(zero_vec) else: for minibatch_index in xrange(n_train_batches): cost_pop_epoch = train_pop_model(minibatch_index) set_zero(zero_vec) cost_type_epoch = train_type_model(minibatch_index) set_zero(zero_vec) train_pop_losses = [test_pop_model(i) for i in xrange(n_train_batches)] train_pop_perf = 1 - np.mean(train_pop_losses) train_type_losses = [ test_type_model(i) for i in xrange(n_train_batches) ] train_type_perf = 1 - np.mean(train_type_losses) val_pop_losses = [val_pop_model(i) for i in xrange(n_val_batches)] val_pop_perf = 1 - np.mean(val_pop_losses) val_type_losses = [val_type_model(i) for i in xrange(n_val_batches)] val_type_perf = 1 - np.mean(val_type_losses) print('epoch %i, train pop perf %f %%, val pop perf %f' % (epoch, train_pop_perf * 100., val_pop_perf * 100.)) print('epoch %i, train type perf %f %%, val type perf %f' % (epoch, train_type_perf * 100., val_type_perf * 100.)) if val_pop_perf >= best_pop_val_perf: best_pop_val_perf = val_pop_perf #test_pop_losses = test_pop_model_all(test_set_x, test_set_pop_y) test_pop_losses = [ real_test_pop_model(i) for i in xrange(n_test_batches) ] test_pop_perf = 1 - np.mean(test_pop_losses) print "Test POP Performance %f under Current Best Valid perf %f" % ( test_pop_perf, val_pop_perf) if val_type_perf >= best_type_val_perf: best_type_val_perf = val_type_perf #test_type_losses = test_type_model_all(test_set_x, test_set_type_y) test_type_losses = [ real_test_type_model(i) for i in xrange(n_test_batches) ] test_type_perf = 1 - np.mean(test_type_losses) print "Test Type Performance %f under Current Best Valid perf %f" % ( test_type_perf, val_type_perf) end_time = timeit.default_timer() print "Epoch %d finish take time %fm " % (epoch, (end_time - start_time) / 60.) start_time = timeit.default_timer() return test_pop_perf, test_type_perf
def run_cnn(exp_name, dataset, embedding, log_fn, perf_fn, emb_dm=100, batch_size=100, filter_hs=[1, 2, 3], hidden_units=[200, 100, 11], dropout_rate=0.5, shuffle_batch=True, n_epochs=300, lr_decay=0.95, activation=ReLU, sqr_norm_lim=9, non_static=True, sen_weight=False): """ Train and Evaluate CNN event encoder model :dataset: list containing three elements[(train_x, train_y), (valid_x, valid_y), (test_x, test_y)] :embedding: word embedding with shape (|V| * emb_dm) :filter_hs: filter height for each paralle cnn layer :dropout_rate: dropout rate for full connected layers :n_epochs: the max number of iterations """ start_time = timeit.default_timer() rng = np.random.RandomState(1234) input_height = len(dataset[0][0][0][0]) # number of words in the sentences num_sens = len(dataset[0][0][0]) # number of sentences print "--input height ", input_height input_width = emb_dm num_maps = hidden_units[0] ################### # start snippet 1 # ################### print "start to construct the model ...." x = T.tensor3("x") y = T.ivector("y") words = shared(value=np.asarray(embedding, dtype=theano.config.floatX), name="embedding", borrow=True) # define function to keep padding vector as zero zero_vector_tensor = T.vector() zero_vec = np.zeros(input_width, dtype=theano.config.floatX) set_zero = function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0, :], zero_vector_tensor))]) # the input for the sentence level conv layers layer0_input = words[T.cast(x.flatten(), dtype="int32")].reshape( (x.shape[0] * x.shape[1], 1, x.shape[2], emb_dm)) conv_layers = [] layer1_inputs = [] for i in xrange(len(filter_hs)): filter_shape = (num_maps, 1, filter_hs[i], emb_dm) pool_size = (input_height - filter_hs[i] + 1, 1) conv_layer = nn.ConvPoolLayer(rng, input=layer0_input, input_shape=None, filter_shape=filter_shape, pool_size=pool_size, activation=activation) sen_vecs = conv_layer.output.reshape( (x.shape[0], x.shape[1], num_maps)) sen_vecs = sen_vecs.dimshuffle(0, 2, 1) # construct the weighted sentences if sen_weight: # using sentence weight #s_w = 1. / T.arange(1, x.shape[1] + 1) s_w = T.arange(1, x.shape[1] + 1) s_w = (1.0 * x.shape[0] - s_w) / T.sum(s_w) sen_vecs = sen_vecs * s_w # using max in each dimension to represent the document vec doc_vec = T.sum(sen_vecs, axis=2).flatten(2) layer1_inputs.append(doc_vec) conv_layers.append(conv_layer) """ doc_filter_shape = (num_maps, 1, 2, num_maps) doc_pool_size = (num_sens - 2 + 1, 1) doc_conv_layer = nn.ConvPoolLayer(rng, input=sen_vecs, input_shape=None, filter_shape=doc_filter_shape, pool_size=doc_pool_size, activation=activation) layer1_input = doc_conv_layer.output.flatten(2) conv_layers.append(conv_layer) conv_layers.append(doc_conv_layer) layer1_inputs.append(layer1_input) """ layer1_input = T.concatenate(layer1_inputs, 1) ############## # classifier # ############## print "Construct classifier ...." hidden_units[0] = num_maps * len(filter_hs) model = nn.MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, dropout_rates=[dropout_rate], activations=[activation]) params = model.params for conv_layer in conv_layers: params += conv_layer.params if non_static: params.append(words) cost = model.negative_log_likelihood(y) dropout_cost = model.dropout_negative_log_likelihood(y) grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim) ##################### # Construct Dataset # ##################### print "Copy data to GPU and constrct train/valid/test func" np.random.seed(1234) train_x, train_y = shared_dataset(dataset[0]) valid_x, valid_y = shared_dataset(dataset[1]) test_x, test_y = shared_dataset(dataset[2]) n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size)) n_valid_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size)) n_test_batches = int(np.ceil(1.0 * len(dataset[2][0]) / batch_size)) ##################### # Train model func # ##################### index = T.iscalar() train_func = function( [index], cost, updates=grad_updates, givens={ x: train_x[index * batch_size:(index + 1) * batch_size], y: train_y[index * batch_size:(index + 1) * batch_size] }) valid_train_func = function( [index], cost, updates=grad_updates, givens={ x: valid_x[index * batch_size:(index + 1) * batch_size], y: valid_y[index * batch_size:(index + 1) * batch_size] }) train_pred = function( [index], model.preds, givens={x: train_x[index * batch_size:(index + 1) * batch_size]}) valid_pred = function([index], model.preds, givens={ x: valid_x[index * batch_size:(index + 1) * batch_size], }) test_pred = function([index], model.preds, givens={ x: test_x[index * batch_size:(index + 1) * batch_size], }) # apply early stop strategy patience = 100 patience_increase = 2 improvement_threshold = 1.005 n_valid = len(dataset[1][0]) n_test = len(dataset[2][0]) epoch = 0 best_params = None best_validation_score = 0. test_perf = 0 done_loop = False log_file = open(log_fn, 'a') print "Start to train the model....." cpu_trn_y = np.asarray(dataset[0][1]) cpu_val_y = np.asarray(dataset[1][1]) cpu_tst_y = np.asarray(dataset[2][1]) def compute_score(true_list, pred_list): mat = np.equal(true_list, pred_list) score = np.mean(mat) return score best_test_score = 0. while (epoch < n_epochs) and not done_loop: start_time = timeit.default_timer() epoch += 1 costs = [] for minibatch_index in np.random.permutation(range(n_train_batches)): cost_epoch = train_func(minibatch_index) costs.append(cost_epoch) set_zero(zero_vec) # do validatiovalidn valid_cost = [ valid_train_func(i) for i in np.random.permutation(xrange(n_valid_batches)) ] if epoch % 5 == 0: # do test test_preds = np.concatenate( [test_pred(i) for i in xrange(n_test_batches)]) test_score = compute_score(cpu_tst_y, test_preds) with open(os.path.join(perf_fn, "%s_%d.pred" % (exp_name, epoch)), 'w') as epf: for p in test_preds: epf.write("%d\n" % int(p)) message = "Epoch %d test perf %f" % (epoch, test_score) print message log_file.write(message + "\n") log_file.flush() # store the best model if test_score > best_test_score: best_test_score = test_score # save the model model_name = "%s_%d.model" % (exp_name, epoch) with open(model_name, 'wb') as bm: for p in params: cPickle.dump(p.get_value(), bm) end_time = timeit.default_timer() print "Finish one iteration using %f m" % ( (end_time - start_time) / 60.) log_file.flush() log_file.close()
def run_cnn(exp_name, dataset, embedding, log_fn, perf_fn, k=0, emb_dm=100, batch_size=100, filter_hs=[1, 2, 3], hidden_units=[200, 100, 11], dropout_rate=0.5, shuffle_batch=True, n_epochs=300, lr_decay=0.95, activation=ReLU, sqr_norm_lim=9, non_static=True, print_freq=5): """ Train and Evaluate CNN event encoder model :dataset: list containing three elements[(train_x, train_y), (valid_x, valid_y), (test_x, test_y)] :embedding: word embedding with shape (|V| * emb_dm) :filter_hs: filter height for each paralle cnn layer :dropout_rate: dropout rate for full connected layers :n_epochs: the max number of iterations """ start_time = timeit.default_timer() rng = np.random.RandomState(1234) input_height = len(dataset[0][0][0][0]) num_sens = len(dataset[0][0][0]) print "--input height ", input_height input_width = emb_dm num_maps = hidden_units[0] ################### # start snippet 1 # ################### print "start to construct the model ...." x = T.tensor3("x") y_type = T.ivector("y_type") y_pop = T.ivector("y_pop") words = shared(value=np.asarray(embedding, dtype=theano.config.floatX), name="embedding", borrow=True) # define function to keep padding vector as zero zero_vector_tensor = T.vector() zero_vec = np.zeros(input_width, dtype=theano.config.floatX) set_zero = function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0,:], zero_vector_tensor))]) layer0_input = words[T.cast(x.flatten(), dtype="int32")].reshape(( x.shape[0] * x.shape[1], 1, x.shape[2], emb_dm )) conv_layers = [] layer1_inputs = [] for i in xrange(len(filter_hs)): filter_shape = (num_maps, 1, filter_hs[i], emb_dm) pool_size = (input_height - filter_hs[i] + 1, 1) conv_layer = nn.ConvPoolLayer(rng, input=layer0_input, input_shape=None, filter_shape=filter_shape, pool_size=pool_size, activation=activation) sen_vecs = conv_layer.output.reshape((x.shape[0], 1, x.shape[1], num_maps)) # construct multi-layer sentence vectors conv_layers.append(conv_layer) layer1_inputs.append(sen_vecs) sen_vec = T.concatenate(layer1_inputs, 3) # score the sentences theta_value = np.random.random((len(filter_hs) * num_maps, 1)) theta = shared(value=np.asarray(theta_value, dtype=theano.config.floatX), name="theta", borrow=True) weighted_sen_vecs, sen_score = keep_max(sen_vec, theta, k) doc_vec = T.max(weighted_sen_vecs, axis=2) layer1_input = doc_vec.flatten(2) final_sen_score = sen_score.flatten(2) ############## # classifier pop# ############## print "Construct classifier ...." hidden_units[0] = num_maps * len(filter_hs) model = nn.MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, dropout_rates=[dropout_rate], activations=[activation]) params = model.params for conv_layer in conv_layers: params += conv_layer.params params.append(theta) if non_static: params.append(words) cost = model.negative_log_likelihood(y_pop) dropout_cost = model.dropout_negative_log_likelihood(y_pop) ####################### # classifier Type ##### ####################### type_hidden_units = [num for num in hidden_units] type_hidden_units[-1] = 5 type_model = nn.MLPDropout(rng, input=layer1_input, layer_sizes=type_hidden_units, dropout_rates=[dropout_rate], activations=[activation]) params += type_model.params type_cost = type_model.negative_log_likelihood(y_type) type_dropout_cost = type_model.dropout_negative_log_likelihood(y_type) total_cost = cost + type_cost total_dropout_cost = dropout_cost + type_dropout_cost # using adagrad lr = 0.01 """ total_grad_updates = nn.optimizer(total_dropout_cost, params, lr, method="adadelta" ) """ total_grad_updates = sgd_updates_adadelta(params, total_dropout_cost, lr_decay, 1e-6, sqr_norm_lim) total_preds = [model.preds, type_model.preds] ##################### # Construct Dataset # ##################### print "Copy data to GPU and constrct train/valid/test func" np.random.seed(1234) train_x, train_pop_y, train_type_y = shared_dataset(dataset[0]) test_x, test_pop_y, test_type_y = shared_dataset(dataset[1]) n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size)) n_test_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size)) ##################### # Train model func # ##################### index = T.iscalar() train_func = function([index], total_cost, updates=total_grad_updates, givens={ x: train_x[index*batch_size:(index+1)*batch_size], y_pop: train_pop_y[index*batch_size:(index+1)*batch_size], y_type:train_type_y[index*batch_size:(index+1)*batch_size] }) test_pred = function([index], total_preds, givens={ x:test_x[index*batch_size:(index+1)*batch_size], }) test_sentence_est = function([index], final_sen_score, givens={ x: test_x[index*batch_size:(index+1)*batch_size] }) train_sentence_est = function([index], final_sen_score, givens={ x: train_x[index*batch_size:(index+1)*batch_size] }) # apply early stop strategy patience = 100 patience_increase = 2 improvement_threshold = 1.005 n_test = len(dataset[1][0]) epoch = 0 best_params = None best_validation_score = 0. test_perf = 0 done_loop = False log_file = open(log_fn, 'w') print "Start to train the model....." cpu_tst_pop_y = np.asarray(dataset[1][1]) cpu_tst_type_y = np.asarray(dataset[1][2]) def compute_score(true_list, pred_list): mat = np.equal(true_list, pred_list) score = np.mean(mat) return score total_score = 0.0 while (epoch < n_epochs) and not done_loop: start_time = timeit.default_timer() epoch += 1 costs = [] for minibatch_index in np.random.permutation(range(n_train_batches)): cost_epoch = train_func(minibatch_index) costs.append(cost_epoch) set_zero(zero_vec) if epoch % print_freq == 0: # do test test_pop_preds, test_type_preds = map(np.concatenate, zip(*[test_pred(i) for i in xrange(n_test_batches)])) test_pop_score = compute_score(cpu_tst_pop_y, test_pop_preds) test_type_score = compute_score(cpu_tst_type_y, test_type_preds) with open(os.path.join(perf_fn, "%s_%d.pop_pred" % (exp_name, epoch)), 'w') as epf: for p in test_pop_preds: epf.write("%d\n" % int(p)) with open(os.path.join(perf_fn, "%s_%d.type_pred" % (exp_name, epoch)), 'w') as epf: for p in test_type_preds: epf.write("%d\n" % int(p)) message = "Epoch %d test pop perf %f, type perf %f" % (epoch, test_pop_score, test_type_score) print message log_file.write(message + "\n") log_file.flush() if ((test_pop_score + test_type_score) > total_score) or (epoch % 15 == 0): total_score = test_pop_score + test_type_score # save the sentence score test_sen_score = [test_sentence_est(i) for i in xrange(n_test_batches)] score_file = "./results/%s_%d_test.score" % (exp_name, epoch) with open(score_file, "wb") as sm: cPickle.dump(test_sen_score, sm) train_sen_score = [train_sentence_est(i) for i in xrange(n_train_batches)] score_file = "./results/%s_%d_train.score" % (exp_name, epoch) with open(score_file, "wb") as sm: cPickle.dump(train_sen_score, sm) end_time = timeit.default_timer() print "Finish one iteration using %f m" % ((end_time - start_time)/60.) log_file.flush() log_file.close()
def train_cnn_encoder(datasets, word_embedding, input_width=64, filter_hs=[3, 4, 5], hidden_units=[100, 2], dropout_rate=[0.5], shuffle_batch=True, n_epochs=100, batch_size=50, lr_decay=0.95, activations=[ReLU], sqr_norm_lim=9, non_static=True): rng = np.random.RandomState(1234) input_height = len(datasets[0][0]) - 1 filter_width = input_width feature_maps = hidden_units[0] filter_shapes = [] pool_sizes = [] for filter_h in filter_hs: filter_shapes.append((feature_maps, 1, filter_h, filter_width)) pool_sizes.append((input_height-filter_h+1, input_width-filter_width+1)) parameters = [("Input Shape", input_height, input_width), ("Filter Shape", filter_shapes), ("Pool Sizes", pool_sizes), ("dropout rate", dropout_rate), ("hidden units", hidden_units), ("shuffle_batch", shuffle_batch), ("n_epochs", n_epochs), ("batch size", batch_size)] print parameters # construct the model index = T.iscalar() x = T.matrix("x") y = T.ivector("y") words = shared(value=word_embedding, name="embedding") zero_vector_tensor = T.vector() zero_vec = np.zeros(input_width) set_zero = function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0,:], zero_vector_tensor))]) layer0_input = words[T.cast(x.flatten(), dtype="int32")].reshape((x.shape[0],1,x.shape[1],words.shape[1])) conv_layers = [] layer1_inputs = [] for i in xrange(len(filter_hs)): filter_shape = filter_shapes[i] pool_size = pool_sizes[i] conv_layer = nn.ConvPoolLayer(rng, input=layer0_input, input_shape=(batch_size, 1, input_height, input_width), filter_shape=filter_shape, pool_size=pool_size, activation=ReLU) layer1_input = conv_layer.output.flatten(2) conv_layers.append(conv_layer) layer1_inputs.append(layer1_input) layer1_input = T.concatenate(layer1_inputs, 1) hidden_units[0] = feature_maps * len(filter_hs) classifier = nn.MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, dropout_rates=dropout_rate, activations=activations) params = classifier.params for conv_layer in conv_layers: params += conv_layer.params if non_static: params.append(words) cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim) np.random.seed(1234) if datasets[0].shape[0] % batch_size > 0: extra_data_num = batch_size - datasets[0].shape[0] % batch_size train_set = np.random.permutation(datasets[0]) extra_data = train_set[:extra_data_num] new_data = np.append(datasets[0], extra_data, axis=0) else: new_data = datasets[0] new_data = np.random.permutation(new_data) n_batches = new_data.shape[0]/batch_size n_train_batches = int(np.round(n_batches*0.9)) # divide the train set intp train/val sets test_set_x = datasets[1][:,:input_height] test_set_y = np.asarray(datasets[1][:,-1], "int32") train_set = new_data[:n_train_batches*batch_size,:] val_set = new_data[n_train_batches*batch_size:,:] print train_set[:,-1] train_set_x, train_set_y = shared_dataset((train_set[:,:input_height],train_set[:,-1])) val_set_x, val_set_y = shared_dataset((val_set[:,:input_height],val_set[:,-1])) n_val_batches = n_batches - n_train_batches val_model = function([index], classifier.errors(y), givens={ x: val_set_x[index * batch_size: (index + 1) * batch_size], y: val_set_y[index * batch_size: (index + 1) * batch_size] }) test_model = function([index], classifier.errors(y), givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] }) train_model = function([index], cost, updates=grad_updates, givens={ x: train_set_x[index*batch_size:(index+1)*batch_size], y: train_set_y[index*batch_size:(index+1)*batch_size] }) test_pred_layers = [] test_size = test_set_x.shape[0] test_layer0_input = words[T.cast(x.flatten(), dtype="int32")].reshape((test_size, 1, input_height, input_width)) for conv_layer in conv_layers: test_layer0_output = conv_layer.predict(test_layer0_input, test_size) test_pred_layers.append(test_layer0_output.flatten(2)) test_layer1_input = T.concatenate(test_pred_layers, 1) test_y_pred = classifier.predict(test_layer1_input) test_error = T.mean(T.neq(test_y_pred, y)) test_model_all = function([x, y], test_error) # start to training the model print "Start training the model...." epoch = 0 best_val_perf = 0 val_perf = 0 cost_epoch = 0 while(epoch < n_epochs): epoch += 1 if shuffle_batch: for minibatch_index in np.random.permutation(range(n_train_batches)): print minibatch_index cost_epoch = train_model(minibatch_index) set_zero(zero_vec) else: for minibatch_index in xrange(n_train_batches): cost_epoch = train_model(minibatch_index) set_zero(zero_vec) train_losses = [test_model(i) for i in xrange(n_train_batches)] train_perf = 1 - np.mean(train_losses) val_losses = [val_model(i) for i in xrange(n_val_batches)] val_perf = 1 - np.mean(val_losses) print('epoch %i, train perf %f %%, val perf %f' % (epoch, train_perf * 100., val_perf*100.)) if val_perf >= best_val_perf: best_val_perf = val_perf test_losses = test_model_all(test_set_x, test_set_y) test_perf = 1 - test_losses print "Test Performance %f under Current Best Valid perf %f" % (test_perf, val_perf) return test_perf
def run_cnn(exp_name, dataset, embedding, log_fn, perf_fn, emb_dm=100, batch_size=100, filter_hs=[1, 2, 3], hidden_units=[200, 100, 11], dropout_rate=0.5, shuffle_batch=True, n_epochs=300, lr_decay=0.95, activation=ReLU, sqr_norm_lim=9, non_static=True, alpha=0.0001): """ Train and Evaluate CNN event encoder model :dataset: list containing three elements[(train_x, train_y), (valid_x, valid_y), (test_x, test_y)] :embedding: word embedding with shape (|V| * emb_dm) :filter_hs: filter height for each paralle cnn layer :dropout_rate: dropout rate for full connected layers :n_epochs: the max number of iterations """ start_time = timeit.default_timer() input_height = len(dataset[0][0][0]) print "--input height ", input_height input_width = emb_dm num_maps = hidden_units[0] ################### # start snippet 1 # ################### print "start to construct the model ...." word_x = T.matrix("word_x") freq_x = T.matrix("freq_x") pos_x = T.matrix("pos_x") y = T.ivector("y") words = shared(value=np.asarray(embedding, dtype=theano.config.floatX), name="embedding", borrow=True) sym_dim = 20 # the frequency embedding is 21 * 50 matrix freq_val = np.random.random((21, sym_dim)) freqs = shared(value=np.asarray(freq_val, dtype=theano.config.floatX), borrow=True, name="freqs") # the position embedding is 31 * 50 matrix poss_val = np.random.random((31, sym_dim)) poss = shared(value=np.asarray(poss_val, dtype=theano.config.floatX), borrow=True, name="poss") # define function to keep padding vector as zero zero_vector_tensor = T.vector() zero_vec = np.zeros(input_width, dtype=theano.config.floatX) set_zero = function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0, :], zero_vector_tensor))]) freq_zero_tensor = T.vector() freq_zero_vec = np.zeros(sym_dim, dtype=theano.config.floatX) freq_set_zero = function([freq_zero_tensor], updates=[(freqs, T.set_subtensor(freqs[0, :], freq_zero_tensor))]) pos_zero_tensor = T.vector() pos_zero_vec = np.zeros(sym_dim, dtype=theano.config.floatX) pos_set_zero = function([pos_zero_tensor], updates=[(poss, T.set_subtensor(poss[0, :], pos_zero_tensor))]) word_x_emb = words[T.cast(word_x.flatten(), dtype="int32")].reshape( (word_x.shape[0], 1, word_x.shape[1], emb_dm)) freq_x_emb = freqs[T.cast(freq_x.flatten(), dtype="int32")].reshape( (freq_x.shape[0], 1, freq_x.shape[1], sym_dim)) pos_x_emb = poss[T.cast(pos_x.flatten(), dtype="int32")].reshape( (pos_x.shape[0], 1, pos_x.shape[1], sym_dim)) layer0_input = T.concatenate([word_x_emb, freq_x_emb, pos_x_emb], axis=3) conv_layers = [] layer1_inputs = [] rng = np.random.RandomState() for i in xrange(len(filter_hs)): filter_shape = (num_maps, 1, filter_hs[i], emb_dm + sym_dim + sym_dim) pool_size = (input_height - filter_hs[i] + 1, 1) conv_layer = nn.ConvPoolLayer(rng, input=layer0_input, input_shape=None, filter_shape=filter_shape, pool_size=pool_size, activation=activation) layer1_input = conv_layer.output.flatten(2) conv_layers.append(conv_layer) layer1_inputs.append(layer1_input) layer1_input = T.concatenate(layer1_inputs, 1) ############## # classifier # ############## print "Construct classifier ...." hidden_units[0] = num_maps * len(filter_hs) model = nn.MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, dropout_rates=[dropout_rate], activations=[activation]) params = model.params for conv_layer in conv_layers: params += conv_layer.params params.append(words) params.append(freqs) params.append(poss) cost = model.negative_log_likelihood(y) + alpha * model.L2 dropout_cost = model.dropout_negative_log_likelihood(y) + alpha * model.L2 grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim) ##################### # Construct Dataset # ##################### print "Copy data to GPU and constrct train/valid/test func" train_word_x, train_freq_x, train_pos_x, train_y = shared_dataset( dataset[0]) test_word_x, test_freq_x, test_pos_x, test_y = shared_dataset(dataset[1]) n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size)) n_test_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size)) ##################### # Train model func # ##################### index = T.iscalar() train_func = function( [index], cost, updates=grad_updates, givens={ word_x: train_word_x[index * batch_size:(index + 1) * batch_size], freq_x: train_freq_x[index * batch_size:(index + 1) * batch_size], pos_x: train_pos_x[index * batch_size:(index + 1) * batch_size], y: train_y[index * batch_size:(index + 1) * batch_size] }) test_pred = function( [index], model.preds, givens={ word_x: test_word_x[index * batch_size:(index + 1) * batch_size], freq_x: test_freq_x[index * batch_size:(index + 1) * batch_size], pos_x: test_pos_x[index * batch_size:(index + 1) * batch_size] }) # apply early stop strategy patience = 100 patience_increase = 2 improvement_threshold = 1.005 n_test = len(dataset[1][0]) epoch = 0 best_params = None best_validation_score = 0. test_perf = 0 done_loop = False log_file = open(log_fn, 'a') print "Start to train the model....." cpu_trn_y = np.asarray(dataset[0][3]) cpu_tst_y = np.asarray(dataset[1][3]) def compute_score(true_list, pred_list): mat = np.equal(true_list, pred_list) score = np.mean(mat) return score while (epoch < n_epochs) and not done_loop: start_time = timeit.default_timer() epoch += 1 costs = [] for minibatch_index in np.random.permutation(range(n_train_batches)): cost_epoch = train_func(minibatch_index) costs.append(cost_epoch) set_zero(zero_vec) freq_set_zero(freq_zero_vec) pos_set_zero(pos_zero_vec) if epoch % 5 == 0: # do test test_preds = np.concatenate( [test_pred(i) for i in xrange(n_test_batches)]) test_score = compute_score(cpu_tst_y, test_preds) with open(os.path.join(perf_fn, "%s_%d.pred" % (exp_name, epoch)), 'w') as epf: for p in test_preds: epf.write("%d\n" % int(p)) message = "Epoch %d test perf %f with train cost %f" % ( epoch, test_score, np.mean(costs)) print message log_file.write(message + "\n") log_file.flush() end_time = timeit.default_timer() print "Finish one iteration using %f m" % ( (end_time - start_time) / 60.) log_file.flush() log_file.close()