def run_experiment(self, dataset, word_embedding, exp_name): # load parameters num_maps_word = self.options["num_maps_word"] drop_rate_word = self.options["drop_rate_word"] drop_rate_sentence = self.options["drop_rate_sentence"] word_window = self.options["word_window"] word_dim = self.options["word_dim"] k_max_word = self.options["k_max_word"] batch_size = self.options["batch_size"] rho = self.options["rho"] epsilon = self.options["epsilon"] norm_lim = self.options["norm_lim"] max_iteration = self.options["max_iteration"] k_portion = self.options["k_portion"] sentence_len = len(dataset[0][0][0][0]) # compute the sentence flags train_flags, test_flags = construct_sentence_flag(dataset) train_k_value = construct_dynamic_k(train_flags, k_portion) test_k_value = construct_dynamic_k(test_flags, k_portion) train_flags = theano.shared(value=np.asarray(train_flags, dtype=theano.config.floatX), borrow=True) test_flags = theano.shared(value=np.asarray(test_flags, dtype=theano.config.floatX), borrow=True) train_k = theano.shared(value=np.asarray(train_k_value, dtype=theano.config.floatX), borrow=True) test_k = theano.shared(value=np.asarray(test_k_value, dtype=theano.config.floatX), borrow=True) # define the parameters x = T.tensor3("x") y = T.ivector("y") sen_flags = T.matrix("flag") sen_k = T.matrix("sen_k") rng = np.random.RandomState(1234) words = theano.shared(value=np.asarray(word_embedding, dtype=theano.config.floatX), name="embedding", borrow=True) zero_vector_tensor = T.vector() zero_vec = np.zeros(word_dim, dtype=theano.config.floatX) set_zero = theano.function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0,:], zero_vector_tensor))]) x_emb = words[T.cast(x.flatten(), dtype="int32")].reshape((x.shape[0]*x.shape[1], 1, x.shape[2], words.shape[1])) dropout_x_emb = nn.dropout_from_layer(rng, x_emb, drop_rate_word) # compute convolution on words layer word_filter_shape = (num_maps_word, 1, word_window, word_dim) word_pool_size = (sentence_len - word_window + 1, 1) dropout_word_conv = nn.ConvPoolLayer(rng, input=dropout_x_emb, input_shape=None, filter_shape=word_filter_shape, pool_size=word_pool_size, activation=Tanh, k=k_max_word) sent_vec_dim = num_maps_word*k_max_word dropout_sent_vec = dropout_word_conv.output.reshape((x.shape[0] * x.shape[1], sent_vec_dim)) word_conv = nn.ConvPoolLayer(rng, input=dropout_x_emb*(1 - drop_rate_word), input_shape=None, filter_shape=word_filter_shape, pool_size=word_pool_size, activation=Tanh, k=k_max_word, W=dropout_word_conv.W, b=dropout_word_conv.b) sent_vec = word_conv.output.reshape((x.shape[0] * x.shape[1], sent_vec_dim)) # construct sentence level classifier n_in = sent_vec_dim n_out = 1 sen_W_values = np.zeros((n_in, n_out), dtype=theano.config.floatX) sen_W = theano.shared(value=sen_W_values, borrow=True, name="logis_W") sen_b_value = nn.as_floatX(0.0) sen_b = theano.shared(value=sen_b_value, borrow=True, name="logis_b") drop_sent_prob = T.nnet.sigmoid(T.dot(dropout_sent_vec, sen_W) + sen_b) sent_prob = T.nnet.sigmoid(T.dot(sent_vec, sen_W*(1-drop_rate_sentence)) + sen_b) # reform the sent vec to doc level drop_sent_prob = drop_sent_prob.reshape((x.shape[0], x.shape[1])) sent_prob = sent_prob.reshape((x.shape[0], x.shape[1])) """ # the pos probability bag label is the avg of the probs drop_doc_prob = T.sum(drop_sent_prob * sen_flags, axis=1) / T.sum(sen_flags, axis=1) doc_prob = T.sum(sent_prob * sen_flags, axis=1) / T.sum(sen_flags, axis=1) """ # using the dynamic top k max probability as bag level probability # compute the dynamic K for each documents drop_doc_prob = T.sum(T.sort(drop_sent_prob, axis=1) * sen_k, axis=1) / T.sum(sen_k, axis=1) doc_prob = T.sum(T.sort(sent_prob, axis=1) * sen_k, axis=1) / T.sum(sen_k, axis=1) drop_doc_prob = T.clip(drop_doc_prob, nn.as_floatX(1e-7), nn.as_floatX(1 - 1e-7 )) doc_prob = T.clip(doc_prob, nn.as_floatX(1e-7), nn.as_floatX(1 - 1e-7 )) doc_preds = doc_prob > 0.5 # instance level cost drop_sent_cost = T.sum(T.maximum(0.0, nn.as_floatX(.5) - T.sgn(drop_sent_prob.reshape((x.shape[0]*x.shape[1], n_out)) - nn.as_floatX(0.6)) * T.dot(dropout_sent_vec, sen_W)) * sen_flags.reshape((x.shape[0]*x.shape[1], n_out))) / T.sum(sen_flags) # we need that the most positive instance at least 0.7 in pos bags # and at most 0.1 in neg bags # we want the number of positive instance should at least ... # and non of the positive instances in the negative bags # compute the number of positive instance positive_count = T.sum((drop_sent_prob * sen_flags) > 0.5, axis=1) pos_cost = T.maximum(nn.as_floatX(0.0), nn.as_floatX(2) - positive_count) neg_cost = T.maximum(nn.as_floatX(0.0), positive_count) penal_cost = T.mean(pos_cost * y + neg_cost * (nn.as_floatX(1.0) - y)) # add the sentence similarity constrains sen_sen = T.dot(dropout_sent_vec, dropout_sent_vec.T) sen_sqr = T.sum(dropout_sent_vec ** 2, axis=1) sen_sqr_left = sen_sqr.dimshuffle(0, 'x') sen_sqr_right = sen_sqr.dimshuffle('x', 0) sen_sim_matrix = sen_sqr_left - 2 * sen_sqr + sen_sqr_right sen_sim_matrix = T.exp(-1 * sen_sim_matrix) sen_sim_prob = drop_sent_prob.reshape((x.shape[0]*x.shape[1], 1)) - drop_sent_prob.flatten() sen_sim_prob = sen_sim_prob ** 2 sen_sim_flag = T.dot(sen_flags.reshape((x.shape[0]*x.shape[1],1)), sen_flags.reshape((1,x.shape[0]*x.shape[1]))) sen_sim_cost = T.sum(sen_sim_matrix * sen_sim_prob * sen_sim_flag) / T.sum(sen_sim_flag) # bag level cost drop_bag_cost = T.mean(-y * T.log(drop_doc_prob) * nn.as_floatX(0.6) - (1 - y) * T.log(1 - drop_doc_prob) * nn.as_floatX(0.4)) #drop_cost = drop_bag_cost * nn.as_floatX(3.0) + drop_sent_cost + nn.as_floatX(2.0) * penal_cost drop_cost = drop_bag_cost * nn.as_floatX(0.6) + drop_sent_cost * nn.as_floatX(0.1) + penal_cost * nn.as_floatX(0.5) + sen_sim_cost * nn.as_floatX(0.0001) # collect parameters self.params.append(words) self.params += dropout_word_conv.params self.params.append(sen_W) self.params.append(sen_b) grad_updates = nn.sgd_updates_adadelta(self.params, drop_cost, rho, epsilon, norm_lim) # construct the dataset # random the train_x, train_y = nn.shared_dataset(dataset[0]) test_x, test_y = nn.shared_dataset(dataset[1]) test_cpu_y = dataset[1][1] n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size)) n_test_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size)) # construt the model index = T.iscalar() train_func = theano.function([index], [drop_cost, drop_bag_cost, drop_sent_cost, penal_cost, sen_sim_cost], updates=grad_updates, givens={ x: train_x[index*batch_size:(index+1)*batch_size], y: train_y[index*batch_size:(index+1)*batch_size], sen_flags: train_flags[index*batch_size:(index+1)*batch_size], sen_k: train_k[index*batch_size:(index+1)*batch_size] }) test_func = theano.function([index], doc_preds, givens={ x:test_x[index*batch_size:(index+1)*batch_size], sen_k:test_k[index*batch_size:(index+1)*batch_size] }) get_train_sent_prob = theano.function([index], sent_prob, givens={ x:train_x[index*batch_size:(index+1)*batch_size] }) get_test_sent_prob = theano.function([index], sent_prob, givens={ x:test_x[index*batch_size:(index+1)*batch_size] }) epoch = 0 best_score = 0 log_file = open("./log/%s.log" % exp_name, 'w') while epoch <= max_iteration: start_time = timeit.default_timer() epoch += 1 costs = [] for minibatch_index in np.random.permutation(range(n_train_batches)): cost_epoch = train_func(minibatch_index) costs.append(cost_epoch) set_zero(zero_vec) total_train_cost, train_bag_cost, train_sent_cost, train_penal_cost, train_sim_cost = zip(*costs) print "Iteration %d, total_cost %f bag_cost %f sent_cost %f penal_cost %f sim cost %f\n" % (epoch, np.mean(total_train_cost), np.mean(train_bag_cost), np.mean(train_sent_cost), np.mean(train_penal_cost), np.mean(train_sim_cost)) if epoch % 1 == 0: test_preds = [] for i in xrange(n_test_batches): test_y_pred = test_func(i) test_preds.append(test_y_pred) test_preds = np.concatenate(test_preds) test_score = 1 - np.mean(np.not_equal(test_cpu_y, test_preds)) precision, recall, beta, support = precision_recall_fscore_support(test_cpu_y, test_preds, pos_label=1) if beta[1] > best_score or epoch % 5 == 0: best_score = beta[1] # save the sentence vectors train_sens = [get_train_sent_prob(i) for i in range(n_train_batches)] test_sens = [get_test_sent_prob(i) for i in range(n_test_batches)] train_sens = np.concatenate(train_sens, axis=0) test_sens = np.concatenate(test_sens, axis=0) out_train_sent_file = "./results/%s_train_sent_%d.vec" % (exp_name, epoch) out_test_sent_file = "./results/%s_test_sent_%d.vec" % (exp_name, epoch) with open(out_test_sent_file, 'w') as test_f, open(out_train_sent_file, 'w') as train_f: cPickle.dump(train_sens, train_f) cPickle.dump(test_sens, test_f) print "Get best performace at %d iteration %f" % (epoch, test_score) log_file.write("Get best performance at %d iteration %f \n" % (epoch, test_score)) end_time = timeit.default_timer() print "Iteration %d , precision, recall, f1" % epoch, precision, recall, beta log_file.write("Iteration %d, neg precision %f, pos precision %f, neg recall %f pos recall %f , neg f1 %f, pos f1 %f, total_cost %f bag_cost %f sent_cost %f penal_cost %f\n" % (epoch, precision[0], precision[1], recall[0], recall[1], beta[0], beta[1], np.mean(total_train_cost), np.mean(train_bag_cost), np.mean(train_sent_cost), np.mean(train_penal_cost))) print "Using time %f m" % ((end_time -start_time)/60.) log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.)) end_time = timeit.default_timer() print "Iteration %d Using time %f m" % ( epoch, (end_time -start_time)/60.) log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.)) log_file.flush() log_file.close()
def run_experiment(self, dataset, word_embedding, exp_name): # load parameters num_maps_word = self.options["num_maps_word"] drop_rate_word = self.options["drop_rate_word"] drop_rate_sentence = self.options["drop_rate_sentence"] word_window = self.options["word_window"] word_dim = self.options["word_dim"] k_max_word = self.options["k_max_word"] batch_size = self.options["batch_size"] rho = self.options["rho"] epsilon = self.options["epsilon"] norm_lim = self.options["norm_lim"] max_iteration = self.options["max_iteration"] sentence_len = len(dataset[0][0][0][0]) # compute the sentence flags train_flags, test_flags = construct_sentence_flag(dataset) train_flags = theano.shared(value=np.asarray(train_flags, dtype=theano.config.floatX), borrow=True) test_flags = theano.shared(value=np.asarray(test_flags, dtype=theano.config.floatX), borrow=True) # define the parameters x = T.tensor3("x") y = T.ivector("y") sen_flags = T.matrix("flag") rng = np.random.RandomState(1234) words = theano.shared(value=np.asarray(word_embedding, dtype=theano.config.floatX), name="embedding", borrow=True) zero_vector_tensor = T.vector() zero_vec = np.zeros(word_dim, dtype=theano.config.floatX) set_zero = theano.function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0,:], zero_vector_tensor))]) x_emb = words[T.cast(x.flatten(), dtype="int32")].reshape((x.shape[0]*x.shape[1], 1, x.shape[2], words.shape[1])) dropout_x_emb = nn.dropout_from_layer(rng, x_emb, drop_rate_word) # compute convolution on words layer word_filter_shape = (num_maps_word, 1, word_window, word_dim) word_pool_size = (sentence_len - word_window + 1, 1) dropout_word_conv = nn.ConvPoolLayer(rng, input=dropout_x_emb, input_shape=None, filter_shape=word_filter_shape, pool_size=word_pool_size, activation=Tanh, k=k_max_word) sent_vec_dim = num_maps_word*k_max_word dropout_sent_vec = dropout_word_conv.output.reshape((x.shape[0] * x.shape[1], sent_vec_dim)) word_conv = nn.ConvPoolLayer(rng, input=dropout_x_emb*(1 - drop_rate_word), input_shape=None, filter_shape=word_filter_shape, pool_size=word_pool_size, activation=Tanh, k=k_max_word, W=dropout_word_conv.W, b=dropout_word_conv.b) sent_vec = word_conv.output.reshape((x.shape[0] * x.shape[1], sent_vec_dim)) # construct sentence level classifier n_in = sent_vec_dim n_out = 1 sen_W_values = np.zeros((n_in, n_out), dtype=theano.config.floatX) sen_W = theano.shared(value=sen_W_values, borrow=True, name="logis_W") sen_b_value = nn.as_floatX(0.0) sen_b = theano.shared(value=sen_b_value, borrow=True, name="logis_b") drop_sent_prob = T.nnet.sigmoid(T.dot(dropout_sent_vec, sen_W) + sen_b) sent_prob = T.nnet.sigmoid(T.dot(sent_vec, sen_W*(1-drop_rate_sentence)) + sen_b) # reform the sent vec to doc level drop_sent_prob = drop_sent_prob.reshape((x.shape[0], x.shape[1])) sent_prob = sent_prob.reshape((x.shape[0], x.shape[1])) # the pos probability bag label is the avg of the probs drop_doc_prob = T.sum(drop_sent_prob * sen_flags, axis=1) / T.sum(sen_flags, axis=1) doc_prob = T.sum(sent_prob * sen_flags, axis=1) / T.sum(sen_flags, axis=1) drop_doc_prob = T.clip(drop_doc_prob, nn.as_floatX(1e-7), nn.as_floatX(1 - 1e-7 )) doc_prob = T.clip(doc_prob, nn.as_floatX(1e-7), nn.as_floatX(1 - 1e-7 )) """ # the pos probability bag label equals to 1 - all negative drop_doc_prob = T.prod(drop_sent_prob, axis=1) drop_doc_prob = T.set_subtensor(drop_doc_prob[:,1], 1 - drop_doc_prob[:,0]) doc_prob = T.prod(sent_prob, axis=1) doc_prob = T.set_subtensor(doc_prob[:,1], 1 - doc_prob[:,0]) # the pos probability bag label is the most positive probability drop_doc_prob = T.max(drop_sent_prob, axis=1) drop_doc_prob = T.clip(drop_doc_prob, nn.as_floatX(1e-7), nn.as_floatX(1 - 1e-7 )) doc_prob = T.max(sent_prob, axis=1) doc_prob = T.clip(doc_prob, nn.as_floatX(1e-7), nn.as_floatX(1 - 1e-7 )) """ doc_preds = doc_prob > 0.5 # instance level cost drop_sent_cost = T.sum(T.maximum(0.0, nn.as_floatX(.5) - T.sgn(drop_sent_prob.reshape((x.shape[0]*x.shape[1], n_out)) - nn.as_floatX(0.6)) * T.dot(dropout_sent_vec, sen_W)) * sen_flags.reshape((x.shape[0]*x.shape[1], n_out))) / T.sum(sen_flags) # we need that the most positive instance at least 0.7 in pos bags # and at most 0.1 in neg bags # we want the number of positive instance should at least ... # and non of the positive instances in the negative bags # compute the number of positive instance positive_count = T.sum((drop_sent_prob * sen_flags) > 0.5, axis=1) pos_cost = T.maximum(nn.as_floatX(0.0), nn.as_floatX(2) - positive_count) neg_cost = T.maximum(nn.as_floatX(0.0), positive_count) """ most_positive_prob = T.max(drop_sent_prob, axis=1) pos_cost = T.maximum(0.0, nn.as_floatX(0.6) - most_positive_prob) neg_cost = T.maximum(0.0, most_positive_prob - nn.as_floatX(0.05)) """ penal_cost = T.mean(pos_cost * y + neg_cost * (nn.as_floatX(1.0) - y)) # add the sentence similarity constrains sen_sen = T.dot(dropout_sent_vec, dropout_sent_vec.T) sen_sqr = T.sum(dropout_sent_vec ** 2, axis=1) sen_sqr_left = sen_sqr.dimshuffle(0, 'x') sen_sqr_right = sen_sqr.dimshuffle('x', 0) sen_sim_matrix = sen_sqr_left - 2 * sen_sqr + sen_sqr_right sen_sim_matrix = T.exp(-1 * sen_sim_matrix) sen_sim_prob = drop_sent_prob.reshape((x.shape[0]*x.shape[1], 1)) - drop_sent_prob.flatten() sen_sim_prob = sen_sim_prob ** 2 sen_sim_flag = T.dot(sen_flags.reshape((x.shape[0]*x.shape[1],1)), sen_flags.reshape((1,x.shape[0]*x.shape[1]))) sen_sim_cost = T.sum(sen_sim_matrix * sen_sim_prob * sen_sim_flag) / T.sum(sen_sim_flag) # bag level cost drop_bag_cost = T.mean(-y * T.log(drop_doc_prob) * nn.as_floatX(0.6) - (1 - y) * T.log(1 - drop_doc_prob) * nn.as_floatX(0.4)) #drop_cost = drop_bag_cost * nn.as_floatX(3.0) + drop_sent_cost + nn.as_floatX(2.0) * penal_cost drop_cost = drop_bag_cost * nn.as_floatX(0.6) + drop_sent_cost * nn.as_floatX(0.1) + penal_cost * nn.as_floatX(0.5) + sen_sim_cost * nn.as_floatX(0.0001) # collect parameters self.params.append(words) self.params += dropout_word_conv.params self.params.append(sen_W) self.params.append(sen_b) grad_updates = nn.sgd_updates_adadelta(self.params, drop_cost, rho, epsilon, norm_lim) # construct the dataset # random the train_x, train_y = nn.shared_dataset(dataset[0]) test_x, test_y = nn.shared_dataset(dataset[1]) test_cpu_y = dataset[1][1] n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size)) n_test_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size)) # construt the model index = T.iscalar() train_func = theano.function([index], [drop_cost, drop_bag_cost, drop_sent_cost, penal_cost, sen_sim_cost], updates=grad_updates, givens={ x: train_x[index*batch_size:(index+1)*batch_size], y: train_y[index*batch_size:(index+1)*batch_size], sen_flags: train_flags[index*batch_size:(index+1)*batch_size] }) test_func = theano.function([index], doc_preds, givens={ x:test_x[index*batch_size:(index+1)*batch_size], sen_flags: test_flags[index*batch_size:(index+1)*batch_size] }) get_train_sent_prob = theano.function([index], sent_prob, givens={ x:train_x[index*batch_size:(index+1)*batch_size] }) get_test_sent_prob = theano.function([index], sent_prob, givens={ x:test_x[index*batch_size:(index+1)*batch_size] }) epoch = 0 best_score = 0 log_file = open("./log/%s.log" % exp_name, 'w') while epoch <= max_iteration: start_time = timeit.default_timer() epoch += 1 costs = [] for minibatch_index in np.random.permutation(range(n_train_batches)): cost_epoch = train_func(minibatch_index) costs.append(cost_epoch) set_zero(zero_vec) total_train_cost, train_bag_cost, train_sent_cost, train_penal_cost, train_sim_cost = zip(*costs) print "Iteration %d, total_cost %f bag_cost %f sent_cost %f penal_cost %f sim cost %f\n" % (epoch, np.mean(total_train_cost), np.mean(train_bag_cost), np.mean(train_sent_cost), np.mean(train_penal_cost), np.mean(train_sim_cost)) if epoch % 1 == 0: test_preds = [] for i in xrange(n_test_batches): test_y_pred = test_func(i) test_preds.append(test_y_pred) test_preds = np.concatenate(test_preds) test_score = 1 - np.mean(np.not_equal(test_cpu_y, test_preds)) precision, recall, beta, support = precision_recall_fscore_support(test_cpu_y, test_preds, pos_label=1) if beta[1] > best_score or epoch % 5 == 0: best_score = beta[1] # save the sentence vectors train_sens = [get_train_sent_prob(i) for i in range(n_train_batches)] test_sens = [get_test_sent_prob(i) for i in range(n_test_batches)] train_sens = np.concatenate(train_sens, axis=0) test_sens = np.concatenate(test_sens, axis=0) out_train_sent_file = "./results/%s_train_sent_%d.vec" % (exp_name, epoch) out_test_sent_file = "./results/%s_test_sent_%d.vec" % (exp_name, epoch) with open(out_test_sent_file, 'w') as test_f, open(out_train_sent_file, 'w') as train_f: cPickle.dump(train_sens, train_f) cPickle.dump(test_sens, test_f) print "Get best performace at %d iteration %f" % (epoch, test_score) log_file.write("Get best performance at %d iteration %f \n" % (epoch, test_score)) end_time = timeit.default_timer() print "Iteration %d , precision, recall, f1" % epoch, precision, recall, beta log_file.write("Iteration %d, neg precision %f, pos precision %f, neg recall %f pos recall %f , neg f1 %f, pos f1 %f, total_cost %f bag_cost %f sent_cost %f penal_cost %f\n" % (epoch, precision[0], precision[1], recall[0], recall[1], beta[0], beta[1], np.mean(total_train_cost), np.mean(train_bag_cost), np.mean(train_sent_cost), np.mean(train_penal_cost))) print "Using time %f m" % ((end_time -start_time)/60.) log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.)) end_time = timeit.default_timer() print "Iteration %d Using time %f m" % ( epoch, (end_time -start_time)/60.) log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.)) log_file.flush() log_file.close()
def run_experiment(self, dataset, word_embedding, exp_name): # load parameters num_maps_word = self.options["num_maps_word"] drop_rate_word = self.options["drop_rate_word"] word_window = self.options["word_window"] word_dim = self.options["word_dim"] k_max_word = self.options["k_max_word"] num_maps_sentence = self.options["num_maps_sentence"] drop_rate_sentence = self.options["drop_rate_sentence"] sentence_window = self.options["sentence_window"] k_max_sentence = self.options["k_max_sentence"] batch_size = self.options["batch_size"] rho = self.options["rho"] epsilon = self.options["epsilon"] norm_lim = self.options["norm_lim"] max_iteration = self.options["max_iteration"] sentence_len = len(dataset[0][0][0][0]) sentence_num = len(dataset[0][0][0]) # define the parameters x = T.tensor3("x") y = T.ivector("y") rng = np.random.RandomState(1234) words = theano.shared(value=np.asarray(word_embedding, dtype=theano.config.floatX), name="embedding", borrow=True) zero_vector_tensor = T.vector() zero_vec = np.zeros(word_dim, dtype=theano.config.floatX) set_zero = theano.function( [zero_vector_tensor], updates=[(words, T.set_subtensor(words[0, :], zero_vector_tensor))]) x_emb = words[T.cast(x.flatten(), dtype="int32")].reshape( (x.shape[0] * x.shape[1], 1, x.shape[2], words.shape[1])) dropout_x_emb = nn.dropout_from_layer(rng, x_emb, drop_rate_word) # compute convolution on words layer word_filter_shape = (num_maps_word, 1, word_window, word_dim) word_pool_size = (sentence_len - word_window + 1, 1) dropout_word_conv = nn.ConvPoolLayer(rng, input=dropout_x_emb, input_shape=None, filter_shape=word_filter_shape, pool_size=word_pool_size, activation=Tanh, k=k_max_word) sent_vec_dim = num_maps_word * k_max_word dropout_sent_vec = dropout_word_conv.output.reshape( (x.shape[0], 1, x.shape[1], sent_vec_dim)) dropout_sent_vec = nn.dropout_from_layer(rng, dropout_sent_vec, drop_rate_sentence) word_conv = nn.ConvPoolLayer(rng, input=dropout_x_emb * (1 - drop_rate_word), input_shape=None, filter_shape=word_filter_shape, pool_size=word_pool_size, activation=Tanh, k=k_max_word, W=dropout_word_conv.W, b=dropout_word_conv.b) sent_vec = word_conv.output.reshape( (x.shape[0], 1, x.shape[1], sent_vec_dim)) # construct the convolution layer on sentences sent_filter_shape = (num_maps_sentence, 1, sentence_window, sent_vec_dim) sent_pool_size = (sentence_num - sentence_window + 1, 1) dropout_sent_conv = nn.ConvPoolLayer(rng, input=dropout_sent_vec, input_shape=None, filter_shape=sent_filter_shape, pool_size=sent_pool_size, activation=Tanh, k=k_max_sentence) sent_conv = nn.ConvPoolLayer(rng, input=sent_vec * (1 - drop_rate_sentence), input_shape=None, filter_shape=sent_filter_shape, pool_size=sent_pool_size, activation=Tanh, k=k_max_sentence, W=dropout_sent_conv.W, b=dropout_sent_conv.b) dropout_doc_vec = dropout_sent_conv.output.flatten(2) doc_vec = sent_conv.output.flatten(2) doc_vec_dim = num_maps_sentence * k_max_sentence # construct classifier dropout_logistic_layer = nn.LogisticRegressionLayer( input=dropout_doc_vec, n_in=doc_vec_dim, n_out=2) logistic_layer = nn.LogisticRegressionLayer(input=doc_vec, n_in=doc_vec_dim, n_out=2, W=dropout_logistic_layer.W, b=dropout_logistic_layer.b) dropout_cost = dropout_logistic_layer.negative_log_likelihood(y) cost = logistic_layer.negative_log_likelihood(y) preds = logistic_layer.y_pred errors = logistic_layer.errors(y) # collect parameters self.params.append(words) self.params += dropout_word_conv.params self.params += dropout_sent_conv.params self.params += dropout_logistic_layer.params grad_updates = nn.sgd_updates_adadelta(self.params, dropout_cost, rho, epsilon, norm_lim) # construct the dataset train_x, train_y = nn.shared_dataset(dataset[0]) test_x, test_y = nn.shared_dataset(dataset[1]) test_cpu_y = dataset[1][1] n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size)) n_test_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size)) # construt the model index = T.iscalar() train_func = theano.function( [index], dropout_cost, updates=grad_updates, givens={ x: train_x[index * batch_size:(index + 1) * batch_size], y: train_y[index * batch_size:(index + 1) * batch_size] }) test_func = theano.function( [index], preds, givens={x: test_x[index * batch_size:(index + 1) * batch_size]}) get_train_sentvec = theano.function( [index], sent_vec, givens={x: train_x[index * batch_size:(index + 1) * batch_size]}) get_test_sentvec = theano.function( [index], sent_vec, givens={x: test_x[index * batch_size:(index + 1) * batch_size]}) epoch = 0 best_score = 0 raw_train_x = dataset[0][0] raw_test_x = dataset[1][0] # get the sentence number for each document number_train_sens = [] number_test_sens = [] for doc in raw_train_x: sen_num = 0 for sen in doc: if np.any(sen): sen_num += 1 number_train_sens.append(sen_num) for doc in raw_test_x: sen_num = 0 for sen in doc: if np.any(sen): sen_num += 1 number_test_sens.append(sen_num) log_file = open("./log/%s.log" % exp_name, 'w') while epoch <= max_iteration: start_time = timeit.default_timer() epoch += 1 costs = [] for minibatch_index in np.random.permutation( range(n_train_batches)): cost_epoch = train_func(minibatch_index) costs.append(cost_epoch) set_zero(zero_vec) if epoch % 5 == 0: test_preds = [] for i in xrange(n_test_batches): test_y_pred = test_func(i) test_preds.append(test_y_pred) test_preds = np.concatenate(test_preds) test_score = 1 - np.mean(np.not_equal(test_cpu_y, test_preds)) precision, recall, beta, support = precision_recall_fscore_support( test_cpu_y, test_preds, pos_label=1) if test_score > best_score: best_score = test_score # save the sentence vectors train_sens = [ get_train_sentvec(i) for i in range(n_train_batches) ] test_sens = [ get_test_sentvec(i) for i in range(n_test_batches) ] train_sens = np.concatenate(train_sens, axis=0) test_sens = np.concatenate(test_sens, axis=0) out_train_sent_file = "./results/%s_train_sent.vec" % exp_name out_test_sent_file = "./results/%s_test_sent.vec" % exp_name with open(out_train_sent_file, 'w') as train_f, open(out_test_sent_file, 'w') as test_f: for i in range(len(train_sens)): tr_doc_vect = train_sens[i][ 0][:number_train_sens[i]] train_f.write( json.dumps(tr_doc_vect.tolist()) + "\n") for i in range(len(test_sens)): te_doc_vect = test_sens[i][0][:number_test_sens[i]] test_f.write( json.dumps(te_doc_vect.tolist()) + "\n") print "Get best performace at %d iteration" % epoch log_file.write("Get best performance at %d iteration\n" % epoch) end_time = timeit.default_timer() print "Iteration %d , precision, recall, support" % epoch, precision, recall, support log_file.write( "Iteration %d, neg precision %f, pos precision %f, neg recall %f pos recall %f \n" % (epoch, precision[0], precision[1], recall[0], recall[1])) print "Using time %f m" % ((end_time - start_time) / 60.) log_file.write("Uing time %f m\n" % ((end_time - start_time) / 60.)) end_time = timeit.default_timer() print "Iteration %d Using time %f m" % (epoch, (end_time - start_time) / 60.) log_file.write("Uing time %f m\n" % ((end_time - start_time) / 60.)) log_file.flush() log_file.close()
def run_experiment(self, dataset, word_embedding, exp_name): # load parameters num_maps_word = self.options["num_maps_word"] drop_rate_word = self.options["drop_rate_word"] word_window = self.options["word_window"] word_dim = self.options["word_dim"] k_max_word = self.options["k_max_word"] num_maps_sentence = self.options["num_maps_sentence"] drop_rate_sentence = self.options["drop_rate_sentence"] sentence_window = self.options["sentence_window"] k_max_sentence = self.options["k_max_sentence"] batch_size = self.options["batch_size"] rho = self.options["rho"] epsilon = self.options["epsilon"] norm_lim = self.options["norm_lim"] max_iteration = self.options["max_iteration"] sentence_len = len(dataset[0][0][0][0]) sentence_num = len(dataset[0][0][0]) # define the parameters x = T.tensor3("x") y = T.ivector("y") rng = np.random.RandomState(1234) words = theano.shared(value=np.asarray(word_embedding, dtype=theano.config.floatX), name="embedding", borrow=True) zero_vector_tensor = T.vector() zero_vec = np.zeros(word_dim, dtype=theano.config.floatX) set_zero = theano.function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0,:], zero_vector_tensor))]) x_emb = words[T.cast(x.flatten(), dtype="int32")].reshape((x.shape[0]*x.shape[1], 1, x.shape[2], words.shape[1])) dropout_x_emb = nn.dropout_from_layer(rng, x_emb, drop_rate_word) # compute convolution on words layer word_filter_shape = (num_maps_word, 1, word_window, word_dim) word_pool_size = (sentence_len - word_window + 1, 1) dropout_word_conv = nn.ConvPoolLayer(rng, input=dropout_x_emb, input_shape=None, filter_shape=word_filter_shape, pool_size=word_pool_size, activation=Tanh, k=k_max_word) sent_vec_dim = num_maps_word*k_max_word dropout_sent_vec = dropout_word_conv.output.reshape((x.shape[0], 1, x.shape[1], sent_vec_dim)) dropout_sent_vec = nn.dropout_from_layer(rng, dropout_sent_vec, drop_rate_sentence) word_conv = nn.ConvPoolLayer(rng, input=dropout_x_emb*(1 - drop_rate_word), input_shape=None, filter_shape=word_filter_shape, pool_size=word_pool_size, activation=Tanh, k=k_max_word, W=dropout_word_conv.W, b=dropout_word_conv.b) sent_vec = word_conv.output.reshape((x.shape[0], 1, x.shape[1], sent_vec_dim)) # construct the convolution layer on sentences sent_filter_shape = (num_maps_sentence, 1, sentence_window, sent_vec_dim) sent_pool_size = (sentence_num - sentence_window + 1, 1) dropout_sent_conv = nn.ConvPoolLayer(rng, input=dropout_sent_vec, input_shape=None, filter_shape=sent_filter_shape, pool_size=sent_pool_size, activation=Tanh, k=k_max_sentence) sent_conv = nn.ConvPoolLayer(rng, input=sent_vec*(1 - drop_rate_sentence), input_shape=None, filter_shape=sent_filter_shape, pool_size=sent_pool_size, activation=Tanh, k=k_max_sentence, W=dropout_sent_conv.W, b=dropout_sent_conv.b) dropout_doc_vec = dropout_sent_conv.output.flatten(2) doc_vec = sent_conv.output.flatten(2) doc_vec_dim = num_maps_sentence * k_max_sentence # construct classifier dropout_logistic_layer = nn.LogisticRegressionLayer( input=dropout_doc_vec, n_in=doc_vec_dim, n_out=2) logistic_layer = nn.LogisticRegressionLayer( input=doc_vec, n_in=doc_vec_dim, n_out=2, W=dropout_logistic_layer.W, b=dropout_logistic_layer.b) dropout_cost = dropout_logistic_layer.negative_log_likelihood(y) cost = logistic_layer.negative_log_likelihood(y) preds = logistic_layer.y_pred errors = logistic_layer.errors(y) # collect parameters self.params.append(words) self.params += dropout_word_conv.params self.params += dropout_sent_conv.params self.params += dropout_logistic_layer.params grad_updates = nn.sgd_updates_adadelta(self.params, dropout_cost, rho, epsilon, norm_lim) # construct the dataset train_x, train_y = nn.shared_dataset(dataset[0]) test_x, test_y = nn.shared_dataset(dataset[1]) test_cpu_y = dataset[1][1] n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size)) n_test_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size)) # construt the model index = T.iscalar() train_func = theano.function([index], dropout_cost, updates=grad_updates, givens={ x: train_x[index*batch_size:(index+1)*batch_size], y: train_y[index*batch_size:(index+1)*batch_size] }) test_func = theano.function([index], preds, givens={ x:test_x[index*batch_size:(index+1)*batch_size] }) get_train_sentvec = theano.function([index], sent_vec, givens={ x:train_x[index*batch_size:(index+1)*batch_size] }) get_test_sentvec = theano.function([index], sent_vec, givens={ x:test_x[index*batch_size:(index+1)*batch_size] }) epoch = 0 best_score = 0 raw_train_x = dataset[0][0] raw_test_x = dataset[1][0] # get the sentence number for each document number_train_sens = [] number_test_sens = [] for doc in raw_train_x: sen_num = 0 for sen in doc: if np.any(sen): sen_num += 1 number_train_sens.append(sen_num) for doc in raw_test_x: sen_num = 0 for sen in doc: if np.any(sen): sen_num += 1 number_test_sens.append(sen_num) log_file = open("./log/%s.log" % exp_name, 'w') while epoch <= max_iteration: start_time = timeit.default_timer() epoch += 1 costs = [] for minibatch_index in np.random.permutation(range(n_train_batches)): cost_epoch = train_func(minibatch_index) costs.append(cost_epoch) set_zero(zero_vec) if epoch % 5 == 0: test_preds = [] for i in xrange(n_test_batches): test_y_pred = test_func(i) test_preds.append(test_y_pred) test_preds = np.concatenate(test_preds) test_score = 1 - np.mean(np.not_equal(test_cpu_y, test_preds)) precision, recall, beta, support = precision_recall_fscore_support(test_cpu_y, test_preds, pos_label=1) if test_score > best_score: best_score = test_score # save the sentence vectors train_sens = [get_train_sentvec(i) for i in range(n_train_batches)] test_sens = [get_test_sentvec(i) for i in range(n_test_batches)] train_sens = np.concatenate(train_sens, axis=0) test_sens = np.concatenate(test_sens, axis=0) out_train_sent_file = "./results/%s_train_sent.vec" % exp_name out_test_sent_file = "./results/%s_test_sent.vec" % exp_name with open(out_train_sent_file, 'w') as train_f, open(out_test_sent_file, 'w') as test_f: for i in range(len(train_sens)): tr_doc_vect = train_sens[i][0][:number_train_sens[i]] train_f.write(json.dumps(tr_doc_vect.tolist()) + "\n") for i in range(len(test_sens)): te_doc_vect = test_sens[i][0][:number_test_sens[i]] test_f.write(json.dumps(te_doc_vect.tolist()) + "\n") print "Get best performace at %d iteration" % epoch log_file.write("Get best performance at %d iteration\n" % epoch) end_time = timeit.default_timer() print "Iteration %d , precision, recall, support" % epoch, precision, recall, support log_file.write("Iteration %d, neg precision %f, pos precision %f, neg recall %f pos recall %f \n" % (epoch, precision[0], precision[1], recall[0], recall[1])) print "Using time %f m" % ((end_time -start_time)/60.) log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.)) end_time = timeit.default_timer() print "Iteration %d Using time %f m" % ( epoch, (end_time -start_time)/60.) log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.)) log_file.flush() log_file.close()
def run_cnn(exp_name, dataset, embedding, log_fn, perf_fn, emb_dm=100, batch_size=100, filter_hs=[1, 2, 3], hidden_units=[200, 100, 11], dropout_rate=0.5, shuffle_batch=True, n_epochs=300, lr_decay=0.95, activation=ReLU, sqr_norm_lim=9, non_static=True, print_freq=5): """ Train and Evaluate CNN event encoder model :dataset: list containing three elements[(train_x, train_y), (valid_x, valid_y), (test_x, test_y)] :embedding: word embedding with shape (|V| * emb_dm) :filter_hs: filter height for each paralle cnn layer :dropout_rate: dropout rate for full connected layers :n_epochs: the max number of iterations """ start_time = timeit.default_timer() rng = np.random.RandomState(1234) input_height = len(dataset[0][0][0][0]) # number of words in the sentences num_sens = len(dataset[0][0][0]) # number of sentences print "--input height ", input_height input_width = emb_dm num_maps = hidden_units[0] ################### # start snippet 1 # ################### print "start to construct the model ...." x = T.tensor3("x") y = T.ivector("y") words = shared(value=np.asarray(embedding, dtype=theano.config.floatX), name="embedding", borrow=True) # define function to keep padding vector as zero zero_vector_tensor = T.vector() zero_vec = np.zeros(input_width, dtype=theano.config.floatX) set_zero = function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0,:], zero_vector_tensor))]) # the input for the sentence level conv layers layer0_input = words[T.cast(x.flatten(), dtype="int32")].reshape(( x.shape[0] * x.shape[1], 1, x.shape[2], emb_dm )) conv_layers = [] filter_shape = (num_maps, 1, filter_hs[0], emb_dm) pool_size = (input_height - filter_hs[0] + 1, 1) conv_layer = nn.ConvPoolLayer(rng, input=layer0_input, input_shape=None, filter_shape=filter_shape, pool_size=pool_size, activation=activation) sen_vecs = conv_layer.output.reshape((x.shape[0] * x.shape[1], num_maps)) conv_layers.append(conv_layer) # compute preactivation for each sentences layer_sizes = zip(hidden_units, hidden_units[1:]) full_layer_input = sen_vecs dropout_input = sen_vecs hidden_outs = [] drophidden_outs = [] hidden_layers = [] dropout_layers = [] droprate = 0.5 for lay_size in layer_sizes[:-1]: U_value = np.random.random(lay_size).astype(theano.config.floatX) b_value = np.zeros((lay_size[1],), dtype=theano.config.floatX) U = theano.shared(U_value, borrow=True, name="U") b = theano.shared(b_value, borrow=True, name="b") hiddenLayer = nn.HiddenLayer(rng, full_layer_input, lay_size[0], lay_size[1], ReLU, U * (1 - droprate), b) dropHiddenLayer = nn.DropoutHiddenLayer(rng, dropout_input, lay_size[0], lay_size[1], ReLU, droprate, U, b) hidden_layers.append(hiddenLayer) dropout_layers.append(dropHiddenLayer) hidden_out = hiddenLayer.output drophidden_out = dropHiddenLayer.output hidden_outs.append(hidden_out) drophidden_outs.append(drophidden_out) full_layer_input = hidden_out dropout_input = drophidden_out # get the max value for each class n_in, n_out = layer_sizes[-1] W_value = np.random.random((n_in, n_out)).astype(theano.config.floatX) b_value = np.zeros((n_out,), dtype=theano.config.floatX) W = theano.shared(W_value, borrow=True, name="logis_W") b = theano.shared(b_value, borrow=True, name="logis_b") full_act = T.dot(hidden_outs[-1], W*(1 - droprate)) + b dropout_act = nn.dropout_from_layer(rng, T.dot(drophidden_outs[-1], W) + b, droprate) # compute the probability sen_full_probs = T.nnet.softmax(full_act) sen_dropout_probs = T.nnet.softmax(dropout_act) # compute the sentence similarity sen_sen = T.dot(sen_vecs, sen_vecs.T) sen_sqr = T.sum(sen_vecs ** 2, axis=1) sen_sqr_left = sen_sqr.dimshuffle(0, 'x') sen_sqr_right = sen_sqr.dimshuffle('x', 0) sen_smi_matrix = sen_sqr_left - 2 * sen_sen + sen_sqr_right sen_smi_matrix = T.exp(-1 * sen_smi_matrix) # compute the delta between sentence probabilities prob_prob_full = T.dot(sen_full_probs, sen_full_probs.T) prob_sqr_full = T.sum(sen_full_probs ** 2, axis=1) prob_sqr_left_full = prob_sqr_full.dimshuffle(0, 'x') prob_sqr_right_full = prob_sqr_full.dimshuffle('x', 0) prob_delta_full = prob_sqr_left_full - 2 * prob_prob_full + prob_sqr_right_full sen_cost_full = T.sum(sen_smi_matrix * prob_delta_full) prob_prob_drop = T.dot(sen_dropout_probs, sen_dropout_probs.T) prob_sqr_drop = T.sum(sen_dropout_probs ** 2, axis=1) prob_sqr_left_drop = prob_sqr_drop.dimshuffle(0, 'x') prob_sqr_right_drop = prob_sqr_drop.dimshuffle('x', 0) prob_delta_drop = prob_sqr_left_drop - 2 * prob_prob_drop + prob_sqr_right_drop sen_cost_drop = T.sum(sen_smi_matrix * prob_delta_drop) # transform the sen probs to doc probs # by using average probs doc_full_probs = sen_full_probs.reshape((x.shape[0], x.shape[1], n_out)) doc_full_probs = T.mean(doc_full_probs, axis=1) doc_dropout_probs = sen_dropout_probs.reshape((x.shape[0], x.shape[1], n_out)) doc_dropout_probs = T.mean(doc_dropout_probs, axis=1) doc_full_y_pred = T.argmax(doc_full_probs, axis=1) doc_dropout_y_pred = T.argmax(doc_dropout_probs, axis=1) full_negative_likelihood = T.sum(-T.log(doc_full_probs)[T.arange(y.shape[0]), y]) dropout_negative_likelihood = T.sum(-T.log(doc_dropout_probs)[T.arange(y.shape[0]), y]) full_errors = T.mean(T.neq(doc_full_y_pred, y)) gamma = 2 full_cost = full_negative_likelihood + gamma * sen_cost_full dropout_cost = dropout_negative_likelihood + gamma * sen_cost_drop params = [] for conv_layer in conv_layers: params += conv_layer.params for dropout_layer in dropout_layers: params += dropout_layer.params params.append(W) params.append(b) if non_static: params.append(words) grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim) ##################### # Construct Dataset # ##################### print "Copy data to GPU and constrct train/valid/test func" np.random.seed(1234) train_x, train_y = shared_dataset(dataset[0]) valid_x, valid_y = shared_dataset(dataset[1]) test_x, test_y = shared_dataset(dataset[2]) n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size)) n_valid_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size)) n_test_batches = int(np.ceil(1.0 * len(dataset[2][0]) / batch_size)) ##################### # Train model func # ##################### index = T.iscalar() train_func = function([index], full_cost, updates=grad_updates, givens={ x: train_x[index*batch_size:(index+1)*batch_size], y: train_y[index*batch_size:(index+1)*batch_size] }) train_error = function([index], full_errors, givens={ x: train_x[index*batch_size:(index+1)*batch_size], y: train_y[index*batch_size:(index+1)*batch_size] }) valid_train_func = function([index], [full_negative_likelihood, sen_cost_full], updates=grad_updates, givens={ x: valid_x[index*batch_size:(index+1)*batch_size], y: valid_y[index*batch_size:(index+1)*batch_size] }) test_pred = function([index], doc_full_y_pred, givens={ x:test_x[index*batch_size:(index+1)*batch_size], }) # apply early stop strategy patience = 100 patience_increase = 2 improvement_threshold = 1.005 n_valid = len(dataset[1][0]) n_test = len(dataset[2][0]) epoch = 0 best_params = None best_validation_score = 0. test_perf = 0 done_loop = False log_file = open(log_fn, 'w') print "Start to train the model....." cpu_trn_y = np.asarray(dataset[0][1]) cpu_val_y = np.asarray(dataset[1][1]) cpu_tst_y = np.asarray(dataset[2][1]) def compute_score(true_list, pred_list): mat = np.equal(true_list, pred_list) score = np.mean(mat) return score best_test_score = 0. while (epoch < n_epochs) and not done_loop: start_time = timeit.default_timer() epoch += 1 costs = [] for minibatch_index in np.random.permutation(range(n_train_batches)): cost_epoch = train_func(minibatch_index) costs.append(cost_epoch) set_zero(zero_vec) # do validatiovalidn valid_cost, valid_sen_cost = zip(*[valid_train_func(i) for i in np.random.permutation(xrange(n_valid_batches))]) if epoch % print_freq == 0: # do test test_preds = np.concatenate([test_pred(i) for i in xrange(n_test_batches)]) test_score = compute_score(cpu_tst_y, test_preds) with open(os.path.join(perf_fn, "%s_%d.pred" % (exp_name, epoch)), 'w') as epf: for p in test_preds: epf.write("%d\n" % int(p)) message = "Epoch %d test perf %f train cost %f, valid_sen_cost %f, valid_doc_cost %f" % (epoch, test_score, np.mean(costs), np.mean(valid_sen_cost), np.mean(valid_cost)) print message log_file.write(message + "\n") log_file.flush() """ # store the best model if (test_score > best_test_score) or (epoch % 25 == 0): best_test_score = test_score # save the model model_name = "%s_%d.model" % (exp_name, epoch) with open(model_name, 'wb') as bm: for p in params: cPickle.dump(p.get_value(), bm) """ end_time = timeit.default_timer() print "Finish one iteration using %f m" % ((end_time - start_time)/60.) log_file.flush() log_file.close()
def run_experiment(self, dataset, word_embedding, exp_name): # load parameters num_maps_word = self.options["num_maps_word"] drop_rate_word = self.options["drop_rate_word"] drop_rate_sentence = self.options["drop_rate_sentence"] word_window = self.options["word_window"] word_dim = self.options["word_dim"] k_max_word = self.options["k_max_word"] batch_size = self.options["batch_size"] rho = self.options["rho"] epsilon = self.options["epsilon"] norm_lim = self.options["norm_lim"] max_iteration = self.options["max_iteration"] k = self.options["k_max"] sentence_len = len(dataset[0][0][0][0]) sentence_num = len(dataset[0][0][0]) # define the parameters x = T.tensor3("x") y = T.ivector("y") rng = np.random.RandomState(1234) words = theano.shared(value=np.asarray(word_embedding, dtype=theano.config.floatX), name="embedding", borrow=True) zero_vector_tensor = T.vector() zero_vec = np.zeros(word_dim, dtype=theano.config.floatX) set_zero = theano.function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0,:], zero_vector_tensor))]) x_emb = words[T.cast(x.flatten(), dtype="int32")].reshape((x.shape[0]*x.shape[1], 1, x.shape[2], words.shape[1])) dropout_x_emb = nn.dropout_from_layer(rng, x_emb, drop_rate_word) # compute convolution on words layer word_filter_shape = (num_maps_word, 1, word_window, word_dim) word_pool_size = (sentence_len - word_window + 1, 1) dropout_word_conv = nn.ConvPoolLayer(rng, input=dropout_x_emb, input_shape=None, filter_shape=word_filter_shape, pool_size=word_pool_size, activation=Tanh, k=k_max_word) sent_vec_dim = num_maps_word*k_max_word dropout_sent_vec = dropout_word_conv.output.reshape((x.shape[0] * x.shape[1], sent_vec_dim)) word_conv = nn.ConvPoolLayer(rng, input=dropout_x_emb*(1 - drop_rate_word), input_shape=None, filter_shape=word_filter_shape, pool_size=word_pool_size, activation=Tanh, k=k_max_word, W=dropout_word_conv.W, b=dropout_word_conv.b) sent_vec = word_conv.output.reshape((x.shape[0] * x.shape[1], sent_vec_dim)) theta_value = np.random.random((sent_vec_dim,1)) theta = shared(value=np.asarray(theta_value, dtype=theano.config.floatX), name="theta", borrow=True) weighted_drop_sent_vec, weighted_sen_score = keep_max(dropout_sent_vec.reshape((x.shape[0], 1, x.shape[1], sent_vec_dim)), theta, k) drop_doc_vec = T.sum(weighted_drop_sent_vec, axis=2).flatten(2) weighted_sent_vec, sen_score = keep_max(sent_vec.reshape((x.shape[0], 1, x.shape[1], sent_vec_dim)), theta, k) doc_vec = T.sum(weighted_sent_vec, axis=2).flatten(2) # we need to constrain the number of positive sentences in positive # collect parameters self.params.append(words) self.params += dropout_word_conv.params self.params.append(sen_W) self.params.append(sen_b) grad_updates = nn.sgd_updates_adadelta(self.params, drop_cost, rho, epsilon, norm_lim) # construct the dataset train_x, train_y = nn.shared_dataset(dataset[0]) test_x, test_y = nn.shared_dataset(dataset[1]) test_cpu_y = dataset[1][1] n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size)) n_test_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size)) # construt the model index = T.iscalar() train_func = theano.function([index], [drop_cost, drop_bag_cost, drop_sent_cost, penal_cost], updates=grad_updates, givens={ x: train_x[index*batch_size:(index+1)*batch_size], y: train_y[index*batch_size:(index+1)*batch_size] }) test_func = theano.function([index], doc_preds, givens={ x:test_x[index*batch_size:(index+1)*batch_size] }) get_train_sent_prob = theano.function([index], sent_prob, givens={ x:train_x[index*batch_size:(index+1)*batch_size] }) get_test_sent_prob = theano.function([index], sent_prob, givens={ x:test_x[index*batch_size:(index+1)*batch_size] }) epoch = 0 best_score = 0 raw_train_x = dataset[0][0] raw_test_x = dataset[1][0] # get the sentence number for each document number_train_sens = [] number_test_sens = [] log_file = open("./log/%s.log" % exp_name, 'w') while epoch <= max_iteration: start_time = timeit.default_timer() epoch += 1 costs = [] for minibatch_index in np.random.permutation(range(n_train_batches)): cost_epoch = train_func(minibatch_index) costs.append(cost_epoch) set_zero(zero_vec) total_train_cost, train_bag_cost, train_sent_cost, train_penal_cost = zip(*costs) print "Iteration %d, total_cost %f bag_cost %f sent_cost %f penal_cost %f\n" % (epoch, np.mean(total_train_cost), np.mean(train_bag_cost), np.mean(train_sent_cost), np.mean(train_penal_cost)) if epoch % 5 == 0: test_preds = [] for i in xrange(n_test_batches): test_y_pred = test_func(i) test_preds.append(test_y_pred) test_preds = np.concatenate(test_preds) test_score = 1 - np.mean(np.not_equal(test_cpu_y, test_preds)) precision, recall, beta, support = precision_recall_fscore_support(test_cpu_y, test_preds, pos_label=1) if test_score > best_score: best_score = test_score # save the sentence vectors train_sens = [get_train_sent_prob(i) for i in range(n_train_batches)] test_sens = [get_test_sent_prob(i) for i in range(n_test_batches)] train_sens = np.concatenate(train_sens, axis=0) test_sens = np.concatenate(test_sens, axis=0) out_train_sent_file = "./results/%s_train_sent.vec" % exp_name out_test_sent_file = "./results/%s_test_sent.vec" % exp_name with open(out_train_sent_file, 'w') as train_f, open(out_test_sent_file, 'w') as test_f: cPickle.dump(train_sens, train_f) cPickle.dump(test_sens, test_f) print "Get best performace at %d iteration %f" % (epoch, test_score) log_file.write("Get best performance at %d iteration %f \n" % (epoch, test_score)) end_time = timeit.default_timer() print "Iteration %d , precision, recall, support" % epoch, precision, recall, support log_file.write("Iteration %d, neg precision %f, pos precision %f, neg recall %f pos recall %f , total_cost %f bag_cost %f sent_cost %f penal_cost %f\n" % (epoch, precision[0], precision[1], recall[0], recall[1], np.mean(total_train_cost), np.mean(train_bag_cost), np.mean(train_sent_cost), np.mean(train_penal_cost))) print "Using time %f m" % ((end_time -start_time)/60.) log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.)) end_time = timeit.default_timer() print "Iteration %d Using time %f m" % ( epoch, (end_time -start_time)/60.) log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.)) log_file.flush() log_file.close()