def run_experiment(self, dataset, word_embedding, exp_name):

        # load parameters
        num_maps_word = self.options["num_maps_word"]
        drop_rate_word = self.options["drop_rate_word"]
        drop_rate_sentence = self.options["drop_rate_sentence"]
        word_window = self.options["word_window"]
        word_dim = self.options["word_dim"]
        k_max_word = self.options["k_max_word"]
        batch_size = self.options["batch_size"]
        rho = self.options["rho"]
        epsilon = self.options["epsilon"]
        norm_lim = self.options["norm_lim"]
        max_iteration = self.options["max_iteration"]
        k_portion = self.options["k_portion"]

        sentence_len = len(dataset[0][0][0][0])

        # compute the sentence flags
        train_flags, test_flags = construct_sentence_flag(dataset)
        train_k_value = construct_dynamic_k(train_flags, k_portion)
        test_k_value = construct_dynamic_k(test_flags, k_portion)
       
        train_flags = theano.shared(value=np.asarray(train_flags, dtype=theano.config.floatX), borrow=True)
        test_flags = theano.shared(value=np.asarray(test_flags, dtype=theano.config.floatX), borrow=True) 
        train_k = theano.shared(value=np.asarray(train_k_value, dtype=theano.config.floatX), borrow=True)
        test_k = theano.shared(value=np.asarray(test_k_value, dtype=theano.config.floatX), borrow=True)

        # define the parameters
        x = T.tensor3("x")
        y = T.ivector("y")
        sen_flags = T.matrix("flag")
        sen_k = T.matrix("sen_k")
        rng = np.random.RandomState(1234)

        words = theano.shared(value=np.asarray(word_embedding,
            dtype=theano.config.floatX),
            name="embedding", borrow=True)
        zero_vector_tensor = T.vector()
        zero_vec = np.zeros(word_dim, dtype=theano.config.floatX)
        set_zero = theano.function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0,:], zero_vector_tensor))])

        x_emb = words[T.cast(x.flatten(), dtype="int32")].reshape((x.shape[0]*x.shape[1], 1, x.shape[2], words.shape[1]))

        dropout_x_emb = nn.dropout_from_layer(rng, x_emb, drop_rate_word)

        # compute convolution on words layer
        word_filter_shape = (num_maps_word, 1, word_window, word_dim)
        word_pool_size = (sentence_len - word_window + 1, 1)
        dropout_word_conv = nn.ConvPoolLayer(rng,
                input=dropout_x_emb,
                input_shape=None,
                filter_shape=word_filter_shape,
                pool_size=word_pool_size,
                activation=Tanh,
                k=k_max_word)
        sent_vec_dim = num_maps_word*k_max_word
        dropout_sent_vec = dropout_word_conv.output.reshape((x.shape[0] * x.shape[1], sent_vec_dim))

        word_conv = nn.ConvPoolLayer(rng,
                input=dropout_x_emb*(1 - drop_rate_word),
                input_shape=None,
                filter_shape=word_filter_shape,
                pool_size=word_pool_size,
                activation=Tanh,
                k=k_max_word,
                W=dropout_word_conv.W,
                b=dropout_word_conv.b)
        sent_vec = word_conv.output.reshape((x.shape[0] * x.shape[1], sent_vec_dim))

        # construct sentence level classifier
        n_in = sent_vec_dim
        n_out = 1
        sen_W_values = np.zeros((n_in, n_out), dtype=theano.config.floatX)
        sen_W = theano.shared(value=sen_W_values, borrow=True, name="logis_W")
        sen_b_value = nn.as_floatX(0.0)
        sen_b = theano.shared(value=sen_b_value, borrow=True, name="logis_b")

        drop_sent_prob = T.nnet.sigmoid(T.dot(dropout_sent_vec, sen_W) + sen_b)
        sent_prob = T.nnet.sigmoid(T.dot(sent_vec, sen_W*(1-drop_rate_sentence)) + sen_b)

        # reform the sent vec to doc level
        drop_sent_prob = drop_sent_prob.reshape((x.shape[0], x.shape[1]))
        sent_prob = sent_prob.reshape((x.shape[0], x.shape[1]))

        """
        # the pos probability bag label is the avg of the probs
        drop_doc_prob = T.sum(drop_sent_prob * sen_flags, axis=1) / T.sum(sen_flags, axis=1)
        doc_prob = T.sum(sent_prob * sen_flags, axis=1) / T.sum(sen_flags, axis=1)
        """
        # using the dynamic top k max probability as bag level probability
        # compute the dynamic K for each documents
        drop_doc_prob = T.sum(T.sort(drop_sent_prob, axis=1) * sen_k, axis=1) / T.sum(sen_k, axis=1)
        doc_prob = T.sum(T.sort(sent_prob, axis=1) * sen_k, axis=1) / T.sum(sen_k, axis=1)
        

        drop_doc_prob = T.clip(drop_doc_prob, nn.as_floatX(1e-7), nn.as_floatX(1 - 1e-7 ))
        doc_prob = T.clip(doc_prob, nn.as_floatX(1e-7), nn.as_floatX(1 - 1e-7 ))

        doc_preds = doc_prob > 0.5

        # instance level cost
        drop_sent_cost = T.sum(T.maximum(0.0, nn.as_floatX(.5) - T.sgn(drop_sent_prob.reshape((x.shape[0]*x.shape[1], n_out)) - nn.as_floatX(0.6)) * T.dot(dropout_sent_vec, sen_W)) * sen_flags.reshape((x.shape[0]*x.shape[1], n_out))) / T.sum(sen_flags)

        # we need that the most positive instance at least 0.7 in pos bags
        # and at most 0.1 in neg bags
        # we want the number of positive instance should at least ...
        # and non of the positive instances in the negative bags
        
        # compute the number of positive instance
        positive_count = T.sum((drop_sent_prob * sen_flags) > 0.5, axis=1)
        pos_cost = T.maximum(nn.as_floatX(0.0), nn.as_floatX(2) - positive_count)
        neg_cost = T.maximum(nn.as_floatX(0.0), positive_count)
        penal_cost = T.mean(pos_cost * y + neg_cost * (nn.as_floatX(1.0) - y))

        # add the sentence similarity constrains
        sen_sen = T.dot(dropout_sent_vec, dropout_sent_vec.T)
        sen_sqr = T.sum(dropout_sent_vec ** 2, axis=1)
        sen_sqr_left = sen_sqr.dimshuffle(0, 'x')
        sen_sqr_right = sen_sqr.dimshuffle('x', 0)
        sen_sim_matrix = sen_sqr_left - 2 * sen_sqr + sen_sqr_right
        sen_sim_matrix = T.exp(-1 * sen_sim_matrix)

        sen_sim_prob = drop_sent_prob.reshape((x.shape[0]*x.shape[1], 1)) - drop_sent_prob.flatten()
        sen_sim_prob = sen_sim_prob ** 2

        sen_sim_flag = T.dot(sen_flags.reshape((x.shape[0]*x.shape[1],1)), sen_flags.reshape((1,x.shape[0]*x.shape[1])))

        sen_sim_cost = T.sum(sen_sim_matrix * sen_sim_prob * sen_sim_flag) / T.sum(sen_sim_flag)


        # bag level cost
        drop_bag_cost = T.mean(-y * T.log(drop_doc_prob) * nn.as_floatX(0.6) - (1 - y) * T.log(1 - drop_doc_prob) * nn.as_floatX(0.4))
        #drop_cost = drop_bag_cost * nn.as_floatX(3.0) + drop_sent_cost + nn.as_floatX(2.0) * penal_cost
        drop_cost = drop_bag_cost * nn.as_floatX(0.6) + drop_sent_cost * nn.as_floatX(0.1) + penal_cost * nn.as_floatX(0.5)  + sen_sim_cost * nn.as_floatX(0.0001)


        # collect parameters
        self.params.append(words)
        self.params += dropout_word_conv.params
        self.params.append(sen_W)
        self.params.append(sen_b)

        grad_updates = nn.sgd_updates_adadelta(self.params,
                drop_cost,
                rho,
                epsilon,
                norm_lim)

        # construct the dataset
        # random the
        train_x, train_y = nn.shared_dataset(dataset[0])
        test_x, test_y = nn.shared_dataset(dataset[1])
        test_cpu_y = dataset[1][1]

        n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size))
        n_test_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size))

        # construt the model
        index = T.iscalar()
        train_func = theano.function([index], [drop_cost, drop_bag_cost, drop_sent_cost, penal_cost, sen_sim_cost], updates=grad_updates,
                givens={
                    x: train_x[index*batch_size:(index+1)*batch_size],
                    y: train_y[index*batch_size:(index+1)*batch_size],
                    sen_flags: train_flags[index*batch_size:(index+1)*batch_size],
                    sen_k: train_k[index*batch_size:(index+1)*batch_size]
                    })

        test_func = theano.function([index], doc_preds,
                givens={
                    x:test_x[index*batch_size:(index+1)*batch_size],
                    sen_k:test_k[index*batch_size:(index+1)*batch_size]
                    })

        get_train_sent_prob = theano.function([index], sent_prob,
                givens={
                    x:train_x[index*batch_size:(index+1)*batch_size]
                    })

        get_test_sent_prob = theano.function([index], sent_prob,
                givens={
                    x:test_x[index*batch_size:(index+1)*batch_size]
                    })

        epoch = 0
        best_score = 0


        log_file = open("./log/%s.log" % exp_name, 'w')

        while epoch <= max_iteration:
            start_time = timeit.default_timer()
            epoch += 1
            costs = []

            for minibatch_index in np.random.permutation(range(n_train_batches)):
                cost_epoch = train_func(minibatch_index)
                costs.append(cost_epoch)
                set_zero(zero_vec)

            total_train_cost, train_bag_cost, train_sent_cost, train_penal_cost, train_sim_cost = zip(*costs)
            print "Iteration %d, total_cost %f bag_cost %f sent_cost %f penal_cost %f sim cost %f\n" %  (epoch, np.mean(total_train_cost), np.mean(train_bag_cost), np.mean(train_sent_cost), np.mean(train_penal_cost), np.mean(train_sim_cost))

            if epoch % 1 == 0:
                test_preds = []
                for i in xrange(n_test_batches):
                    test_y_pred = test_func(i)
                    test_preds.append(test_y_pred)
                test_preds = np.concatenate(test_preds)
                test_score = 1 - np.mean(np.not_equal(test_cpu_y, test_preds))

                precision, recall, beta, support = precision_recall_fscore_support(test_cpu_y, test_preds, pos_label=1)

                if beta[1] > best_score or epoch % 5 == 0:
                    best_score = beta[1]
                    # save the sentence vectors
                    train_sens = [get_train_sent_prob(i) for i in range(n_train_batches)]
                    test_sens = [get_test_sent_prob(i) for i in range(n_test_batches)]

                    train_sens = np.concatenate(train_sens, axis=0)
                    test_sens = np.concatenate(test_sens, axis=0)

                    out_train_sent_file = "./results/%s_train_sent_%d.vec" % (exp_name, epoch)
                    out_test_sent_file = "./results/%s_test_sent_%d.vec" % (exp_name, epoch)

                    with open(out_test_sent_file, 'w') as test_f, open(out_train_sent_file, 'w') as train_f:
                        cPickle.dump(train_sens, train_f)
                        cPickle.dump(test_sens, test_f)
                    print "Get best performace at %d iteration %f" % (epoch, test_score)
                    log_file.write("Get best performance at %d iteration %f \n" % (epoch, test_score))

                end_time = timeit.default_timer()
                print "Iteration %d , precision, recall, f1" % epoch, precision, recall, beta
                log_file.write("Iteration %d, neg precision %f, pos precision %f, neg recall %f pos recall %f , neg f1 %f, pos f1 %f, total_cost %f bag_cost %f sent_cost %f penal_cost %f\n" % (epoch, precision[0], precision[1], recall[0], recall[1], beta[0], beta[1], np.mean(total_train_cost), np.mean(train_bag_cost), np.mean(train_sent_cost), np.mean(train_penal_cost)))
                print "Using time %f m" % ((end_time -start_time)/60.)
                log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.))
            end_time = timeit.default_timer()
            print "Iteration %d Using time %f m" % ( epoch, (end_time -start_time)/60.)
            log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.))
            log_file.flush()

        log_file.close()
Esempio n. 2
0
    def run_experiment(self, dataset, word_embedding, exp_name):

        # load parameters
        num_maps_word = self.options["num_maps_word"]
        drop_rate_word = self.options["drop_rate_word"]
        drop_rate_sentence = self.options["drop_rate_sentence"]
        word_window = self.options["word_window"]
        word_dim = self.options["word_dim"]
        k_max_word = self.options["k_max_word"]
        batch_size = self.options["batch_size"]
        rho = self.options["rho"]
        epsilon = self.options["epsilon"]
        norm_lim = self.options["norm_lim"]
        max_iteration = self.options["max_iteration"]

        sentence_len = len(dataset[0][0][0][0])

        # compute the sentence flags
        train_flags, test_flags = construct_sentence_flag(dataset)
        train_flags = theano.shared(value=np.asarray(train_flags, dtype=theano.config.floatX), borrow=True)
        test_flags = theano.shared(value=np.asarray(test_flags, dtype=theano.config.floatX), borrow=True)


        # define the parameters
        x = T.tensor3("x")
        y = T.ivector("y")
        sen_flags = T.matrix("flag")
        rng = np.random.RandomState(1234)

        words = theano.shared(value=np.asarray(word_embedding,
            dtype=theano.config.floatX),
            name="embedding", borrow=True)
        zero_vector_tensor = T.vector()
        zero_vec = np.zeros(word_dim, dtype=theano.config.floatX)
        set_zero = theano.function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0,:], zero_vector_tensor))])

        x_emb = words[T.cast(x.flatten(), dtype="int32")].reshape((x.shape[0]*x.shape[1], 1, x.shape[2], words.shape[1]))

        dropout_x_emb = nn.dropout_from_layer(rng, x_emb, drop_rate_word)

        # compute convolution on words layer
        word_filter_shape = (num_maps_word, 1, word_window, word_dim)
        word_pool_size = (sentence_len - word_window + 1, 1)
        dropout_word_conv = nn.ConvPoolLayer(rng,
                input=dropout_x_emb,
                input_shape=None,
                filter_shape=word_filter_shape,
                pool_size=word_pool_size,
                activation=Tanh,
                k=k_max_word)
        sent_vec_dim = num_maps_word*k_max_word
        dropout_sent_vec = dropout_word_conv.output.reshape((x.shape[0] * x.shape[1], sent_vec_dim))

        word_conv = nn.ConvPoolLayer(rng,
                input=dropout_x_emb*(1 - drop_rate_word),
                input_shape=None,
                filter_shape=word_filter_shape,
                pool_size=word_pool_size,
                activation=Tanh,
                k=k_max_word,
                W=dropout_word_conv.W,
                b=dropout_word_conv.b)
        sent_vec = word_conv.output.reshape((x.shape[0] * x.shape[1], sent_vec_dim))

        # construct sentence level classifier
        n_in = sent_vec_dim
        n_out = 1
        sen_W_values = np.zeros((n_in, n_out), dtype=theano.config.floatX)
        sen_W = theano.shared(value=sen_W_values, borrow=True, name="logis_W")
        sen_b_value = nn.as_floatX(0.0)
        sen_b = theano.shared(value=sen_b_value, borrow=True, name="logis_b")

        drop_sent_prob = T.nnet.sigmoid(T.dot(dropout_sent_vec, sen_W) + sen_b)
        sent_prob = T.nnet.sigmoid(T.dot(sent_vec, sen_W*(1-drop_rate_sentence)) + sen_b)

        # reform the sent vec to doc level
        drop_sent_prob = drop_sent_prob.reshape((x.shape[0], x.shape[1]))
        sent_prob = sent_prob.reshape((x.shape[0], x.shape[1]))
        # the pos probability bag label is the avg of the probs
        drop_doc_prob = T.sum(drop_sent_prob * sen_flags, axis=1) / T.sum(sen_flags, axis=1)
        doc_prob = T.sum(sent_prob * sen_flags, axis=1) / T.sum(sen_flags, axis=1)

        drop_doc_prob = T.clip(drop_doc_prob, nn.as_floatX(1e-7), nn.as_floatX(1 - 1e-7 ))
        doc_prob = T.clip(doc_prob, nn.as_floatX(1e-7), nn.as_floatX(1 - 1e-7 ))
        """
        # the pos probability bag label equals to 1 - all negative
        drop_doc_prob = T.prod(drop_sent_prob, axis=1)
        drop_doc_prob = T.set_subtensor(drop_doc_prob[:,1], 1 - drop_doc_prob[:,0])

        doc_prob = T.prod(sent_prob, axis=1)
        doc_prob = T.set_subtensor(doc_prob[:,1], 1 - doc_prob[:,0])

        # the pos probability bag label is the most positive probability
        drop_doc_prob = T.max(drop_sent_prob, axis=1)
        drop_doc_prob = T.clip(drop_doc_prob, nn.as_floatX(1e-7), nn.as_floatX(1 - 1e-7 ))
        doc_prob = T.max(sent_prob, axis=1)
        doc_prob = T.clip(doc_prob, nn.as_floatX(1e-7), nn.as_floatX(1 - 1e-7 ))
        """

        doc_preds = doc_prob > 0.5

        # instance level cost
        drop_sent_cost = T.sum(T.maximum(0.0, nn.as_floatX(.5) - T.sgn(drop_sent_prob.reshape((x.shape[0]*x.shape[1], n_out)) - nn.as_floatX(0.6)) * T.dot(dropout_sent_vec, sen_W)) * sen_flags.reshape((x.shape[0]*x.shape[1], n_out))) / T.sum(sen_flags)

        # we need that the most positive instance at least 0.7 in pos bags
        # and at most 0.1 in neg bags
        # we want the number of positive instance should at least ...
        # and non of the positive instances in the negative bags
        
        # compute the number of positive instance
        positive_count = T.sum((drop_sent_prob * sen_flags) > 0.5, axis=1)
        pos_cost = T.maximum(nn.as_floatX(0.0), nn.as_floatX(2) - positive_count)
        neg_cost = T.maximum(nn.as_floatX(0.0), positive_count)
        
        """
        most_positive_prob = T.max(drop_sent_prob, axis=1)
        pos_cost = T.maximum(0.0, nn.as_floatX(0.6) - most_positive_prob)
        neg_cost = T.maximum(0.0, most_positive_prob - nn.as_floatX(0.05))
        """
        penal_cost = T.mean(pos_cost * y + neg_cost * (nn.as_floatX(1.0) - y))

        # add the sentence similarity constrains
        sen_sen = T.dot(dropout_sent_vec, dropout_sent_vec.T)
        sen_sqr = T.sum(dropout_sent_vec ** 2, axis=1)
        sen_sqr_left = sen_sqr.dimshuffle(0, 'x')
        sen_sqr_right = sen_sqr.dimshuffle('x', 0)
        sen_sim_matrix = sen_sqr_left - 2 * sen_sqr + sen_sqr_right
        sen_sim_matrix = T.exp(-1 * sen_sim_matrix)

        sen_sim_prob = drop_sent_prob.reshape((x.shape[0]*x.shape[1], 1)) - drop_sent_prob.flatten()
        sen_sim_prob = sen_sim_prob ** 2

        sen_sim_flag = T.dot(sen_flags.reshape((x.shape[0]*x.shape[1],1)), sen_flags.reshape((1,x.shape[0]*x.shape[1])))

        sen_sim_cost = T.sum(sen_sim_matrix * sen_sim_prob * sen_sim_flag) / T.sum(sen_sim_flag)


        # bag level cost
        drop_bag_cost = T.mean(-y * T.log(drop_doc_prob) * nn.as_floatX(0.6) - (1 - y) * T.log(1 - drop_doc_prob) * nn.as_floatX(0.4))
        #drop_cost = drop_bag_cost * nn.as_floatX(3.0) + drop_sent_cost + nn.as_floatX(2.0) * penal_cost
        drop_cost = drop_bag_cost * nn.as_floatX(0.6) + drop_sent_cost * nn.as_floatX(0.1) + penal_cost * nn.as_floatX(0.5)  + sen_sim_cost * nn.as_floatX(0.0001)


        # collect parameters
        self.params.append(words)
        self.params += dropout_word_conv.params
        self.params.append(sen_W)
        self.params.append(sen_b)

        grad_updates = nn.sgd_updates_adadelta(self.params,
                drop_cost,
                rho,
                epsilon,
                norm_lim)

        # construct the dataset
        # random the
        train_x, train_y = nn.shared_dataset(dataset[0])
        test_x, test_y = nn.shared_dataset(dataset[1])
        test_cpu_y = dataset[1][1]

        n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size))
        n_test_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size))

        # construt the model
        index = T.iscalar()
        train_func = theano.function([index], [drop_cost, drop_bag_cost, drop_sent_cost, penal_cost, sen_sim_cost], updates=grad_updates,
                givens={
                    x: train_x[index*batch_size:(index+1)*batch_size],
                    y: train_y[index*batch_size:(index+1)*batch_size],
                    sen_flags: train_flags[index*batch_size:(index+1)*batch_size]
                    })

        test_func = theano.function([index], doc_preds,
                givens={
                    x:test_x[index*batch_size:(index+1)*batch_size],
                    sen_flags: test_flags[index*batch_size:(index+1)*batch_size]
                    })

        get_train_sent_prob = theano.function([index], sent_prob,
                givens={
                    x:train_x[index*batch_size:(index+1)*batch_size]
                    })

        get_test_sent_prob = theano.function([index], sent_prob,
                givens={
                    x:test_x[index*batch_size:(index+1)*batch_size]
                    })

        epoch = 0
        best_score = 0


        log_file = open("./log/%s.log" % exp_name, 'w')

        while epoch <= max_iteration:
            start_time = timeit.default_timer()
            epoch += 1
            costs = []

            for minibatch_index in np.random.permutation(range(n_train_batches)):
                cost_epoch = train_func(minibatch_index)
                costs.append(cost_epoch)
                set_zero(zero_vec)

            total_train_cost, train_bag_cost, train_sent_cost, train_penal_cost, train_sim_cost = zip(*costs)
            print "Iteration %d, total_cost %f bag_cost %f sent_cost %f penal_cost %f sim cost %f\n" %  (epoch, np.mean(total_train_cost), np.mean(train_bag_cost), np.mean(train_sent_cost), np.mean(train_penal_cost), np.mean(train_sim_cost))

            if epoch % 1 == 0:
                test_preds = []
                for i in xrange(n_test_batches):
                    test_y_pred = test_func(i)
                    test_preds.append(test_y_pred)
                test_preds = np.concatenate(test_preds)
                test_score = 1 - np.mean(np.not_equal(test_cpu_y, test_preds))

                precision, recall, beta, support = precision_recall_fscore_support(test_cpu_y, test_preds, pos_label=1)

                if beta[1] > best_score or epoch % 5 == 0:
                    best_score = beta[1]
                    # save the sentence vectors
                    train_sens = [get_train_sent_prob(i) for i in range(n_train_batches)]
                    test_sens = [get_test_sent_prob(i) for i in range(n_test_batches)]

                    train_sens = np.concatenate(train_sens, axis=0)
                    test_sens = np.concatenate(test_sens, axis=0)

                    out_train_sent_file = "./results/%s_train_sent_%d.vec" % (exp_name, epoch)
                    out_test_sent_file = "./results/%s_test_sent_%d.vec" % (exp_name, epoch)

                    with open(out_test_sent_file, 'w') as test_f, open(out_train_sent_file, 'w') as train_f:
                        cPickle.dump(train_sens, train_f)
                        cPickle.dump(test_sens, test_f)
                    print "Get best performace at %d iteration %f" % (epoch, test_score)
                    log_file.write("Get best performance at %d iteration %f \n" % (epoch, test_score))

                end_time = timeit.default_timer()
                print "Iteration %d , precision, recall, f1" % epoch, precision, recall, beta
                log_file.write("Iteration %d, neg precision %f, pos precision %f, neg recall %f pos recall %f , neg f1 %f, pos f1 %f, total_cost %f bag_cost %f sent_cost %f penal_cost %f\n" % (epoch, precision[0], precision[1], recall[0], recall[1], beta[0], beta[1], np.mean(total_train_cost), np.mean(train_bag_cost), np.mean(train_sent_cost), np.mean(train_penal_cost)))
                print "Using time %f m" % ((end_time -start_time)/60.)
                log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.))
            end_time = timeit.default_timer()
            print "Iteration %d Using time %f m" % ( epoch, (end_time -start_time)/60.)
            log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.))
            log_file.flush()

        log_file.close()
    def run_experiment(self, dataset, word_embedding, exp_name):
        
        # load parameters
        num_maps_word = self.options["num_maps_word"]
        drop_rate_word = self.options["drop_rate_word"]
        word_window = self.options["word_window"]
        word_dim = self.options["word_dim"]
        k_max_word = self.options["k_max_word"]
        num_maps_sentence = self.options["num_maps_sentence"]
        drop_rate_sentence = self.options["drop_rate_sentence"]
        sentence_window = self.options["sentence_window"]
        k_max_sentence = self.options["k_max_sentence"]
        batch_size = self.options["batch_size"]
        rho = self.options["rho"]
        epsilon = self.options["epsilon"]
        norm_lim = self.options["norm_lim"]
        max_iteration = self.options["max_iteration"]

        sentence_len = len(dataset[0][0][0][0])
        sentence_num = len(dataset[0][0][0])
        
        # define the parameters
        x = T.tensor3("x")
        y = T.ivector("y")
        rng = np.random.RandomState(1234)
        
        words = theano.shared(value=np.asarray(word_embedding,
            dtype=theano.config.floatX),
            name="embedding", borrow=True)
        zero_vector_tensor = T.vector() 
        zero_vec = np.zeros(word_dim, dtype=theano.config.floatX)
        set_zero = theano.function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0,:], zero_vector_tensor))])

        x_emb = words[T.cast(x.flatten(), dtype="int32")].reshape((x.shape[0]*x.shape[1], 1, x.shape[2], words.shape[1]))

        dropout_x_emb = nn.dropout_from_layer(rng, x_emb, drop_rate_word)

        # compute convolution on words layer
        word_filter_shape = (num_maps_word, 1, word_window, word_dim)
        word_pool_size = (sentence_len - word_window + 1, 1)
        dropout_word_conv = nn.ConvPoolLayer(rng, 
                input=dropout_x_emb,
                input_shape=None,
                filter_shape=word_filter_shape,
                pool_size=word_pool_size,
                activation=Tanh,
                k=k_max_word)
        sent_vec_dim = num_maps_word*k_max_word
        dropout_sent_vec = dropout_word_conv.output.reshape((x.shape[0], 1, x.shape[1], sent_vec_dim))
        dropout_sent_vec = nn.dropout_from_layer(rng, dropout_sent_vec, drop_rate_sentence)

        word_conv = nn.ConvPoolLayer(rng, 
                input=dropout_x_emb*(1 - drop_rate_word),
                input_shape=None,
                filter_shape=word_filter_shape,
                pool_size=word_pool_size,
                activation=Tanh,
                k=k_max_word,
                W=dropout_word_conv.W,
                b=dropout_word_conv.b)
        sent_vec = word_conv.output.reshape((x.shape[0], 1, x.shape[1], sent_vec_dim))

        # construct the convolution layer on sentences
        sent_filter_shape = (num_maps_sentence, 1, sentence_window, sent_vec_dim)
        sent_pool_size = (sentence_num - sentence_window + 1, 1)
        dropout_sent_conv = nn.ConvPoolLayer(rng,
                input=dropout_sent_vec,
                input_shape=None,
                filter_shape=sent_filter_shape,
                pool_size=sent_pool_size,
                activation=Tanh,
                k=k_max_sentence)

        sent_conv = nn.ConvPoolLayer(rng,
                input=sent_vec*(1 - drop_rate_sentence),
                input_shape=None,
                filter_shape=sent_filter_shape,
                pool_size=sent_pool_size,
                activation=Tanh,
                k=k_max_sentence,
                W=dropout_sent_conv.W,
                b=dropout_sent_conv.b)
        
        dropout_doc_vec = dropout_sent_conv.output.flatten(2)
        doc_vec = sent_conv.output.flatten(2)
        doc_vec_dim = num_maps_sentence * k_max_sentence

        # construct classifier
        dropout_logistic_layer = nn.LogisticRegressionLayer(
                input=dropout_doc_vec,
                n_in=doc_vec_dim,
                n_out=2)

        logistic_layer = nn.LogisticRegressionLayer(
                input=doc_vec,
                n_in=doc_vec_dim,
                n_out=2,
                W=dropout_logistic_layer.W,
                b=dropout_logistic_layer.b)

        
        dropout_cost = dropout_logistic_layer.negative_log_likelihood(y)
        cost = logistic_layer.negative_log_likelihood(y)

        preds = logistic_layer.y_pred
        errors = logistic_layer.errors(y)

        # collect parameters
        self.params.append(words)
        self.params += dropout_word_conv.params
        self.params += dropout_sent_conv.params
        self.params += dropout_logistic_layer.params
        
        grad_updates = nn.sgd_updates_adadelta(self.params,
                dropout_cost,
                rho,
                epsilon,
                norm_lim)

        # construct the dataset
        train_x, train_y = nn.shared_dataset(dataset[0])
        test_x, test_y = nn.shared_dataset(dataset[1])
        test_cpu_y = dataset[1][1]

        n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size))
        n_test_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size))

        # construt the model
        index = T.iscalar()
        train_func = theano.function([index], dropout_cost, updates=grad_updates,
                givens={
                    x: train_x[index*batch_size:(index+1)*batch_size],
                    y: train_y[index*batch_size:(index+1)*batch_size]
                    })

        test_func = theano.function([index], preds,
                givens={
                    x:test_x[index*batch_size:(index+1)*batch_size]
                    })

        get_train_sentvec = theano.function([index], sent_vec,
                givens={
                    x:train_x[index*batch_size:(index+1)*batch_size]
                    })

        get_test_sentvec = theano.function([index], sent_vec,
                givens={
                    x:test_x[index*batch_size:(index+1)*batch_size]
                    })

        epoch = 0
        best_score = 0
        raw_train_x = dataset[0][0]
        raw_test_x = dataset[1][0]
        # get the sentence number for each document
        number_train_sens = []
        number_test_sens = []

        for doc in raw_train_x:
            sen_num = 0
            for sen in doc:
                if np.any(sen):
                    sen_num += 1
            number_train_sens.append(sen_num)
        
        for doc in raw_test_x:
            sen_num = 0
            for sen in doc:
                if np.any(sen):
                    sen_num += 1
            number_test_sens.append(sen_num)

        log_file = open("./log/%s.log" % exp_name, 'w')

        while epoch <= max_iteration:
            start_time = timeit.default_timer()
            epoch += 1
            costs = []

            for minibatch_index in np.random.permutation(range(n_train_batches)):
                cost_epoch = train_func(minibatch_index)
                costs.append(cost_epoch)
                set_zero(zero_vec)

            if epoch % 5 == 0:
                test_preds = []
                for i in xrange(n_test_batches):
                    test_y_pred = test_func(i)
                    test_preds.append(test_y_pred)
                test_preds = np.concatenate(test_preds)
                test_score = 1 - np.mean(np.not_equal(test_cpu_y, test_preds))

                precision, recall, beta, support = precision_recall_fscore_support(test_cpu_y, test_preds, pos_label=1)

                if test_score > best_score:
                    best_score = test_score
                    # save the sentence vectors
                    train_sens = [get_train_sentvec(i) for i in range(n_train_batches)]
                    test_sens = [get_test_sentvec(i) for i in range(n_test_batches)]

                    train_sens = np.concatenate(train_sens, axis=0)
                    test_sens = np.concatenate(test_sens, axis=0)

                    out_train_sent_file = "./results/%s_train_sent.vec" % exp_name
                    out_test_sent_file = "./results/%s_test_sent.vec" % exp_name

                    with open(out_train_sent_file, 'w') as train_f, open(out_test_sent_file, 'w') as test_f:
                        for i in range(len(train_sens)):
                            tr_doc_vect = train_sens[i][0][:number_train_sens[i]]
                            train_f.write(json.dumps(tr_doc_vect.tolist()) + "\n")

                        for i in range(len(test_sens)):
                            te_doc_vect = test_sens[i][0][:number_test_sens[i]]
                            test_f.write(json.dumps(te_doc_vect.tolist()) + "\n")
                    print "Get best performace at %d iteration" % epoch
                    log_file.write("Get best performance at %d iteration\n" % epoch)

                end_time = timeit.default_timer()
                print "Iteration %d , precision, recall, support" % epoch, precision, recall, support
                log_file.write("Iteration %d, neg precision %f, pos precision %f, neg recall %f pos recall %f \n" % (epoch, precision[0], precision[1], recall[0], recall[1]))
                print "Using time %f m" % ((end_time -start_time)/60.)
                log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.))
            end_time = timeit.default_timer()
            print "Iteration %d Using time %f m" % ( epoch, (end_time -start_time)/60.)
            log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.))
            log_file.flush()

        log_file.close()
    def run_experiment(self, dataset, word_embedding, exp_name):

        # load parameters
        num_maps_word = self.options["num_maps_word"]
        drop_rate_word = self.options["drop_rate_word"]
        word_window = self.options["word_window"]
        word_dim = self.options["word_dim"]
        k_max_word = self.options["k_max_word"]
        num_maps_sentence = self.options["num_maps_sentence"]
        drop_rate_sentence = self.options["drop_rate_sentence"]
        sentence_window = self.options["sentence_window"]
        k_max_sentence = self.options["k_max_sentence"]
        batch_size = self.options["batch_size"]
        rho = self.options["rho"]
        epsilon = self.options["epsilon"]
        norm_lim = self.options["norm_lim"]
        max_iteration = self.options["max_iteration"]

        sentence_len = len(dataset[0][0][0][0])
        sentence_num = len(dataset[0][0][0])

        # define the parameters
        x = T.tensor3("x")
        y = T.ivector("y")
        rng = np.random.RandomState(1234)

        words = theano.shared(value=np.asarray(word_embedding,
                                               dtype=theano.config.floatX),
                              name="embedding",
                              borrow=True)
        zero_vector_tensor = T.vector()
        zero_vec = np.zeros(word_dim, dtype=theano.config.floatX)
        set_zero = theano.function(
            [zero_vector_tensor],
            updates=[(words, T.set_subtensor(words[0, :],
                                             zero_vector_tensor))])

        x_emb = words[T.cast(x.flatten(), dtype="int32")].reshape(
            (x.shape[0] * x.shape[1], 1, x.shape[2], words.shape[1]))

        dropout_x_emb = nn.dropout_from_layer(rng, x_emb, drop_rate_word)

        # compute convolution on words layer
        word_filter_shape = (num_maps_word, 1, word_window, word_dim)
        word_pool_size = (sentence_len - word_window + 1, 1)
        dropout_word_conv = nn.ConvPoolLayer(rng,
                                             input=dropout_x_emb,
                                             input_shape=None,
                                             filter_shape=word_filter_shape,
                                             pool_size=word_pool_size,
                                             activation=Tanh,
                                             k=k_max_word)
        sent_vec_dim = num_maps_word * k_max_word
        dropout_sent_vec = dropout_word_conv.output.reshape(
            (x.shape[0], 1, x.shape[1], sent_vec_dim))
        dropout_sent_vec = nn.dropout_from_layer(rng, dropout_sent_vec,
                                                 drop_rate_sentence)

        word_conv = nn.ConvPoolLayer(rng,
                                     input=dropout_x_emb *
                                     (1 - drop_rate_word),
                                     input_shape=None,
                                     filter_shape=word_filter_shape,
                                     pool_size=word_pool_size,
                                     activation=Tanh,
                                     k=k_max_word,
                                     W=dropout_word_conv.W,
                                     b=dropout_word_conv.b)
        sent_vec = word_conv.output.reshape(
            (x.shape[0], 1, x.shape[1], sent_vec_dim))

        # construct the convolution layer on sentences
        sent_filter_shape = (num_maps_sentence, 1, sentence_window,
                             sent_vec_dim)
        sent_pool_size = (sentence_num - sentence_window + 1, 1)
        dropout_sent_conv = nn.ConvPoolLayer(rng,
                                             input=dropout_sent_vec,
                                             input_shape=None,
                                             filter_shape=sent_filter_shape,
                                             pool_size=sent_pool_size,
                                             activation=Tanh,
                                             k=k_max_sentence)

        sent_conv = nn.ConvPoolLayer(rng,
                                     input=sent_vec * (1 - drop_rate_sentence),
                                     input_shape=None,
                                     filter_shape=sent_filter_shape,
                                     pool_size=sent_pool_size,
                                     activation=Tanh,
                                     k=k_max_sentence,
                                     W=dropout_sent_conv.W,
                                     b=dropout_sent_conv.b)

        dropout_doc_vec = dropout_sent_conv.output.flatten(2)
        doc_vec = sent_conv.output.flatten(2)
        doc_vec_dim = num_maps_sentence * k_max_sentence

        # construct classifier
        dropout_logistic_layer = nn.LogisticRegressionLayer(
            input=dropout_doc_vec, n_in=doc_vec_dim, n_out=2)

        logistic_layer = nn.LogisticRegressionLayer(input=doc_vec,
                                                    n_in=doc_vec_dim,
                                                    n_out=2,
                                                    W=dropout_logistic_layer.W,
                                                    b=dropout_logistic_layer.b)

        dropout_cost = dropout_logistic_layer.negative_log_likelihood(y)
        cost = logistic_layer.negative_log_likelihood(y)

        preds = logistic_layer.y_pred
        errors = logistic_layer.errors(y)

        # collect parameters
        self.params.append(words)
        self.params += dropout_word_conv.params
        self.params += dropout_sent_conv.params
        self.params += dropout_logistic_layer.params

        grad_updates = nn.sgd_updates_adadelta(self.params, dropout_cost, rho,
                                               epsilon, norm_lim)

        # construct the dataset
        train_x, train_y = nn.shared_dataset(dataset[0])
        test_x, test_y = nn.shared_dataset(dataset[1])
        test_cpu_y = dataset[1][1]

        n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size))
        n_test_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size))

        # construt the model
        index = T.iscalar()
        train_func = theano.function(
            [index],
            dropout_cost,
            updates=grad_updates,
            givens={
                x: train_x[index * batch_size:(index + 1) * batch_size],
                y: train_y[index * batch_size:(index + 1) * batch_size]
            })

        test_func = theano.function(
            [index],
            preds,
            givens={x: test_x[index * batch_size:(index + 1) * batch_size]})

        get_train_sentvec = theano.function(
            [index],
            sent_vec,
            givens={x: train_x[index * batch_size:(index + 1) * batch_size]})

        get_test_sentvec = theano.function(
            [index],
            sent_vec,
            givens={x: test_x[index * batch_size:(index + 1) * batch_size]})

        epoch = 0
        best_score = 0
        raw_train_x = dataset[0][0]
        raw_test_x = dataset[1][0]
        # get the sentence number for each document
        number_train_sens = []
        number_test_sens = []

        for doc in raw_train_x:
            sen_num = 0
            for sen in doc:
                if np.any(sen):
                    sen_num += 1
            number_train_sens.append(sen_num)

        for doc in raw_test_x:
            sen_num = 0
            for sen in doc:
                if np.any(sen):
                    sen_num += 1
            number_test_sens.append(sen_num)

        log_file = open("./log/%s.log" % exp_name, 'w')

        while epoch <= max_iteration:
            start_time = timeit.default_timer()
            epoch += 1
            costs = []

            for minibatch_index in np.random.permutation(
                    range(n_train_batches)):
                cost_epoch = train_func(minibatch_index)
                costs.append(cost_epoch)
                set_zero(zero_vec)

            if epoch % 5 == 0:
                test_preds = []
                for i in xrange(n_test_batches):
                    test_y_pred = test_func(i)
                    test_preds.append(test_y_pred)
                test_preds = np.concatenate(test_preds)
                test_score = 1 - np.mean(np.not_equal(test_cpu_y, test_preds))

                precision, recall, beta, support = precision_recall_fscore_support(
                    test_cpu_y, test_preds, pos_label=1)

                if test_score > best_score:
                    best_score = test_score
                    # save the sentence vectors
                    train_sens = [
                        get_train_sentvec(i) for i in range(n_train_batches)
                    ]
                    test_sens = [
                        get_test_sentvec(i) for i in range(n_test_batches)
                    ]

                    train_sens = np.concatenate(train_sens, axis=0)
                    test_sens = np.concatenate(test_sens, axis=0)

                    out_train_sent_file = "./results/%s_train_sent.vec" % exp_name
                    out_test_sent_file = "./results/%s_test_sent.vec" % exp_name

                    with open(out_train_sent_file,
                              'w') as train_f, open(out_test_sent_file,
                                                    'w') as test_f:
                        for i in range(len(train_sens)):
                            tr_doc_vect = train_sens[i][
                                0][:number_train_sens[i]]
                            train_f.write(
                                json.dumps(tr_doc_vect.tolist()) + "\n")

                        for i in range(len(test_sens)):
                            te_doc_vect = test_sens[i][0][:number_test_sens[i]]
                            test_f.write(
                                json.dumps(te_doc_vect.tolist()) + "\n")
                    print "Get best performace at %d iteration" % epoch
                    log_file.write("Get best performance at %d iteration\n" %
                                   epoch)

                end_time = timeit.default_timer()
                print "Iteration %d , precision, recall, support" % epoch, precision, recall, support
                log_file.write(
                    "Iteration %d, neg precision %f, pos precision %f, neg recall %f pos recall %f \n"
                    %
                    (epoch, precision[0], precision[1], recall[0], recall[1]))
                print "Using time %f m" % ((end_time - start_time) / 60.)
                log_file.write("Uing time %f m\n" %
                               ((end_time - start_time) / 60.))
            end_time = timeit.default_timer()
            print "Iteration %d Using time %f m" % (epoch,
                                                    (end_time - start_time) /
                                                    60.)
            log_file.write("Uing time %f m\n" %
                           ((end_time - start_time) / 60.))
            log_file.flush()

        log_file.close()
    def run_experiment(self, dataset, word_embedding, exp_name):
        
        # load parameters
        num_maps_word = self.options["num_maps_word"]
        drop_rate_word = self.options["drop_rate_word"]
        drop_rate_sentence = self.options["drop_rate_sentence"]
        word_window = self.options["word_window"]
        word_dim = self.options["word_dim"]
        k_max_word = self.options["k_max_word"]
        batch_size = self.options["batch_size"]
        rho = self.options["rho"]
        epsilon = self.options["epsilon"]
        norm_lim = self.options["norm_lim"]
        max_iteration = self.options["max_iteration"]
        k = self.options["k_max"]

        sentence_len = len(dataset[0][0][0][0])
        sentence_num = len(dataset[0][0][0])
        
        # define the parameters
        x = T.tensor3("x")
        y = T.ivector("y")
        rng = np.random.RandomState(1234)
        
        words = theano.shared(value=np.asarray(word_embedding,
            dtype=theano.config.floatX),
            name="embedding", borrow=True)
        zero_vector_tensor = T.vector() 
        zero_vec = np.zeros(word_dim, dtype=theano.config.floatX)
        set_zero = theano.function([zero_vector_tensor], updates=[(words, T.set_subtensor(words[0,:], zero_vector_tensor))])

        x_emb = words[T.cast(x.flatten(), dtype="int32")].reshape((x.shape[0]*x.shape[1], 1, x.shape[2], words.shape[1]))

        dropout_x_emb = nn.dropout_from_layer(rng, x_emb, drop_rate_word)

        # compute convolution on words layer
        word_filter_shape = (num_maps_word, 1, word_window, word_dim)
        word_pool_size = (sentence_len - word_window + 1, 1)
        dropout_word_conv = nn.ConvPoolLayer(rng, 
                input=dropout_x_emb,
                input_shape=None,
                filter_shape=word_filter_shape,
                pool_size=word_pool_size,
                activation=Tanh,
                k=k_max_word)
        sent_vec_dim = num_maps_word*k_max_word
        dropout_sent_vec = dropout_word_conv.output.reshape((x.shape[0] * x.shape[1], sent_vec_dim))

        word_conv = nn.ConvPoolLayer(rng, 
                input=dropout_x_emb*(1 - drop_rate_word),
                input_shape=None,
                filter_shape=word_filter_shape,
                pool_size=word_pool_size,
                activation=Tanh,
                k=k_max_word,
                W=dropout_word_conv.W,
                b=dropout_word_conv.b)
        sent_vec = word_conv.output.reshape((x.shape[0] * x.shape[1], sent_vec_dim))
        
        theta_value = np.random.random((sent_vec_dim,1))
        theta = shared(value=np.asarray(theta_value, dtype=theano.config.floatX), name="theta", borrow=True)
        weighted_drop_sent_vec, weighted_sen_score = keep_max(dropout_sent_vec.reshape((x.shape[0], 1, x.shape[1], sent_vec_dim)), theta, k)
        drop_doc_vec = T.sum(weighted_drop_sent_vec, axis=2).flatten(2)
        
        weighted_sent_vec, sen_score = keep_max(sent_vec.reshape((x.shape[0], 1, x.shape[1], sent_vec_dim)), theta, k)
        doc_vec = T.sum(weighted_sent_vec, axis=2).flatten(2)
        # we need to constrain the number of positive sentences in positive
        

        
        # collect parameters
        self.params.append(words)
        self.params += dropout_word_conv.params
        self.params.append(sen_W)
        self.params.append(sen_b)
        
        grad_updates = nn.sgd_updates_adadelta(self.params,
                drop_cost,
                rho,
                epsilon,
                norm_lim)

        # construct the dataset
        train_x, train_y = nn.shared_dataset(dataset[0])
        test_x, test_y = nn.shared_dataset(dataset[1])
        test_cpu_y = dataset[1][1]

        n_train_batches = int(np.ceil(1.0 * len(dataset[0][0]) / batch_size))
        n_test_batches = int(np.ceil(1.0 * len(dataset[1][0]) / batch_size))

        # construt the model
        index = T.iscalar()
        train_func = theano.function([index], [drop_cost, drop_bag_cost, drop_sent_cost, penal_cost], updates=grad_updates,
                givens={
                    x: train_x[index*batch_size:(index+1)*batch_size],
                    y: train_y[index*batch_size:(index+1)*batch_size]
                    })

        test_func = theano.function([index], doc_preds,
                givens={
                    x:test_x[index*batch_size:(index+1)*batch_size]
                    })

        get_train_sent_prob = theano.function([index], sent_prob,
                givens={
                    x:train_x[index*batch_size:(index+1)*batch_size]
                    })

        get_test_sent_prob = theano.function([index], sent_prob,
                givens={
                    x:test_x[index*batch_size:(index+1)*batch_size]
                    })

        epoch = 0
        best_score = 0
        raw_train_x = dataset[0][0]
        raw_test_x = dataset[1][0]
        # get the sentence number for each document
        number_train_sens = []
        number_test_sens = []


        log_file = open("./log/%s.log" % exp_name, 'w')

        while epoch <= max_iteration:
            start_time = timeit.default_timer()
            epoch += 1
            costs = []

            for minibatch_index in np.random.permutation(range(n_train_batches)):
                cost_epoch = train_func(minibatch_index)
                costs.append(cost_epoch)
                set_zero(zero_vec)

            total_train_cost, train_bag_cost, train_sent_cost, train_penal_cost = zip(*costs)
            print "Iteration %d, total_cost %f bag_cost %f sent_cost %f penal_cost %f\n" %  (epoch, np.mean(total_train_cost), np.mean(train_bag_cost), np.mean(train_sent_cost), np.mean(train_penal_cost))

            if epoch % 5 == 0:
                test_preds = []
                for i in xrange(n_test_batches):
                    test_y_pred = test_func(i)
                    test_preds.append(test_y_pred)
                test_preds = np.concatenate(test_preds)
                test_score = 1 - np.mean(np.not_equal(test_cpu_y, test_preds))

                precision, recall, beta, support = precision_recall_fscore_support(test_cpu_y, test_preds, pos_label=1)

                if test_score > best_score:
                    best_score = test_score
                    # save the sentence vectors
                    train_sens = [get_train_sent_prob(i) for i in range(n_train_batches)]
                    test_sens = [get_test_sent_prob(i) for i in range(n_test_batches)]

                    train_sens = np.concatenate(train_sens, axis=0)
                    test_sens = np.concatenate(test_sens, axis=0)

                    out_train_sent_file = "./results/%s_train_sent.vec" % exp_name
                    out_test_sent_file = "./results/%s_test_sent.vec" % exp_name

                    with open(out_train_sent_file, 'w') as train_f, open(out_test_sent_file, 'w') as test_f:
                        cPickle.dump(train_sens, train_f)
                        cPickle.dump(test_sens, test_f)
                    print "Get best performace at %d iteration %f" % (epoch, test_score)
                    log_file.write("Get best performance at %d iteration %f \n" % (epoch, test_score))

                end_time = timeit.default_timer()
                print "Iteration %d , precision, recall, support" % epoch, precision, recall, support
                log_file.write("Iteration %d, neg precision %f, pos precision %f, neg recall %f pos recall %f , total_cost %f bag_cost %f sent_cost %f penal_cost %f\n" % (epoch, precision[0], precision[1], recall[0], recall[1], np.mean(total_train_cost), np.mean(train_bag_cost), np.mean(train_sent_cost), np.mean(train_penal_cost)))
                print "Using time %f m" % ((end_time -start_time)/60.)
                log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.))
            end_time = timeit.default_timer()
            print "Iteration %d Using time %f m" % ( epoch, (end_time -start_time)/60.)
            log_file.write("Uing time %f m\n" % ((end_time - start_time)/60.))
            log_file.flush()

        log_file.close()