def work(mode, data_name, test_dataname):
	print "mode: ", mode
	print "data_name: ", data_name
	print "Started!"
	
	data_names = data_name.split(":")
	data_count = len(data_names)
	print "Train dataset:"
	for i in xrange(data_count):
		print "%d: %s" % (i, data_names[i])
		
	print "Test dataset:"
	test_data_names = test_dataname.split(":")
	test_data_count = len(test_data_names)
	for i in xrange(test_data_count):
		print "%d: %s" % (i, test_data_names[i])
	
	if test_data_count != data_count:
		raise Exception("The amount of test and train dataset must be the same.")
	
	rng = numpy.random.RandomState(23455)
	docSentenceCount = T.ivector("docSentenceCount")
	sentenceWordCount = T.ivector("sentenceWordCount")
	corpus = T.matrix("corpus")
	docLabel = T.ivector('docLabel')
	
	hidden_layer_w = None
	hidden_layer_b = None
	logistic_layer_w = None
	logistic_layer_b = None
	layer0 = list()
	layer1 = list()
	layer2 = list()
	local_params = list()
	# for list-type data
	for i in xrange(data_count):
		layer0.append(DocEmbeddingNN(corpus, docSentenceCount, sentenceWordCount, rng, wordEmbeddingDim=200, \
														 sentenceLayerNodesNum=100, \
														 sentenceLayerNodesSize=[5, 200], \
														 docLayerNodesNum=100, \
														 docLayerNodesSize=[3, 100]))

		layer1.append(HiddenLayer(
			rng,
			input=layer0[i].output,
			n_in=layer0[i].outputDimension,
			n_out=100,
			activation=T.tanh,
			W=hidden_layer_w,
			b=hidden_layer_b
		))
		
		hidden_layer_w = layer1[i].W
		hidden_layer_b = layer1[i].b
	
		layer2.append(LogisticRegression(input=layer1[i].output, n_in=100, n_out=2, W=logistic_layer_w, b=logistic_layer_b))
		logistic_layer_w = layer2[i].W
		logistic_layer_b = layer2[i].b
		
		local_params.append(layer2[i].params + layer1[i].params + layer0[i].params)

	# construct the parameter array.
	params = layer2[0].params + layer1[0].params
	
	for i in xrange(data_count):
		params += layer0[i].params
		
	# Load the parameters last time, optionally.
	
# 	data_name = "car"
	
	para_path = "data/" + data_name + "/model/scnn.model"
	traintext = ["data/" + data_names[i] + "/train/text"  for i in xrange(data_count)]
	trainlabel = ["data/" + data_names[i] + "/train/label"  for i in xrange(data_count)]
	testtext = ["data/" + test_data_names[i] + "/test/text"  for i in xrange(data_count)]
	testlabel =  ["data/" + test_data_names[i] + "/test/label"  for i in xrange(data_count)]
	
	loadParamsVal(para_path, params)

	if(mode == "train"):
		train_model = list()
		valid_model = list()
		print "Loading train data."
		batchSize = 10
		learning_rate = 0.1
		n_batches = list()
		
		print "Loading test data."
 		
		for i in xrange(data_count):
			cr_train = CorpusReader(minDocSentenceNum=5, minSentenceWordNum=5, dataset=traintext[i], labelset=trainlabel[i])
			docMatrixes, docSentenceNums, sentenceWordNums, ids, labels = cr_train.getCorpus([0, 100000])
			
			docMatrixes = transToTensor(docMatrixes, theano.config.floatX)
			docSentenceNums = transToTensor(docSentenceNums, numpy.int32)
			sentenceWordNums = transToTensor(sentenceWordNums, numpy.int32)
			labels = transToTensor(labels, numpy.int32)
			
			index = T.lscalar("index")
			
			n_batches.append((len(docSentenceNums.get_value()) - 1) / batchSize + 1)
			print "Dataname: %s" % data_names[i]
			print "Train set size is ", len(docMatrixes.get_value())
			print "Batch size is ", batchSize
			print "Number of training batches  is ", n_batches[i]
			error = layer2[i].errors(docLabel)
			cost = layer2[i].negative_log_likelihood(docLabel)
			
			grads = T.grad(cost, local_params[i])
		
			updates = [
				(param_i, param_i - learning_rate * grad_i)
				for param_i, grad_i in zip(local_params[i], grads)
			]
			print "Compiling train computing graph."
			
			train_model.append(theano.function(
		 		[index],
		 		[cost, error, layer2[i].y_pred, docLabel],
		 		updates=updates,
		 		givens={
								corpus: docMatrixes,
								docSentenceCount: docSentenceNums[index * batchSize: (index + 1) * batchSize + 1],
								sentenceWordCount: sentenceWordNums,
								docLabel: labels[index * batchSize: (index + 1) * batchSize]
							}
	 		))
			print "Compiled."
			
			print "Load test dataname: %s" % test_data_names[i]
			cr_test = CorpusReader(minDocSentenceNum=5, minSentenceWordNum=5, dataset=testtext[i], labelset=testlabel[i])
			validDocMatrixes, validDocSentenceNums, validSentenceWordNums, validIds, validLabels = cr_test.getCorpus([0, 1000])
			validDocMatrixes = transToTensor(validDocMatrixes, theano.config.floatX)
			validDocSentenceNums = transToTensor(validDocSentenceNums, numpy.int32)
			validSentenceWordNums = transToTensor(validSentenceWordNums, numpy.int32)
			validLabels = transToTensor(validLabels, numpy.int32)
			print "Validating set size is ", len(validDocMatrixes.get_value())
			print "Data loaded."
			
			print "Compiling test computing graph."
			valid_model.append(theano.function(
		 		[],
		 		[cost, error, layer2[i].y_pred, docLabel, T.transpose(layer2[i].p_y_given_x)[1]],
		 		givens={
								corpus: validDocMatrixes,
								docSentenceCount: validDocSentenceNums,
								sentenceWordCount: validSentenceWordNums,
								docLabel: validLabels
						}
		 	))
			print "Compiled."
		# for list-type data

		print "Start to train."
		epoch = 0
		n_epochs = 2000
		ite = 0
		
		# ####Validate the model####
		for dataset_index in xrange(data_count):
			costNum, errorNum, pred_label, real_label, pred_prob = valid_model[i]()
			print "Valid current model :", data_names[dataset_index]
			print "Cost: ", costNum
			print "Error: ", errorNum
# 			print "Valid Pred: ", pred_label
# 			print "pred_prob: ", pred_prob
	 		
			fpr, tpr, _ = roc_curve(real_label, pred_prob)
			roc_auc = auc(fpr, tpr)
			print "data_name: ", data_name
			print "test_dataname: ", test_dataname
			print "ROC: ", roc_auc
			
		while (epoch < n_epochs):
			epoch = epoch + 1
			#######################
			for i in range(max(n_batches)):
				for dataset_index in xrange(data_count):
					if i >= n_batches[dataset_index]:
						continue
					# for list-type data
					costNum, errorNum, pred_label, real_label = train_model[dataset_index](i)
					ite = ite + 1
					# for padding data
		# 			costNum, errorNum = train_model(docMatrixes, labels)
		# 			del docMatrixes, docSentenceNums, sentenceWordNums, labels
					# print ".", 
					if(ite % 10 == 0):
						print
						print "Dataset name: ", data_names[dataset_index]
						print "@iter: ", ite
						print "Cost: ", costNum
						print "Error: ", errorNum
						
			# Validate the model
			for dataset_index in xrange(data_count):
				costNum, errorNum, pred_label, real_label, pred_prob = valid_model[i]()
				print "Valid current model :", data_names[dataset_index]
				print "Cost: ", costNum
				print "Error: ", errorNum
	# 			print "Valid Pred: ", pred_label
	# 			print "pred_prob: ", pred_prob
		 		
				fpr, tpr, _ = roc_curve(real_label, pred_prob)
				roc_auc = auc(fpr, tpr)
				print "data_name: ", data_name
				print "test_dataname: ", test_dataname
				print "ROC: ", roc_auc
		
				# Save model
				print "Saving parameters."
				saveParamsVal(para_path, params)
				print "Saved."
# 	elif(mode == "deploy"):
# 		print "Compiling computing graph."
# 		output_model = theano.function(
# 	 		[corpus, docSentenceCount, sentenceWordCount],
# 	 		[layer2.y_pred]
# 	 	)
# 		print "Compiled."
# 		cr = CorpusReader(minDocSentenceNum=5, minSentenceWordNum=5, dataset="data/train_valid/split")
# 		count = 21000
# 		while(count <= 21000):
# 			docMatrixes, docSentenceNums, sentenceWordNums, ids = cr.getCorpus([count, count + 100])
# 			docMatrixes = numpy.matrix(
# 			            docMatrixes,
# 			            dtype=theano.config.floatX
# 			        )
# 			docSentenceNums = numpy.array(
# 			            docSentenceNums,
# 			            dtype=numpy.int32
# 			        )
# 			sentenceWordNums = numpy.array(
# 			            sentenceWordNums,
# 			            dtype=numpy.int32
# 			        )
# 			print "start to predict."
# 			pred_y = output_model(docMatrixes, docSentenceNums, sentenceWordNums)
# 			print "End predicting."
# 			print "Writing resfile."
# 	# 		print zip(ids, pred_y[0])
# 			f = file("data/test/res/res" + str(count), "w")
# 			f.write(str(zip(ids, pred_y[0])))
# 			f.close()
# 			print "Written." + str(count)
# 			count += 100
		
		
	print "All finished!"
def work(mode, data_name, test_dataname, pooling_mode="average_exc_pad"):
    print "mode: ", mode
    print "data_name: ", data_name
    print "pooling_mode: ", pooling_mode
    print "Started!"

    data_names = data_name.split(":")
    data_count = len(data_names)
    print "Train dataset:"
    for i in xrange(data_count):
        print "%d: %s" % (i, data_names[i])

    print "Test dataset:"
    test_data_names = test_dataname.split(":")
    test_data_count = len(test_data_names)
    for i in xrange(test_data_count):
        print "%d: %s" % (i, test_data_names[i])

    if test_data_count != data_count:
        raise Exception(
            "The amount of test and train dataset must be the same.")

    rng = numpy.random.RandomState(23455)
    docSentenceCount = T.ivector("docSentenceCount")
    sentenceWordCount = T.ivector("sentenceWordCount")
    corpus = T.matrix("corpus")
    docLabel = T.ivector('docLabel')

    sentenceW = None
    sentenceB = None
    docW = None
    docB = None

    hidden_layer_w = None
    hidden_layer_b = None
    logistic_layer_w = None
    logistic_layer_b = None
    layer0 = list()
    layer1 = list()
    layer2 = list()
    local_params = list()
    # for list-type data
    for i in xrange(data_count):
        layer0.append(DocEmbeddingNN(corpus, docSentenceCount, sentenceWordCount, rng, wordEmbeddingDim=200, \
                     sentenceLayerNodesNum=50, \
                     sentenceLayerNodesSize=[5, 200], \
                     docLayerNodesNum=10, \
                     docLayerNodesSize=[3, 50],
                     sentenceW=sentenceW,
                     sentenceB=sentenceB,
                     docW=docW,
                     docB=docB,
                     pooling_mode=pooling_mode))

        sentenceW = layer0[i].sentenceW
        sentenceB = layer0[i].sentenceB
        docW = layer0[i].docW
        docB = layer0[i].docB

        layer1.append(
            HiddenLayer(rng,
                        input=layer0[i].output,
                        n_in=layer0[i].outputDimension,
                        n_out=10,
                        activation=T.tanh,
                        W=hidden_layer_w,
                        b=hidden_layer_b))

        hidden_layer_w = layer1[i].W
        hidden_layer_b = layer1[i].b

        layer2.append(
            LogisticRegression(input=layer1[i].output,
                               n_in=10,
                               n_out=2,
                               W=logistic_layer_w,
                               b=logistic_layer_b))
        # 		logistic_layer_w = layer2[i].W
        # 		logistic_layer_b = layer2[i].b

        local_params.append(layer2[i].params)

    share_params = list(layer0[0].params + layer1[0].params)
    # construct the parameter array.
    params = list(layer0[0].params) + layer1[0].params

    for i in xrange(data_count):
        params += layer2[i].params

# 	data_name = "car"

    para_path = "data/" + data_name + "/share_hidden_low_model/" + pooling_mode + ".model"
    traintext = [
        "data/" + data_names[i] + "/train/text" for i in xrange(data_count)
    ]
    trainlabel = [
        "data/" + data_names[i] + "/train/label" for i in xrange(data_count)
    ]
    testtext = [
        "data/" + test_data_names[i] + "/test/text" for i in xrange(data_count)
    ]
    testlabel = [
        "data/" + test_data_names[i] + "/test/label"
        for i in xrange(data_count)
    ]

    # Load the parameters last time, optionally.
    loadParamsVal(para_path, params)

    if (mode == "train" or mode == "test"):
        train_model = list()
        valid_model = list()
        print "Loading train data."
        batchSize = 10
        share_learning_rate = 0.1
        local_learning_rate = 0.1
        n_batches = list()

        print "Loading test data."

        all_pred_label = list()
        all_real_label = list()
        all_pred_prob = list()
        for i in xrange(data_count):
            cr_train = CorpusReader(minDocSentenceNum=5,
                                    minSentenceWordNum=5,
                                    dataset=traintext[i],
                                    labelset=trainlabel[i])
            docMatrixes, docSentenceNums, sentenceWordNums, ids, labels, _, _ = cr_train.getCorpus(
                [0, 100000])

            docMatrixes = transToTensor(docMatrixes, theano.config.floatX)
            docSentenceNums = transToTensor(docSentenceNums, numpy.int32)
            sentenceWordNums = transToTensor(sentenceWordNums, numpy.int32)
            labels = transToTensor(labels, numpy.int32)

            index = T.lscalar("index")

            n_batches.append((len(docSentenceNums.get_value()) - 1 - 1) /
                             batchSize + 1)
            print "Dataname: %s" % data_names[i]
            print "Train set size is ", len(docMatrixes.get_value())
            print "Batch size is ", batchSize
            print "Number of training batches  is ", n_batches[i]
            error = layer2[i].errors(docLabel)
            cost = layer2[i].negative_log_likelihood(docLabel)

            share_grads = T.grad(cost, share_params)
            share_updates = [
                (param_i, param_i - share_learning_rate * grad_i)
                for param_i, grad_i in zip(share_params, share_grads)
            ]

            grads = T.grad(cost, local_params[i])
            local_updates = [
                (param_i, param_i - local_learning_rate * grad_i)
                for param_i, grad_i in zip(local_params[i], grads)
            ]
            updates = share_updates + local_updates
            print "Compiling train computing graph."
            if mode == "train":
                train_model.append(
                    theano.function(
                        [index], [cost, error, layer2[i].y_pred, docLabel],
                        updates=updates,
                        givens={
                            corpus:
                            docMatrixes,
                            docSentenceCount:
                            docSentenceNums[index *
                                            batchSize:(index + 1) * batchSize +
                                            1],
                            sentenceWordCount:
                            sentenceWordNums,
                            docLabel:
                            labels[index * batchSize:(index + 1) * batchSize]
                        }))
            print "Compiled."

            print "Load test dataname: %s" % test_data_names[i]
            cr_test = CorpusReader(minDocSentenceNum=5,
                                   minSentenceWordNum=5,
                                   dataset=testtext[i],
                                   labelset=testlabel[i])
            validDocMatrixes, validDocSentenceNums, validSentenceWordNums, validIds, validLabels, _, _ = cr_test.getCorpus(
                [0, 1000])
            validDocMatrixes = transToTensor(validDocMatrixes,
                                             theano.config.floatX)
            validDocSentenceNums = transToTensor(validDocSentenceNums,
                                                 numpy.int32)
            validSentenceWordNums = transToTensor(validSentenceWordNums,
                                                  numpy.int32)
            validLabels = transToTensor(validLabels, numpy.int32)
            print "Validating set size is ", len(validDocMatrixes.get_value())
            print "Data loaded."

            print "Compiling test computing graph."
            valid_model.append(
                theano.function(
                    [], [
                        cost, error, layer2[i].y_pred, docLabel,
                        T.transpose(layer2[i].p_y_given_x)[1]
                    ],
                    givens={
                        corpus: validDocMatrixes,
                        docSentenceCount: validDocSentenceNums,
                        sentenceWordCount: validSentenceWordNums,
                        docLabel: validLabels
                    }))
            print "Compiled."
            costNum, errorNum, pred_label, real_label, pred_prob = valid_model[
                i]()

            all_pred_label.extend(pred_label)
            all_real_label.extend(real_label)
            all_pred_prob.extend(pred_prob)

            print "Valid current model :", data_names[i]
            print "Cost: ", costNum
            print "Error: ", errorNum

            fpr, tpr, _ = roc_curve(real_label, pred_prob)
            roc_auc = auc(fpr, tpr)
            print "data_name: ", data_name
            print "ROC: ", roc_auc
            fpr, tpr, threshold = roc_curve(real_label, pred_label)
            if 1 in threshold:
                index_of_one = list(threshold).index(1)
                print "TPR: ", tpr[index_of_one]
                print "FPR: ", fpr[index_of_one]
                print "AR: ", (tpr[index_of_one] + 1 - fpr[index_of_one]) / 2
                print "threshold: ", threshold[index_of_one]

        print "Valid current model :", data_names
        errorNum = 1 - accuracy_score(all_real_label, all_pred_label)
        print "Error: ", errorNum

        fpr, tpr, _ = roc_curve(all_real_label, all_pred_prob)
        if mode == "test":
            print "tpr_all: ", tpr
            print "fpr_all: ", fpr
        roc_auc = auc(fpr, tpr)
        print "data_name: ", data_name
        print "ROC: ", roc_auc
        fpr, tpr, threshold = roc_curve(all_real_label, all_pred_label)
        if 1 in threshold:
            index_of_one = list(threshold).index(1)
            print "TPR: ", tpr[index_of_one]
            print "FPR: ", fpr[index_of_one]
            print "AR: ", (tpr[index_of_one] + 1 - fpr[index_of_one]) / 2
            print "threshold: ", threshold[index_of_one]

        if mode == "test":
            return

        print "Start to train."
        epoch = 0
        n_epochs = 10
        ite = 0

        while (epoch < n_epochs):
            epoch = epoch + 1
            #######################
            for i in range(max(n_batches)):
                for dataset_index in xrange(data_count):
                    if i >= n_batches[dataset_index]:
                        continue
                    # for list-type data
                    costNum, errorNum, pred_label, real_label = train_model[
                        dataset_index](i)
                    ite = ite + 1
                    # for padding data
                    if (ite % 10 == 0):
                        print
                        print "Dataset name: ", data_names[dataset_index]
                        print "@iter: ", ite
                        print "Cost: ", costNum
                        print "Error: ", errorNum

            # Validate the model
            all_pred_label = list()
            all_real_label = list()
            all_pred_prob = list()
            for dataset_index in xrange(data_count):
                costNum, errorNum, pred_label, real_label, pred_prob = valid_model[
                    dataset_index]()

                all_pred_label.extend(pred_label)
                all_real_label.extend(real_label)
                all_pred_prob.extend(pred_prob)

                print "Valid current model :", data_names[dataset_index]
                print "Cost: ", costNum
                print "Error: ", errorNum

                fpr, tpr, _ = roc_curve(real_label, pred_prob)
                roc_auc = auc(fpr, tpr)
                print "data_name: ", data_name
                print "ROC: ", roc_auc

                fpr, tpr, threshold = roc_curve(real_label, pred_label)
                index_of_one = list(threshold).index(1)
                print "TPR: ", tpr[index_of_one]
                print "FPR: ", fpr[index_of_one]
                print "AR: ", (tpr[index_of_one] + 1 - fpr[index_of_one]) / 2
                print "threshold: ", threshold[index_of_one]

            print "Valid current model :", data_names
            errorNum = 1 - accuracy_score(all_real_label, all_pred_label)
            print "Error: ", errorNum

            fpr, tpr, _ = roc_curve(all_real_label, all_pred_prob)
            roc_auc = auc(fpr, tpr)
            print "data_name: ", data_name
            print "ROC: ", roc_auc
            fpr, tpr, threshold = roc_curve(all_real_label, all_pred_label)
            index_of_one = list(threshold).index(1)
            print "TPR: ", tpr[index_of_one]
            print "FPR: ", fpr[index_of_one]
            print "AR: ", (tpr[index_of_one] + 1 - fpr[index_of_one]) / 2
            print "threshold: ", threshold[index_of_one]
            # Save model
            print "Saving parameters."
            saveParamsVal(para_path, params)
            print "Saved."
def work(mode, data_name, test_dataname, pooling_mode):
    print "mode: ", mode
    print "data_name: ", data_name
    print "Started!"

    data_names = data_name.split(":")
    data_count = len(data_names)
    print "Train dataset:"
    for i in xrange(data_count):
        print "%d: %s" % (i, data_names[i])

    print "Test dataset:"
    test_data_names = test_dataname.split(":")
    test_data_count = len(test_data_names)
    for i in xrange(test_data_count):
        print "%d: %s" % (i, test_data_names[i])

    if test_data_count != data_count:
        raise Exception(
            "The amount of test and train dataset must be the same.")

    rng = numpy.random.RandomState(23455)
    docSentenceCount = T.ivector("docSentenceCount")
    sentenceWordCount = T.ivector("sentenceWordCount")
    corpus = T.matrix("corpus")
    docLabel = T.ivector('docLabel')

    hidden_layer_w = None
    hidden_layer_b = None
    logistic_layer_w = None
    logistic_layer_b = None
    layer0 = list()
    layer1 = list()
    layer2 = list()
    local_params = list()
    # for list-type data
    for i in xrange(data_count):
        layer0.append(DocEmbeddingNN(corpus, docSentenceCount, sentenceWordCount, rng, wordEmbeddingDim=200, \
                     sentenceLayerNodesNum=100, \
                     sentenceLayerNodesSize=[5, 200], \
                     docLayerNodesNum=100, \
                     docLayerNodesSize=[3, 100]))

        layer1.append(
            HiddenLayer(rng,
                        input=layer0[i].output,
                        n_in=layer0[i].outputDimension,
                        n_out=100,
                        activation=T.tanh,
                        W=hidden_layer_w,
                        b=hidden_layer_b))

        hidden_layer_w = layer1[i].W
        hidden_layer_b = layer1[i].b

        layer2.append(
            LogisticRegression(input=layer1[i].output,
                               n_in=100,
                               n_out=2,
                               W=logistic_layer_w,
                               b=logistic_layer_b))
        logistic_layer_w = layer2[i].W
        logistic_layer_b = layer2[i].b

        local_params.append(layer0[i].params)

    share_params = layer2[0].params + layer1[0].params
    # construct the parameter array.
    params = layer2[0].params + layer1[0].params

    for i in xrange(data_count):
        params += layer0[i].params


# 	data_name = "car"

    para_path = "data/" + data_name + "/model/scnn.model"
    traintext = [
        "data/" + data_names[i] + "/train/text" for i in xrange(data_count)
    ]
    trainlabel = [
        "data/" + data_names[i] + "/train/label" for i in xrange(data_count)
    ]
    testtext = [
        "data/" + test_data_names[i] + "/test/text" for i in xrange(data_count)
    ]
    testlabel = [
        "data/" + test_data_names[i] + "/test/label"
        for i in xrange(data_count)
    ]

    # Load the parameters last time, optionally.
    loadParamsVal(para_path, params)

    if (mode == "train"):
        train_model = list()
        valid_model = list()
        print "Loading train data."
        batchSize = 10
        share_learning_rate = 0.01
        local_learning_rate = 0.1
        n_batches = list()

        for i in xrange(data_count):
            cr_train = CorpusReader(minDocSentenceNum=5,
                                    minSentenceWordNum=5,
                                    dataset=traintext[i],
                                    labelset=trainlabel[i])
            docMatrixes, docSentenceNums, sentenceWordNums, ids, labels = cr_train.getCorpus(
                [0, 100000])

            docMatrixes = transToTensor(docMatrixes, theano.config.floatX)
            docSentenceNums = transToTensor(docSentenceNums, numpy.int32)
            sentenceWordNums = transToTensor(sentenceWordNums, numpy.int32)
            labels = transToTensor(labels, numpy.int32)

            index = T.lscalar("index")

            n_batches.append((len(docSentenceNums.get_value()) - 1 - 1) /
                             batchSize + 1)
            print "Dataname: %s" % data_names[i]
            print "Train set size is ", len(docMatrixes.get_value())
            print "Batch size is ", batchSize
            print "Number of training batches  is ", n_batches[i]
            error = layer2[i].errors(docLabel)
            cost = layer2[i].negative_log_likelihood(docLabel)

            share_grads = T.grad(cost, share_params)
            share_updates = [
                (param_i, param_i - share_learning_rate * grad_i)
                for param_i, grad_i in zip(share_params, share_grads)
            ]

            grads = T.grad(cost, local_params[i])
            local_updates = [
                (param_i, param_i - local_learning_rate * grad_i)
                for param_i, grad_i in zip(local_params[i], grads)
            ]
            updates = share_updates + local_updates
            print "Compiling train computing graph."

            train_model.append(
                theano.function(
                    [index], [cost, error, layer2[i].y_pred, docLabel],
                    updates=updates,
                    givens={
                        corpus:
                        docMatrixes,
                        docSentenceCount:
                        docSentenceNums[index *
                                        batchSize:(index + 1) * batchSize + 1],
                        sentenceWordCount:
                        sentenceWordNums,
                        docLabel:
                        labels[index * batchSize:(index + 1) * batchSize]
                    }))
            print "Compiled."

        print "Start to train."
        epoch = 0
        n_epochs = 10
        ite = 0

        # ####Validate the model####
        for dataset_index in xrange(data_count):
            costNum, errorNum, pred_label, real_label, pred_prob = valid_model[
                dataset_index]()
            print "Valid current model :", data_names[dataset_index]
            print "Cost: ", costNum
            print "Error: ", errorNum

            fpr, tpr, _ = roc_curve(real_label, pred_prob)
            roc_auc = auc(fpr, tpr)
            print "data_name: ", data_name
            print "ROC: ", roc_auc

    print "All finished!"