else:
            train_set = new_data[:,:]
            train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0]))

    n_batches = new_data.shape[0]/batch_size #batch num
    n_train_batches = int(numpy.round(n_batches))

    train_model = theano.function([index], cost, updates=grad_updates,
          givens={
            x: train_set_x[index*batch_size:(index+1)*batch_size],
            y: train_set_y[index*batch_size:(index+1)*batch_size]})   
    #theano.printing.debugprint(train_model)
            
    #f_scores=[classifier.f_score(y,i+1) for i in xrange(hidden_units[-1]-1)]
<<<<<<< HEAD
    f_scores =(classifier.f_score(y,i+1)[0] for i in xrange(5))
=======
    f_scores =(classifier.f_score(y,i+1)[0] for i in xrange(22))
>>>>>>> fcaa5859445351e40525e8d9eee56180be4b5d04
    f_scores = tuple(f_scores)
    '''    
    fenzi=0
    fenmu=0    
    for item in f_scores:
        f_score,precision,recall=item
        fenzi+=(precision*recall)
        fenmu+=(precision+recall)
    '''
    #fenzi_printed=theano.printing.Print('fenzi:')(fenzi)
    #micro_avg_f_score=2.*fenzi / (fenmu+0.000001)
    test_model=theano.function([],f_scores,
def train_nn(datasets,
             wordvec,
             word_size=200,
             hidden_units=[2000,1000,2],
             dropout_rate=[0,0,0],
             shuffle_batch=True,
             n_epochs=3000,
             batch_size=256,
             init_learning_rate=0.4,
             adadelta=True,
             lr_decay=0.95,
             sqr_norm_lim=9,
             activations=[Tanh,Tanh,Tanh],
             non_static=True,
             use_valid_set=False,
             proportion=1):
    rng = numpy.random.RandomState(3435)
    #print np.shape(wordvec)
    #count = np.shape(wordvec)[0]
    #wordvec=np.random.uniform(-0.25,0.25,(count,5))
    #wordvec=numpy.asarray(wordvec,dtype=theano.config.floatX)
    Words=theano.shared(value=wordvec,name='Words')
    zero_vec_tensor = T.vector()
    zero_vec = numpy.zeros(word_size,dtype=theano.config.floatX)
    set_zero = theano.function([zero_vec_tensor], 
                               updates=[(Words, T.set_subtensor(Words[0,:], zero_vec_tensor))])
    x=T.matrix('x')
    y=T.ivector('y')
    index=T.lscalar('index')
    
    layer0_input= Words[T.cast(x.flatten(),dtype="int32")].\
                  reshape((x.shape[0],x.shape[1]*Words.shape[1]))
    #input_printed=theano.printing.Print('layer0_input:')(layer0_input)
    classifier = MLPDropout(rng, 
                            input=layer0_input, 
                            layer_sizes=hidden_units,
                            activations=activations, 
                            dropout_rates=dropout_rate)
    params = classifier.params
    
    if non_static:
        params.append(Words)
        
    cost = classifier.negative_log_likelihood(y) 
    dropout_cost = classifier.dropout_negative_log_likelihood(y)  
    if adadelta:
        grad_updates = sgd_updates_adadelta(params, dropout_cost, 
                                            lr_decay, 1e-6, sqr_norm_lim)
    else:
        grad_updates = sgd_updates(params,dropout_cost,init_learning_rate)
    #print params
    numpy.random.seed(3435)
    if datasets[0].shape[0] % batch_size > 0:
        extra_data_num = batch_size - datasets[0].shape[0] % batch_size
        train_set = numpy.random.permutation(datasets[0]) ## shuffle  
        extra_data = train_set[:extra_data_num] # the batch
        new_data=numpy.append(datasets[0],extra_data,axis=0) #使得训练集个数正好是batch_size的整数倍
    else:
        new_data = datasets[0]
    
    new_data = numpy.random.permutation(new_data) #train data
    n_batches = new_data.shape[0]/batch_size #batch num
    n_train_batches = int(numpy.round(n_batches*proportion))
    
    if len(datasets)==3:
        use_valid_set=True
        train_set = new_data
        val_set = datasets[1]
        train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0]))
        val_set_x, val_set_y = shared_dataset((val_set[:,1:],val_set[:,0]))
        test_set_x = datasets[2][:,1:] 
        test_set_y = numpy.asarray(datasets[2][:,0],"int32")
    else:
        test_set_x = datasets[1][:,1:] 
        test_set_y = numpy.asarray(datasets[1][:,0],"int32")
        if use_valid_set:
            train_set = new_data[:n_train_batches*batch_size,:]
            val_set = new_data[n_train_batches*batch_size:,:]     
            train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0]))
            val_set_x, val_set_y = shared_dataset((val_set[:,1:],val_set[:,0]))
        else:
            train_set = new_data[:,:]
            train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0]))

    n_batches = new_data.shape[0]/batch_size #batch num
    n_train_batches = int(numpy.round(n_batches))

    train_model = theano.function([index], cost, updates=grad_updates,
          givens={
            x: train_set_x[index*batch_size:(index+1)*batch_size],
            y: train_set_y[index*batch_size:(index+1)*batch_size]})   
    #theano.printing.debugprint(train_model)
            
    #f_scores=[classifier.f_score(y,i+1) for i in xrange(hidden_units[-1]-1)]
    f_scores =(classifier.f_score(y,i+1)[0] for i in xrange(1))
    f_scores = tuple(f_scores)
    '''    
    fenzi=0
    fenmu=0    
    for item in f_scores:
        f_score,precision,recall=item
        fenzi+=(precision*recall)
        fenmu+=(precision+recall)
    '''
    #fenzi_printed=theano.printing.Print('fenzi:')(fenzi)
    #micro_avg_f_score=2.*fenzi / (fenmu+0.000001)
    test_model=theano.function([],f_scores,
                               givens={
                               x:test_set_x,
                               y:test_set_y})
    '''
    valid_model=theano.function([],classifier.errors(y),
                                givens={
                                x:test_set_x,
                                y:test_set_y})
    '''
    #theano.printing.debugprint(test_model)
    #print micro_avg_f_score.owner.inputs
    #test_y_pred = classifier.f_score()
    #test_model_all = theano.function([x,y], test_error)   

    epoch=0
    while (epoch < n_epochs):        
        epoch = epoch + 1
        if shuffle_batch:
            cost=[]
            for minibatch_index in numpy.random.permutation(range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                cost.append(cost_epoch)
                set_zero(zero_vec)
            print 'epoch:%d, cost value:%f, '%(epoch,numpy.mean(cost)),
        else:
            for minibatch_index in xrange(n_train_batches):
                cost_epoch = train_model(minibatch_index)
                print cost_epoch
                set_zero(zero_vec)
        #print test_model()
        f_scores = test_model()
        f_scores = tuple(f_scores)
        print '%.2f,'*1%(f_scores)
        
    layer0_input= Words[T.cast(test_set_x.flatten(),dtype="int32")].\
                  reshape((test_set_x.shape[0],test_set_x.shape[1]*Words.shape[1]))
    t_pred=classifier.predict(layer0_input)
    write_matrix_to_file(t_pred.eval(),'pred.txt')
    write_matrix_to_file(test_set_y,'real.txt')