예제 #1
0
            for minibatch_index in xrange(n_train_batches):
                cost_epoch = train_model(minibatch_index)
                print cost_epoch
                set_zero(zero_vec)
        #print test_model()
        f_scores = test_model()
        f_scores = tuple(f_scores)
<<<<<<< HEAD
        print '%.2f,'*5%(f_scores)
=======
        print '%.2f,'*22%(f_scores)
>>>>>>> fcaa5859445351e40525e8d9eee56180be4b5d04
        
    layer0_input= Words[T.cast(test_set_x.flatten(),dtype="int32")].\
                  reshape((test_set_x.shape[0],test_set_x.shape[1]*Words.shape[1]))
    t_pred=classifier.predict(layer0_input)
    write_matrix_to_file(t_pred.eval(),'pred.txt')
    write_matrix_to_file(test_set_y,'real.txt')
    #print Words.get_value()

def shared_dataset(data_xy, borrow=True):

        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
예제 #2
0
def train_conv(datasets,
               wordvec,
               word_size=150,
               window_sizes=[9,11,13],
               hidden_units=[100,100,claz_count],
               dropout_rate=[0],
               shuffle_batch=True,
               n_epochs=10000,
               batch_size=256,
               lr_decay=0.95,
               sqr_norm_lim=9,
               conv_non_linear="relu",
               activations=[Tanh],#dropout
               non_static=True,
               proportion=1):
    rng = numpy.random.RandomState(3435)
    
    sen_length = len(datasets[0][0])-1  # sentence length
    filter_w = word_size # filter width
    feature_maps = hidden_units[0]
    filter_shapes = [] #filter:param W
    pool_sizes = []
    for filter_h in window_sizes: # filter heighth
        filter_shapes.append((feature_maps, 1, filter_h,filter_w))
        pool_sizes.append((sen_length-filter_h+1, 1))
    parameters = [("image shape",sen_length,word_size),("filter shape",filter_shapes), ("hidden_units",hidden_units),
                  ("dropout", dropout_rate), ("batch_size",batch_size),("non_static", non_static),
                    ("learn_decay",lr_decay), ("conv_non_linear", conv_non_linear), ("non_static", non_static)
                    ,("sqr_norm_lim",sqr_norm_lim),("shuffle_batch",shuffle_batch)]
    print parameters  

    #print wordvec
    #count = np.shape(wordvec)[0]
    #wordvec=np.random.uniform(-0.25,0.25,(count,50))
    #wordvec=numpy.asarray(wordvec,dtype=theano.config.floatX)
    Words=theano.shared(value=wordvec,name='Words')
    zero_vec_tensor = T.vector()
    zero_vec = numpy.zeros(word_size,dtype=theano.config.floatX)
    set_zero = theano.function([zero_vec_tensor], 
                               updates=[(Words, T.set_subtensor(Words[0,:], zero_vec_tensor))])

    x=T.matrix('x')
    y=T.ivector('y')
    index=T.lscalar('index')
    
    layer0_input = Words[T.cast(x.flatten(),dtype="int32")].\
                        reshape((x.shape[0],1,x.shape[1],Words.shape[1]))
    #theano.printing.debugprint(layer0_input)
    conv_layers=[]
    layer1_inputs=[]
    for i in xrange(len(window_sizes)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng, input=layer0_input,image_shape=(batch_size, 1, sen_length, word_size),
                                filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs,1)
    hidden_units[0] = feature_maps*len(window_sizes)
    #print hidden_units
    classifier = MLPDropout(rng, 
                            input=layer1_input, 
                            layer_sizes=hidden_units,
                            activations=activations, 
                            dropout_rates=dropout_rate)
    params = classifier.params   
    for conv_layer in conv_layers:
        params += conv_layer.params
    if non_static:
        params += [Words]

    cost = classifier.negative_log_likelihood(y) 
    dropout_cost = classifier.dropout_negative_log_likelihood(y)           
    grad_updates = sgd_updates_adadelta(params, dropout_cost, 
                                        lr_decay, 1e-6, sqr_norm_lim)
    
    numpy.random.seed(3435)

    test_set = datasets[1]
    train_set = datasets[0]
        
    if train_set.shape[0] % batch_size > 0:
        extra_data_num = batch_size - train_set.shape[0] % batch_size
        train_set = numpy.random.permutation(train_set)
        extra_data = train_set[:extra_data_num] # the batch
        new_data=numpy.append(train_set,extra_data,axis=0) #使得训练集个数正好是batch_size的整数倍
    else:
        new_data = train_set
    #train_set = numpy.random.permutation(new_data)
    train_set_x = new_data[:,1:]
    test_set_x = test_set[:,1:]
    train_set_x, train_set_y = shared_dataset((train_set_x,new_data[:,0]))
    test_set_x, test_set_y = shared_dataset((test_set_x,test_set[:,0]))
    n_batches = new_data.shape[0]/batch_size #batch num
    n_train_batches = int(numpy.round(n_batches))
    
    train_model = theano.function([index], cost, updates=grad_updates,
          givens={
            x: train_set_x[index*batch_size:(index+1)*batch_size],
            y: train_set_y[index*batch_size:(index+1)*batch_size]})
    #theano.printing.debugprint(train_model)


    test_pred_layers = []
    test_size = test_set_x.shape[0].eval()
    test_layer0_input = Words[T.cast(x.flatten(),dtype="int32")].\
                              reshape((x.shape[0],1,x.shape[1],Words.shape[1]))
    for conv_layer in conv_layers:
        test_layer0_output = conv_layer.predict(test_layer0_input, test_size)
        test_pred_layers.append(test_layer0_output.flatten(2))
    test_layer1_input = T.concatenate(test_pred_layers, 1)
    test_y_pred = classifier.predict(test_layer1_input)
    test_error = T.mean(T.neq(test_y_pred, y))
    test_model_all = theano.function(inputs=[x,y], outputs=[test_error,test_y_pred])   
       
    epoch=0
    max_f1_score = 0.25
    while (epoch < n_epochs):
        epoch+=1        
        if shuffle_batch:
            cost=[]
            for minibatch_index in numpy.random.permutation(range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                cost.append(cost_epoch)
                set_zero(zero_vec)
            error,prediction=test_model_all(x=test_set_x.get_value(borrow=True),\
                                            y=test_set_y.eval())
            precision,recall,f1_score=evaluate.evaluate_multi_class_seedev(prediction=prediction,
                                                                          answer=test_set_y.eval(),
                                                                          claz_count=claz_count)
            #print 'epoch:%d,error:%.3f,micro_f_score:%.2f,macro_f_score:%.2f'%(epoch,error,micro_f_score,macro_f_score)
            print 'epoch:%d,error:%.3f,precision:%.4f,  recall:%.4f,  f1_score:%.4f'%(epoch,error,precision,recall,f1_score)
            if f1_score > max_f1_score:
                max_f1_score = f1_score
                write_matrix_to_file(prediction,'pred_entity.txt')
        else:
            for minibatch_index in xrange(n_train_batches):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
#            error,prediction=test_model_all(x=test_set_x.get_value(borrow=True),y=test_set_y.eval())
#            micro_f_score,macro_f_score=evaluate.simple_evaluate(prediction=prediction,
#                                                                 answer=test_set_y.eval())
            print 'epoch:%d,error:%f'%(epoch,error)
def train_nn(datasets,
             wordvec,
             word_size=200,
             hidden_units=[2000,1000,2],
             dropout_rate=[0,0,0],
             shuffle_batch=True,
             n_epochs=3000,
             batch_size=256,
             init_learning_rate=0.4,
             adadelta=True,
             lr_decay=0.95,
             sqr_norm_lim=9,
             activations=[Tanh,Tanh,Tanh],
             non_static=True,
             use_valid_set=False,
             proportion=1):
    rng = numpy.random.RandomState(3435)
    #print np.shape(wordvec)
    #count = np.shape(wordvec)[0]
    #wordvec=np.random.uniform(-0.25,0.25,(count,5))
    #wordvec=numpy.asarray(wordvec,dtype=theano.config.floatX)
    Words=theano.shared(value=wordvec,name='Words')
    zero_vec_tensor = T.vector()
    zero_vec = numpy.zeros(word_size,dtype=theano.config.floatX)
    set_zero = theano.function([zero_vec_tensor], 
                               updates=[(Words, T.set_subtensor(Words[0,:], zero_vec_tensor))])
    x=T.matrix('x')
    y=T.ivector('y')
    index=T.lscalar('index')
    
    layer0_input= Words[T.cast(x.flatten(),dtype="int32")].\
                  reshape((x.shape[0],x.shape[1]*Words.shape[1]))
    #input_printed=theano.printing.Print('layer0_input:')(layer0_input)
    classifier = MLPDropout(rng, 
                            input=layer0_input, 
                            layer_sizes=hidden_units,
                            activations=activations, 
                            dropout_rates=dropout_rate)
    params = classifier.params
    
    if non_static:
        params.append(Words)
        
    cost = classifier.negative_log_likelihood(y) 
    dropout_cost = classifier.dropout_negative_log_likelihood(y)  
    if adadelta:
        grad_updates = sgd_updates_adadelta(params, dropout_cost, 
                                            lr_decay, 1e-6, sqr_norm_lim)
    else:
        grad_updates = sgd_updates(params,dropout_cost,init_learning_rate)
    #print params
    numpy.random.seed(3435)
    if datasets[0].shape[0] % batch_size > 0:
        extra_data_num = batch_size - datasets[0].shape[0] % batch_size
        train_set = numpy.random.permutation(datasets[0]) ## shuffle  
        extra_data = train_set[:extra_data_num] # the batch
        new_data=numpy.append(datasets[0],extra_data,axis=0) #使得训练集个数正好是batch_size的整数倍
    else:
        new_data = datasets[0]
    
    new_data = numpy.random.permutation(new_data) #train data
    n_batches = new_data.shape[0]/batch_size #batch num
    n_train_batches = int(numpy.round(n_batches*proportion))
    
    if len(datasets)==3:
        use_valid_set=True
        train_set = new_data
        val_set = datasets[1]
        train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0]))
        val_set_x, val_set_y = shared_dataset((val_set[:,1:],val_set[:,0]))
        test_set_x = datasets[2][:,1:] 
        test_set_y = numpy.asarray(datasets[2][:,0],"int32")
    else:
        test_set_x = datasets[1][:,1:] 
        test_set_y = numpy.asarray(datasets[1][:,0],"int32")
        if use_valid_set:
            train_set = new_data[:n_train_batches*batch_size,:]
            val_set = new_data[n_train_batches*batch_size:,:]     
            train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0]))
            val_set_x, val_set_y = shared_dataset((val_set[:,1:],val_set[:,0]))
        else:
            train_set = new_data[:,:]
            train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0]))

    n_batches = new_data.shape[0]/batch_size #batch num
    n_train_batches = int(numpy.round(n_batches))

    train_model = theano.function([index], cost, updates=grad_updates,
          givens={
            x: train_set_x[index*batch_size:(index+1)*batch_size],
            y: train_set_y[index*batch_size:(index+1)*batch_size]})   
    #theano.printing.debugprint(train_model)
            
    #f_scores=[classifier.f_score(y,i+1) for i in xrange(hidden_units[-1]-1)]
    f_scores =(classifier.f_score(y,i+1)[0] for i in xrange(1))
    f_scores = tuple(f_scores)
    '''    
    fenzi=0
    fenmu=0    
    for item in f_scores:
        f_score,precision,recall=item
        fenzi+=(precision*recall)
        fenmu+=(precision+recall)
    '''
    #fenzi_printed=theano.printing.Print('fenzi:')(fenzi)
    #micro_avg_f_score=2.*fenzi / (fenmu+0.000001)
    test_model=theano.function([],f_scores,
                               givens={
                               x:test_set_x,
                               y:test_set_y})
    '''
    valid_model=theano.function([],classifier.errors(y),
                                givens={
                                x:test_set_x,
                                y:test_set_y})
    '''
    #theano.printing.debugprint(test_model)
    #print micro_avg_f_score.owner.inputs
    #test_y_pred = classifier.f_score()
    #test_model_all = theano.function([x,y], test_error)   

    epoch=0
    while (epoch < n_epochs):        
        epoch = epoch + 1
        if shuffle_batch:
            cost=[]
            for minibatch_index in numpy.random.permutation(range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                cost.append(cost_epoch)
                set_zero(zero_vec)
            print 'epoch:%d, cost value:%f, '%(epoch,numpy.mean(cost)),
        else:
            for minibatch_index in xrange(n_train_batches):
                cost_epoch = train_model(minibatch_index)
                print cost_epoch
                set_zero(zero_vec)
        #print test_model()
        f_scores = test_model()
        f_scores = tuple(f_scores)
        print '%.2f,'*1%(f_scores)
        
    layer0_input= Words[T.cast(test_set_x.flatten(),dtype="int32")].\
                  reshape((test_set_x.shape[0],test_set_x.shape[1]*Words.shape[1]))
    t_pred=classifier.predict(layer0_input)
    write_matrix_to_file(t_pred.eval(),'pred.txt')
    write_matrix_to_file(test_set_y,'real.txt')