Esempio n. 1
0
def train_conv(datasets,
               wordvec,
               word_size=150,
               window_sizes=[7,8,9],
               hidden_units=[100,200,23],
               dropout_rate=[0],
               shuffle_batch=True,
               n_epochs=200,
               batch_size=128,
               lr_decay=0.99,
               sqr_norm_lim=9,
               conv_non_linear="relu",
               activations=[Tanh],#dropout
               non_static=True,
               proportion=1):
    rng = numpy.random.RandomState(3435)
    
    sen_length = len(datasets[0][0])-1  # sentence length
    filter_w = word_size   # filter width
    feature_maps = hidden_units[0]
    filter_shapes = [] #filter:param W
    pool_sizes = []
    for filter_h in window_sizes: # filter heighth
        filter_shapes.append((feature_maps, 1, filter_h,filter_w))
        pool_sizes.append((sen_length-filter_h+1, 1))
    parameters = [("image shape",sen_length,word_size),("filter shape",filter_shapes), ("hidden_units",hidden_units),
                  ("dropout", dropout_rate), ("batch_size",batch_size),("non_static", non_static),
                    ("learn_decay",lr_decay), ("conv_non_linear", conv_non_linear), ("non_static", non_static)
                    ,("sqr_norm_lim",sqr_norm_lim),("shuffle_batch",shuffle_batch)]
    print parameters  

    #print wordvec
    Words=theano.shared(value=wordvec,name='Words')
    zero_vec_tensor = T.vector()
    zero_vec = numpy.zeros(word_size,dtype=theano.config.floatX)
    set_zero = theano.function([zero_vec_tensor], 
                               updates=[(Words, T.set_subtensor(Words[0,:], zero_vec_tensor))])
    x=T.matrix('x')
    y=T.ivector('y')
    index=T.lscalar('index')
    
    layer0_input = Words[T.cast(x.flatten(),dtype="int32")].\
                        reshape((x.shape[0],1,x.shape[1],Words.shape[1]))
    #theano.printing.debugprint(layer0_input)
    conv_layers=[]
    layer1_inputs=[]
    for i in xrange(len(window_sizes)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng, input=layer0_input,image_shape=(batch_size, 1, sen_length, word_size),
                                filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs,1)
    hidden_units[0] = feature_maps*len(window_sizes)
    #print hidden_units
    classifier = MLPDropout(rng, 
                            input=layer1_input, 
                            layer_sizes=hidden_units,
                            activations=activations, 
                            dropout_rates=dropout_rate)
    params = classifier.params   
    for conv_layer in conv_layers:
        params += conv_layer.params
    if non_static:
        params+=[Words]

    cost = classifier.negative_log_likelihood(y) 
    dropout_cost = classifier.dropout_negative_log_likelihood(y)           
    grad_updates = sgd_updates_adadelta(params, dropout_cost, 
                                        lr_decay, 1e-6, sqr_norm_lim)
    
    numpy.random.seed(3435)

    test_set = datasets[2]
    dev_set = datasets[1]
    train_set = datasets[0]
        
    if train_set.shape[0] % batch_size > 0:
        extra_data_num = batch_size - train_set.shape[0] % batch_size
        train_set = numpy.random.permutation(train_set) ## shuffle  
        extra_data = train_set[:extra_data_num] # the batch
        new_data=numpy.append(train_set,extra_data,axis=0) #使得训练集个数正好是batch_size的整数倍
    else:
        new_data = train_set
    train_set = numpy.random.permutation(new_data)
    train_set_x = train_set[:,1:]
    dev_set_x = dev_set[:,1:]
    test_set_x = test_set[:,1:]
    train_set_x, train_set_y = shared_dataset((train_set_x,train_set[:,0]))
    dev_set_x, dev_set_y = shared_dataset((dev_set_x, dev_set[:,0]))
    test_set_x, test_set_y = shared_dataset((test_set_x, test_set[:,0]))

    n_batches = new_data.shape[0]/batch_size #batch num
    n_train_batches = int(numpy.round(n_batches))

    train_model = theano.function([index], cost, updates=grad_updates,
          givens={
            x: train_set_x[index*batch_size:(index+1)*batch_size],
            y: train_set_y[index*batch_size:(index+1)*batch_size]})   
    #theano.printing.debugprint(train_model)
    
    dev_pred_layers = []
    dev_size = dev_set_x.shape[0].eval()
    dev_layer0_input = Words[T.cast(x.flatten(),dtype="int32")].\
                              reshape((x.shape[0],1,x.shape[1],Words.shape[1]))
    for conv_layer in conv_layers:
        dev_layer0_output = conv_layer.predict(dev_layer0_input, dev_size)
        dev_pred_layers.append(dev_layer0_output.flatten(2))
    dev_layer1_input = T.concatenate(dev_pred_layers, 1)
    dev_y_pred = classifier.predict(dev_layer1_input)
    dev_error = T.mean(T.neq(dev_y_pred, y))
    dev_model_all = theano.function(inputs=[x,y], outputs=[dev_error,dev_y_pred])
    
    test_pred_layers = []
    test_size = test_set_x.shape[0].eval()
    test_layer0_input = Words[T.cast(x.flatten(),dtype="int32")].\
                              reshape((x.shape[0],1,x.shape[1],Words.shape[1]))
    for conv_layer in conv_layers:
        test_layer0_output = conv_layer.predict(test_layer0_input, test_size)
        test_pred_layers.append(test_layer0_output.flatten(2))
    test_layer1_input = T.concatenate(test_pred_layers, 1)
    test_y_pred = classifier.predict(test_layer1_input)
    test_model_all = theano.function(inputs=[x], outputs=[test_y_pred]) 

    macro=[]
    micro=[]
    epoch=0
    #li-输出
    answer=dev_set_y.eval().tolist()
    answer_list=[]
    for line in answer:
        answer_list.append(int(line))
    Li_out = []  
    Li_out.append(answer_list)
    #li-输出
    test_out = []
    while (epoch < n_epochs):
        epoch+=1  
        predict_list=[]
        if shuffle_batch:
            cost=[]
            for minibatch_index in numpy.random.permutation(range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                cost.append(cost_epoch)
                set_zero(zero_vec)
            error,prediction=dev_model_all(x=dev_set_x.get_value(borrow=True),y=dev_set_y.eval())
            prediction1=test_model_all(x=test_set_x.get_value(borrow=True))
            for line in prediction1[0]:
                predict_list.append(int(line))            
            
            micro_f_score,macro_f_score=evaluate.simple_evaluate(prediction=prediction,
                                                                 answer=dev_set_y.eval(),out=Li_out)
            print 'epoch:%d,error:%f,micro_f_score:%f,macro_f_score:%f'\
                    %(epoch,error,micro_f_score,macro_f_score)
            macro.append(macro_f_score)
            micro.append(micro_f_score)
            
        else:
            for minibatch_index in xrange(n_train_batches):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
            error,prediction=dev_model_all(x=dev_set_x.get_value(borrow=True),y=dev_set_y.eval())
            prediction1=test_model_all(x=test_set_x.get_value(borrow=True))
            for line in prediction1[0]:
                predict_list.append(int(line))
                    
            micro_f_score,macro_f_score=evaluate.simple_evaluate(prediction=prediction,
                                                                 answer=dev_set_y.eval(),out=Li_out)
            print 'epoch:%d,error:%f,micro_f_score:%f,macro_f_score:%f'\
                    %(epoch,error,micro_f_score,macro_f_score)
            macro.append(macro_f_score)
            micro.append(micro_f_score)
        test_out.append(predict_list)

    print 'max micro value:%f'%(numpy.max(micro))
    print 'max macro value:%f'%(numpy.max(macro))
    
    #li-输出 
    
    csv_writer=csv.writer(open('/home/lihaorui/2016/li-dev.csv','wb'))
    Li_out = asarray(Li_out)
    for i in range(len(Li_out)):
        csv_writer.writerow(Li_out[i])
        
    csv_writer0=csv.writer(open('/home/lihaorui/2016/li-test.csv','wb'))
    test_out = asarray(test_out)
    for i in range(len(test_out)):
        csv_writer0.writerow(test_out[i])
Esempio n. 2
0
def train_conv(datasets,
               wordvec,
               word_size=150,
               window_sizes=[9,11,13],
               hidden_units=[100,100,claz_count],
               dropout_rate=[0],
               shuffle_batch=True,
               n_epochs=10000,
               batch_size=256,
               lr_decay=0.95,
               sqr_norm_lim=9,
               conv_non_linear="relu",
               activations=[Tanh],#dropout
               non_static=True,
               proportion=1):
    rng = numpy.random.RandomState(3435)
    
    sen_length = len(datasets[0][0])-1  # sentence length
    filter_w = word_size # filter width
    feature_maps = hidden_units[0]
    filter_shapes = [] #filter:param W
    pool_sizes = []
    for filter_h in window_sizes: # filter heighth
        filter_shapes.append((feature_maps, 1, filter_h,filter_w))
        pool_sizes.append((sen_length-filter_h+1, 1))
    parameters = [("image shape",sen_length,word_size),("filter shape",filter_shapes), ("hidden_units",hidden_units),
                  ("dropout", dropout_rate), ("batch_size",batch_size),("non_static", non_static),
                    ("learn_decay",lr_decay), ("conv_non_linear", conv_non_linear), ("non_static", non_static)
                    ,("sqr_norm_lim",sqr_norm_lim),("shuffle_batch",shuffle_batch)]
    print parameters  

    #print wordvec
    #count = np.shape(wordvec)[0]
    #wordvec=np.random.uniform(-0.25,0.25,(count,50))
    #wordvec=numpy.asarray(wordvec,dtype=theano.config.floatX)
    Words=theano.shared(value=wordvec,name='Words')
    zero_vec_tensor = T.vector()
    zero_vec = numpy.zeros(word_size,dtype=theano.config.floatX)
    set_zero = theano.function([zero_vec_tensor], 
                               updates=[(Words, T.set_subtensor(Words[0,:], zero_vec_tensor))])

    x=T.matrix('x')
    y=T.ivector('y')
    index=T.lscalar('index')
    
    layer0_input = Words[T.cast(x.flatten(),dtype="int32")].\
                        reshape((x.shape[0],1,x.shape[1],Words.shape[1]))
    #theano.printing.debugprint(layer0_input)
    conv_layers=[]
    layer1_inputs=[]
    for i in xrange(len(window_sizes)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng, input=layer0_input,image_shape=(batch_size, 1, sen_length, word_size),
                                filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs,1)
    hidden_units[0] = feature_maps*len(window_sizes)
    #print hidden_units
    classifier = MLPDropout(rng, 
                            input=layer1_input, 
                            layer_sizes=hidden_units,
                            activations=activations, 
                            dropout_rates=dropout_rate)
    params = classifier.params   
    for conv_layer in conv_layers:
        params += conv_layer.params
    if non_static:
        params += [Words]

    cost = classifier.negative_log_likelihood(y) 
    dropout_cost = classifier.dropout_negative_log_likelihood(y)           
    grad_updates = sgd_updates_adadelta(params, dropout_cost, 
                                        lr_decay, 1e-6, sqr_norm_lim)
    
    numpy.random.seed(3435)

    test_set = datasets[1]
    train_set = datasets[0]
        
    if train_set.shape[0] % batch_size > 0:
        extra_data_num = batch_size - train_set.shape[0] % batch_size
        train_set = numpy.random.permutation(train_set)
        extra_data = train_set[:extra_data_num] # the batch
        new_data=numpy.append(train_set,extra_data,axis=0) #使得训练集个数正好是batch_size的整数倍
    else:
        new_data = train_set
    #train_set = numpy.random.permutation(new_data)
    train_set_x = new_data[:,1:]
    test_set_x = test_set[:,1:]
    train_set_x, train_set_y = shared_dataset((train_set_x,new_data[:,0]))
    test_set_x, test_set_y = shared_dataset((test_set_x,test_set[:,0]))
    n_batches = new_data.shape[0]/batch_size #batch num
    n_train_batches = int(numpy.round(n_batches))
    
    train_model = theano.function([index], cost, updates=grad_updates,
          givens={
            x: train_set_x[index*batch_size:(index+1)*batch_size],
            y: train_set_y[index*batch_size:(index+1)*batch_size]})
    #theano.printing.debugprint(train_model)


    test_pred_layers = []
    test_size = test_set_x.shape[0].eval()
    test_layer0_input = Words[T.cast(x.flatten(),dtype="int32")].\
                              reshape((x.shape[0],1,x.shape[1],Words.shape[1]))
    for conv_layer in conv_layers:
        test_layer0_output = conv_layer.predict(test_layer0_input, test_size)
        test_pred_layers.append(test_layer0_output.flatten(2))
    test_layer1_input = T.concatenate(test_pred_layers, 1)
    test_y_pred = classifier.predict(test_layer1_input)
    test_error = T.mean(T.neq(test_y_pred, y))
    test_model_all = theano.function(inputs=[x,y], outputs=[test_error,test_y_pred])   
       
    epoch=0
    max_f1_score = 0.25
    while (epoch < n_epochs):
        epoch+=1        
        if shuffle_batch:
            cost=[]
            for minibatch_index in numpy.random.permutation(range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                cost.append(cost_epoch)
                set_zero(zero_vec)
            error,prediction=test_model_all(x=test_set_x.get_value(borrow=True),\
                                            y=test_set_y.eval())
            precision,recall,f1_score=evaluate.evaluate_multi_class_seedev(prediction=prediction,
                                                                          answer=test_set_y.eval(),
                                                                          claz_count=claz_count)
            #print 'epoch:%d,error:%.3f,micro_f_score:%.2f,macro_f_score:%.2f'%(epoch,error,micro_f_score,macro_f_score)
            print 'epoch:%d,error:%.3f,precision:%.4f,  recall:%.4f,  f1_score:%.4f'%(epoch,error,precision,recall,f1_score)
            if f1_score > max_f1_score:
                max_f1_score = f1_score
                write_matrix_to_file(prediction,'pred_entity.txt')
        else:
            for minibatch_index in xrange(n_train_batches):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
#            error,prediction=test_model_all(x=test_set_x.get_value(borrow=True),y=test_set_y.eval())
#            micro_f_score,macro_f_score=evaluate.simple_evaluate(prediction=prediction,
#                                                                 answer=test_set_y.eval())
            print 'epoch:%d,error:%f'%(epoch,error)