for minibatch_index in xrange(n_train_batches): cost_epoch = train_model(minibatch_index) print cost_epoch set_zero(zero_vec) #print test_model() f_scores = test_model() f_scores = tuple(f_scores) <<<<<<< HEAD print '%.2f,'*5%(f_scores) ======= print '%.2f,'*22%(f_scores) >>>>>>> fcaa5859445351e40525e8d9eee56180be4b5d04 layer0_input= Words[T.cast(test_set_x.flatten(),dtype="int32")].\ reshape((test_set_x.shape[0],test_set_x.shape[1]*Words.shape[1])) t_pred=classifier.predict(layer0_input) write_matrix_to_file(t_pred.eval(),'pred.txt') write_matrix_to_file(test_set_y,'real.txt') #print Words.get_value() def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy
def train_conv(datasets, wordvec, word_size=150, window_sizes=[9,11,13], hidden_units=[100,100,claz_count], dropout_rate=[0], shuffle_batch=True, n_epochs=10000, batch_size=256, lr_decay=0.95, sqr_norm_lim=9, conv_non_linear="relu", activations=[Tanh],#dropout non_static=True, proportion=1): rng = numpy.random.RandomState(3435) sen_length = len(datasets[0][0])-1 # sentence length filter_w = word_size # filter width feature_maps = hidden_units[0] filter_shapes = [] #filter:param W pool_sizes = [] for filter_h in window_sizes: # filter heighth filter_shapes.append((feature_maps, 1, filter_h,filter_w)) pool_sizes.append((sen_length-filter_h+1, 1)) parameters = [("image shape",sen_length,word_size),("filter shape",filter_shapes), ("hidden_units",hidden_units), ("dropout", dropout_rate), ("batch_size",batch_size),("non_static", non_static), ("learn_decay",lr_decay), ("conv_non_linear", conv_non_linear), ("non_static", non_static) ,("sqr_norm_lim",sqr_norm_lim),("shuffle_batch",shuffle_batch)] print parameters #print wordvec #count = np.shape(wordvec)[0] #wordvec=np.random.uniform(-0.25,0.25,(count,50)) #wordvec=numpy.asarray(wordvec,dtype=theano.config.floatX) Words=theano.shared(value=wordvec,name='Words') zero_vec_tensor = T.vector() zero_vec = numpy.zeros(word_size,dtype=theano.config.floatX) set_zero = theano.function([zero_vec_tensor], updates=[(Words, T.set_subtensor(Words[0,:], zero_vec_tensor))]) x=T.matrix('x') y=T.ivector('y') index=T.lscalar('index') layer0_input = Words[T.cast(x.flatten(),dtype="int32")].\ reshape((x.shape[0],1,x.shape[1],Words.shape[1])) #theano.printing.debugprint(layer0_input) conv_layers=[] layer1_inputs=[] for i in xrange(len(window_sizes)): filter_shape = filter_shapes[i] pool_size = pool_sizes[i] conv_layer = LeNetConvPoolLayer(rng, input=layer0_input,image_shape=(batch_size, 1, sen_length, word_size), filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear) layer1_input = conv_layer.output.flatten(2) conv_layers.append(conv_layer) layer1_inputs.append(layer1_input) layer1_input = T.concatenate(layer1_inputs,1) hidden_units[0] = feature_maps*len(window_sizes) #print hidden_units classifier = MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, activations=activations, dropout_rates=dropout_rate) params = classifier.params for conv_layer in conv_layers: params += conv_layer.params if non_static: params += [Words] cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim) numpy.random.seed(3435) test_set = datasets[1] train_set = datasets[0] if train_set.shape[0] % batch_size > 0: extra_data_num = batch_size - train_set.shape[0] % batch_size train_set = numpy.random.permutation(train_set) extra_data = train_set[:extra_data_num] # the batch new_data=numpy.append(train_set,extra_data,axis=0) #使得训练集个数正好是batch_size的整数倍 else: new_data = train_set #train_set = numpy.random.permutation(new_data) train_set_x = new_data[:,1:] test_set_x = test_set[:,1:] train_set_x, train_set_y = shared_dataset((train_set_x,new_data[:,0])) test_set_x, test_set_y = shared_dataset((test_set_x,test_set[:,0])) n_batches = new_data.shape[0]/batch_size #batch num n_train_batches = int(numpy.round(n_batches)) train_model = theano.function([index], cost, updates=grad_updates, givens={ x: train_set_x[index*batch_size:(index+1)*batch_size], y: train_set_y[index*batch_size:(index+1)*batch_size]}) #theano.printing.debugprint(train_model) test_pred_layers = [] test_size = test_set_x.shape[0].eval() test_layer0_input = Words[T.cast(x.flatten(),dtype="int32")].\ reshape((x.shape[0],1,x.shape[1],Words.shape[1])) for conv_layer in conv_layers: test_layer0_output = conv_layer.predict(test_layer0_input, test_size) test_pred_layers.append(test_layer0_output.flatten(2)) test_layer1_input = T.concatenate(test_pred_layers, 1) test_y_pred = classifier.predict(test_layer1_input) test_error = T.mean(T.neq(test_y_pred, y)) test_model_all = theano.function(inputs=[x,y], outputs=[test_error,test_y_pred]) epoch=0 max_f1_score = 0.25 while (epoch < n_epochs): epoch+=1 if shuffle_batch: cost=[] for minibatch_index in numpy.random.permutation(range(n_train_batches)): cost_epoch = train_model(minibatch_index) cost.append(cost_epoch) set_zero(zero_vec) error,prediction=test_model_all(x=test_set_x.get_value(borrow=True),\ y=test_set_y.eval()) precision,recall,f1_score=evaluate.evaluate_multi_class_seedev(prediction=prediction, answer=test_set_y.eval(), claz_count=claz_count) #print 'epoch:%d,error:%.3f,micro_f_score:%.2f,macro_f_score:%.2f'%(epoch,error,micro_f_score,macro_f_score) print 'epoch:%d,error:%.3f,precision:%.4f, recall:%.4f, f1_score:%.4f'%(epoch,error,precision,recall,f1_score) if f1_score > max_f1_score: max_f1_score = f1_score write_matrix_to_file(prediction,'pred_entity.txt') else: for minibatch_index in xrange(n_train_batches): cost_epoch = train_model(minibatch_index) set_zero(zero_vec) # error,prediction=test_model_all(x=test_set_x.get_value(borrow=True),y=test_set_y.eval()) # micro_f_score,macro_f_score=evaluate.simple_evaluate(prediction=prediction, # answer=test_set_y.eval()) print 'epoch:%d,error:%f'%(epoch,error)
def train_nn(datasets, wordvec, word_size=200, hidden_units=[2000,1000,2], dropout_rate=[0,0,0], shuffle_batch=True, n_epochs=3000, batch_size=256, init_learning_rate=0.4, adadelta=True, lr_decay=0.95, sqr_norm_lim=9, activations=[Tanh,Tanh,Tanh], non_static=True, use_valid_set=False, proportion=1): rng = numpy.random.RandomState(3435) #print np.shape(wordvec) #count = np.shape(wordvec)[0] #wordvec=np.random.uniform(-0.25,0.25,(count,5)) #wordvec=numpy.asarray(wordvec,dtype=theano.config.floatX) Words=theano.shared(value=wordvec,name='Words') zero_vec_tensor = T.vector() zero_vec = numpy.zeros(word_size,dtype=theano.config.floatX) set_zero = theano.function([zero_vec_tensor], updates=[(Words, T.set_subtensor(Words[0,:], zero_vec_tensor))]) x=T.matrix('x') y=T.ivector('y') index=T.lscalar('index') layer0_input= Words[T.cast(x.flatten(),dtype="int32")].\ reshape((x.shape[0],x.shape[1]*Words.shape[1])) #input_printed=theano.printing.Print('layer0_input:')(layer0_input) classifier = MLPDropout(rng, input=layer0_input, layer_sizes=hidden_units, activations=activations, dropout_rates=dropout_rate) params = classifier.params if non_static: params.append(Words) cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) if adadelta: grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim) else: grad_updates = sgd_updates(params,dropout_cost,init_learning_rate) #print params numpy.random.seed(3435) if datasets[0].shape[0] % batch_size > 0: extra_data_num = batch_size - datasets[0].shape[0] % batch_size train_set = numpy.random.permutation(datasets[0]) ## shuffle extra_data = train_set[:extra_data_num] # the batch new_data=numpy.append(datasets[0],extra_data,axis=0) #使得训练集个数正好是batch_size的整数倍 else: new_data = datasets[0] new_data = numpy.random.permutation(new_data) #train data n_batches = new_data.shape[0]/batch_size #batch num n_train_batches = int(numpy.round(n_batches*proportion)) if len(datasets)==3: use_valid_set=True train_set = new_data val_set = datasets[1] train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0])) val_set_x, val_set_y = shared_dataset((val_set[:,1:],val_set[:,0])) test_set_x = datasets[2][:,1:] test_set_y = numpy.asarray(datasets[2][:,0],"int32") else: test_set_x = datasets[1][:,1:] test_set_y = numpy.asarray(datasets[1][:,0],"int32") if use_valid_set: train_set = new_data[:n_train_batches*batch_size,:] val_set = new_data[n_train_batches*batch_size:,:] train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0])) val_set_x, val_set_y = shared_dataset((val_set[:,1:],val_set[:,0])) else: train_set = new_data[:,:] train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0])) n_batches = new_data.shape[0]/batch_size #batch num n_train_batches = int(numpy.round(n_batches)) train_model = theano.function([index], cost, updates=grad_updates, givens={ x: train_set_x[index*batch_size:(index+1)*batch_size], y: train_set_y[index*batch_size:(index+1)*batch_size]}) #theano.printing.debugprint(train_model) #f_scores=[classifier.f_score(y,i+1) for i in xrange(hidden_units[-1]-1)] f_scores =(classifier.f_score(y,i+1)[0] for i in xrange(1)) f_scores = tuple(f_scores) ''' fenzi=0 fenmu=0 for item in f_scores: f_score,precision,recall=item fenzi+=(precision*recall) fenmu+=(precision+recall) ''' #fenzi_printed=theano.printing.Print('fenzi:')(fenzi) #micro_avg_f_score=2.*fenzi / (fenmu+0.000001) test_model=theano.function([],f_scores, givens={ x:test_set_x, y:test_set_y}) ''' valid_model=theano.function([],classifier.errors(y), givens={ x:test_set_x, y:test_set_y}) ''' #theano.printing.debugprint(test_model) #print micro_avg_f_score.owner.inputs #test_y_pred = classifier.f_score() #test_model_all = theano.function([x,y], test_error) epoch=0 while (epoch < n_epochs): epoch = epoch + 1 if shuffle_batch: cost=[] for minibatch_index in numpy.random.permutation(range(n_train_batches)): cost_epoch = train_model(minibatch_index) cost.append(cost_epoch) set_zero(zero_vec) print 'epoch:%d, cost value:%f, '%(epoch,numpy.mean(cost)), else: for minibatch_index in xrange(n_train_batches): cost_epoch = train_model(minibatch_index) print cost_epoch set_zero(zero_vec) #print test_model() f_scores = test_model() f_scores = tuple(f_scores) print '%.2f,'*1%(f_scores) layer0_input= Words[T.cast(test_set_x.flatten(),dtype="int32")].\ reshape((test_set_x.shape[0],test_set_x.shape[1]*Words.shape[1])) t_pred=classifier.predict(layer0_input) write_matrix_to_file(t_pred.eval(),'pred.txt') write_matrix_to_file(test_set_y,'real.txt')