def load_train_model(self, params): """Load G2P model for continuing train.""" # Check for saved model. if not os.path.exists(os.path.join(self.model_dir, 'checkpoint')): raise RuntimeError("Model not found in %s" % self.model_dir) # Load model parameters. params.num_layers, params.size = data_utils.load_params(self.model_dir) # Prepare data and G2P Model. self.__prepare_model(params) # Restore model. print("Reading model parameters from %s" % self.model_dir) self.model.saver.restore(self.session, os.path.join(self.model_dir, "model"))
def __init__(self, train_file=None, valid_file=None, test_file=None): """Create G2P model and initialize or load parameters in session.""" self.test_file = test_file # Preliminary actions before model creation. if FLAGS.train: #Load model parameters. num_layers, size = data_utils.save_params(FLAGS.num_layers, FLAGS.size, FLAGS.model) batch_size = FLAGS.batch_size # Prepare G2P data. print("Preparing G2P data") train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\ self.ph_vocab = data_utils.prepare_g2p_data(FLAGS.model, train_file, valid_file) # Read data into buckets and compute their sizes. print ("Reading development and training data.") self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids) self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids) else: #Load model parameters. num_layers, size = data_utils.load_params(FLAGS.num_layers, FLAGS.size, FLAGS.model) batch_size = 1 # We decode one word at a time. # Load vocabularies self.gr_vocab = data_utils.load_vocabulary(os.path.join(FLAGS.model, "vocab.grapheme")) self.ph_vocab = data_utils.load_vocabulary(os.path.join(FLAGS.model, "vocab.phoneme")) self.rev_ph_vocab =\ data_utils.load_vocabulary(os.path.join(FLAGS.model, "vocab.phoneme"), reverse=True) self.session = tf.Session() # Create model. print("Creating %d layers of %d units." % (num_layers, size)) self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab), len(self.ph_vocab), self._BUCKETS, size, num_layers, FLAGS.max_gradient_norm, batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=not FLAGS.train) self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1) self.__create_model()
def __init__(self, train_dic=None, valid_dic=None, test_dic=None): """Create G2P model and initialize or load parameters in session.""" self.test_dic = test_dic # Preliminary actions before model creation. if FLAGS.train: #Load model parameters. num_layers, size = data_utils.save_params(FLAGS.num_layers, FLAGS.size, FLAGS.model) batch_size = FLAGS.batch_size # Prepare G2P data. print("Preparing G2P data") train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\ self.ph_vocab = data_utils.prepare_g2p_data(FLAGS.model, train_dic, valid_dic) # Read data into buckets and compute their sizes. print ("Reading development and training data.") self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids) self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids) else: #Load model parameters. num_layers, size = data_utils.load_params(FLAGS.num_layers, FLAGS.size, FLAGS.model) batch_size = 1 # We decode one word at a time. # Load vocabularies self.gr_vocab = data_utils.load_vocabulary(os.path.join(FLAGS.model, "vocab.grapheme")) self.ph_vocab = data_utils.load_vocabulary(os.path.join(FLAGS.model, "vocab.phoneme")) self.rev_ph_vocab =\ data_utils.load_vocabulary(os.path.join(FLAGS.model, "vocab.phoneme"), reverse=True) self.session = tf.Session() # Create model. print("Creating %d layers of %d units." % (num_layers, size)) self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab), len(self.ph_vocab), self._BUCKETS, size, num_layers, FLAGS.max_gradient_norm, batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=not FLAGS.train) self.__create_model()
def load_decode_model(self): """Load G2P model and initialize or load parameters in session.""" if not os.path.exists(os.path.join(self.model_dir, 'checkpoint')): raise RuntimeError("Model not found in %s" % self.model_dir) self.batch_size = 1 # We decode one word at a time. #Load model parameters. num_layers, size = data_utils.load_params(self.model_dir) # Load vocabularies print("Loading vocabularies from %s" % self.model_dir) self.gr_vocab = data_utils.load_vocabulary( os.path.join(self.model_dir, "vocab.grapheme")) self.ph_vocab = data_utils.load_vocabulary( os.path.join(self.model_dir, "vocab.phoneme")) self.rev_ph_vocab =\ data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.phoneme"), reverse=True) self.session = tf.Session() # Restore model. print("Creating %d layers of %d units." % (num_layers, size)) self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab), len(self.ph_vocab), self._BUCKETS, size, num_layers, 0, self.batch_size, 0, 0, forward_only=True) self.model.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # Check for saved models and restore them. print("Reading model parameters from %s" % self.model_dir) self.model.saver.restore(self.session, os.path.join(self.model_dir, "model"))
def __init__(self, task_name='word2vec-word', init=False, expand=True): tasks = { 'word2vec-lemma': { 'emb_file': config.word_embed_file, 'word_type': 'lemma' }, 'word2vec-word': { 'emb_file': config.word_embed_file, 'word_type': 'word' }, 'fasttext-lemma': { 'emb_file': config.fasttext_file, 'word_type': 'lemma' }, 'fasttext-word': { 'emb_file': config.fasttext_file, 'word_type': 'word' }, 'paragram-lemma': { 'emb_file': config.paragram_file, 'word_type': 'lemma' }, 'paragram-word': { 'emb_file': config.paragram_file, 'word_type': 'word' }, } task = tasks[task_name] global WORD_TYPE WORD_TYPE = task['word_type'] self.train_file = config.train_exp_file if expand else config.train_file self.dev_file = config.dev_file self.test_file = config.test_file self.word_embed_file = task['emb_file'] self.word_dim = config.word_dim self.max_len = config.max_sent_len self.num_class = config.num_class self.w2i_file, self.we_file = config.get_w2i_we_file(task_name) utils.check_file_exist(self.w2i_file) utils.check_file_exist(self.we_file) if init: word_vocab = self.build_vocab() self.word_vocab, self.embed = data_utils.load_word_embedding( word_vocab, self.word_embed_file, self.word_dim) data_utils.save_params(self.word_vocab, self.w2i_file) data_utils.save_params(self.embed, self.we_file) else: self.word_vocab = data_utils.load_params(self.w2i_file) self.embed = data_utils.load_params(self.we_file) print("vocab size: %d" % len(self.word_vocab), "we shape: ", self.embed.shape) self.train_data = Dataset(self.train_file, self.word_vocab, self.max_len, self.num_class) self.dev_data = Dataset(self.dev_file, self.word_vocab, self.max_len, self.num_class) if self.test_file: self.test_data = Dataset(self.test_file, self.word_vocab, self.max_len, self.num_class)
def load(self, load_file=None): if load_file is None: load_file = DataSet.data_pkl self.we, self.ce, self.train, self.dev, self.test = data_utils.load_params( load_file)
annotation += str(ang) + '_' rotated_set = get_rotated_sets(trX, trY, ang) the_set = np.concatenate((the_set[0], rotated_set[0])), np.concatenate((the_set[1], rotated_set[1])) trX, trY = shuffle_in_unison(the_set[0], the_set[1]) print("noMNIST loaded" if noMNIST else "MNIST loaded") trX = trX.reshape(-1, 1, 28, 28) teX = teX.reshape(-1, 1, 28, 28) X = T.ftensor4() Y = T.fmatrix() if loadparams: epoch = epoch_from_filename(paramsfilename) w, w2, w3, w4, w_o = load_params(paramsfilename) else: w = init_weights((32, 1, 3, 3)) w2 = init_weights((64, 32, 3, 3)) w3 = init_weights((128, 64, 3, 3)) w4 = init_weights((128 * 3 * 3, 625)) w_o = init_weights((625, 10)) noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, w, w2, w3, w4, 0.2, 0.5) l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, 0., 0.) y_x = T.argmax(py_x, axis=1) cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y)) params = [w, w2, w3, w4, w_o]
def __init__(self, init=False, FLAGS=None): self.FLAGS = FLAGS self.train_file = config.train_file self.dev_file = config.dev_new_file # 测试集之后添加 self.test_file = config.test_file_final if FLAGS.embed == "SWM": self.word_embed_file = config.word_embed_SWM elif FLAGS.embed == "google": self.word_embed_file = config.word_embed_google elif FLAGS.embed == 'w2v': self.word_embed_file = config.word_embed_w2v elif FLAGS.embed == 'glove': self.word_embed_file = config.word_embed_glove self.word_dim = config.word_dim self.char_dim = config.char_dim self.ner_dim = config.ner_dim self.pos_dim = config.pos_dim self.max_sent_len = config.max_sent_len self.max_word_len = config.max_word_len self.num_class = config.num_class self.threshold = FLAGS.threshold self.we_file = config.we_file self.w2i_file = config.w2i_file self.c2i_file = config.c2i_file self.n2i_file = config.n2i_file self.p2i_file = config.p2i_file self.rf2i_file = config.rf_file self.train_predict_file = None self.dev_predict_file = None self.test_predict_file = None # the char_embed always init if init: self.word_vocab, self.char_vocab, self.ner_vocab, self.pos_vocab = self.build_vocab( ) self.embed = data_utils.load_word_embedding( self.word_vocab, self.word_embed_file, self.word_dim) data_utils.save_params(self.word_vocab, self.w2i_file) data_utils.save_params(self.char_vocab, self.c2i_file) data_utils.save_params(self.ner_vocab, self.n2i_file) data_utils.save_params(self.pos_vocab, self.p2i_file) data_utils.save_params(self.embed, self.we_file) else: self.embed = data_utils.load_params(self.we_file) self.word_vocab = data_utils.load_params(self.w2i_file) self.char_vocab = data_utils.load_params(self.c2i_file) self.ner_vocab = data_utils.load_params(self.n2i_file) self.pos_vocab = data_utils.load_params(self.p2i_file) self.embed = self.embed.astype(np.float32) self.rf_vocab = data_utils.load_key_value_dict_from_file( self.rf2i_file) self.char_embed = np.array(np.random.uniform( -0.25, 0.25, (len(self.char_vocab), self.char_dim)), dtype=np.float32) self.ner_embed = np.array(np.random.uniform( -0.25, 0.25, (len(self.ner_vocab), self.ner_dim)), dtype=np.float32) self.pos_embed = np.array(np.random.uniform( -0.25, 0.25, (len(self.pos_vocab), self.pos_dim)), dtype=np.float32) print("vocab size: %d" % len(self.word_vocab), "we shape: ", self.embed.shape) # examples = read_data(self.train_file) # np.random.shuffle(examples) # examples_train = examples[:int(0.9*len(examples))] # print(examples_train[0][0]) # examples_dev = examples[int(0.9*len(examples)):] # print(examples_dev[0][0]) examples_train = read_data(self.train_file) examples_dev = read_data(self.dev_file) data_utils.cout_distribution(examples_dev) examples_test = read_data(self.test_file) self.train_data = Dataset(examples_train, 'none', self.word_vocab, self.char_vocab, self.ner_vocab, self.pos_vocab, self.rf_vocab, self.max_sent_len, self.max_word_len, self.num_class) self.dev_data = Dataset(examples_dev, 'dev', self.word_vocab, self.char_vocab, self.ner_vocab, self.pos_vocab, self.rf_vocab, self.max_sent_len, self.max_word_len, self.num_class) if self.test_file: self.test_data = Dataset(examples_test, 'none', self.word_vocab, self.char_vocab, self.ner_vocab, self.pos_vocab, self.rf_vocab, self.max_sent_len, self.max_word_len, self.num_class)
def __init__(self, rng, input, n_in, n_hidden, n_out, randomInit=False, loadparams=False, paramsfilename = paramsfilename ): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ loadedparams =[None]*4 if loadparams: print("Loading params from " + paramsfilename + "..." ) loadedparams = load_params(paramsfilename) # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function self.hiddenLayer = HiddenLayer( rng=rng, input=input, n_in=n_in, n_out=n_hidden, W=loadedparams[0], b= loadedparams[1], activation=activation_mlp ) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out, randomInit=randomInit, W=loadedparams[2], b=loadedparams[3], ) # end-snippet-2 start-snippet-3 # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = ( abs(self.hiddenLayer.W).sum() + abs(self.logRegressionLayer.W).sum() ) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = ( (self.hiddenLayer.W ** 2).sum() + (self.logRegressionLayer.W ** 2).sum() ) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likelihood ) # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params # end-snippet-3 # keep track of model input self.input = input
def evaluate_lenet5(learning_rate=0.1, n_epochs=n_epochs_convmlp, dataset='mnist.pkl.gz', nkerns=[20, 50], batch_size=500, thislogfilename = logfilename, loadparams=loadparams, paramsfilename=paramsfilename, randomInit=False, testrun=testrun, add_blurs=add_blurs, blur=blur, rot_angles = rotation_angles, annotation =''): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ loadedparams = [None] * 8 if loadparams: print("Loading params from " + paramsfilename + "...") loadedparams = load_params(paramsfilename) rng = numpy.random.RandomState(23455) datasets = load_data(dataset, add_the_blurs=add_blurs, blur = blur, angles = rot_angles) if len(rot_angles)>0: annotation += '_angles_' for ang in rot_angles: annotation += str(ang)+'_' train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), W = loadedparams[6], b=loadedparams[7] ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), W = loadedparams[4], b = loadedparams[5] ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=activation_convmlp, W=loadedparams[2], b=loadedparams[3] ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10, W=loadedparams[0], b=loadedparams[1]) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters # CCC Commenting out patience for simplicity and transparency's sake # patience = 10000 # look as this many examples regardless # patience_increase = 2 # wait this much longer when a new best is # # found # improvement_threshold = 0.995 # a relative improvement of this much is # # considered significant validation_frequency = n_train_batches #min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 if loadparams: epoch = epoch_from_filename(paramsfilename) done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough # CCC if this_validation_loss < best_validation_loss * \ # improvement_threshold: # patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) # CCC if patience <= iter: # done_looping = True # break if epoch in saveepochs_convmlp: # test it on the test set epoch_test_losses = [test_model(i) for i in range(n_test_batches)] epoch_test_score = numpy.mean(epoch_test_losses) print(('epoch %i, test error of ' 'best model %f %%') % (epoch, epoch_test_score * 100.)) save_model(params, epoch, best_validation_loss, epoch_test_score, '../data/models/best_model_convolutional_mlp_' , randomInit, add_blurs, testrun, thislogfilename, endrun = (n_epochs == epoch), annotation = annotation) end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)