def inputs(tfrecords_file): ''' create inputs ''' print tfrecords_file filename_queue = tf.train.string_input_producer( [tfrecords_file]) # ここで指定したepoch数はtrainableになるので注意 read_input = load.read(filename_queue) reshaped_image = tf.cast(read_input.image, tf.float32) height = CROP_SIZE width = CROP_SIZE resized_image = tf.image.resize_image_with_crop_or_pad( reshaped_image, width, height) float_image = tf.image.per_image_whitening(resized_image) min_fraction_of_examples_in_queue = 0.4 #min_fraction_of_examples_in_queue = 1 min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue) print( 'filling queue with %d train images before starting to train. This will take a few minutes.' % min_queue_examples) return _generate_image_and_label_batch(float_image, read_input.label, min_queue_examples)
def distorted_inputs(tfrecords_file): ''' create inputs with real time augumentation. ''' print tfrecords_file filename_queue = tf.train.string_input_producer( [tfrecords_file]) # ここで指定したepoch数はtrainableになるので注意 read_input = load.read(filename_queue) reshaped_image = tf.cast(read_input.image, tf.float32) height = CROP_SIZE width = CROP_SIZE # crop if tf.__version__[2] == '7': distorted_image = tf.random_crop(reshaped_image, [height, width, IMAGE_DEPTH]) else: distorted_image = tf.image.random_crop(reshaped_image, [height, width]) # flip 対称性のある物体の場合はONにする #distorted_image = tf.image.random_flip_left_right(distorted_image) # you can add random brightness contrast distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) # whitening float_image = tf.image.per_image_whitening(distorted_image) min_fraction_of_examples_in_queue = 0.4 #min_fraction_of_examples_in_queue = 1 min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue) print( 'filling queue with %d train images before starting to train. This will take a few minutes.' % min_queue_examples) return _generate_image_and_label_batch(float_image, read_input.label, min_queue_examples)
def CharSCNN(learning_rate=0.0001, n_epochs=10, nkerns=[20, 50], batch_size=500): """ A simple CharSCNN implementation :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ d_wrd = 10 k_wrd = 5 d_char = 5 k_char = 3 cl_char = 10 cl_wrd = 50 rng = numpy.random.RandomState(23455) print "Loading data from file ..." #(num_sent, v_char, v_wrd, max_word_len, max_sen_len, set_char, set_wrd, set_y) = pickle.load(open("data_mlp.pkl","rb")) (num_sent, v_char, v_wrd, max_word_len, max_sen_len, set_char, set_wrd, set_y) = load.read("tweets_clean.txt") print "Processing data to numpy arrays ..." set_char = theano.shared(numpy.array(set_char,dtype=theano.config.floatX),borrow=True) set_wrd = theano.shared(numpy.array(set_wrd,dtype=theano.config.floatX),borrow=True) set_y = theano.shared(numpy.array(set_y),borrow=True) print "Data loaded" n_train_batches = 8*num_sent/10 n_valid_batches = num_sent/10 n_test_batches = num_sent/10 train_x_wrd, train_x_char, train_y = set_wrd[:n_train_batches], set_char[:n_train_batches], set_y[:n_train_batches] val_x_wrd, val_x_char, val_y = set_wrd[n_train_batches:n_train_batches+n_valid_batches], set_char[n_train_batches:n_train_batches+n_valid_batches], set_y[n_train_batches:n_train_batches+n_valid_batches] test_x_wrd, test_x_char, test_y = set_wrd[-n_test_batches:], set_char[-n_test_batches:], set_y[-n_test_batches:] # compute number of minibatches for training, validation and testing #theano.config.compute_test_value = 'warn' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x_wrd = T.matrix('x_wrd') # the data is presented as rasterized images x_char = T.tensor3('x_char') y = T.lvector('y') # x_char.tag.test_value = numpy.random.rand(max_sen_len,max_word_len,v_char) # x_wrd.tag.test_value = numpy.random.rand(max_sen_len,v_wrd) # y.tag.test_value = numpy.array([1]) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x_char.reshape((max_sen_len, 1, max_word_len, v_char)) layer0 = HiddenLayer( rng, input=layer0_input, n_in=v_char, n_out=d_char, isb=0 ) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(max_sen_len, 1, max_word_len, d_char), filter_shape=(cl_char, 1, k_char, d_char), poolsize=(max_word_len - k_char + 1, 1) ) layer2_input = x_wrd.reshape((max_sen_len, v_wrd)) layer2 = HiddenLayer( rng, input=layer2_input, n_in=v_wrd, n_out=d_wrd, isb=0 ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (nkerns[0], nkerns[1], 4, 4) layer3_input = T.concatenate([layer1.output.reshape((max_sen_len,cl_char)), layer2.output], axis=1).reshape((1, 1, max_sen_len, cl_char + d_wrd)) layer3 = LeNetConvPoolLayer( rng, input=layer3_input, image_shape=(1, 1, max_sen_len, cl_char + d_wrd), filter_shape=(cl_wrd, 1, k_wrd, cl_char + d_wrd), poolsize=(max_sen_len - k_wrd + 1, 1) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer4_input = layer3.output.reshape((1,cl_wrd)) # construct a fully-connected sigmoidal layer layer4 = HiddenLayer( rng, input=layer4_input, n_in=cl_wrd, n_out=50, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer5 = LogisticRegression(input=layer4.output, n_in=50, n_out=2) # the cost we minimize during training is the NLL of the model #theano.printing.Print('this is a very important value')(x_chr) cost = layer5.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer5.errors(y), givens={ x_wrd: test_x_wrd[index], x_char: test_x_char[index], y: test_y[index:index+1] }, mode="FAST_RUN" ) validate_model = theano.function( [index], #layer5.negative_log_likelihood(y), layer5.errors(y), givens={ x_wrd: val_x_wrd[index], x_char: val_x_char[index], y: val_y[index:index+1] }, mode="FAST_RUN" ) # create a list of all model parameters to be fit by gradient descent params = layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x_wrd: train_x_wrd[index], x_char: train_x_char[index], y: train_y[index:index+1] }, mode="FAST_RUN" ) # end-snippet-1 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter # ((theano.printing.Print(x))) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def __init__( self, rng, batchsize=100, activation=tanh ): import load (num_sent, word_cnt, max_sen_len, k_wrd, x_wrd, y) \ = load.read("tweets_clean.txt") dim_word = 100 cl_word = 300 k_wrd = 5 vocab_size = word_cnt n_hidden = 300 data_train,\ data_test,\ target_train,\ target_test\ = train_test_split(x_wrd, y, random_state=1234, test_size=0.1) x_train = theano.shared(np.asarray(data_train, dtype='int16'), borrow=True) y_train = theano.shared(np.asarray(target_train, dtype='int32'), borrow=True) x_test = theano.shared(np.asarray(data_test, dtype='int16'), borrow=True) y_test = theano.shared(np.asarray(target_test, dtype='int32'), borrow=True) self.n_train_batches = x_train.get_value(borrow=True).shape[0] / batchsize self.n_test_batches = x_test.get_value(borrow=True).shape[0] / batchsize """symbol definition""" index = T.iscalar() x = T.wmatrix('x') y = T.ivector('y') train = T.iscalar('train') layer_embed_input = x#.reshape((batchsize, max_sen_len)) layer_embed = EmbedIDLayer( rng, layer_embed_input, n_input=vocab_size, n_output=dim_word, ) layer1_input = layer_embed.output.reshape((batchsize, 1, max_sen_len, dim_word)) layer1 = ConvolutionalLayer( rng, layer1_input, filter_shape=(cl_word, 1, k_wrd, dim_word),#1は入力チャネル数 image_shape=(batchsize, 1, max_sen_len, dim_word), activation=activation ) layer2 = MaxPoolingLayer( layer1.output, poolsize=(max_sen_len-k_wrd+1, 1) ) layer3_input = layer2.output.reshape((batchsize, cl_word)) layer3 = FullyConnectedLayer( rng, dropout(rng, layer3_input, train), n_input=cl_word, n_output=n_hidden, activation=activation ) layer4 = FullyConnectedLayer( rng, dropout(rng, layer3.output, train), n_input=n_hidden, n_output=2, activation=None ) result = Result(layer4.output, y) # loss = result.negative_log_likelihood() loss = result.cross_entropy() accuracy = result.accuracy() params = layer4.params + layer3.params + layer1.params + layer_embed.params # updates = AdaDelta(params=params).updates(loss) updates = RMSprop(learning_rate=0.001, params=params).updates(loss) self.train_model = theano.function( inputs=[index], outputs=[loss, accuracy], updates=updates, givens={ x: x_train[index*batchsize: (index+1)*batchsize], y: y_train[index*batchsize: (index+1)*batchsize], train: np.cast['int32'](1) } ) self.test_model = theano.function( inputs=[index], outputs=[loss, accuracy], givens={ x: x_test[index*batchsize: (index+1)*batchsize], y: y_test[index*batchsize: (index+1)*batchsize], train: np.cast['int32'](0) } )
def __init__(self, rng, batchsize=100, activation=tanh): import load (num_sent, word_cnt, max_sen_len, k_wrd, x_wrd, y) \ = load.read("tweets_clean.txt") dim_word = 100 cl_word = 300 k_wrd = 5 vocab_size = word_cnt n_hidden = 300 data_train,\ data_test,\ target_train,\ target_test\ = train_test_split(x_wrd, y, random_state=1234, test_size=0.1) x_train = theano.shared(np.asarray(data_train, dtype='int16'), borrow=True) y_train = theano.shared(np.asarray(target_train, dtype='int32'), borrow=True) x_test = theano.shared(np.asarray(data_test, dtype='int16'), borrow=True) y_test = theano.shared(np.asarray(target_test, dtype='int32'), borrow=True) self.n_train_batches = x_train.get_value( borrow=True).shape[0] / batchsize self.n_test_batches = x_test.get_value( borrow=True).shape[0] / batchsize """symbol definition""" index = T.iscalar() x = T.wmatrix('x') y = T.ivector('y') train = T.iscalar('train') layer_embed_input = x #.reshape((batchsize, max_sen_len)) layer_embed = EmbedIDLayer( rng, layer_embed_input, n_input=vocab_size, n_output=dim_word, ) layer1_input = layer_embed.output.reshape( (batchsize, 1, max_sen_len, dim_word)) layer1 = ConvolutionalLayer( rng, layer1_input, filter_shape=(cl_word, 1, k_wrd, dim_word), #1は入力チャネル数 image_shape=(batchsize, 1, max_sen_len, dim_word), activation=activation) layer2 = MaxPoolingLayer(layer1.output, poolsize=(max_sen_len - k_wrd + 1, 1)) layer3_input = layer2.output.reshape((batchsize, cl_word)) layer3 = FullyConnectedLayer(rng, dropout(rng, layer3_input, train), n_input=cl_word, n_output=n_hidden, activation=activation) layer4 = FullyConnectedLayer(rng, dropout(rng, layer3.output, train), n_input=n_hidden, n_output=2, activation=None) result = Result(layer4.output, y) # loss = result.negative_log_likelihood() loss = result.cross_entropy() accuracy = result.accuracy() params = layer4.params + layer3.params + layer1.params + layer_embed.params # updates = AdaDelta(params=params).updates(loss) updates = RMSprop(learning_rate=0.001, params=params).updates(loss) self.train_model = theano.function( inputs=[index], outputs=[loss, accuracy], updates=updates, givens={ x: x_train[index * batchsize:(index + 1) * batchsize], y: y_train[index * batchsize:(index + 1) * batchsize], train: np.cast['int32'](1) }) self.test_model = theano.function( inputs=[index], outputs=[loss, accuracy], givens={ x: x_test[index * batchsize:(index + 1) * batchsize], y: y_test[index * batchsize:(index + 1) * batchsize], train: np.cast['int32'](0) })