Exemple #1
0
def inputs(tfrecords_file):
    '''
    create inputs
    '''
    print tfrecords_file
    filename_queue = tf.train.string_input_producer(
        [tfrecords_file])  # ここで指定したepoch数はtrainableになるので注意
    read_input = load.read(filename_queue)
    reshaped_image = tf.cast(read_input.image, tf.float32)

    height = CROP_SIZE
    width = CROP_SIZE

    resized_image = tf.image.resize_image_with_crop_or_pad(
        reshaped_image, width, height)
    float_image = tf.image.per_image_whitening(resized_image)

    min_fraction_of_examples_in_queue = 0.4
    #min_fraction_of_examples_in_queue = 1
    min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
                             min_fraction_of_examples_in_queue)
    print(
        'filling queue with %d train images before starting to train.  This will take a few minutes.'
        % min_queue_examples)

    return _generate_image_and_label_batch(float_image, read_input.label,
                                           min_queue_examples)
Exemple #2
0
def distorted_inputs(tfrecords_file):
    '''
    create inputs with real time augumentation.
    '''
    print tfrecords_file
    filename_queue = tf.train.string_input_producer(
        [tfrecords_file])  # ここで指定したepoch数はtrainableになるので注意
    read_input = load.read(filename_queue)
    reshaped_image = tf.cast(read_input.image, tf.float32)

    height = CROP_SIZE
    width = CROP_SIZE

    # crop
    if tf.__version__[2] == '7':
        distorted_image = tf.random_crop(reshaped_image,
                                         [height, width, IMAGE_DEPTH])
    else:
        distorted_image = tf.image.random_crop(reshaped_image, [height, width])

    # flip 対称性のある物体の場合はONにする
    #distorted_image = tf.image.random_flip_left_right(distorted_image)

    # you can add random brightness contrast
    distorted_image = tf.image.random_brightness(distorted_image, max_delta=63)
    distorted_image = tf.image.random_contrast(distorted_image,
                                               lower=0.2,
                                               upper=1.8)

    # whitening
    float_image = tf.image.per_image_whitening(distorted_image)

    min_fraction_of_examples_in_queue = 0.4
    #min_fraction_of_examples_in_queue = 1
    min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
                             min_fraction_of_examples_in_queue)
    print(
        'filling queue with %d train images before starting to train.  This will take a few minutes.'
        % min_queue_examples)

    return _generate_image_and_label_batch(float_image, read_input.label,
                                           min_queue_examples)
Exemple #3
0
def CharSCNN(learning_rate=0.0001, n_epochs=10, nkerns=[20, 50], batch_size=500):
    """ A simple CharSCNN implementation

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """
    d_wrd = 10
    k_wrd = 5
    d_char = 5
    k_char = 3
    cl_char = 10
    cl_wrd = 50

    rng = numpy.random.RandomState(23455)

    print "Loading data from file ..."
    #(num_sent, v_char, v_wrd, max_word_len, max_sen_len, set_char, set_wrd, set_y) = pickle.load(open("data_mlp.pkl","rb"))
    (num_sent, v_char, v_wrd, max_word_len, max_sen_len, set_char, set_wrd, set_y) = load.read("tweets_clean.txt")
    print "Processing data to numpy arrays ..."
    set_char = theano.shared(numpy.array(set_char,dtype=theano.config.floatX),borrow=True)
    set_wrd = theano.shared(numpy.array(set_wrd,dtype=theano.config.floatX),borrow=True)
    set_y = theano.shared(numpy.array(set_y),borrow=True)
    print "Data loaded"
    n_train_batches = 8*num_sent/10
    n_valid_batches = num_sent/10
    n_test_batches = num_sent/10
    
    train_x_wrd, train_x_char, train_y = set_wrd[:n_train_batches], set_char[:n_train_batches], set_y[:n_train_batches]
    val_x_wrd, val_x_char, val_y = set_wrd[n_train_batches:n_train_batches+n_valid_batches], set_char[n_train_batches:n_train_batches+n_valid_batches], set_y[n_train_batches:n_train_batches+n_valid_batches]
    test_x_wrd, test_x_char, test_y = set_wrd[-n_test_batches:], set_char[-n_test_batches:], set_y[-n_test_batches:]

    # compute number of minibatches for training, validation and testing
    
    
    #theano.config.compute_test_value = 'warn'
    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x_wrd = T.matrix('x_wrd')   # the data is presented as rasterized images
    x_char = T.tensor3('x_char') 
    y = T.lvector('y')

    # x_char.tag.test_value = numpy.random.rand(max_sen_len,max_word_len,v_char)
    # x_wrd.tag.test_value = numpy.random.rand(max_sen_len,v_wrd)
    # y.tag.test_value = numpy.array([1])

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x_char.reshape((max_sen_len, 1, max_word_len, v_char))
    
    layer0 = HiddenLayer(
        rng,
        input=layer0_input,
        n_in=v_char,
        n_out=d_char,
        isb=0
    )
    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(max_sen_len, 1, max_word_len, d_char),
        filter_shape=(cl_char, 1, k_char, d_char),
        poolsize=(max_word_len - k_char + 1, 1)
    )

    layer2_input = x_wrd.reshape((max_sen_len, v_wrd))
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=v_wrd,
        n_out=d_wrd,
        isb=0
    )
    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (nkerns[0], nkerns[1], 4, 4)
    layer3_input = T.concatenate([layer1.output.reshape((max_sen_len,cl_char)), layer2.output], axis=1).reshape((1, 1, max_sen_len, cl_char + d_wrd))

    layer3 = LeNetConvPoolLayer(
        rng,
        input=layer3_input,
        image_shape=(1, 1, max_sen_len, cl_char + d_wrd),
        filter_shape=(cl_wrd, 1, k_wrd, cl_char + d_wrd),
        poolsize=(max_sen_len - k_wrd + 1, 1)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer4_input = layer3.output.reshape((1,cl_wrd))

    # construct a fully-connected sigmoidal layer
    layer4 = HiddenLayer(
        rng,
        input=layer4_input,
        n_in=cl_wrd,
        n_out=50,
        activation=T.tanh
    )

    # classify the values of the fully-connected sigmoidal layer
    layer5 = LogisticRegression(input=layer4.output, n_in=50, n_out=2)

    # the cost we minimize during training is the NLL of the model
    #theano.printing.Print('this is a very important value')(x_chr)
    cost = layer5.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer5.errors(y),
        givens={
            x_wrd: test_x_wrd[index],
            x_char: test_x_char[index],
            y: test_y[index:index+1]
        },
        mode="FAST_RUN"
    )

    validate_model = theano.function(
        [index],
        #layer5.negative_log_likelihood(y),
        layer5.errors(y),
        givens={
            x_wrd: val_x_wrd[index],
            x_char: val_x_char[index],
            y: val_y[index:index+1]
        },
        mode="FAST_RUN"
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x_wrd: train_x_wrd[index],
            x_char: train_x_char[index],
            y: train_y[index:index+1]
        },
        mode="FAST_RUN"
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            # ((theano.printing.Print(x)))
            cost_ij =  train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Exemple #4
0
	def __init__(
		self,
		rng,
		batchsize=100,
		activation=tanh
	):
		
		import load
		(num_sent, word_cnt, max_sen_len, k_wrd, x_wrd, y) \
		= load.read("tweets_clean.txt")


		dim_word = 100
		cl_word = 300
		k_wrd = 5
		vocab_size = word_cnt
		n_hidden = 300

		data_train,\
		data_test,\
		target_train,\
		target_test\
		= train_test_split(x_wrd, y, random_state=1234, test_size=0.1)

		x_train = theano.shared(np.asarray(data_train, dtype='int16'), borrow=True)
		y_train = theano.shared(np.asarray(target_train, dtype='int32'), borrow=True)
		x_test = theano.shared(np.asarray(data_test, dtype='int16'), borrow=True)
		y_test = theano.shared(np.asarray(target_test, dtype='int32'), borrow=True)

		self.n_train_batches = x_train.get_value(borrow=True).shape[0] / batchsize
		self.n_test_batches = x_test.get_value(borrow=True).shape[0] / batchsize


		
		"""symbol definition"""
		index = T.iscalar()
		x = T.wmatrix('x')
		y = T.ivector('y')
		train = T.iscalar('train')


		layer_embed_input = x#.reshape((batchsize, max_sen_len))

		layer_embed = EmbedIDLayer(
			rng,
			layer_embed_input,
			n_input=vocab_size,
			n_output=dim_word,
		)

		layer1_input = layer_embed.output.reshape((batchsize, 1, max_sen_len, dim_word))

		layer1 = ConvolutionalLayer(
			rng,
			layer1_input,
			filter_shape=(cl_word, 1, k_wrd, dim_word),#1は入力チャネル数
			image_shape=(batchsize, 1, max_sen_len, dim_word),
			activation=activation
		)

		layer2 = MaxPoolingLayer(
			layer1.output,
			poolsize=(max_sen_len-k_wrd+1, 1)
		)

		layer3_input = layer2.output.reshape((batchsize, cl_word))

		layer3 = FullyConnectedLayer(
			rng,
			dropout(rng, layer3_input, train),
			n_input=cl_word,
			n_output=n_hidden,
			activation=activation
		)

		layer4 = FullyConnectedLayer(
			rng,
			dropout(rng, layer3.output, train),
			n_input=n_hidden,
			n_output=2,
			activation=None
		)

		result = Result(layer4.output, y)
		# loss = result.negative_log_likelihood()
		loss = result.cross_entropy()
		accuracy = result.accuracy()
		params = layer4.params + layer3.params + layer1.params + layer_embed.params
		# updates = AdaDelta(params=params).updates(loss)
		updates = RMSprop(learning_rate=0.001, params=params).updates(loss)
		

		self.train_model = theano.function(
			inputs=[index],
			outputs=[loss, accuracy],
			updates=updates,
			givens={
				x: x_train[index*batchsize: (index+1)*batchsize],
				y: y_train[index*batchsize: (index+1)*batchsize],
				train: np.cast['int32'](1)
			}
		)

		self.test_model = theano.function(
			inputs=[index],
			outputs=[loss, accuracy],
			givens={
				x: x_test[index*batchsize: (index+1)*batchsize],
				y: y_test[index*batchsize: (index+1)*batchsize],
				train: np.cast['int32'](0)
			}
		)
Exemple #5
0
    def __init__(self, rng, batchsize=100, activation=tanh):

        import load
        (num_sent, word_cnt, max_sen_len, k_wrd, x_wrd, y) \
        = load.read("tweets_clean.txt")

        dim_word = 100
        cl_word = 300
        k_wrd = 5
        vocab_size = word_cnt
        n_hidden = 300

        data_train,\
        data_test,\
        target_train,\
        target_test\
        = train_test_split(x_wrd, y, random_state=1234, test_size=0.1)

        x_train = theano.shared(np.asarray(data_train, dtype='int16'),
                                borrow=True)
        y_train = theano.shared(np.asarray(target_train, dtype='int32'),
                                borrow=True)
        x_test = theano.shared(np.asarray(data_test, dtype='int16'),
                               borrow=True)
        y_test = theano.shared(np.asarray(target_test, dtype='int32'),
                               borrow=True)

        self.n_train_batches = x_train.get_value(
            borrow=True).shape[0] / batchsize
        self.n_test_batches = x_test.get_value(
            borrow=True).shape[0] / batchsize
        """symbol definition"""
        index = T.iscalar()
        x = T.wmatrix('x')
        y = T.ivector('y')
        train = T.iscalar('train')

        layer_embed_input = x  #.reshape((batchsize, max_sen_len))

        layer_embed = EmbedIDLayer(
            rng,
            layer_embed_input,
            n_input=vocab_size,
            n_output=dim_word,
        )

        layer1_input = layer_embed.output.reshape(
            (batchsize, 1, max_sen_len, dim_word))

        layer1 = ConvolutionalLayer(
            rng,
            layer1_input,
            filter_shape=(cl_word, 1, k_wrd, dim_word),  #1は入力チャネル数
            image_shape=(batchsize, 1, max_sen_len, dim_word),
            activation=activation)

        layer2 = MaxPoolingLayer(layer1.output,
                                 poolsize=(max_sen_len - k_wrd + 1, 1))

        layer3_input = layer2.output.reshape((batchsize, cl_word))

        layer3 = FullyConnectedLayer(rng,
                                     dropout(rng, layer3_input, train),
                                     n_input=cl_word,
                                     n_output=n_hidden,
                                     activation=activation)

        layer4 = FullyConnectedLayer(rng,
                                     dropout(rng, layer3.output, train),
                                     n_input=n_hidden,
                                     n_output=2,
                                     activation=None)

        result = Result(layer4.output, y)
        # loss = result.negative_log_likelihood()
        loss = result.cross_entropy()
        accuracy = result.accuracy()
        params = layer4.params + layer3.params + layer1.params + layer_embed.params
        # updates = AdaDelta(params=params).updates(loss)
        updates = RMSprop(learning_rate=0.001, params=params).updates(loss)

        self.train_model = theano.function(
            inputs=[index],
            outputs=[loss, accuracy],
            updates=updates,
            givens={
                x: x_train[index * batchsize:(index + 1) * batchsize],
                y: y_train[index * batchsize:(index + 1) * batchsize],
                train: np.cast['int32'](1)
            })

        self.test_model = theano.function(
            inputs=[index],
            outputs=[loss, accuracy],
            givens={
                x: x_test[index * batchsize:(index + 1) * batchsize],
                y: y_test[index * batchsize:(index + 1) * batchsize],
                train: np.cast['int32'](0)
            })