Beispiel #1
0
def run_hotel_review():
	print('loading data...')
	X = hotel_review_data_utils.load_data_train()
	X_reverse = hotel_review_utils.reverse_X()
	X, masks = sign_lang.pad_data_to_max_sample_length(X)
	X_reverse, _ = sign_lang.pad_data_to_max_sample_length(X_reverse)

	X = X.astype(theano.config.floatX)
	X_reverse = X_reverse.astype(theano.config.floatX)
	masks = masks.astype(theano.config.floatX)

	X = np.swapaxes(X, 0, 1)
	X_reverse = np.swapaxes(X_reverse, 0, 1)
	masks = np.swapaxes(masks, 0, 1)

	X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True)
	masks = theano.shared(np.asarray(masks, dtype=theano.config.floatX), borrow=True)
	X_reverse = theano.shared(np.asarray(X_reverse, dtype=theano.config.floatX), borrow=True)

	index = T.lscalar()
	x = T.tensor3('x')
	target = T.tensor3('target')
	print_x = theano.printing.Print('\nx')(x)
	print_target = theano.printing.Print('target')(target)
	mask = T.tensor3('mask')

	print('building model...')

	# encoder_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/enc.save'
	# decoder_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/dec.save'
	
	# encoder = load_model(encoder_filepath)
	# decoder = load_model(decoder_filepath)

	# the number of words in the dictionary, including the marker for end-of-document
	n_classes = 25000
	n_hidden = 1000
	encoder = variable_length_sequence_lstm.LSTM(n_vis=n_classes, n_hid=n_hidden, layer_name='enc', return_indices=[-1])
	decoder = hotel_review_enc_dec_rnn.DecoderLSTM(n_hid=n_hidden, n_classes=n_classes, layer_name='dec')

	rnn = hotel_review_enc_dec_rnn.EncoderDecoderRNN(encoder, decoder)

	cost, updates = rnn.get_cost_updates(x, target, mask, learning_rate=0.1)

	batch_size = 10

	print('building trainer...')
	trainer = theano.function(
		[index],
		[cost],
		updates=updates,
		givens={
			x: X_reverse[:, index * batch_size: (index + 1) * batch_size],
			target: X[:, index * batch_size: (index + 1) * batch_size],
			mask: masks[:, index * batch_size: (index + 1) * batch_size]
		},
		mode='FAST_RUN'
	)

	print('training model...')
	n_examples = X.shape.eval()[1]
	n_batches = int(n_examples / float(batch_size))
	n_epochs = 100
	lowest_cost = -1
	for epoch in range(n_epochs):
		costs = []
		for sample_idx in range(n_batches):
			costs.append(trainer(sample_idx)[0])
		avg_cost = np.mean(costs)
		print('training cost for epoch {0}: {1}'.format(epoch, avg_cost))

		if lowest_cost == -1 or avg_cost < lowest_cost * 0.99:
			lowest_cost = avg_cost
			save_model(encoder, encoder_filepath)
			save_model(decoder, decoder_filepath)

	print('finished training, final stats:\nfinal cost: {0}'.format(np.mean(costs)))

	layers = [rnn.encoder, rnn.decoder]
	for layer in layers:
		for param in layer.params:
			print('{}: {}'.format(param.name, param.get_value()))
def main_theano_sign_lang_var_len_adadelta():
    """
	:description: this trains a model on the sign language data as well, but accounts for variable length sequences and processes batches.
	"""
    print('loading data...')
    n_input_at_each_timestep = 10
    n_classes = 97  # no base 0 considered, there are just 98 of them. May need to be 97

    X, y = sign_lang.load_data_from_aggregate_file()
    X, masks = sign_lang.pad_data_to_max_sample_length(X)
    X = X.astype(theano.config.floatX)
    masks = masks.astype(theano.config.floatX)
    X = np.swapaxes(X, 0, 1)
    masks = np.swapaxes(masks, 0, 1)

    split_idx = int(.8 * X.shape[1])

    X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True)
    masks = theano.shared(np.asarray(masks, dtype=theano.config.floatX),
                          borrow=True)
    y = theano.shared(y, borrow=True)

    trainset_masks = masks[:, :split_idx, :]
    testset_masks = masks[:, split_idx:, :]

    trainset_X, trainset_y = X[:, :split_idx, :], y[:split_idx]
    testset_X, testset_y = X[:, split_idx:, :], y[split_idx:]

    index = T.lscalar()
    x = T.tensor3('x')
    target = T.lvector('target')
    print_x = theano.printing.Print('\nx')(x)
    print_target = theano.printing.Print('target')(target)
    mask = T.tensor3('mask')

    print('building model...')

    lstm_1_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_1.save'
    lstm_2_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_2.save'
    lstm_3_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_3.save'
    softmax_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/softmax_1.save'

    lstm_1 = load_model(lstm_1_filepath)
    # lstm_2 = load_model(lstm_2_filepath)
    # lstm_3 = load_model(lstm_3_filepath)
    softmax = load_model(softmax_filepath)

    #lstm_1 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_1', return_indices=[-1], dropout_prob=0.3)
    #lstm_2 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_2', return_indices=None, dropout_prob=0.3)
    #lstm_3 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_3', return_indices=[-1], dropout_prob=0.3)
    #softmax = variable_length_sequence_lstm.Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)

    # layers = [lstm_1, lstm_2, lstm_3, softmax]
    layers = [lstm_1, softmax]

    cost_expr = variable_length_sequence_lstm.Softmax.negative_log_likelihood
    rnn = variable_length_sequence_lstm.MLP(layers,
                                            cost=cost_expr,
                                            return_indices=[-1])

    cost, updates = rnn.get_cost_updates(x, target, mask, learning_rate=0.005)

    batch_size = 10

    print('building trainer...')
    trainer = theano.function(
        [index], [cost],
        updates=updates,
        givens={
            x: trainset_X[:, index * batch_size:(index + 1) * batch_size],
            target: trainset_y[index * batch_size:(index + 1) * batch_size],
            mask: trainset_masks[:,
                                 index * batch_size:(index + 1) * batch_size]
        },
        mode='FAST_RUN')

    errors = rnn.layers[-1].errors(target)
    validate_model = theano.function(
        inputs=[index],
        outputs=[cost, errors],
        givens={
            x: testset_X[:, index * batch_size:(index + 1) * batch_size],
            target: testset_y[index * batch_size:(index + 1) * batch_size],
            mask: testset_masks[:, index * batch_size:(index + 1) * batch_size]
        },
        mode='FAST_RUN')

    print('training model...')
    n_train_examples = trainset_X.shape.eval()[1]
    n_test_examples = testset_X.shape.eval()[1]

    n_epochs = 1000
    lowest_cost = -1
    n_train_batches = int(trainset_X.shape.eval()[1] / float(batch_size))
    n_validation_batches = int(testset_X.shape.eval()[1] / float(batch_size))
    for epoch in range(n_epochs):
        costs = []
        #random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=100)

        for sample_idx in range(n_train_batches):
            # for sample_idx in random_indices:
            costs.append(trainer(sample_idx)[0])
        avg_cost = np.mean(costs)
        print('training cost for epoch {0}: {1}'.format(epoch, avg_cost))

        if lowest_cost == -1 or avg_cost < lowest_cost * 0.99:
            lowest_cost = avg_cost
            run_validation = True
            save_model(lstm_1, lstm_1_filepath)
            # save_model(lstm_2, lstm_2_filepath)
            # save_model(lstm_3, lstm_3_filepath)
            save_model(softmax, softmax_filepath)

        predictions = []
        if run_validation:
            print('\nvalidation')
            for sample_idx in range(n_validation_batches):
                predictions.append(validate_model(sample_idx)[1])
            accuracy = (1 - np.mean(predictions)) * 100
            print('accuracy for epoch {0}: {1}%'.format(epoch, accuracy))
            run_validation = False

    # print('finished training, final stats:\nfinal cost: {0}\naccuracy: {1}%'.format(np.mean(costs), accuracy))
    print('finished training, final stats:\nfinal cost: {0}'.format(
        np.mean(costs)))

    for layer in rnn.layers:
        for param in layer.params:
            print('{}: {}'.format(param.name, param.get_value()))
Beispiel #3
0
def run_hotel_review():
    print('loading data...')
    X = hotel_review_data_utils.load_data_train()
    X_reverse = hotel_review_utils.reverse_X()
    X, masks = sign_lang.pad_data_to_max_sample_length(X)
    X_reverse, _ = sign_lang.pad_data_to_max_sample_length(X_reverse)

    X = X.astype(theano.config.floatX)
    X_reverse = X_reverse.astype(theano.config.floatX)
    masks = masks.astype(theano.config.floatX)

    X = np.swapaxes(X, 0, 1)
    X_reverse = np.swapaxes(X_reverse, 0, 1)
    masks = np.swapaxes(masks, 0, 1)

    X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True)
    masks = theano.shared(np.asarray(masks, dtype=theano.config.floatX),
                          borrow=True)
    X_reverse = theano.shared(np.asarray(X_reverse,
                                         dtype=theano.config.floatX),
                              borrow=True)

    index = T.lscalar()
    x = T.tensor3('x')
    target = T.tensor3('target')
    print_x = theano.printing.Print('\nx')(x)
    print_target = theano.printing.Print('target')(target)
    mask = T.tensor3('mask')

    print('building model...')

    # encoder_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/enc.save'
    # decoder_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/dec.save'

    # encoder = load_model(encoder_filepath)
    # decoder = load_model(decoder_filepath)

    # the number of words in the dictionary, including the marker for end-of-document
    n_classes = 25000
    n_hidden = 1000
    encoder = variable_length_sequence_lstm.LSTM(n_vis=n_classes,
                                                 n_hid=n_hidden,
                                                 layer_name='enc',
                                                 return_indices=[-1])
    decoder = hotel_review_enc_dec_rnn.DecoderLSTM(n_hid=n_hidden,
                                                   n_classes=n_classes,
                                                   layer_name='dec')

    rnn = hotel_review_enc_dec_rnn.EncoderDecoderRNN(encoder, decoder)

    cost, updates = rnn.get_cost_updates(x, target, mask, learning_rate=0.1)

    batch_size = 10

    print('building trainer...')
    trainer = theano.function(
        [index], [cost],
        updates=updates,
        givens={
            x: X_reverse[:, index * batch_size:(index + 1) * batch_size],
            target: X[:, index * batch_size:(index + 1) * batch_size],
            mask: masks[:, index * batch_size:(index + 1) * batch_size]
        },
        mode='FAST_RUN')

    print('training model...')
    n_examples = X.shape.eval()[1]
    n_batches = int(n_examples / float(batch_size))
    n_epochs = 100
    lowest_cost = -1
    for epoch in range(n_epochs):
        costs = []
        for sample_idx in range(n_batches):
            costs.append(trainer(sample_idx)[0])
        avg_cost = np.mean(costs)
        print('training cost for epoch {0}: {1}'.format(epoch, avg_cost))

        if lowest_cost == -1 or avg_cost < lowest_cost * 0.99:
            lowest_cost = avg_cost
            save_model(encoder, encoder_filepath)
            save_model(decoder, decoder_filepath)

    print('finished training, final stats:\nfinal cost: {0}'.format(
        np.mean(costs)))

    layers = [rnn.encoder, rnn.decoder]
    for layer in layers:
        for param in layer.params:
            print('{}: {}'.format(param.name, param.get_value()))
Beispiel #4
0
def main_theano_sign_lang_var_len_adadelta():
	"""
	:description: this trains a model on the sign language data as well, but accounts for variable length sequences and processes batches.
	"""
	print('loading data...')
	n_input_at_each_timestep = 10
	n_classes = 97	# no base 0 considered, there are just 98 of them. May need to be 97
	
	X, y = sign_lang.load_data_from_aggregate_file()
	X, masks = sign_lang.pad_data_to_max_sample_length(X)
	X = X.astype(theano.config.floatX)
	masks = masks.astype(theano.config.floatX)
	X = np.swapaxes(X, 0, 1)
	masks = np.swapaxes(masks, 0, 1)

	split_idx = int(.8 * X.shape[1])

	X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True)
	masks = theano.shared(np.asarray(masks, dtype=theano.config.floatX), borrow=True)
	y = theano.shared(y, borrow=True)

	trainset_masks = masks[:, :split_idx, :]
	testset_masks = masks[:, split_idx:, :]
	
	trainset_X, trainset_y = X[:, :split_idx, :], y[:split_idx]
	testset_X, testset_y = X[:, split_idx:, :], y[split_idx:]

	index = T.lscalar()
	x = T.tensor3('x')
	target = T.lvector('target')
	print_x = theano.printing.Print('\nx')(x)
	print_target = theano.printing.Print('target')(target)
	mask = T.tensor3('mask')

	print('building model...')

	lstm_1_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_1.save'
	lstm_2_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_2.save'
	lstm_3_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_3.save'
	softmax_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/softmax_1.save'
	
	lstm_1 = load_model(lstm_1_filepath)
	# lstm_2 = load_model(lstm_2_filepath)
	# lstm_3 = load_model(lstm_3_filepath)
	softmax = load_model(softmax_filepath)


	#lstm_1 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_1', return_indices=[-1], dropout_prob=0.3)
	#lstm_2 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_2', return_indices=None, dropout_prob=0.3)
	#lstm_3 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_3', return_indices=[-1], dropout_prob=0.3)
	#softmax = variable_length_sequence_lstm.Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)

	# layers = [lstm_1, lstm_2, lstm_3, softmax]
	layers = [lstm_1, softmax]

	cost_expr = variable_length_sequence_lstm.Softmax.negative_log_likelihood
	rnn = variable_length_sequence_lstm.MLP(layers, cost=cost_expr, return_indices=[-1])

	cost, updates = rnn.get_cost_updates(x, target, mask, learning_rate=0.005)

	batch_size = 10

	print('building trainer...')
	trainer = theano.function(
		[index],
		[cost],
		updates=updates,
		givens={
			x: trainset_X[:, index * batch_size: (index + 1) * batch_size],
			target: trainset_y[index * batch_size: (index + 1) * batch_size],
			mask: trainset_masks[:, index * batch_size: (index + 1) * batch_size]
		},
		mode='FAST_RUN'
	)

	errors = rnn.layers[-1].errors(target)
	validate_model = theano.function(
		inputs=[index],
		outputs=[cost, errors],
		givens={
			x: testset_X[:, index * batch_size: (index + 1) * batch_size],
			target: testset_y[index * batch_size: (index + 1) * batch_size],
			mask: testset_masks[:, index * batch_size: (index + 1) * batch_size]
		},
		mode='FAST_RUN'
	)

	print('training model...')
	n_train_examples = trainset_X.shape.eval()[1]
	n_test_examples = testset_X.shape.eval()[1]

	n_epochs = 1000
	lowest_cost = -1
	n_train_batches = int(trainset_X.shape.eval()[1] / float(batch_size))
	n_validation_batches = int(testset_X.shape.eval()[1] / float(batch_size))
	for epoch in range(n_epochs):
		costs = []
		#random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=100)

		for sample_idx in range(n_train_batches):
		# for sample_idx in random_indices:
			costs.append(trainer(sample_idx)[0])
		avg_cost = np.mean(costs)
		print('training cost for epoch {0}: {1}'.format(epoch, avg_cost))

		if lowest_cost == -1 or avg_cost < lowest_cost * 0.99:
			lowest_cost = avg_cost
			run_validation = True
			save_model(lstm_1, lstm_1_filepath)
			# save_model(lstm_2, lstm_2_filepath)
			# save_model(lstm_3, lstm_3_filepath)
			save_model(softmax, softmax_filepath)

		predictions = []
		if run_validation:
			print('\nvalidation')
			for sample_idx in range(n_validation_batches):
				predictions.append(validate_model(sample_idx)[1])
			accuracy = (1 - np.mean(predictions)) * 100
		 	print('accuracy for epoch {0}: {1}%'.format(epoch, accuracy))
		 	run_validation = False

	# print('finished training, final stats:\nfinal cost: {0}\naccuracy: {1}%'.format(np.mean(costs), accuracy))
	print('finished training, final stats:\nfinal cost: {0}'.format(np.mean(costs)))

	for layer in rnn.layers:
		for param in layer.params:
			print('{}: {}'.format(param.name, param.get_value()))