Example #1
0
def conv_train(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels, image_size,
               num_labels, basic_hps, stride_ps, drop=False, lrd=False, get_grad=False, norm_list=None):
    batch_size = basic_hps['batch_size']
    patch_size = basic_hps['patch_size']
    depth = basic_hps['depth']
    num_hidden = basic_hps['num_hidden']
    num_channels = 1
    layer_cnt = basic_hps['layer_sum']
    loss_collect = list()

    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(
            tf.float32, shape=(batch_size, image_size, image_size, num_channels))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        # Variables.
        input_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, num_channels, depth], stddev=0.1))
        input_biases = tf.Variable(tf.zeros([depth]))

        mid_layer_cnt = layer_cnt - 1
        layer_weights = list()
        layer_biases = [tf.Variable(tf.constant(1.0, shape=[depth])) for _ in range(mid_layer_cnt)]
        output_weights = list()
        output_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
        final_weights = tf.Variable(tf.truncated_normal(
            [num_hidden, num_labels], stddev=0.1))
        final_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
        weight_set_done = False

        # Model.
        def model(data):
            if not large_data_size(data) or not large_data_size(input_weights):
                stride_ps[0] = [1, 1, 1, 1]
            conv = tf.nn.conv2d(data, input_weights, stride_ps[0], use_cudnn_on_gpu=True, padding='SAME')
            conv = maxpool2d(conv)
            hidden = tf.nn.relu(conv + input_biases)
            if drop:
                hidden = tf.nn.dropout(hidden, 0.5)
            for i in range(mid_layer_cnt):
                # print(hidden)
                if not weight_set_done:
                    # avoid filter shape larger than input shape
                    hid_shape = hidden.get_shape()
                    # print(hid_shape)
                    filter_w = patch_size / (i + 1)
                    filter_h = patch_size / (i + 1)
                    # print(filter_w)
                    # print(filter_h)
                    if filter_w > hid_shape[1]:
                        filter_w = int(hid_shape[1])
                    if filter_h > hid_shape[2]:
                        filter_h = int(hid_shape[2])
                    layer_weight = tf.Variable(tf.truncated_normal(shape=[filter_w, filter_h, depth, depth],
                                                                   stddev=0.1))
                    layer_weights.append(layer_weight)
                if not large_data_size(hidden) or not large_data_size(layer_weights[i]):
                    # print("is not large data")
                    stride_ps[i + 1] = [1, 1, 1, 1]
                # print(stride_ps[i + 1])
                # print(len(stride_ps))
                # print(i + 1)
                conv = tf.nn.conv2d(hidden, layer_weights[i], stride_ps[i + 1], use_cudnn_on_gpu=True, padding='SAME')
                if not large_data_size(conv):
                    conv = maxpool2d(conv, 1, 1)
                else:
                    conv = maxpool2d(conv)
                hidden = tf.nn.relu(conv + layer_biases[i])
                if drop:
                    hidden = tf.nn.dropout(hidden, 0.7)

            shapes = hidden.get_shape().as_list()
            shape_mul = 1
            for s in shapes[1:]:
                shape_mul *= s

            if not weight_set_done:
                output_size = shape_mul
                output_weights.append(tf.Variable(tf.truncated_normal([output_size, num_hidden], stddev=0.1)))
            reshape = tf.reshape(hidden, [shapes[0], shape_mul])

            hidden = tf.nn.relu(tf.matmul(reshape, output_weights[0]) + output_biases)
            if drop:
                hidden = tf.nn.dropout(hidden, 0.8)
            return tf.matmul(hidden, final_weights) + final_biases

        # Training computation.
        logits = model(tf_train_dataset)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
        # Optimizer.
        if lrd:
            cur_step = tf.Variable(0)  # count the number of steps taken.
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 10000, 0.96, staircase=True)
            optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step)
        else:
            optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
        test_prediction = tf.nn.softmax(model(tf_test_dataset))
    num_steps = 3001
    start_fit = 600
    init_loss = []

    with tf.Session(graph=graph) as session:
        tf.initialize_all_variables().run()
        print('Initialized')
        end_train = False
        mean_loss = 0
        for step in range(num_steps):
            if end_train:
                break
            offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            mean_loss += l
            if step % 5 == 0:
                mean_loss /= 5.0
                loss_collect.append(mean_loss)
                mean_loss = 0
                if norm_list is None:
                    return [1 for _ in range(len(basic_hps))]
                if step >= start_fit:
                    # print(loss_collect)
                    if step == start_fit:
                        res = fit_loss(1,
                                       [batch_size / norm_list[0], depth / norm_list[1], num_hidden / norm_list[2],
                                        layer_cnt / norm_list[3], patch_size / norm_list[4]],
                                       loss_collect)
                    else:
                        res = fit_loss(0,
                                       [batch_size / norm_list[0], depth / norm_list[1], num_hidden / norm_list[2],
                                        layer_cnt / norm_list[3], patch_size / norm_list[4]],
                                       loss_collect)
                    if get_grad:
                        better_hyper([batch_size / norm_list[0], depth / norm_list[1], num_hidden / norm_list[2],
                                      layer_cnt / norm_list[3], patch_size / norm_list[4]],
                                     loss_collect)
                    loss_collect.remove(loss_collect[0])
                    ret = res['ret']
                    if ret == 1 and not get_grad:
                        print('ret is end train when step is {step}'.format(step=step))
                        init_loss.append(loss_collect)
                        end_train = True

                        if step % 50 == 0:
                            print('Minibatch loss at step %d: %f' % (step, l))
                            print('Validation accuracy: %.1f%%' % accuracy(
                                valid_prediction.eval(), valid_labels))

        print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
        if end_train:
            hypers = better_hyper(
                [batch_size / norm_list[0], depth / norm_list[1], num_hidden / norm_list[2],
                 layer_cnt / norm_list[3], patch_size / norm_list[4]],
                init_loss[0])
            print(hypers)
            hypers = [hyper * norm_list[i] for i, hyper in enumerate(hypers)]
            print(norm_list)
            print(hypers)
            for i in range(len(hypers)):
                if hypers[i] <= 1.0:
                    hypers[i] = 1
                else:
                    hypers[i] = int(hypers[i])
        else:
            hypers = [batch_size, depth, num_hidden, layer_cnt, patch_size]
    return end_train, hypers
Example #2
0
def tf_deep_nn(regular=False, drop_out=False, lrd=False, layer_cnt=2):
    batch_size = 128

    graph = tf.Graph()
    with graph.as_default():
        tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, feature_dim))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        hidden_node_count = 32
        # start weight
        hidden_stddev = np.sqrt(2.0 / 100)
        weights1 = tf.Variable(tf.truncated_normal([feature_dim, hidden_node_count], stddev=hidden_stddev))
        biases1 = tf.Variable(tf.zeros([hidden_node_count]))
        # middle weight
        weights = []
        biases = []
        hidden_cur_cnt = hidden_node_count
        for i in range(layer_cnt - 2):
            if hidden_cur_cnt > 2:
                hidden_next_cnt = int(hidden_cur_cnt / 2)
            else:
                hidden_next_cnt = 2
            hidden_stddev = np.sqrt(2.0 / hidden_cur_cnt / 10)
            weights.append(tf.Variable(tf.truncated_normal([hidden_cur_cnt, hidden_next_cnt], stddev=hidden_stddev)))
            biases.append(tf.Variable(tf.zeros([hidden_next_cnt])))
            hidden_cur_cnt = hidden_next_cnt
        # first wx + b
        y0 = tf.matmul(tf_train_dataset, weights1) + biases1
        # first sigmoid
        hidden = tf.nn.sigmoid(y0)
        # hidden = y0
        hidden_drop = hidden
        # first DropOut
        keep_prob = 0.5
        if drop_out:
            hidden_drop = tf.nn.dropout(hidden, keep_prob)
        # first wx+b for valid
        valid_y0 = tf.matmul(tf_valid_dataset, weights1) + biases1
        valid_hidden = tf.nn.sigmoid(valid_y0)
        # valid_hidden = valid_y0
        # first wx+b for test
        test_y0 = tf.matmul(tf_test_dataset, weights1) + biases1
        test_hidden = tf.nn.sigmoid(test_y0)
        # test_hidden = test_y0

        # middle layer
        for i in range(layer_cnt - 2):
            y1 = tf.matmul(hidden_drop, weights[i]) + biases[i]
            hidden_drop = tf.nn.sigmoid(y1)
            if drop_out:
                keep_prob += 0.5 * i / (layer_cnt + 1)
                hidden_drop = tf.nn.dropout(hidden_drop, keep_prob)

            y0 = tf.matmul(hidden, weights[i]) + biases[i]
            hidden = tf.nn.sigmoid(y0)
            # hidden = y0

            valid_y0 = tf.matmul(valid_hidden, weights[i]) + biases[i]
            valid_hidden = tf.nn.sigmoid(valid_y0)
            # valid_hidden = valid_y0

            test_y0 = tf.matmul(test_hidden, weights[i]) + biases[i]
            test_hidden = tf.nn.sigmoid(test_y0)
            # test_hidden = test_y0

        # last weight
        weights2 = tf.Variable(tf.truncated_normal([hidden_cur_cnt, num_labels], stddev=hidden_stddev / 2))
        biases2 = tf.Variable(tf.zeros([num_labels]))
        # last wx + b
        logits = tf.matmul(hidden_drop, weights2) + biases2

        # predicts
        logits_predict = tf.matmul(hidden, weights2) + biases2
        valid_predict = tf.matmul(valid_hidden, weights2) + biases2
        test_predict = tf.matmul(test_hidden, weights2) + biases2

        l2_loss = 0
        # enable regularization
        if regular:
            l2_loss = tf.nn.l2_loss(weights1) + tf.nn.l2_loss(weights2)
            for i in range(len(weights)):
                l2_loss += tf.nn.l2_loss(weights[i])
                # l2_loss += tf.nn.l2_loss(biases[i])

            beta = 1e-2
            l2_loss *= beta
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels)) + l2_loss

        # Optimizer.
        if lrd:
            cur_step = tf.Variable(0, trainable=False)  # count the number of steps taken.
            starter_learning_rate = 0.4
            learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 500, 0.75, staircase=True)
            optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step)
        else:
            optimizer = tf.train.AdamOptimizer(0.5).minimize(loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits_predict)
        valid_prediction = tf.nn.softmax(valid_predict)
        test_prediction = tf.nn.softmax(test_predict)

    num_steps = 8001

    with tf.Session(graph=graph) as session:
        tf.global_variables_initializer().run()
        print("Initialized")
        for step in range(num_steps):
            offset_range = train_labels.shape[0] - batch_size
            offset = (step * batch_size) % offset_range
            batch_data = train_dataset[offset:(offset + batch_size), :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            if step % 50 == 0:
                print("Minibatch loss at step %d: %f" % (step, l))
                print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
                print("Validation accuracy: %.1f%%" % accuracy(
                    valid_prediction.eval(), valid_labels))
        print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
Example #3
0
def conv_train(train_dataset,
               train_labels,
               valid_dataset,
               valid_labels,
               test_dataset,
               test_labels,
               image_size,
               num_labels,
               basic_hps,
               stride_ps,
               lrd=False,
               get_grad=False):
    batch_size = basic_hps['batch_size']
    patch_size = basic_hps['patch_size']
    depth = basic_hps['depth']
    num_hidden = basic_hps['num_hidden']
    num_channels = 1
    layer_cnt = basic_hps['layer_sum']
    loss_collect = list()
    first_hidden_num = basic_hps['num_hidden']
    second_hidden_num = first_hidden_num / 2 + 1

    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(tf.float32,
                                          shape=(batch_size, image_size,
                                                 image_size, num_channels))
        tf_train_labels = tf.placeholder(tf.float32,
                                         shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        input_weights = tf.Variable(
            tf.truncated_normal([patch_size, patch_size, num_channels, depth],
                                stddev=0.1))
        input_biases = tf.Variable(tf.zeros([depth]))
        mid_layer_cnt = layer_cnt - 1
        layer_weights = list()
        layer_biases = [
            tf.Variable(tf.constant(1.0, shape=[depth / (i + 2)]))
            for i in range(mid_layer_cnt)
        ]
        output_weights = list()
        output_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
        first_nn_weights = tf.Variable(
            tf.truncated_normal([first_hidden_num, second_hidden_num],
                                stddev=0.1))
        second_nn_weights = tf.Variable(
            tf.truncated_normal([second_hidden_num, num_labels], stddev=0.1))
        first_nn_biases = tf.Variable(
            tf.constant(1.0, shape=[second_hidden_num]))
        second_nn_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

        # Model.
        def model(data, init=False):
            # Variables.
            if not large_data_size(data) or not large_data_size(input_weights):
                stride_ps[0] = [1, 1, 1, 1]
            conv = tf.nn.conv2d(data,
                                input_weights,
                                stride_ps[0],
                                use_cudnn_on_gpu=True,
                                padding='SAME')
            conv = maxpool2d(conv)
            hidden = tf.nn.relu(conv + input_biases)
            if init:
                hidden = tf.nn.dropout(hidden, 0.8)
            for i in range(mid_layer_cnt):
                # print(hidden)
                if init:
                    # avoid filter shape larger than input shape
                    hid_shape = hidden.get_shape()
                    # print(hid_shape)
                    filter_w = patch_size / (i + 1)
                    filter_h = patch_size / (i + 1)
                    # print(filter_w)
                    # print(filter_h)
                    if filter_w > hid_shape[1]:
                        filter_w = int(hid_shape[1])
                    if filter_h > hid_shape[2]:
                        filter_h = int(hid_shape[2])
                    layer_weight = tf.Variable(
                        tf.truncated_normal(shape=[
                            filter_w, filter_h, depth / (i + 1),
                            depth / (i + 2)
                        ],
                                            stddev=0.1))
                    layer_weights.append(layer_weight)
                if not large_data_size(hidden) or not large_data_size(
                        layer_weights[i]):
                    # print("is not large data")
                    stride_ps[i + 1] = [1, 1, 1, 1]
                # print(stride_ps[i + 1])
                # print(len(stride_ps))
                # print(i + 1)
                conv = tf.nn.conv2d(hidden,
                                    layer_weights[i],
                                    stride_ps[i + 1],
                                    use_cudnn_on_gpu=True,
                                    padding='SAME')
                if not large_data_size(conv):
                    conv = maxpool2d(conv, 1, 1)
                else:
                    conv = maxpool2d(conv)
                hidden = tf.nn.relu(conv + layer_biases[i])
                if init:
                    hidden = tf.nn.dropout(hidden, 0.8)

            shapes = hidden.get_shape().as_list()
            shape_mul = 1
            for s in shapes[1:]:
                shape_mul *= s

            if init:
                output_size = shape_mul
                output_weights.append(
                    tf.Variable(
                        tf.truncated_normal([output_size, num_hidden],
                                            stddev=0.1)))
            reshape = tf.reshape(hidden, [shapes[0], shape_mul])

            hidden = tf.nn.relu6(
                tf.matmul(reshape, output_weights[0]) + output_biases)
            if init:
                hidden = tf.nn.dropout(hidden, 0.5)
            hidden = tf.matmul(hidden, first_nn_weights) + first_nn_biases
            if init:
                hidden = tf.nn.dropout(hidden, 0.5)
            hidden = tf.matmul(hidden, second_nn_weights) + second_nn_biases
            return hidden

        # Training computation.
        logits = model(tf_train_dataset, init=True)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
        # Optimizer.
        starter_learning_rate = 0.1
        if lrd:
            cur_step = tf.Variable(0)  # count the number of steps taken.
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       cur_step,
                                                       10000,
                                                       0.96,
                                                       staircase=True)
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate).minimize(loss, global_step=cur_step)
        else:
            optimizer = tf.train.AdagradOptimizer(
                starter_learning_rate).minimize(loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
        test_prediction = tf.nn.softmax(model(tf_test_dataset))
    num_steps = 3001
    start_fit = 600
    init_loss = []

    with tf.Session(graph=graph) as session:
        tf.initialize_all_variables().run()
        print('Initialized')
        end_train = False
        mean_loss = 0
        for step in range(num_steps):
            if end_train:
                break
            offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {
                tf_train_dataset: batch_data,
                tf_train_labels: batch_labels
            }
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            mean_loss += l
            if step % 5 == 0:
                mean_loss /= 5.0
                loss_collect.append(mean_loss)
                mean_loss = 0
                if step >= start_fit:
                    # print(loss_collect)
                    if step == start_fit:
                        res = fit_loss(1, [
                            batch_size, depth, num_hidden, layer_cnt,
                            patch_size
                        ], loss_collect)
                    else:
                        res = fit_loss(0, [
                            batch_size, depth, num_hidden, layer_cnt,
                            patch_size
                        ], loss_collect)
                    if get_grad:
                        better_hyper([
                            batch_size, depth, num_hidden, layer_cnt,
                            patch_size
                        ], loss_collect)
                    loss_collect.remove(loss_collect[0])
                    ret = res['ret']
                    if ret == 1 and not get_grad:
                        print('ret is end train when step is {step}'.format(
                            step=step))
                        init_loss.append(loss_collect)
                        end_train = True

                        if step % 50 == 0:
                            print('Minibatch loss at step %d: %f' % (step, l))
                            print('Validation accuracy: %.1f%%' % accuracy(
                                valid_prediction.eval(), valid_labels))

        print('Test accuracy: %.1f%%' %
              accuracy(test_prediction.eval(), test_labels))
        if end_train:
            hypers = better_hyper(
                [batch_size, depth, num_hidden, layer_cnt, patch_size],
                init_loss[0])
            print(hypers)
            for i in range(len(hypers)):
                if hypers[i] <= 1.0:
                    hypers[i] = 1
                else:
                    hypers[i] = int(hypers[i])
        else:
            hypers = [batch_size, depth, num_hidden, layer_cnt, patch_size]
    return end_train, hypers
Example #4
0
def conv_train(train_dataset, train_labels, valid_dataset, valid_labels,
               test_dataset, test_labels, image_size, num_labels, basic_hps,
               stride_ps):
    batch_size = basic_hps['batch_size']
    patch_size = basic_hps['patch_size']
    depth = basic_hps['depth']
    num_hidden = basic_hps['num_hidden']
    num_channels = 1
    layer_cnt = basic_hps['layer_sum']
    starter_learning_rate = basic_hps['start_learning_rate']
    loss_collect = list()
    first_hidden_num = basic_hps['num_hidden']
    second_hidden_num = first_hidden_num / 2 + 1

    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(tf.float32,
                                          shape=(batch_size, image_size,
                                                 image_size, num_channels))
        tf_train_labels = tf.placeholder(tf.float32,
                                         shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        input_weights = tf.Variable(
            tf.truncated_normal([patch_size, patch_size, num_channels, depth],
                                stddev=0.1))
        input_biases = tf.Variable(tf.zeros([depth]))
        mid_layer_cnt = layer_cnt - 1
        layer_weights = list()
        layer_biases = [
            tf.Variable(tf.constant(1.0, shape=[depth / (i + 2)]))
            for i in range(mid_layer_cnt)
        ]
        output_weights = list()
        output_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
        first_nn_weights = tf.Variable(
            tf.truncated_normal([first_hidden_num, second_hidden_num],
                                stddev=0.1))
        second_nn_weights = tf.Variable(
            tf.truncated_normal([second_hidden_num, num_labels], stddev=0.1))
        first_nn_biases = tf.Variable(
            tf.constant(1.0, shape=[second_hidden_num]))
        second_nn_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

        # Model.
        def model(data, init=False):
            # Variables.
            if not large_data_size(data) or not large_data_size(input_weights):
                stride_ps[0] = [1, 1, 1, 1]
            conv = tf.nn.conv2d(data,
                                input_weights,
                                stride_ps[0],
                                use_cudnn_on_gpu=True,
                                padding='SAME')
            conv = maxpool2d(conv)
            hidden = tf.nn.relu(conv + input_biases)
            if init:
                hidden = tf.nn.dropout(hidden, 0.8)
            for i in range(mid_layer_cnt):
                # print(hidden)
                if init:
                    hid_shape = hidden.get_shape()
                    filter_w = patch_size / (i + 1)
                    filter_h = patch_size / (i + 1)
                    if filter_w > hid_shape[1]:
                        filter_w = int(hid_shape[1])
                    if filter_h > hid_shape[2]:
                        filter_h = int(hid_shape[2])
                    layer_weight = tf.Variable(
                        tf.truncated_normal(shape=[
                            filter_w, filter_h, depth / (i + 1),
                            depth / (i + 2)
                        ],
                                            stddev=0.1))
                    layer_weights.append(layer_weight)
                if not large_data_size(hidden) or not large_data_size(
                        layer_weights[i]):
                    stride_ps[i + 1] = [1, 1, 1, 1]
                conv = tf.nn.conv2d(hidden,
                                    layer_weights[i],
                                    stride_ps[i + 1],
                                    use_cudnn_on_gpu=True,
                                    padding='SAME')
                if not large_data_size(conv):
                    conv = maxpool2d(conv, 1, 1)
                else:
                    conv = maxpool2d(conv)
                hidden = tf.nn.relu(conv + layer_biases[i])
                if init:
                    hidden = tf.nn.dropout(hidden, 0.8)

            shapes = hidden.get_shape().as_list()
            shape_mul = 1
            for s in shapes[1:]:
                shape_mul *= s

            if init:
                output_size = shape_mul
                output_weights.append(
                    tf.Variable(
                        tf.truncated_normal([output_size, num_hidden],
                                            stddev=0.1)))
            reshape = tf.reshape(hidden, [shapes[0], shape_mul])

            hidden = tf.nn.relu6(
                tf.matmul(reshape, output_weights[0]) + output_biases)
            if init:
                hidden = tf.nn.dropout(hidden, 0.5)
            hidden = tf.matmul(hidden, first_nn_weights) + first_nn_biases
            if init:
                hidden = tf.nn.dropout(hidden, 0.5)
            hidden = tf.matmul(hidden, second_nn_weights) + second_nn_biases
            return hidden

        # Training computation.
        logits = model(tf_train_dataset, init=True)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
        optimizer = tf.train.AdagradOptimizer(starter_learning_rate).minimize(
            loss)

        train_prediction = tf.nn.softmax(logits)
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
        test_prediction = tf.nn.softmax(model(tf_test_dataset))
    num_steps = 1001

    with tf.Session(graph=graph) as session:
        tf.initialize_all_variables().run()
        print('Initialized')
        mean_loss = 0
        for step in range(num_steps):
            offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {
                tf_train_dataset: batch_data,
                tf_train_labels: batch_labels
            }
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            mean_loss += l
            if step % 5 == 0:
                mean_loss /= 5.0
                loss_collect.append(mean_loss)
                mean_loss = 0
                if step % 50 == 0:
                    print('Minibatch loss at step %d: %f' % (step, l))
                    print('Validation accuracy: %.1f%%' %
                          accuracy(valid_prediction.eval(), valid_labels))
        print('Test accuracy: %.1f%%' %
              accuracy(test_prediction.eval(), test_labels))
        hypers = better_trend_hyper(
            [batch_size, depth, num_hidden, layer_cnt, patch_size],
            loss_collect)
        print(hypers)
        for i in range(len(hypers)):
            if hypers[i] <= 1.0:
                hypers[i] = 1
            else:
                hypers[i] = int(hypers[i])
    return hypers
def conv_train(train_dataset,
               train_labels,
               valid_dataset,
               valid_labels,
               test_dataset,
               test_labels,
               image_size,
               num_labels,
               basic_hps,
               stride_ps,
               drop=False,
               lrd=False):
    batch_size = basic_hps['batch_size']
    patch_size = basic_hps['patch_size']
    depth = basic_hps['depth']
    first_hidden_num = basic_hps['num_hidden']
    second_hidden_num = first_hidden_num / 2 + 1
    num_channels = 1
    layer_cnt = basic_hps['layer_sum']

    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(tf.float32,
                                          shape=(batch_size, image_size,
                                                 image_size, num_channels))
        tf_train_labels = tf.placeholder(tf.float32,
                                         shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        # Variables.
        # the third parameter must be same as the last layer depth
        input_weights = tf.Variable(
            tf.truncated_normal([patch_size, patch_size, num_channels, depth],
                                stddev=0.1))
        input_biases = tf.Variable(tf.zeros([depth]))

        mid_layer_cnt = layer_cnt - 1
        layer_weights = list()
        layer_biases = [
            tf.Variable(tf.constant(1.0, shape=[depth * (i + 2)]))
            for i in range(mid_layer_cnt)
        ]
        output_weights = list()
        output_biases = tf.Variable(tf.constant(1.0, shape=[first_hidden_num]))
        first_nn_weights = tf.Variable(
            tf.truncated_normal([first_hidden_num, second_hidden_num],
                                stddev=0.1))
        second_nn_weights = tf.Variable(
            tf.truncated_normal([second_hidden_num, num_labels], stddev=0.1))
        first_nn_biases = tf.Variable(
            tf.constant(1.0, shape=[second_hidden_num]))
        second_nn_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

        # Model.
        def model(data, model_drop=True, init=True):
            if not large_data_size(data) or not large_data_size(input_weights):
                stride_ps[0] = [1, 1, 1, 1]
            conv = tf.nn.conv2d(data,
                                input_weights,
                                stride_ps[0],
                                use_cudnn_on_gpu=True,
                                padding='SAME')
            conv = maxpool2d(conv)
            hidden = tf.nn.relu6(conv + input_biases)
            if drop and model_drop:
                hidden = tf.nn.dropout(hidden, 0.8)
            for i in range(mid_layer_cnt):
                print(hidden)
                if init:
                    # avoid filter shape larger than input shape
                    hid_shape = hidden.get_shape()
                    # print(hid_shape)
                    filter_w = patch_size / (i + 1)
                    filter_h = patch_size / (i + 1)
                    # print(filter_w)
                    # print(filter_h)
                    if filter_w > hid_shape[1]:
                        filter_w = int(hid_shape[1])
                    if filter_h > hid_shape[2]:
                        filter_h = int(hid_shape[2])
                    layer_weight = tf.Variable(
                        tf.truncated_normal(shape=[
                            filter_w, filter_h, depth * (i + 1),
                            depth * (i + 2)
                        ],
                                            stddev=0.1))
                    layer_weights.append(layer_weight)
                if not large_data_size(hidden) or not large_data_size(
                        layer_weights[i]):
                    # print("is not large data")
                    stride_ps[i + 1] = [1, 1, 1, 1]
                # print(stride_ps[i + 1])
                # print(len(stride_ps))
                # print(i + 1)
                conv = tf.nn.conv2d(hidden,
                                    layer_weights[i],
                                    stride_ps[i + 1],
                                    use_cudnn_on_gpu=True,
                                    padding='SAME')
                if not large_data_size(conv):
                    print('not large')
                    conv = maxpool2d(conv, 1, 1)
                else:
                    conv = maxpool2d(conv)
                hidden = tf.nn.relu6(conv + layer_biases[i])

            shapes = hidden.get_shape().as_list()
            shape_mul = 1
            for s in shapes[1:]:
                shape_mul *= s

            if init:
                output_size = shape_mul
                output_weights.append(
                    tf.Variable(
                        tf.truncated_normal([output_size, first_hidden_num],
                                            stddev=0.1)))
            reshape = tf.reshape(hidden, [shapes[0], shape_mul])

            hidden = tf.nn.relu6(
                tf.matmul(reshape, output_weights[0]) + output_biases)
            if drop and model_drop:
                hidden = tf.nn.dropout(hidden, 0.5)
            hidden = tf.matmul(hidden, first_nn_weights) + first_nn_biases
            if drop and model_drop:
                hidden = tf.nn.dropout(hidden, 0.5)
            hidden = tf.matmul(hidden, second_nn_weights) + second_nn_biases
            return hidden

        # Training computation.
        logits = model(tf_train_dataset)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                    labels=tf_train_labels))
        # Optimizer.
        if lrd:
            cur_step = tf.Variable(0)  # count the number of steps taken.
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       cur_step,
                                                       600,
                                                       0.1,
                                                       staircase=True)
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate).minimize(loss, global_step=cur_step)
        else:
            optimizer = tf.train.AdagradOptimizer(0.06).minimize(loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)
        valid_prediction = tf.nn.softmax(
            model(tf_valid_dataset, model_drop=False, init=False))
        test_prediction = tf.nn.softmax(
            model(tf_test_dataset, model_drop=False, init=False))
        saver = tf.train.Saver()
    # on step 1750, run over 55000 train images
    num_steps = 1750 * 3

    save_path = 'conv_mnist'
    save_flag = True
    with tf.Session(graph=graph) as session:
        if os.path.exists(save_path) and save_flag:
            # Restore variables from disk.
            saver.restore(session, save_path)
        else:
            tf.global_variables_initializer().run()
            print('Initialized')
        end_train = False
        mean_loss = 0
        for step in range(num_steps):
            if end_train:
                break
            offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {
                tf_train_dataset: batch_data,
                tf_train_labels: batch_labels
            }
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            mean_loss += l
            if step % 10 == 0:
                mean_loss /= 10.0
                if step % 200 == 0:
                    print('Minibatch loss at step %d: %f' % (step, mean_loss))
                    print('Validation accuracy: %.1f%%' %
                          accuracy(valid_prediction.eval(), valid_labels))
                mean_loss = 0
        if save_flag:
            saver.save(session, save_path)
        print('Test accuracy: %.1f%%' %
              accuracy(test_prediction.eval(), test_labels))
Example #6
0
def conv_train():
    batch_size = 16
    patch_size = 5
    depth = 16
    num_hidden = 64
    num_channels = 1

    graph = tf.Graph()

    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(tf.float32,
                                          shape=(batch_size, image_size,
                                                 image_size, num_channels))
        tf_train_labels = tf.placeholder(tf.float32,
                                         shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        # Variables.
        layer1_weights = tf.Variable(
            tf.truncated_normal([patch_size, patch_size, num_channels, depth],
                                stddev=0.1))
        layer1_biases = tf.Variable(tf.zeros([depth]))
        layer2_weights = tf.Variable(
            tf.truncated_normal([patch_size, patch_size, depth, depth],
                                stddev=0.1))
        layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
        layer3_weights = tf.Variable(
            tf.truncated_normal(
                [image_size // 4 * image_size // 4 * depth, num_hidden],
                stddev=0.1))
        layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
        layer4_weights = tf.Variable(
            tf.truncated_normal([num_hidden, num_labels], stddev=0.1))
        layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

        # Model.
        def model(data):
            conv = tf.nn.conv2d(data,
                                layer1_weights, [1, 2, 2, 1],
                                padding='SAME')
            hidden = tf.nn.relu(conv + layer1_biases)
            conv = tf.nn.conv2d(hidden,
                                layer2_weights, [1, 2, 2, 1],
                                padding='SAME')
            hidden = tf.nn.relu(conv + layer2_biases)
            shape = hidden.get_shape().as_list()
            reshape = tf.reshape(hidden,
                                 [shape[0], shape[1] * shape[2] * shape[3]])
            hidden = tf.nn.relu(
                tf.matmul(reshape, layer3_weights) + layer3_biases)
            return tf.matmul(hidden, layer4_weights) + layer4_biases

        # Training computation.
        logits = model(tf_train_dataset)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))

        # Optimizer.
        optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
        test_prediction = tf.nn.softmax(model(tf_test_dataset))
    num_steps = 1001

    with tf.Session(graph=graph) as session:
        tf.initialize_all_variables().run()
        print('Initialized')
        for step in range(num_steps):
            offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {
                tf_train_dataset: batch_data,
                tf_train_labels: batch_labels
            }
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            if step % 50 == 0:
                print('Minibatch loss at step %d: %f' % (step, l))
                print('Minibatch accuracy: %.1f%%' %
                      accuracy(predictions, batch_labels))
                print('Validation accuracy: %.1f%%' %
                      accuracy(valid_prediction.eval(), valid_labels))
        print('Test accuracy: %.1f%%' %
              accuracy(test_prediction.eval(), test_labels))
Example #7
0
def conv_train(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels, image_size,
               num_labels, basic_hps, stride_ps):
    batch_size = basic_hps['batch_size']
    patch_size = basic_hps['patch_size']
    depth = basic_hps['depth']
    num_hidden = basic_hps['num_hidden']
    num_channels = 1
    layer_cnt = basic_hps['layer_sum']
    starter_learning_rate = basic_hps['starter_learning_rate']
    loss_collect = list()
    first_hidden_num = basic_hps['num_hidden']
    second_hidden_num = first_hidden_num / 2 + 1

    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(
            tf.float32, shape=(batch_size, image_size, image_size, num_channels))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)

        input_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, num_channels, depth], stddev=0.1))
        input_biases = tf.Variable(tf.zeros([depth]))
        mid_layer_cnt = layer_cnt - 1
        layer_weights = list()
        layer_biases = [tf.Variable(tf.constant(1.0, shape=[depth / (i + 2)])) for i in range(mid_layer_cnt)]
        output_weights = list()
        output_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
        first_nn_weights = tf.Variable(tf.truncated_normal(
            [first_hidden_num, second_hidden_num], stddev=0.1))
        second_nn_weights = tf.Variable(tf.truncated_normal(
            [second_hidden_num, num_labels], stddev=0.1))
        first_nn_biases = tf.Variable(tf.constant(1.0, shape=[second_hidden_num]))
        second_nn_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

        # Model.
        def model(data, init=False):
            # Variables.
            if not large_data_size(data) or not large_data_size(input_weights):
                stride_ps[0] = [1, 1, 1, 1]
            conv = tf.nn.conv2d(data, input_weights, stride_ps[0], use_cudnn_on_gpu=True, padding='SAME')
            conv = maxpool2d(conv)
            hidden = tf.nn.relu(conv + input_biases)
            if init:
                hidden = tf.nn.dropout(hidden, 0.8)
            for i in range(mid_layer_cnt):
                # print(hidden)
                if init:
                    hid_shape = hidden.get_shape()
                    filter_w = patch_size / (i + 1)
                    filter_h = patch_size / (i + 1)
                    if filter_w > hid_shape[1]:
                        filter_w = int(hid_shape[1])
                    if filter_h > hid_shape[2]:
                        filter_h = int(hid_shape[2])
                    layer_weight = tf.Variable(tf.truncated_normal(shape=[filter_w, filter_h, depth / (i + 1), depth / (i + 2)],
                                                                   stddev=0.1))
                    layer_weights.append(layer_weight)
                if not large_data_size(hidden) or not large_data_size(layer_weights[i]):
                    stride_ps[i + 1] = [1, 1, 1, 1]
                conv = tf.nn.conv2d(hidden, layer_weights[i], stride_ps[i + 1], use_cudnn_on_gpu=True, padding='SAME')
                if not large_data_size(conv):
                    conv = maxpool2d(conv, 1, 1)
                else:
                    conv = maxpool2d(conv)
                hidden = tf.nn.relu(conv + layer_biases[i])
                if init:
                    hidden = tf.nn.dropout(hidden, 0.8)

            shapes = hidden.get_shape().as_list()
            shape_mul = 1
            for s in shapes[1:]:
                shape_mul *= s

            if init:
                output_size = shape_mul
                output_weights.append(tf.Variable(tf.truncated_normal([output_size, num_hidden], stddev=0.1)))
            reshape = tf.reshape(hidden, [shapes[0], shape_mul])

            hidden = tf.nn.relu6(tf.matmul(reshape, output_weights[0]) + output_biases)
            if init:
                hidden = tf.nn.dropout(hidden, 0.5)
            hidden = tf.matmul(hidden, first_nn_weights) + first_nn_biases
            if init:
                hidden = tf.nn.dropout(hidden, 0.5)
            hidden = tf.matmul(hidden, second_nn_weights) + second_nn_biases
            return hidden

        # Training computation.
        logits = model(tf_train_dataset, init=True)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
        optimizer = tf.train.AdagradOptimizer(starter_learning_rate).minimize(loss)

        train_prediction = tf.nn.softmax(logits)
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
    num_steps = 1001

    with tf.Session(graph=graph) as session:
        tf.global_variables_initializer().run()
        print('Initialized')
        end_train = False
        mean_loss = 0
        for step in range(num_steps):
            if end_train:
                break
            offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            mean_loss += l
            if step % 10 == 0:
                mean_loss /= 5.0
                mean_loss = 0
                if step % 100 == 0:
                    loss_collect.append(mean_loss)
                    print('Minibatch loss at step %d: %f' % (step, l))
                    print('Validation accuracy: %.1f%%' % accuracy(
                        valid_prediction.eval(), valid_labels))
Example #8
0
def better_conv_train(drop=False, lrd=False):
    batch_size = 16
    patch_size = 5
    depth = 16
    num_hidden = 64
    num_channels = 1

    graph = tf.Graph()

    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(
            tf.float32, shape=(batch_size, image_size, image_size, num_channels))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        # Variables.
        layer1_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, num_channels, depth], stddev=0.1))
        layer1_biases = tf.Variable(tf.zeros([depth]))
        layer2_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, depth, depth], stddev=0.1))
        layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
        layer3_weights = tf.Variable(tf.truncated_normal(
            [64, num_hidden], stddev=0.1))
        layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
        layer4_weights = tf.Variable(tf.truncated_normal(
            [num_hidden, num_labels], stddev=0.1))
        layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

        # Model.
        def model(data):
            conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
            conv = maxpool2d(conv)
            hidden = tf.nn.relu(conv + layer1_biases)
            if drop:
                hidden = tf.nn.dropout(hidden, 0.5)
            conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
            conv = maxpool2d(conv)
            hidden = tf.nn.relu(conv + layer2_biases)
            if drop:
                hidden = tf.nn.dropout(hidden, 0.7)
            shape = hidden.get_shape().as_list()
            reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
            hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
            if drop:
                hidden = tf.nn.dropout(hidden, 0.8)
            return tf.matmul(hidden, layer4_weights) + layer4_biases
        # Training computation.
        logits = model(tf_train_dataset)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))

        # Optimizer.
        if lrd:
            cur_step = tf.Variable(0)  # count the number of steps taken.
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 10000, 0.96, staircase=True)
            optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step)
        else:
            optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
        test_prediction = tf.nn.softmax(model(tf_test_dataset))
    num_steps = 5001
    losses = []
    with tf.Session(graph=graph) as session:
        tf.global_variables_initializer().run()
        print('Initialized')
        for step in range(num_steps):
            offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            losses.append(l)
            if step % 50 == 0:
                print('Minibatch loss at step %d: %f' % (step, l))
                print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
                print('Validation accuracy: %.1f%%' % accuracy(
                    valid_prediction.eval(), valid_labels))
        print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
        print(losses)
Example #9
0
def tf_deep_nn(regular=False, drop_out=False, lrd=False, layer_cnt=2):
    batch_size = 128

    graph = tf.Graph()
    with graph.as_default():
        tf_train_dataset = tf.placeholder(tf.float32,
                                          shape=(batch_size, feature_dim))
        tf_train_labels = tf.placeholder(tf.float32,
                                         shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        hidden_node_count = 32
        # start weight
        hidden_stddev = np.sqrt(2.0 / 100)
        weights1 = tf.Variable(
            tf.truncated_normal([feature_dim, hidden_node_count],
                                stddev=hidden_stddev))
        biases1 = tf.Variable(tf.zeros([hidden_node_count]))
        # middle weight
        weights = []
        biases = []
        hidden_cur_cnt = hidden_node_count
        for i in range(layer_cnt - 2):
            if hidden_cur_cnt > 2:
                hidden_next_cnt = int(hidden_cur_cnt / 2)
            else:
                hidden_next_cnt = 2
            hidden_stddev = np.sqrt(2.0 / hidden_cur_cnt / 10)
            weights.append(
                tf.Variable(
                    tf.truncated_normal([hidden_cur_cnt, hidden_next_cnt],
                                        stddev=hidden_stddev)))
            biases.append(tf.Variable(tf.zeros([hidden_next_cnt])))
            hidden_cur_cnt = hidden_next_cnt
        # first wx + b
        y0 = tf.matmul(tf_train_dataset, weights1) + biases1
        # first sigmoid
        hidden = tf.nn.sigmoid(y0)
        # hidden = y0
        hidden_drop = hidden
        # first DropOut
        keep_prob = 0.5
        if drop_out:
            hidden_drop = tf.nn.dropout(hidden, keep_prob)
        # first wx+b for valid
        valid_y0 = tf.matmul(tf_valid_dataset, weights1) + biases1
        valid_hidden = tf.nn.sigmoid(valid_y0)
        # valid_hidden = valid_y0
        # first wx+b for test
        test_y0 = tf.matmul(tf_test_dataset, weights1) + biases1
        test_hidden = tf.nn.sigmoid(test_y0)
        # test_hidden = test_y0

        # middle layer
        for i in range(layer_cnt - 2):
            y1 = tf.matmul(hidden_drop, weights[i]) + biases[i]
            hidden_drop = tf.nn.sigmoid(y1)
            if drop_out:
                keep_prob += 0.5 * i / (layer_cnt + 1)
                hidden_drop = tf.nn.dropout(hidden_drop, keep_prob)

            y0 = tf.matmul(hidden, weights[i]) + biases[i]
            hidden = tf.nn.sigmoid(y0)
            # hidden = y0

            valid_y0 = tf.matmul(valid_hidden, weights[i]) + biases[i]
            valid_hidden = tf.nn.sigmoid(valid_y0)
            # valid_hidden = valid_y0

            test_y0 = tf.matmul(test_hidden, weights[i]) + biases[i]
            test_hidden = tf.nn.sigmoid(test_y0)
            # test_hidden = test_y0

        # last weight
        weights2 = tf.Variable(
            tf.truncated_normal([hidden_cur_cnt, num_labels],
                                stddev=hidden_stddev / 2))
        biases2 = tf.Variable(tf.zeros([num_labels]))
        # last wx + b
        logits = tf.matmul(hidden_drop, weights2) + biases2

        # predicts
        logits_predict = tf.matmul(hidden, weights2) + biases2
        valid_predict = tf.matmul(valid_hidden, weights2) + biases2
        test_predict = tf.matmul(test_hidden, weights2) + biases2

        l2_loss = 0
        # enable regularization
        if regular:
            l2_loss = tf.nn.l2_loss(weights1) + tf.nn.l2_loss(weights2)
            for i in range(len(weights)):
                l2_loss += tf.nn.l2_loss(weights[i])
                # l2_loss += tf.nn.l2_loss(biases[i])

            beta = 1e-2
            l2_loss *= beta
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits,
                                                    tf_train_labels)) + l2_loss

        # Optimizer.
        if lrd:
            cur_step = tf.Variable(
                0, trainable=False)  # count the number of steps taken.
            starter_learning_rate = 0.4
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       cur_step,
                                                       500,
                                                       0.75,
                                                       staircase=True)
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate).minimize(loss, global_step=cur_step)
        else:
            optimizer = tf.train.AdamOptimizer(0.5).minimize(loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits_predict)
        valid_prediction = tf.nn.softmax(valid_predict)
        test_prediction = tf.nn.softmax(test_predict)

    num_steps = 8001

    with tf.Session(graph=graph) as session:
        tf.initialize_all_variables().run()
        print("Initialized")
        for step in range(num_steps):
            offset_range = train_labels.shape[0] - batch_size
            offset = (step * batch_size) % offset_range
            batch_data = train_dataset[offset:(offset + batch_size), :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {
                tf_train_dataset: batch_data,
                tf_train_labels: batch_labels
            }
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            if step % 50 == 0:
                print("Minibatch loss at step %d: %f" % (step, l))
                print("Minibatch accuracy: %.1f%%" %
                      accuracy(predictions, batch_labels))
                print("Validation accuracy: %.1f%%" %
                      accuracy(valid_prediction.eval(), valid_labels))
        print("Test accuracy: %.1f%%" %
              accuracy(test_prediction.eval(), test_labels))
Example #10
0
def better_conv_train(drop=False, lrd=False):
    batch_size = 12
    patch_size = 2
    depth = 12
    num_hidden = 64
    num_channels = 1

    graph = tf.Graph()

    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(
            tf.float32, shape=(batch_size, image_size, image_size, num_channels))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        # Variables.
        layer1_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, num_channels, depth], stddev=0.1))
        layer1_biases = tf.Variable(tf.zeros([depth]))
        layer2_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, depth, depth], stddev=0.1))
        layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
        layer3_weights = tf.Variable(tf.truncated_normal(
            [48, num_hidden], stddev=0.1))
        layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
        layer4_weights = tf.Variable(tf.truncated_normal(
            [num_hidden, num_labels], stddev=0.1))
        layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

        # Model.
        def model(data):
            conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
            conv = maxpool2d(conv)
            hidden = tf.nn.relu(conv + layer1_biases)
            if drop:
                hidden = tf.nn.dropout(hidden, 0.5)
            conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
            conv = maxpool2d(conv)
            hidden = tf.nn.relu(conv + layer2_biases)
            if drop:
                hidden = tf.nn.dropout(hidden, 0.7)
            shape = hidden.get_shape().as_list()
            reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
            hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
            if drop:
                hidden = tf.nn.dropout(hidden, 0.8)
            return tf.matmul(hidden, layer4_weights) + layer4_biases
        # Training computation.
        logits = model(tf_train_dataset)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))

        # Optimizer.
        if lrd:
            cur_step = tf.Variable(0)  # count the number of steps taken.
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 10000, 0.96, staircase=True)
            optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step)
        else:
            optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
        test_prediction = tf.nn.softmax(model(tf_test_dataset))
    num_steps = 5001
    losses = []
    with tf.Session(graph=graph) as session:
        tf.initialize_all_variables().run()
        print('Initialized')
        for step in range(num_steps):
            offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            losses.append(l)
            if step % 50 == 0:
                print('Minibatch loss at step %d: %f' % (step, l))
                print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
                print('Validation accuracy: %.1f%%' % accuracy(
                    valid_prediction.eval(), valid_labels))
        print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
        print(losses)
Example #11
0
def conv_train():
    batch_size = 16
    patch_size = 5
    depth = 16
    num_hidden = 64
    num_channels = 1

    graph = tf.Graph()

    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(
            tf.float32, shape=(batch_size, image_size, image_size, num_channels))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        # Variables.
        layer1_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, num_channels, depth], stddev=0.1))
        layer1_biases = tf.Variable(tf.zeros([depth]))
        layer2_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, depth, depth], stddev=0.1))
        layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
        layer3_weights = tf.Variable(tf.truncated_normal(
            [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
        layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
        layer4_weights = tf.Variable(tf.truncated_normal(
            [num_hidden, num_labels], stddev=0.1))
        layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

        # Model.
        def model(data):
            conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
            hidden = tf.nn.relu(conv + layer1_biases)
            conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
            hidden = tf.nn.relu(conv + layer2_biases)
            shape = hidden.get_shape().as_list()
            reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
            hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
            return tf.matmul(hidden, layer4_weights) + layer4_biases

        # Training computation.
        logits = model(tf_train_dataset)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))

        # Optimizer.
        optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
        test_prediction = tf.nn.softmax(model(tf_test_dataset))
    num_steps = 1001

    with tf.Session(graph=graph) as session:
        tf.initialize_all_variables().run()
        print('Initialized')
        for step in range(num_steps):
            offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            if step % 50 == 0:
                print('Minibatch loss at step %d: %f' % (step, l))
                print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
                print('Validation accuracy: %.1f%%' % accuracy(
                    valid_prediction.eval(), valid_labels))
        print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
Example #12
0
def tf_better_nn(offset_range=-1, regular=False, drop_out=False, lrd=False):
    batch_size = 128

    graph = tf.Graph()
    with graph.as_default():
        # Input data. For the training data, we use a placeholder that will be fed
        # at run time with a training minibatch.
        tf_train_dataset = tf.placeholder(tf.float32,
                                          shape=(batch_size,
                                                 image_size * image_size))
        tf_train_labels = tf.placeholder(tf.float32,
                                         shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        hidden_node_count = 1024
        # Variables.
        weights1 = tf.Variable(
            tf.truncated_normal([image_size * image_size, hidden_node_count]))
        biases1 = tf.Variable(tf.zeros([hidden_node_count]))

        weights2 = tf.Variable(
            tf.truncated_normal([hidden_node_count, num_labels]))
        biases2 = tf.Variable(tf.zeros([num_labels]))

        # Training computation. right most
        ys = tf.matmul(tf_train_dataset, weights1) + biases1
        hidden = tf.nn.relu(ys)
        h_fc = hidden

        valid_y0 = tf.matmul(tf_valid_dataset, weights1) + biases1
        valid_hidden1 = tf.nn.relu(valid_y0)

        test_y0 = tf.matmul(tf_test_dataset, weights1) + biases1
        test_hidden1 = tf.nn.relu(test_y0)

        # enable DropOut
        keep_prob = tf.placeholder(tf.float32)
        if drop_out:
            hidden_drop = tf.nn.dropout(hidden, keep_prob)
            h_fc = hidden_drop

        # left most
        logits = tf.matmul(h_fc, weights2) + biases2
        # only drop out when train
        logits_predict = tf.matmul(hidden, weights2) + biases2
        valid_predict = tf.matmul(valid_hidden1, weights2) + biases2
        test_predict = tf.matmul(test_hidden1, weights2) + biases2
        # loss
        l2_loss = tf.nn.l2_loss(weights1) + tf.nn.l2_loss(
            biases1) + tf.nn.l2_loss(weights2) + tf.nn.l2_loss(biases2)
        # enable regularization
        if not regular:
            l2_loss = 0
        beta = 0.002
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=logits, labels=tf_train_labels)) + beta * l2_loss

        # Optimizer.
        optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
        if lrd:
            cur_step = tf.Variable(0)  # count the number of steps taken.
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       cur_step,
                                                       10000,
                                                       0.96,
                                                       staircase=True)
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate).minimize(loss, global_step=cur_step)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits_predict)
        valid_prediction = tf.nn.softmax(valid_predict)
        test_prediction = tf.nn.softmax(test_predict)

    num_steps = 30001

    with tf.Session(graph=graph) as session:
        tf.global_variables_initializer().run()
        print("Initialized")
        for step in range(num_steps):
            # Pick an offset within the training data, which has been randomized.
            # Note: we could use better randomization across epochs.
            if offset_range == -1:
                offset_range = train_labels.shape[0] - batch_size

            offset = (step * batch_size) % offset_range
            # Generate a minibatch.
            batch_data = train_dataset[offset:(offset + batch_size), :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            # Prepare a dictionary telling the session where to feed the minibatch.
            # The key of the dictionary is the placeholder node of the graph to be fed,
            # and the value is the numpy array to feed to it.
            feed_dict = {
                tf_train_dataset: batch_data,
                tf_train_labels: batch_labels,
                keep_prob: 0.5
            }
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            if step % 500 == 0:
                print("Minibatch loss at step %d: %f" % (step, l))
                print("Minibatch accuracy: %.1f%%" %
                      accuracy(predictions, batch_labels))
                print("Validation accuracy: %.1f%%" %
                      accuracy(valid_prediction.eval(), valid_labels))
        print("Test accuracy: %.1f%%" %
              accuracy(test_prediction.eval(), test_labels))
Example #13
0
def tf_better_nn(offset_range=-1, regular=False, drop_out=False, lrd=False):
    batch_size = 128

    graph = tf.Graph()
    with graph.as_default():
        # Input data. For the training data, we use a placeholder that will be fed
        # at run time with a training minibatch.
        tf_train_dataset = tf.placeholder(tf.float32,
                                          shape=(batch_size, image_size * image_size))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        hidden_node_count = 1024
        # Variables.
        weights1 = tf.Variable(
            tf.truncated_normal([image_size * image_size, hidden_node_count]))
        biases1 = tf.Variable(tf.zeros([hidden_node_count]))

        weights2 = tf.Variable(
            tf.truncated_normal([hidden_node_count, num_labels]))
        biases2 = tf.Variable(tf.zeros([num_labels]))

        # Training computation. right most
        ys = tf.matmul(tf_train_dataset, weights1) + biases1
        hidden = tf.nn.relu(ys)
        h_fc = hidden

        valid_y0 = tf.matmul(tf_valid_dataset, weights1) + biases1
        valid_hidden1 = tf.nn.relu(valid_y0)

        test_y0 = tf.matmul(tf_test_dataset, weights1) + biases1
        test_hidden1 = tf.nn.relu(test_y0)

        # enable DropOut
        keep_prob = tf.placeholder(tf.float32)
        if drop_out:
            hidden_drop = tf.nn.dropout(hidden, keep_prob)
            h_fc = hidden_drop

        # left most
        logits = tf.matmul(h_fc, weights2) + biases2
        # only drop out when train
        logits_predict = tf.matmul(hidden, weights2) + biases2
        valid_predict = tf.matmul(valid_hidden1, weights2) + biases2
        test_predict = tf.matmul(test_hidden1, weights2) + biases2
        # loss
        l2_loss = tf.nn.l2_loss(weights1) + tf.nn.l2_loss(biases1) + tf.nn.l2_loss(weights2) + tf.nn.l2_loss(biases2)
        # enable regularization
        if not regular:
            l2_loss = 0
        beta = 0.002
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels)) + beta * l2_loss

        # Optimizer.
        optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
        if lrd:
            cur_step = tf.Variable(0)  # count the number of steps taken.
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 10000, 0.96, staircase=True)
            optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits_predict)
        valid_prediction = tf.nn.softmax(valid_predict)
        test_prediction = tf.nn.softmax(test_predict)

    num_steps = 30001

    with tf.Session(graph=graph) as session:
        tf.global_variables_initializer().run()
        print("Initialized")
        for step in range(num_steps):
            # Pick an offset within the training data, which has been randomized.
            # Note: we could use better randomization across epochs.
            if offset_range == -1:
                offset_range = train_labels.shape[0] - batch_size

            offset = (step * batch_size) % offset_range
            # Generate a minibatch.
            batch_data = train_dataset[offset:(offset + batch_size), :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            # Prepare a dictionary telling the session where to feed the minibatch.
            # The key of the dictionary is the placeholder node of the graph to be fed,
            # and the value is the numpy array to feed to it.
            feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels, keep_prob: 0.5}
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            if step % 500 == 0:
                print("Minibatch loss at step %d: %f" % (step, l))
                print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
                print("Validation accuracy: %.1f%%" % accuracy(
                    valid_prediction.eval(), valid_labels))
        print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
Example #14
0
def conv_train(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels, image_size,
               num_labels, basic_hps, stride_ps, drop=False, lrd=False):
    batch_size = basic_hps['batch_size']
    patch_size = basic_hps['patch_size']
    depth = basic_hps['depth']
    first_hidden_num = basic_hps['num_hidden']
    second_hidden_num = first_hidden_num / 2 + 1
    num_channels = 1
    layer_cnt = basic_hps['layer_sum']

    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(
            tf.float32, shape=(batch_size, image_size, image_size, num_channels))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        # Variables.
        # the third parameter must be same as the last layer depth
        input_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, num_channels, depth], stddev=0.1))
        input_biases = tf.Variable(tf.zeros([depth]))

        mid_layer_cnt = layer_cnt - 1
        layer_weights = list()
        layer_biases = [tf.Variable(tf.constant(1.0, shape=[depth * (i + 2)])) for i in range(mid_layer_cnt)]
        output_weights = list()
        output_biases = tf.Variable(tf.constant(1.0, shape=[first_hidden_num]))
        first_nn_weights = tf.Variable(tf.truncated_normal(
            [first_hidden_num, second_hidden_num], stddev=0.1))
        second_nn_weights = tf.Variable(tf.truncated_normal(
            [second_hidden_num, num_labels], stddev=0.1))
        first_nn_biases = tf.Variable(tf.constant(1.0, shape=[second_hidden_num]))
        second_nn_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

        # Model.
        def model(data, model_drop=True, init=True):
            if not large_data_size(data) or not large_data_size(input_weights):
                stride_ps[0] = [1, 1, 1, 1]
            conv = tf.nn.conv2d(data, input_weights, stride_ps[0], use_cudnn_on_gpu=True, padding='SAME')
            conv = maxpool2d(conv)
            hidden = tf.nn.relu6(conv + input_biases)
            if drop and model_drop:
                hidden = tf.nn.dropout(hidden, 0.8)
            for i in range(mid_layer_cnt):
                print(hidden)
                if init:
                    # avoid filter shape larger than input shape
                    hid_shape = hidden.get_shape()
                    # print(hid_shape)
                    filter_w = patch_size / (i + 1)
                    filter_h = patch_size / (i + 1)
                    # print(filter_w)
                    # print(filter_h)
                    if filter_w > hid_shape[1]:
                        filter_w = int(hid_shape[1])
                    if filter_h > hid_shape[2]:
                        filter_h = int(hid_shape[2])
                    layer_weight = tf.Variable(tf.truncated_normal(
                        shape=[filter_w, filter_h, depth * (i + 1), depth * (i + 2)], stddev=0.1))
                    layer_weights.append(layer_weight)
                if not large_data_size(hidden) or not large_data_size(layer_weights[i]):
                    # print("is not large data")
                    stride_ps[i + 1] = [1, 1, 1, 1]
                # print(stride_ps[i + 1])
                # print(len(stride_ps))
                # print(i + 1)
                conv = tf.nn.conv2d(hidden, layer_weights[i], stride_ps[i + 1], use_cudnn_on_gpu=True, padding='SAME')
                if not large_data_size(conv):
                    print('not large')
                    conv = maxpool2d(conv, 1, 1)
                else:
                    conv = maxpool2d(conv)
                hidden = tf.nn.relu6(conv + layer_biases[i])

            shapes = hidden.get_shape().as_list()
            shape_mul = 1
            for s in shapes[1:]:
                shape_mul *= s

            if init:
                output_size = shape_mul
                output_weights.append(tf.Variable(tf.truncated_normal([output_size, first_hidden_num], stddev=0.1)))
            reshape = tf.reshape(hidden, [shapes[0], shape_mul])

            hidden = tf.nn.relu6(tf.matmul(reshape, output_weights[0]) + output_biases)
            if drop and model_drop:
                hidden = tf.nn.dropout(hidden, 0.5)
            hidden = tf.matmul(hidden, first_nn_weights) + first_nn_biases
            if drop and model_drop:
                hidden = tf.nn.dropout(hidden, 0.5)
            hidden = tf.matmul(hidden, second_nn_weights) + second_nn_biases
            return hidden

        # Training computation.
        logits = model(tf_train_dataset)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
        # Optimizer.
        if lrd:
            cur_step = tf.Variable(0)  # count the number of steps taken.
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 600, 0.1, staircase=True)
            optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step)
        else:
            optimizer = tf.train.AdagradOptimizer(0.06).minimize(loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset, model_drop=False, init=False))
        test_prediction = tf.nn.softmax(model(tf_test_dataset, model_drop=False, init=False))
        saver = tf.train.Saver()
    # on step 1750, run over 55000 train images
    num_steps = 1750 * 3

    save_path = 'conv_mnist'
    save_flag = True
    with tf.Session(graph=graph) as session:
        if os.path.exists(save_path) and save_flag:
            # Restore variables from disk.
            saver.restore(session, save_path)
        else:
            tf.global_variables_initializer().run()
            print('Initialized')
        end_train = False
        mean_loss = 0
        for step in range(num_steps):
            if end_train:
                break
            offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            mean_loss += l
            if step % 10 == 0:
                mean_loss /= 10.0
                if step % 200 == 0:
                    print('Minibatch loss at step %d: %f' % (step, mean_loss))
                    print('Validation accuracy: %.1f%%' % accuracy(
                        valid_prediction.eval(), valid_labels))
                mean_loss = 0
        if save_flag:
            saver.save(session, save_path)
        print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
Example #15
0
def conv_train(basic_hps, stride_ps, layer_cnt=3, drop=False, lrd=False):
    batch_size = basic_hps['batch_size']
    patch_size = basic_hps['patch_size']
    depth = basic_hps['depth']
    num_hidden = basic_hps['num_hidden']
    num_channels = basic_hps['num_channels']
    loss_collect = list()

    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(
            tf.float32, shape=(batch_size, image_size, image_size, num_channels))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        # Variables.
        input_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, num_channels, depth], stddev=0.1))
        input_biases = tf.Variable(tf.zeros([depth]))

        mid_layer_cnt = layer_cnt - 1
        layer_weights = [tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, depth, depth], stddev=0.1)) for _ in range(mid_layer_cnt)]
        layer_biases = [tf.Variable(tf.constant(1.0, shape=[depth])) for _ in range(mid_layer_cnt)]

        output_size = size_by_conv(stride_ps, [batch_size, image_size, image_size, num_channels], layer_cnt)
        output_weights = tf.Variable(tf.truncated_normal([output_size, num_hidden], stddev=0.1))
        output_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
        final_weights = tf.Variable(tf.truncated_normal(
            [num_hidden, num_labels], stddev=0.1))
        final_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

        # Model.
        def model(data):
            conv = tf.nn.conv2d(data, input_weights, stride_ps[0], use_cudnn_on_gpu=True, padding='SAME')
            conv = maxpool2d(conv)
            hidden = tf.nn.relu(conv + input_biases)
            if drop:
                hidden = tf.nn.dropout(hidden, 0.5)
            for i in range(mid_layer_cnt):
                print(i)
                conv = tf.nn.conv2d(hidden, layer_weights[i], stride_ps[i + 1], use_cudnn_on_gpu=True, padding='SAME')
                conv = maxpool2d(conv)
                hidden = tf.nn.relu(conv + layer_biases[i])
                if drop:
                    hidden = tf.nn.dropout(hidden, 0.7)

            shape = hidden.get_shape().as_list()
            reshape = tf.reshape(hidden, [shape[0], output_size])

            hidden = tf.nn.relu(tf.matmul(reshape, output_weights) + output_biases)
            if drop:
                hidden = tf.nn.dropout(hidden, 0.8)
            return tf.matmul(hidden, final_weights) + final_biases

        # Training computation.
        logits = model(tf_train_dataset)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
        # Optimizer.
        if lrd:
            cur_step = tf.Variable(0)  # count the number of steps taken.
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 10000, 0.96, staircase=True)
            optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step)
        else:
            optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
        test_prediction = tf.nn.softmax(model(tf_test_dataset))
    num_steps = 3001
    fit_frep = 100

    with tf.Session(graph=graph) as session:
        tf.initialize_all_variables().run()
        print('Initialized')
        end_train = False

        for step in range(num_steps):
            if end_train:
                break
            offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            loss_collect.append(l)
            if step % 50 == 0:
                print('Minibatch loss at step %d: %f' % (step, l))
                print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
                print('Validation accuracy: %.1f%%' % accuracy(
                    valid_prediction.eval(), valid_labels))
            if step == fit_frep:
                res = fit_loss([batch_size, depth, num_hidden], loss_collect)
                ret = res['ret']
                if ret == 1:
                    print('ret is end train when step is {step}'.format(step=step))

            elif step % fit_frep == 0 and step != 0:
                for i in range(fit_frep):
                    res = fit_loss(
                        [batch_size, depth, num_hidden],
                        loss_collect[i + step - fit_frep * 2 + 1: i + step - fit_frep + 2])
                    ret = res['ret']
                    if i == 0:
                        print(res)
                    if ret == 1:
                        print('ret is end train when step is {step}'.format(step=step))
                        end_train = True
                        break
        print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

    for loss in loss_collect:
        print(loss)
Example #16
0
def conv_train(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels, image_size,
               num_labels, basic_hps, stride_ps, lrd=False):
    batch_size = basic_hps['batch_size']
    patch_size = basic_hps['patch_size']
    depth = basic_hps['depth']
    num_hidden = basic_hps['num_hidden']
    num_channels = 1
    layer_cnt = basic_hps['layer_sum']
    loss_collect = list()
    first_hidden_num = basic_hps['num_hidden']
    second_hidden_num = first_hidden_num / 2 + 1

    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(
            tf.float32, shape=(batch_size, image_size, image_size, num_channels))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        input_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, num_channels, depth], stddev=0.1))
        input_biases = tf.Variable(tf.zeros([depth]))
        mid_layer_cnt = layer_cnt - 1
        layer_weights = list()
        layer_biases = [tf.Variable(tf.constant(1.0, shape=[depth / (i + 2)])) for i in range(mid_layer_cnt)]
        output_weights = list()
        output_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
        first_nn_weights = tf.Variable(tf.truncated_normal(
            [first_hidden_num, second_hidden_num], stddev=0.1))
        second_nn_weights = tf.Variable(tf.truncated_normal(
            [second_hidden_num, num_labels], stddev=0.1))
        first_nn_biases = tf.Variable(tf.constant(1.0, shape=[second_hidden_num]))
        second_nn_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

        # Model.
        def model(data, init=False):
            # Variables.
            if not large_data_size(data) or not large_data_size(input_weights):
                stride_ps[0] = [1, 1, 1, 1]
            conv = tf.nn.conv2d(data, input_weights, stride_ps[0], use_cudnn_on_gpu=True, padding='SAME')
            conv = maxpool2d(conv)
            hidden = tf.nn.relu(conv + input_biases)
            if init:
                hidden = tf.nn.dropout(hidden, 0.8)
            for i in range(mid_layer_cnt):
                # print(hidden)
                if init:
                    # avoid filter shape larger than input shape
                    hid_shape = hidden.get_shape()
                    # print(hid_shape)
                    filter_w = patch_size / (i + 1)
                    filter_h = patch_size / (i + 1)
                    # print(filter_w)
                    # print(filter_h)
                    if filter_w > hid_shape[1]:
                        filter_w = int(hid_shape[1])
                    if filter_h > hid_shape[2]:
                        filter_h = int(hid_shape[2])
                    layer_weight = tf.Variable(tf.truncated_normal(shape=[filter_w, filter_h, depth / (i + 1), depth / (i + 2)],
                                                                   stddev=0.1))
                    layer_weights.append(layer_weight)
                if not large_data_size(hidden) or not large_data_size(layer_weights[i]):
                    # print("is not large data")
                    stride_ps[i + 1] = [1, 1, 1, 1]
                # print(stride_ps[i + 1])
                # print(len(stride_ps))
                # print(i + 1)
                conv = tf.nn.conv2d(hidden, layer_weights[i], stride_ps[i + 1], use_cudnn_on_gpu=True, padding='SAME')
                if not large_data_size(conv):
                    conv = maxpool2d(conv, 1, 1)
                else:
                    conv = maxpool2d(conv)
                hidden = tf.nn.relu(conv + layer_biases[i])
                if init:
                    hidden = tf.nn.dropout(hidden, 0.8)

            shapes = hidden.get_shape().as_list()
            shape_mul = 1
            for s in shapes[1:]:
                shape_mul *= s

            if init:
                output_size = shape_mul
                output_weights.append(tf.Variable(tf.truncated_normal([output_size, num_hidden], stddev=0.1)))
            reshape = tf.reshape(hidden, [shapes[0], shape_mul])

            hidden = tf.nn.relu6(tf.matmul(reshape, output_weights[0]) + output_biases)
            if init:
                hidden = tf.nn.dropout(hidden, 0.5)
            hidden = tf.matmul(hidden, first_nn_weights) + first_nn_biases
            if init:
                hidden = tf.nn.dropout(hidden, 0.5)
            hidden = tf.matmul(hidden, second_nn_weights) + second_nn_biases
            return hidden

        # Training computation.
        logits = model(tf_train_dataset, init=True)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
        # Optimizer.
        starter_learning_rate = 0.1
        if lrd:
            cur_step = tf.Variable(0)  # count the number of steps taken.
            learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 10000, 0.96, staircase=True)
            optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step)
        else:
            optimizer = tf.train.AdagradOptimizer(starter_learning_rate).minimize(loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
        test_prediction = tf.nn.softmax(model(tf_test_dataset))
    num_steps = 3001
    start_fit = 600
    init_loss = []

    with tf.Session(graph=graph) as session:
        tf.global_variables_initializer().run()
        print('Initialized')
        end_train = False
        mean_loss = 0
        for step in range(num_steps):
            if end_train:
                break
            offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            mean_loss += l
            if step % 5 == 0:
                mean_loss /= 5.0
                loss_collect.append(mean_loss)
                mean_loss = 0
                if step >= start_fit:
                    # print(loss_collect)
                    if step == start_fit:
                        res = fit_more(1, [batch_size, depth, num_hidden, layer_cnt, patch_size], loss_collect)
                    else:
                        res = fit_more(0, [batch_size, depth, num_hidden, layer_cnt, patch_size], loss_collect)
                    loss_collect.remove(loss_collect[0])
                    ret = res['ret']
                    if ret == 1:
                        print('ret is end train when step is {step}'.format(step=step))
                        init_loss.append(loss_collect)
                        more_index = predict_future([batch_size, depth, num_hidden, layer_cnt, patch_size], init_loss[0])
                        print('more index: %d' % more_index)
                        for i in range(more_index):
                            offset = ((step + i + 1) * batch_size) % (train_labels.shape[0] - batch_size)
                            batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
                            batch_labels = train_labels[offset:(offset + batch_size), :]
                            feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
                            _, l, predictions = session.run(
                                [optimizer, loss, train_prediction], feed_dict=feed_dict)
                            loss_collect.append(l)
                            file_helper.write('/home/cwh/coding/python/NN/line.txt', str(loss_collect[20]))
                            loss_collect.remove(loss_collect[0])
                        for loss in loss_collect[21:]:
                            file_helper.write('/home/cwh/coding/python/NN/line.txt', str(loss))
                        end_train = True

                        file_helper.write('/home/cwh/coding/python/NN/line.txt', '===')
                    if step % 50 == 0:
                        print('Minibatch loss at step %d: %f' % (step, l))
                        print('Validation accuracy: %.1f%%' % accuracy(
                            valid_prediction.eval(), valid_labels))

        print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))