def conv_train(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels, image_size, num_labels, basic_hps, stride_ps, drop=False, lrd=False, get_grad=False, norm_list=None): batch_size = basic_hps['batch_size'] patch_size = basic_hps['patch_size'] depth = basic_hps['depth'] num_hidden = basic_hps['num_hidden'] num_channels = 1 layer_cnt = basic_hps['layer_sum'] loss_collect = list() graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder( tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) # Variables. input_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, num_channels, depth], stddev=0.1)) input_biases = tf.Variable(tf.zeros([depth])) mid_layer_cnt = layer_cnt - 1 layer_weights = list() layer_biases = [tf.Variable(tf.constant(1.0, shape=[depth])) for _ in range(mid_layer_cnt)] output_weights = list() output_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden])) final_weights = tf.Variable(tf.truncated_normal( [num_hidden, num_labels], stddev=0.1)) final_biases = tf.Variable(tf.constant(1.0, shape=[num_labels])) weight_set_done = False # Model. def model(data): if not large_data_size(data) or not large_data_size(input_weights): stride_ps[0] = [1, 1, 1, 1] conv = tf.nn.conv2d(data, input_weights, stride_ps[0], use_cudnn_on_gpu=True, padding='SAME') conv = maxpool2d(conv) hidden = tf.nn.relu(conv + input_biases) if drop: hidden = tf.nn.dropout(hidden, 0.5) for i in range(mid_layer_cnt): # print(hidden) if not weight_set_done: # avoid filter shape larger than input shape hid_shape = hidden.get_shape() # print(hid_shape) filter_w = patch_size / (i + 1) filter_h = patch_size / (i + 1) # print(filter_w) # print(filter_h) if filter_w > hid_shape[1]: filter_w = int(hid_shape[1]) if filter_h > hid_shape[2]: filter_h = int(hid_shape[2]) layer_weight = tf.Variable(tf.truncated_normal(shape=[filter_w, filter_h, depth, depth], stddev=0.1)) layer_weights.append(layer_weight) if not large_data_size(hidden) or not large_data_size(layer_weights[i]): # print("is not large data") stride_ps[i + 1] = [1, 1, 1, 1] # print(stride_ps[i + 1]) # print(len(stride_ps)) # print(i + 1) conv = tf.nn.conv2d(hidden, layer_weights[i], stride_ps[i + 1], use_cudnn_on_gpu=True, padding='SAME') if not large_data_size(conv): conv = maxpool2d(conv, 1, 1) else: conv = maxpool2d(conv) hidden = tf.nn.relu(conv + layer_biases[i]) if drop: hidden = tf.nn.dropout(hidden, 0.7) shapes = hidden.get_shape().as_list() shape_mul = 1 for s in shapes[1:]: shape_mul *= s if not weight_set_done: output_size = shape_mul output_weights.append(tf.Variable(tf.truncated_normal([output_size, num_hidden], stddev=0.1))) reshape = tf.reshape(hidden, [shapes[0], shape_mul]) hidden = tf.nn.relu(tf.matmul(reshape, output_weights[0]) + output_biases) if drop: hidden = tf.nn.dropout(hidden, 0.8) return tf.matmul(hidden, final_weights) + final_biases # Training computation. logits = model(tf_train_dataset) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) # Optimizer. if lrd: cur_step = tf.Variable(0) # count the number of steps taken. starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 10000, 0.96, staircase=True) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step) else: optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax(model(tf_valid_dataset)) test_prediction = tf.nn.softmax(model(tf_test_dataset)) num_steps = 3001 start_fit = 600 init_loss = [] with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print('Initialized') end_train = False mean_loss = 0 for step in range(num_steps): if end_train: break offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels} _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) mean_loss += l if step % 5 == 0: mean_loss /= 5.0 loss_collect.append(mean_loss) mean_loss = 0 if norm_list is None: return [1 for _ in range(len(basic_hps))] if step >= start_fit: # print(loss_collect) if step == start_fit: res = fit_loss(1, [batch_size / norm_list[0], depth / norm_list[1], num_hidden / norm_list[2], layer_cnt / norm_list[3], patch_size / norm_list[4]], loss_collect) else: res = fit_loss(0, [batch_size / norm_list[0], depth / norm_list[1], num_hidden / norm_list[2], layer_cnt / norm_list[3], patch_size / norm_list[4]], loss_collect) if get_grad: better_hyper([batch_size / norm_list[0], depth / norm_list[1], num_hidden / norm_list[2], layer_cnt / norm_list[3], patch_size / norm_list[4]], loss_collect) loss_collect.remove(loss_collect[0]) ret = res['ret'] if ret == 1 and not get_grad: print('ret is end train when step is {step}'.format(step=step)) init_loss.append(loss_collect) end_train = True if step % 50 == 0: print('Minibatch loss at step %d: %f' % (step, l)) print('Validation accuracy: %.1f%%' % accuracy( valid_prediction.eval(), valid_labels)) print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels)) if end_train: hypers = better_hyper( [batch_size / norm_list[0], depth / norm_list[1], num_hidden / norm_list[2], layer_cnt / norm_list[3], patch_size / norm_list[4]], init_loss[0]) print(hypers) hypers = [hyper * norm_list[i] for i, hyper in enumerate(hypers)] print(norm_list) print(hypers) for i in range(len(hypers)): if hypers[i] <= 1.0: hypers[i] = 1 else: hypers[i] = int(hypers[i]) else: hypers = [batch_size, depth, num_hidden, layer_cnt, patch_size] return end_train, hypers
def tf_deep_nn(regular=False, drop_out=False, lrd=False, layer_cnt=2): batch_size = 128 graph = tf.Graph() with graph.as_default(): tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, feature_dim)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) hidden_node_count = 32 # start weight hidden_stddev = np.sqrt(2.0 / 100) weights1 = tf.Variable(tf.truncated_normal([feature_dim, hidden_node_count], stddev=hidden_stddev)) biases1 = tf.Variable(tf.zeros([hidden_node_count])) # middle weight weights = [] biases = [] hidden_cur_cnt = hidden_node_count for i in range(layer_cnt - 2): if hidden_cur_cnt > 2: hidden_next_cnt = int(hidden_cur_cnt / 2) else: hidden_next_cnt = 2 hidden_stddev = np.sqrt(2.0 / hidden_cur_cnt / 10) weights.append(tf.Variable(tf.truncated_normal([hidden_cur_cnt, hidden_next_cnt], stddev=hidden_stddev))) biases.append(tf.Variable(tf.zeros([hidden_next_cnt]))) hidden_cur_cnt = hidden_next_cnt # first wx + b y0 = tf.matmul(tf_train_dataset, weights1) + biases1 # first sigmoid hidden = tf.nn.sigmoid(y0) # hidden = y0 hidden_drop = hidden # first DropOut keep_prob = 0.5 if drop_out: hidden_drop = tf.nn.dropout(hidden, keep_prob) # first wx+b for valid valid_y0 = tf.matmul(tf_valid_dataset, weights1) + biases1 valid_hidden = tf.nn.sigmoid(valid_y0) # valid_hidden = valid_y0 # first wx+b for test test_y0 = tf.matmul(tf_test_dataset, weights1) + biases1 test_hidden = tf.nn.sigmoid(test_y0) # test_hidden = test_y0 # middle layer for i in range(layer_cnt - 2): y1 = tf.matmul(hidden_drop, weights[i]) + biases[i] hidden_drop = tf.nn.sigmoid(y1) if drop_out: keep_prob += 0.5 * i / (layer_cnt + 1) hidden_drop = tf.nn.dropout(hidden_drop, keep_prob) y0 = tf.matmul(hidden, weights[i]) + biases[i] hidden = tf.nn.sigmoid(y0) # hidden = y0 valid_y0 = tf.matmul(valid_hidden, weights[i]) + biases[i] valid_hidden = tf.nn.sigmoid(valid_y0) # valid_hidden = valid_y0 test_y0 = tf.matmul(test_hidden, weights[i]) + biases[i] test_hidden = tf.nn.sigmoid(test_y0) # test_hidden = test_y0 # last weight weights2 = tf.Variable(tf.truncated_normal([hidden_cur_cnt, num_labels], stddev=hidden_stddev / 2)) biases2 = tf.Variable(tf.zeros([num_labels])) # last wx + b logits = tf.matmul(hidden_drop, weights2) + biases2 # predicts logits_predict = tf.matmul(hidden, weights2) + biases2 valid_predict = tf.matmul(valid_hidden, weights2) + biases2 test_predict = tf.matmul(test_hidden, weights2) + biases2 l2_loss = 0 # enable regularization if regular: l2_loss = tf.nn.l2_loss(weights1) + tf.nn.l2_loss(weights2) for i in range(len(weights)): l2_loss += tf.nn.l2_loss(weights[i]) # l2_loss += tf.nn.l2_loss(biases[i]) beta = 1e-2 l2_loss *= beta loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels)) + l2_loss # Optimizer. if lrd: cur_step = tf.Variable(0, trainable=False) # count the number of steps taken. starter_learning_rate = 0.4 learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 500, 0.75, staircase=True) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step) else: optimizer = tf.train.AdamOptimizer(0.5).minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits_predict) valid_prediction = tf.nn.softmax(valid_predict) test_prediction = tf.nn.softmax(test_predict) num_steps = 8001 with tf.Session(graph=graph) as session: tf.global_variables_initializer().run() print("Initialized") for step in range(num_steps): offset_range = train_labels.shape[0] - batch_size offset = (step * batch_size) % offset_range batch_data = train_dataset[offset:(offset + batch_size), :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels} _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) if step % 50 == 0: print("Minibatch loss at step %d: %f" % (step, l)) print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)) print("Validation accuracy: %.1f%%" % accuracy( valid_prediction.eval(), valid_labels)) print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
def conv_train(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels, image_size, num_labels, basic_hps, stride_ps, lrd=False, get_grad=False): batch_size = basic_hps['batch_size'] patch_size = basic_hps['patch_size'] depth = basic_hps['depth'] num_hidden = basic_hps['num_hidden'] num_channels = 1 layer_cnt = basic_hps['layer_sum'] loss_collect = list() first_hidden_num = basic_hps['num_hidden'] second_hidden_num = first_hidden_num / 2 + 1 graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) input_weights = tf.Variable( tf.truncated_normal([patch_size, patch_size, num_channels, depth], stddev=0.1)) input_biases = tf.Variable(tf.zeros([depth])) mid_layer_cnt = layer_cnt - 1 layer_weights = list() layer_biases = [ tf.Variable(tf.constant(1.0, shape=[depth / (i + 2)])) for i in range(mid_layer_cnt) ] output_weights = list() output_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden])) first_nn_weights = tf.Variable( tf.truncated_normal([first_hidden_num, second_hidden_num], stddev=0.1)) second_nn_weights = tf.Variable( tf.truncated_normal([second_hidden_num, num_labels], stddev=0.1)) first_nn_biases = tf.Variable( tf.constant(1.0, shape=[second_hidden_num])) second_nn_biases = tf.Variable(tf.constant(1.0, shape=[num_labels])) # Model. def model(data, init=False): # Variables. if not large_data_size(data) or not large_data_size(input_weights): stride_ps[0] = [1, 1, 1, 1] conv = tf.nn.conv2d(data, input_weights, stride_ps[0], use_cudnn_on_gpu=True, padding='SAME') conv = maxpool2d(conv) hidden = tf.nn.relu(conv + input_biases) if init: hidden = tf.nn.dropout(hidden, 0.8) for i in range(mid_layer_cnt): # print(hidden) if init: # avoid filter shape larger than input shape hid_shape = hidden.get_shape() # print(hid_shape) filter_w = patch_size / (i + 1) filter_h = patch_size / (i + 1) # print(filter_w) # print(filter_h) if filter_w > hid_shape[1]: filter_w = int(hid_shape[1]) if filter_h > hid_shape[2]: filter_h = int(hid_shape[2]) layer_weight = tf.Variable( tf.truncated_normal(shape=[ filter_w, filter_h, depth / (i + 1), depth / (i + 2) ], stddev=0.1)) layer_weights.append(layer_weight) if not large_data_size(hidden) or not large_data_size( layer_weights[i]): # print("is not large data") stride_ps[i + 1] = [1, 1, 1, 1] # print(stride_ps[i + 1]) # print(len(stride_ps)) # print(i + 1) conv = tf.nn.conv2d(hidden, layer_weights[i], stride_ps[i + 1], use_cudnn_on_gpu=True, padding='SAME') if not large_data_size(conv): conv = maxpool2d(conv, 1, 1) else: conv = maxpool2d(conv) hidden = tf.nn.relu(conv + layer_biases[i]) if init: hidden = tf.nn.dropout(hidden, 0.8) shapes = hidden.get_shape().as_list() shape_mul = 1 for s in shapes[1:]: shape_mul *= s if init: output_size = shape_mul output_weights.append( tf.Variable( tf.truncated_normal([output_size, num_hidden], stddev=0.1))) reshape = tf.reshape(hidden, [shapes[0], shape_mul]) hidden = tf.nn.relu6( tf.matmul(reshape, output_weights[0]) + output_biases) if init: hidden = tf.nn.dropout(hidden, 0.5) hidden = tf.matmul(hidden, first_nn_weights) + first_nn_biases if init: hidden = tf.nn.dropout(hidden, 0.5) hidden = tf.matmul(hidden, second_nn_weights) + second_nn_biases return hidden # Training computation. logits = model(tf_train_dataset, init=True) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) # Optimizer. starter_learning_rate = 0.1 if lrd: cur_step = tf.Variable(0) # count the number of steps taken. learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 10000, 0.96, staircase=True) optimizer = tf.train.GradientDescentOptimizer( learning_rate).minimize(loss, global_step=cur_step) else: optimizer = tf.train.AdagradOptimizer( starter_learning_rate).minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax(model(tf_valid_dataset)) test_prediction = tf.nn.softmax(model(tf_test_dataset)) num_steps = 3001 start_fit = 600 init_loss = [] with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print('Initialized') end_train = False mean_loss = 0 for step in range(num_steps): if end_train: break offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = { tf_train_dataset: batch_data, tf_train_labels: batch_labels } _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) mean_loss += l if step % 5 == 0: mean_loss /= 5.0 loss_collect.append(mean_loss) mean_loss = 0 if step >= start_fit: # print(loss_collect) if step == start_fit: res = fit_loss(1, [ batch_size, depth, num_hidden, layer_cnt, patch_size ], loss_collect) else: res = fit_loss(0, [ batch_size, depth, num_hidden, layer_cnt, patch_size ], loss_collect) if get_grad: better_hyper([ batch_size, depth, num_hidden, layer_cnt, patch_size ], loss_collect) loss_collect.remove(loss_collect[0]) ret = res['ret'] if ret == 1 and not get_grad: print('ret is end train when step is {step}'.format( step=step)) init_loss.append(loss_collect) end_train = True if step % 50 == 0: print('Minibatch loss at step %d: %f' % (step, l)) print('Validation accuracy: %.1f%%' % accuracy( valid_prediction.eval(), valid_labels)) print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels)) if end_train: hypers = better_hyper( [batch_size, depth, num_hidden, layer_cnt, patch_size], init_loss[0]) print(hypers) for i in range(len(hypers)): if hypers[i] <= 1.0: hypers[i] = 1 else: hypers[i] = int(hypers[i]) else: hypers = [batch_size, depth, num_hidden, layer_cnt, patch_size] return end_train, hypers
def conv_train(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels, image_size, num_labels, basic_hps, stride_ps): batch_size = basic_hps['batch_size'] patch_size = basic_hps['patch_size'] depth = basic_hps['depth'] num_hidden = basic_hps['num_hidden'] num_channels = 1 layer_cnt = basic_hps['layer_sum'] starter_learning_rate = basic_hps['start_learning_rate'] loss_collect = list() first_hidden_num = basic_hps['num_hidden'] second_hidden_num = first_hidden_num / 2 + 1 graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) input_weights = tf.Variable( tf.truncated_normal([patch_size, patch_size, num_channels, depth], stddev=0.1)) input_biases = tf.Variable(tf.zeros([depth])) mid_layer_cnt = layer_cnt - 1 layer_weights = list() layer_biases = [ tf.Variable(tf.constant(1.0, shape=[depth / (i + 2)])) for i in range(mid_layer_cnt) ] output_weights = list() output_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden])) first_nn_weights = tf.Variable( tf.truncated_normal([first_hidden_num, second_hidden_num], stddev=0.1)) second_nn_weights = tf.Variable( tf.truncated_normal([second_hidden_num, num_labels], stddev=0.1)) first_nn_biases = tf.Variable( tf.constant(1.0, shape=[second_hidden_num])) second_nn_biases = tf.Variable(tf.constant(1.0, shape=[num_labels])) # Model. def model(data, init=False): # Variables. if not large_data_size(data) or not large_data_size(input_weights): stride_ps[0] = [1, 1, 1, 1] conv = tf.nn.conv2d(data, input_weights, stride_ps[0], use_cudnn_on_gpu=True, padding='SAME') conv = maxpool2d(conv) hidden = tf.nn.relu(conv + input_biases) if init: hidden = tf.nn.dropout(hidden, 0.8) for i in range(mid_layer_cnt): # print(hidden) if init: hid_shape = hidden.get_shape() filter_w = patch_size / (i + 1) filter_h = patch_size / (i + 1) if filter_w > hid_shape[1]: filter_w = int(hid_shape[1]) if filter_h > hid_shape[2]: filter_h = int(hid_shape[2]) layer_weight = tf.Variable( tf.truncated_normal(shape=[ filter_w, filter_h, depth / (i + 1), depth / (i + 2) ], stddev=0.1)) layer_weights.append(layer_weight) if not large_data_size(hidden) or not large_data_size( layer_weights[i]): stride_ps[i + 1] = [1, 1, 1, 1] conv = tf.nn.conv2d(hidden, layer_weights[i], stride_ps[i + 1], use_cudnn_on_gpu=True, padding='SAME') if not large_data_size(conv): conv = maxpool2d(conv, 1, 1) else: conv = maxpool2d(conv) hidden = tf.nn.relu(conv + layer_biases[i]) if init: hidden = tf.nn.dropout(hidden, 0.8) shapes = hidden.get_shape().as_list() shape_mul = 1 for s in shapes[1:]: shape_mul *= s if init: output_size = shape_mul output_weights.append( tf.Variable( tf.truncated_normal([output_size, num_hidden], stddev=0.1))) reshape = tf.reshape(hidden, [shapes[0], shape_mul]) hidden = tf.nn.relu6( tf.matmul(reshape, output_weights[0]) + output_biases) if init: hidden = tf.nn.dropout(hidden, 0.5) hidden = tf.matmul(hidden, first_nn_weights) + first_nn_biases if init: hidden = tf.nn.dropout(hidden, 0.5) hidden = tf.matmul(hidden, second_nn_weights) + second_nn_biases return hidden # Training computation. logits = model(tf_train_dataset, init=True) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) optimizer = tf.train.AdagradOptimizer(starter_learning_rate).minimize( loss) train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax(model(tf_valid_dataset)) test_prediction = tf.nn.softmax(model(tf_test_dataset)) num_steps = 1001 with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print('Initialized') mean_loss = 0 for step in range(num_steps): offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = { tf_train_dataset: batch_data, tf_train_labels: batch_labels } _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) mean_loss += l if step % 5 == 0: mean_loss /= 5.0 loss_collect.append(mean_loss) mean_loss = 0 if step % 50 == 0: print('Minibatch loss at step %d: %f' % (step, l)) print('Validation accuracy: %.1f%%' % accuracy(valid_prediction.eval(), valid_labels)) print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels)) hypers = better_trend_hyper( [batch_size, depth, num_hidden, layer_cnt, patch_size], loss_collect) print(hypers) for i in range(len(hypers)): if hypers[i] <= 1.0: hypers[i] = 1 else: hypers[i] = int(hypers[i]) return hypers
def conv_train(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels, image_size, num_labels, basic_hps, stride_ps, drop=False, lrd=False): batch_size = basic_hps['batch_size'] patch_size = basic_hps['patch_size'] depth = basic_hps['depth'] first_hidden_num = basic_hps['num_hidden'] second_hidden_num = first_hidden_num / 2 + 1 num_channels = 1 layer_cnt = basic_hps['layer_sum'] graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) # Variables. # the third parameter must be same as the last layer depth input_weights = tf.Variable( tf.truncated_normal([patch_size, patch_size, num_channels, depth], stddev=0.1)) input_biases = tf.Variable(tf.zeros([depth])) mid_layer_cnt = layer_cnt - 1 layer_weights = list() layer_biases = [ tf.Variable(tf.constant(1.0, shape=[depth * (i + 2)])) for i in range(mid_layer_cnt) ] output_weights = list() output_biases = tf.Variable(tf.constant(1.0, shape=[first_hidden_num])) first_nn_weights = tf.Variable( tf.truncated_normal([first_hidden_num, second_hidden_num], stddev=0.1)) second_nn_weights = tf.Variable( tf.truncated_normal([second_hidden_num, num_labels], stddev=0.1)) first_nn_biases = tf.Variable( tf.constant(1.0, shape=[second_hidden_num])) second_nn_biases = tf.Variable(tf.constant(1.0, shape=[num_labels])) # Model. def model(data, model_drop=True, init=True): if not large_data_size(data) or not large_data_size(input_weights): stride_ps[0] = [1, 1, 1, 1] conv = tf.nn.conv2d(data, input_weights, stride_ps[0], use_cudnn_on_gpu=True, padding='SAME') conv = maxpool2d(conv) hidden = tf.nn.relu6(conv + input_biases) if drop and model_drop: hidden = tf.nn.dropout(hidden, 0.8) for i in range(mid_layer_cnt): print(hidden) if init: # avoid filter shape larger than input shape hid_shape = hidden.get_shape() # print(hid_shape) filter_w = patch_size / (i + 1) filter_h = patch_size / (i + 1) # print(filter_w) # print(filter_h) if filter_w > hid_shape[1]: filter_w = int(hid_shape[1]) if filter_h > hid_shape[2]: filter_h = int(hid_shape[2]) layer_weight = tf.Variable( tf.truncated_normal(shape=[ filter_w, filter_h, depth * (i + 1), depth * (i + 2) ], stddev=0.1)) layer_weights.append(layer_weight) if not large_data_size(hidden) or not large_data_size( layer_weights[i]): # print("is not large data") stride_ps[i + 1] = [1, 1, 1, 1] # print(stride_ps[i + 1]) # print(len(stride_ps)) # print(i + 1) conv = tf.nn.conv2d(hidden, layer_weights[i], stride_ps[i + 1], use_cudnn_on_gpu=True, padding='SAME') if not large_data_size(conv): print('not large') conv = maxpool2d(conv, 1, 1) else: conv = maxpool2d(conv) hidden = tf.nn.relu6(conv + layer_biases[i]) shapes = hidden.get_shape().as_list() shape_mul = 1 for s in shapes[1:]: shape_mul *= s if init: output_size = shape_mul output_weights.append( tf.Variable( tf.truncated_normal([output_size, first_hidden_num], stddev=0.1))) reshape = tf.reshape(hidden, [shapes[0], shape_mul]) hidden = tf.nn.relu6( tf.matmul(reshape, output_weights[0]) + output_biases) if drop and model_drop: hidden = tf.nn.dropout(hidden, 0.5) hidden = tf.matmul(hidden, first_nn_weights) + first_nn_biases if drop and model_drop: hidden = tf.nn.dropout(hidden, 0.5) hidden = tf.matmul(hidden, second_nn_weights) + second_nn_biases return hidden # Training computation. logits = model(tf_train_dataset) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels)) # Optimizer. if lrd: cur_step = tf.Variable(0) # count the number of steps taken. starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 600, 0.1, staircase=True) optimizer = tf.train.GradientDescentOptimizer( learning_rate).minimize(loss, global_step=cur_step) else: optimizer = tf.train.AdagradOptimizer(0.06).minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax( model(tf_valid_dataset, model_drop=False, init=False)) test_prediction = tf.nn.softmax( model(tf_test_dataset, model_drop=False, init=False)) saver = tf.train.Saver() # on step 1750, run over 55000 train images num_steps = 1750 * 3 save_path = 'conv_mnist' save_flag = True with tf.Session(graph=graph) as session: if os.path.exists(save_path) and save_flag: # Restore variables from disk. saver.restore(session, save_path) else: tf.global_variables_initializer().run() print('Initialized') end_train = False mean_loss = 0 for step in range(num_steps): if end_train: break offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = { tf_train_dataset: batch_data, tf_train_labels: batch_labels } _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) mean_loss += l if step % 10 == 0: mean_loss /= 10.0 if step % 200 == 0: print('Minibatch loss at step %d: %f' % (step, mean_loss)) print('Validation accuracy: %.1f%%' % accuracy(valid_prediction.eval(), valid_labels)) mean_loss = 0 if save_flag: saver.save(session, save_path) print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
def conv_train(): batch_size = 16 patch_size = 5 depth = 16 num_hidden = 64 num_channels = 1 graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) # Variables. layer1_weights = tf.Variable( tf.truncated_normal([patch_size, patch_size, num_channels, depth], stddev=0.1)) layer1_biases = tf.Variable(tf.zeros([depth])) layer2_weights = tf.Variable( tf.truncated_normal([patch_size, patch_size, depth, depth], stddev=0.1)) layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth])) layer3_weights = tf.Variable( tf.truncated_normal( [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1)) layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden])) layer4_weights = tf.Variable( tf.truncated_normal([num_hidden, num_labels], stddev=0.1)) layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels])) # Model. def model(data): conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME') hidden = tf.nn.relu(conv + layer1_biases) conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME') hidden = tf.nn.relu(conv + layer2_biases) shape = hidden.get_shape().as_list() reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]]) hidden = tf.nn.relu( tf.matmul(reshape, layer3_weights) + layer3_biases) return tf.matmul(hidden, layer4_weights) + layer4_biases # Training computation. logits = model(tf_train_dataset) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) # Optimizer. optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax(model(tf_valid_dataset)) test_prediction = tf.nn.softmax(model(tf_test_dataset)) num_steps = 1001 with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print('Initialized') for step in range(num_steps): offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = { tf_train_dataset: batch_data, tf_train_labels: batch_labels } _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) if step % 50 == 0: print('Minibatch loss at step %d: %f' % (step, l)) print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels)) print('Validation accuracy: %.1f%%' % accuracy(valid_prediction.eval(), valid_labels)) print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
def conv_train(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels, image_size, num_labels, basic_hps, stride_ps): batch_size = basic_hps['batch_size'] patch_size = basic_hps['patch_size'] depth = basic_hps['depth'] num_hidden = basic_hps['num_hidden'] num_channels = 1 layer_cnt = basic_hps['layer_sum'] starter_learning_rate = basic_hps['starter_learning_rate'] loss_collect = list() first_hidden_num = basic_hps['num_hidden'] second_hidden_num = first_hidden_num / 2 + 1 graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder( tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) input_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, num_channels, depth], stddev=0.1)) input_biases = tf.Variable(tf.zeros([depth])) mid_layer_cnt = layer_cnt - 1 layer_weights = list() layer_biases = [tf.Variable(tf.constant(1.0, shape=[depth / (i + 2)])) for i in range(mid_layer_cnt)] output_weights = list() output_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden])) first_nn_weights = tf.Variable(tf.truncated_normal( [first_hidden_num, second_hidden_num], stddev=0.1)) second_nn_weights = tf.Variable(tf.truncated_normal( [second_hidden_num, num_labels], stddev=0.1)) first_nn_biases = tf.Variable(tf.constant(1.0, shape=[second_hidden_num])) second_nn_biases = tf.Variable(tf.constant(1.0, shape=[num_labels])) # Model. def model(data, init=False): # Variables. if not large_data_size(data) or not large_data_size(input_weights): stride_ps[0] = [1, 1, 1, 1] conv = tf.nn.conv2d(data, input_weights, stride_ps[0], use_cudnn_on_gpu=True, padding='SAME') conv = maxpool2d(conv) hidden = tf.nn.relu(conv + input_biases) if init: hidden = tf.nn.dropout(hidden, 0.8) for i in range(mid_layer_cnt): # print(hidden) if init: hid_shape = hidden.get_shape() filter_w = patch_size / (i + 1) filter_h = patch_size / (i + 1) if filter_w > hid_shape[1]: filter_w = int(hid_shape[1]) if filter_h > hid_shape[2]: filter_h = int(hid_shape[2]) layer_weight = tf.Variable(tf.truncated_normal(shape=[filter_w, filter_h, depth / (i + 1), depth / (i + 2)], stddev=0.1)) layer_weights.append(layer_weight) if not large_data_size(hidden) or not large_data_size(layer_weights[i]): stride_ps[i + 1] = [1, 1, 1, 1] conv = tf.nn.conv2d(hidden, layer_weights[i], stride_ps[i + 1], use_cudnn_on_gpu=True, padding='SAME') if not large_data_size(conv): conv = maxpool2d(conv, 1, 1) else: conv = maxpool2d(conv) hidden = tf.nn.relu(conv + layer_biases[i]) if init: hidden = tf.nn.dropout(hidden, 0.8) shapes = hidden.get_shape().as_list() shape_mul = 1 for s in shapes[1:]: shape_mul *= s if init: output_size = shape_mul output_weights.append(tf.Variable(tf.truncated_normal([output_size, num_hidden], stddev=0.1))) reshape = tf.reshape(hidden, [shapes[0], shape_mul]) hidden = tf.nn.relu6(tf.matmul(reshape, output_weights[0]) + output_biases) if init: hidden = tf.nn.dropout(hidden, 0.5) hidden = tf.matmul(hidden, first_nn_weights) + first_nn_biases if init: hidden = tf.nn.dropout(hidden, 0.5) hidden = tf.matmul(hidden, second_nn_weights) + second_nn_biases return hidden # Training computation. logits = model(tf_train_dataset, init=True) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels)) optimizer = tf.train.AdagradOptimizer(starter_learning_rate).minimize(loss) train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax(model(tf_valid_dataset)) num_steps = 1001 with tf.Session(graph=graph) as session: tf.global_variables_initializer().run() print('Initialized') end_train = False mean_loss = 0 for step in range(num_steps): if end_train: break offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels} _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) mean_loss += l if step % 10 == 0: mean_loss /= 5.0 mean_loss = 0 if step % 100 == 0: loss_collect.append(mean_loss) print('Minibatch loss at step %d: %f' % (step, l)) print('Validation accuracy: %.1f%%' % accuracy( valid_prediction.eval(), valid_labels))
def better_conv_train(drop=False, lrd=False): batch_size = 16 patch_size = 5 depth = 16 num_hidden = 64 num_channels = 1 graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder( tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) # Variables. layer1_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, num_channels, depth], stddev=0.1)) layer1_biases = tf.Variable(tf.zeros([depth])) layer2_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, depth, depth], stddev=0.1)) layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth])) layer3_weights = tf.Variable(tf.truncated_normal( [64, num_hidden], stddev=0.1)) layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden])) layer4_weights = tf.Variable(tf.truncated_normal( [num_hidden, num_labels], stddev=0.1)) layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels])) # Model. def model(data): conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME') conv = maxpool2d(conv) hidden = tf.nn.relu(conv + layer1_biases) if drop: hidden = tf.nn.dropout(hidden, 0.5) conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME') conv = maxpool2d(conv) hidden = tf.nn.relu(conv + layer2_biases) if drop: hidden = tf.nn.dropout(hidden, 0.7) shape = hidden.get_shape().as_list() reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]]) hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases) if drop: hidden = tf.nn.dropout(hidden, 0.8) return tf.matmul(hidden, layer4_weights) + layer4_biases # Training computation. logits = model(tf_train_dataset) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels)) # Optimizer. if lrd: cur_step = tf.Variable(0) # count the number of steps taken. starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 10000, 0.96, staircase=True) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step) else: optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax(model(tf_valid_dataset)) test_prediction = tf.nn.softmax(model(tf_test_dataset)) num_steps = 5001 losses = [] with tf.Session(graph=graph) as session: tf.global_variables_initializer().run() print('Initialized') for step in range(num_steps): offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels} _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) losses.append(l) if step % 50 == 0: print('Minibatch loss at step %d: %f' % (step, l)) print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels)) print('Validation accuracy: %.1f%%' % accuracy( valid_prediction.eval(), valid_labels)) print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels)) print(losses)
def tf_deep_nn(regular=False, drop_out=False, lrd=False, layer_cnt=2): batch_size = 128 graph = tf.Graph() with graph.as_default(): tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, feature_dim)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) hidden_node_count = 32 # start weight hidden_stddev = np.sqrt(2.0 / 100) weights1 = tf.Variable( tf.truncated_normal([feature_dim, hidden_node_count], stddev=hidden_stddev)) biases1 = tf.Variable(tf.zeros([hidden_node_count])) # middle weight weights = [] biases = [] hidden_cur_cnt = hidden_node_count for i in range(layer_cnt - 2): if hidden_cur_cnt > 2: hidden_next_cnt = int(hidden_cur_cnt / 2) else: hidden_next_cnt = 2 hidden_stddev = np.sqrt(2.0 / hidden_cur_cnt / 10) weights.append( tf.Variable( tf.truncated_normal([hidden_cur_cnt, hidden_next_cnt], stddev=hidden_stddev))) biases.append(tf.Variable(tf.zeros([hidden_next_cnt]))) hidden_cur_cnt = hidden_next_cnt # first wx + b y0 = tf.matmul(tf_train_dataset, weights1) + biases1 # first sigmoid hidden = tf.nn.sigmoid(y0) # hidden = y0 hidden_drop = hidden # first DropOut keep_prob = 0.5 if drop_out: hidden_drop = tf.nn.dropout(hidden, keep_prob) # first wx+b for valid valid_y0 = tf.matmul(tf_valid_dataset, weights1) + biases1 valid_hidden = tf.nn.sigmoid(valid_y0) # valid_hidden = valid_y0 # first wx+b for test test_y0 = tf.matmul(tf_test_dataset, weights1) + biases1 test_hidden = tf.nn.sigmoid(test_y0) # test_hidden = test_y0 # middle layer for i in range(layer_cnt - 2): y1 = tf.matmul(hidden_drop, weights[i]) + biases[i] hidden_drop = tf.nn.sigmoid(y1) if drop_out: keep_prob += 0.5 * i / (layer_cnt + 1) hidden_drop = tf.nn.dropout(hidden_drop, keep_prob) y0 = tf.matmul(hidden, weights[i]) + biases[i] hidden = tf.nn.sigmoid(y0) # hidden = y0 valid_y0 = tf.matmul(valid_hidden, weights[i]) + biases[i] valid_hidden = tf.nn.sigmoid(valid_y0) # valid_hidden = valid_y0 test_y0 = tf.matmul(test_hidden, weights[i]) + biases[i] test_hidden = tf.nn.sigmoid(test_y0) # test_hidden = test_y0 # last weight weights2 = tf.Variable( tf.truncated_normal([hidden_cur_cnt, num_labels], stddev=hidden_stddev / 2)) biases2 = tf.Variable(tf.zeros([num_labels])) # last wx + b logits = tf.matmul(hidden_drop, weights2) + biases2 # predicts logits_predict = tf.matmul(hidden, weights2) + biases2 valid_predict = tf.matmul(valid_hidden, weights2) + biases2 test_predict = tf.matmul(test_hidden, weights2) + biases2 l2_loss = 0 # enable regularization if regular: l2_loss = tf.nn.l2_loss(weights1) + tf.nn.l2_loss(weights2) for i in range(len(weights)): l2_loss += tf.nn.l2_loss(weights[i]) # l2_loss += tf.nn.l2_loss(biases[i]) beta = 1e-2 l2_loss *= beta loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) + l2_loss # Optimizer. if lrd: cur_step = tf.Variable( 0, trainable=False) # count the number of steps taken. starter_learning_rate = 0.4 learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 500, 0.75, staircase=True) optimizer = tf.train.GradientDescentOptimizer( learning_rate).minimize(loss, global_step=cur_step) else: optimizer = tf.train.AdamOptimizer(0.5).minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits_predict) valid_prediction = tf.nn.softmax(valid_predict) test_prediction = tf.nn.softmax(test_predict) num_steps = 8001 with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print("Initialized") for step in range(num_steps): offset_range = train_labels.shape[0] - batch_size offset = (step * batch_size) % offset_range batch_data = train_dataset[offset:(offset + batch_size), :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = { tf_train_dataset: batch_data, tf_train_labels: batch_labels } _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) if step % 50 == 0: print("Minibatch loss at step %d: %f" % (step, l)) print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)) print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(), valid_labels)) print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
def better_conv_train(drop=False, lrd=False): batch_size = 12 patch_size = 2 depth = 12 num_hidden = 64 num_channels = 1 graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder( tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) # Variables. layer1_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, num_channels, depth], stddev=0.1)) layer1_biases = tf.Variable(tf.zeros([depth])) layer2_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, depth, depth], stddev=0.1)) layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth])) layer3_weights = tf.Variable(tf.truncated_normal( [48, num_hidden], stddev=0.1)) layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden])) layer4_weights = tf.Variable(tf.truncated_normal( [num_hidden, num_labels], stddev=0.1)) layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels])) # Model. def model(data): conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME') conv = maxpool2d(conv) hidden = tf.nn.relu(conv + layer1_biases) if drop: hidden = tf.nn.dropout(hidden, 0.5) conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME') conv = maxpool2d(conv) hidden = tf.nn.relu(conv + layer2_biases) if drop: hidden = tf.nn.dropout(hidden, 0.7) shape = hidden.get_shape().as_list() reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]]) hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases) if drop: hidden = tf.nn.dropout(hidden, 0.8) return tf.matmul(hidden, layer4_weights) + layer4_biases # Training computation. logits = model(tf_train_dataset) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) # Optimizer. if lrd: cur_step = tf.Variable(0) # count the number of steps taken. starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 10000, 0.96, staircase=True) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step) else: optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax(model(tf_valid_dataset)) test_prediction = tf.nn.softmax(model(tf_test_dataset)) num_steps = 5001 losses = [] with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print('Initialized') for step in range(num_steps): offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels} _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) losses.append(l) if step % 50 == 0: print('Minibatch loss at step %d: %f' % (step, l)) print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels)) print('Validation accuracy: %.1f%%' % accuracy( valid_prediction.eval(), valid_labels)) print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels)) print(losses)
def conv_train(): batch_size = 16 patch_size = 5 depth = 16 num_hidden = 64 num_channels = 1 graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder( tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) # Variables. layer1_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, num_channels, depth], stddev=0.1)) layer1_biases = tf.Variable(tf.zeros([depth])) layer2_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, depth, depth], stddev=0.1)) layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth])) layer3_weights = tf.Variable(tf.truncated_normal( [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1)) layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden])) layer4_weights = tf.Variable(tf.truncated_normal( [num_hidden, num_labels], stddev=0.1)) layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels])) # Model. def model(data): conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME') hidden = tf.nn.relu(conv + layer1_biases) conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME') hidden = tf.nn.relu(conv + layer2_biases) shape = hidden.get_shape().as_list() reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]]) hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases) return tf.matmul(hidden, layer4_weights) + layer4_biases # Training computation. logits = model(tf_train_dataset) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) # Optimizer. optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax(model(tf_valid_dataset)) test_prediction = tf.nn.softmax(model(tf_test_dataset)) num_steps = 1001 with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print('Initialized') for step in range(num_steps): offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels} _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) if step % 50 == 0: print('Minibatch loss at step %d: %f' % (step, l)) print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels)) print('Validation accuracy: %.1f%%' % accuracy( valid_prediction.eval(), valid_labels)) print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
def tf_better_nn(offset_range=-1, regular=False, drop_out=False, lrd=False): batch_size = 128 graph = tf.Graph() with graph.as_default(): # Input data. For the training data, we use a placeholder that will be fed # at run time with a training minibatch. tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) hidden_node_count = 1024 # Variables. weights1 = tf.Variable( tf.truncated_normal([image_size * image_size, hidden_node_count])) biases1 = tf.Variable(tf.zeros([hidden_node_count])) weights2 = tf.Variable( tf.truncated_normal([hidden_node_count, num_labels])) biases2 = tf.Variable(tf.zeros([num_labels])) # Training computation. right most ys = tf.matmul(tf_train_dataset, weights1) + biases1 hidden = tf.nn.relu(ys) h_fc = hidden valid_y0 = tf.matmul(tf_valid_dataset, weights1) + biases1 valid_hidden1 = tf.nn.relu(valid_y0) test_y0 = tf.matmul(tf_test_dataset, weights1) + biases1 test_hidden1 = tf.nn.relu(test_y0) # enable DropOut keep_prob = tf.placeholder(tf.float32) if drop_out: hidden_drop = tf.nn.dropout(hidden, keep_prob) h_fc = hidden_drop # left most logits = tf.matmul(h_fc, weights2) + biases2 # only drop out when train logits_predict = tf.matmul(hidden, weights2) + biases2 valid_predict = tf.matmul(valid_hidden1, weights2) + biases2 test_predict = tf.matmul(test_hidden1, weights2) + biases2 # loss l2_loss = tf.nn.l2_loss(weights1) + tf.nn.l2_loss( biases1) + tf.nn.l2_loss(weights2) + tf.nn.l2_loss(biases2) # enable regularization if not regular: l2_loss = 0 beta = 0.002 loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=tf_train_labels)) + beta * l2_loss # Optimizer. optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss) if lrd: cur_step = tf.Variable(0) # count the number of steps taken. starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 10000, 0.96, staircase=True) optimizer = tf.train.GradientDescentOptimizer( learning_rate).minimize(loss, global_step=cur_step) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits_predict) valid_prediction = tf.nn.softmax(valid_predict) test_prediction = tf.nn.softmax(test_predict) num_steps = 30001 with tf.Session(graph=graph) as session: tf.global_variables_initializer().run() print("Initialized") for step in range(num_steps): # Pick an offset within the training data, which has been randomized. # Note: we could use better randomization across epochs. if offset_range == -1: offset_range = train_labels.shape[0] - batch_size offset = (step * batch_size) % offset_range # Generate a minibatch. batch_data = train_dataset[offset:(offset + batch_size), :] batch_labels = train_labels[offset:(offset + batch_size), :] # Prepare a dictionary telling the session where to feed the minibatch. # The key of the dictionary is the placeholder node of the graph to be fed, # and the value is the numpy array to feed to it. feed_dict = { tf_train_dataset: batch_data, tf_train_labels: batch_labels, keep_prob: 0.5 } _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) if step % 500 == 0: print("Minibatch loss at step %d: %f" % (step, l)) print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)) print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(), valid_labels)) print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
def tf_better_nn(offset_range=-1, regular=False, drop_out=False, lrd=False): batch_size = 128 graph = tf.Graph() with graph.as_default(): # Input data. For the training data, we use a placeholder that will be fed # at run time with a training minibatch. tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) hidden_node_count = 1024 # Variables. weights1 = tf.Variable( tf.truncated_normal([image_size * image_size, hidden_node_count])) biases1 = tf.Variable(tf.zeros([hidden_node_count])) weights2 = tf.Variable( tf.truncated_normal([hidden_node_count, num_labels])) biases2 = tf.Variable(tf.zeros([num_labels])) # Training computation. right most ys = tf.matmul(tf_train_dataset, weights1) + biases1 hidden = tf.nn.relu(ys) h_fc = hidden valid_y0 = tf.matmul(tf_valid_dataset, weights1) + biases1 valid_hidden1 = tf.nn.relu(valid_y0) test_y0 = tf.matmul(tf_test_dataset, weights1) + biases1 test_hidden1 = tf.nn.relu(test_y0) # enable DropOut keep_prob = tf.placeholder(tf.float32) if drop_out: hidden_drop = tf.nn.dropout(hidden, keep_prob) h_fc = hidden_drop # left most logits = tf.matmul(h_fc, weights2) + biases2 # only drop out when train logits_predict = tf.matmul(hidden, weights2) + biases2 valid_predict = tf.matmul(valid_hidden1, weights2) + biases2 test_predict = tf.matmul(test_hidden1, weights2) + biases2 # loss l2_loss = tf.nn.l2_loss(weights1) + tf.nn.l2_loss(biases1) + tf.nn.l2_loss(weights2) + tf.nn.l2_loss(biases2) # enable regularization if not regular: l2_loss = 0 beta = 0.002 loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels)) + beta * l2_loss # Optimizer. optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss) if lrd: cur_step = tf.Variable(0) # count the number of steps taken. starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 10000, 0.96, staircase=True) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits_predict) valid_prediction = tf.nn.softmax(valid_predict) test_prediction = tf.nn.softmax(test_predict) num_steps = 30001 with tf.Session(graph=graph) as session: tf.global_variables_initializer().run() print("Initialized") for step in range(num_steps): # Pick an offset within the training data, which has been randomized. # Note: we could use better randomization across epochs. if offset_range == -1: offset_range = train_labels.shape[0] - batch_size offset = (step * batch_size) % offset_range # Generate a minibatch. batch_data = train_dataset[offset:(offset + batch_size), :] batch_labels = train_labels[offset:(offset + batch_size), :] # Prepare a dictionary telling the session where to feed the minibatch. # The key of the dictionary is the placeholder node of the graph to be fed, # and the value is the numpy array to feed to it. feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels, keep_prob: 0.5} _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) if step % 500 == 0: print("Minibatch loss at step %d: %f" % (step, l)) print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)) print("Validation accuracy: %.1f%%" % accuracy( valid_prediction.eval(), valid_labels)) print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
def conv_train(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels, image_size, num_labels, basic_hps, stride_ps, drop=False, lrd=False): batch_size = basic_hps['batch_size'] patch_size = basic_hps['patch_size'] depth = basic_hps['depth'] first_hidden_num = basic_hps['num_hidden'] second_hidden_num = first_hidden_num / 2 + 1 num_channels = 1 layer_cnt = basic_hps['layer_sum'] graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder( tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) # Variables. # the third parameter must be same as the last layer depth input_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, num_channels, depth], stddev=0.1)) input_biases = tf.Variable(tf.zeros([depth])) mid_layer_cnt = layer_cnt - 1 layer_weights = list() layer_biases = [tf.Variable(tf.constant(1.0, shape=[depth * (i + 2)])) for i in range(mid_layer_cnt)] output_weights = list() output_biases = tf.Variable(tf.constant(1.0, shape=[first_hidden_num])) first_nn_weights = tf.Variable(tf.truncated_normal( [first_hidden_num, second_hidden_num], stddev=0.1)) second_nn_weights = tf.Variable(tf.truncated_normal( [second_hidden_num, num_labels], stddev=0.1)) first_nn_biases = tf.Variable(tf.constant(1.0, shape=[second_hidden_num])) second_nn_biases = tf.Variable(tf.constant(1.0, shape=[num_labels])) # Model. def model(data, model_drop=True, init=True): if not large_data_size(data) or not large_data_size(input_weights): stride_ps[0] = [1, 1, 1, 1] conv = tf.nn.conv2d(data, input_weights, stride_ps[0], use_cudnn_on_gpu=True, padding='SAME') conv = maxpool2d(conv) hidden = tf.nn.relu6(conv + input_biases) if drop and model_drop: hidden = tf.nn.dropout(hidden, 0.8) for i in range(mid_layer_cnt): print(hidden) if init: # avoid filter shape larger than input shape hid_shape = hidden.get_shape() # print(hid_shape) filter_w = patch_size / (i + 1) filter_h = patch_size / (i + 1) # print(filter_w) # print(filter_h) if filter_w > hid_shape[1]: filter_w = int(hid_shape[1]) if filter_h > hid_shape[2]: filter_h = int(hid_shape[2]) layer_weight = tf.Variable(tf.truncated_normal( shape=[filter_w, filter_h, depth * (i + 1), depth * (i + 2)], stddev=0.1)) layer_weights.append(layer_weight) if not large_data_size(hidden) or not large_data_size(layer_weights[i]): # print("is not large data") stride_ps[i + 1] = [1, 1, 1, 1] # print(stride_ps[i + 1]) # print(len(stride_ps)) # print(i + 1) conv = tf.nn.conv2d(hidden, layer_weights[i], stride_ps[i + 1], use_cudnn_on_gpu=True, padding='SAME') if not large_data_size(conv): print('not large') conv = maxpool2d(conv, 1, 1) else: conv = maxpool2d(conv) hidden = tf.nn.relu6(conv + layer_biases[i]) shapes = hidden.get_shape().as_list() shape_mul = 1 for s in shapes[1:]: shape_mul *= s if init: output_size = shape_mul output_weights.append(tf.Variable(tf.truncated_normal([output_size, first_hidden_num], stddev=0.1))) reshape = tf.reshape(hidden, [shapes[0], shape_mul]) hidden = tf.nn.relu6(tf.matmul(reshape, output_weights[0]) + output_biases) if drop and model_drop: hidden = tf.nn.dropout(hidden, 0.5) hidden = tf.matmul(hidden, first_nn_weights) + first_nn_biases if drop and model_drop: hidden = tf.nn.dropout(hidden, 0.5) hidden = tf.matmul(hidden, second_nn_weights) + second_nn_biases return hidden # Training computation. logits = model(tf_train_dataset) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels)) # Optimizer. if lrd: cur_step = tf.Variable(0) # count the number of steps taken. starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 600, 0.1, staircase=True) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step) else: optimizer = tf.train.AdagradOptimizer(0.06).minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax(model(tf_valid_dataset, model_drop=False, init=False)) test_prediction = tf.nn.softmax(model(tf_test_dataset, model_drop=False, init=False)) saver = tf.train.Saver() # on step 1750, run over 55000 train images num_steps = 1750 * 3 save_path = 'conv_mnist' save_flag = True with tf.Session(graph=graph) as session: if os.path.exists(save_path) and save_flag: # Restore variables from disk. saver.restore(session, save_path) else: tf.global_variables_initializer().run() print('Initialized') end_train = False mean_loss = 0 for step in range(num_steps): if end_train: break offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels} _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) mean_loss += l if step % 10 == 0: mean_loss /= 10.0 if step % 200 == 0: print('Minibatch loss at step %d: %f' % (step, mean_loss)) print('Validation accuracy: %.1f%%' % accuracy( valid_prediction.eval(), valid_labels)) mean_loss = 0 if save_flag: saver.save(session, save_path) print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
def conv_train(basic_hps, stride_ps, layer_cnt=3, drop=False, lrd=False): batch_size = basic_hps['batch_size'] patch_size = basic_hps['patch_size'] depth = basic_hps['depth'] num_hidden = basic_hps['num_hidden'] num_channels = basic_hps['num_channels'] loss_collect = list() graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder( tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) # Variables. input_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, num_channels, depth], stddev=0.1)) input_biases = tf.Variable(tf.zeros([depth])) mid_layer_cnt = layer_cnt - 1 layer_weights = [tf.Variable(tf.truncated_normal( [patch_size, patch_size, depth, depth], stddev=0.1)) for _ in range(mid_layer_cnt)] layer_biases = [tf.Variable(tf.constant(1.0, shape=[depth])) for _ in range(mid_layer_cnt)] output_size = size_by_conv(stride_ps, [batch_size, image_size, image_size, num_channels], layer_cnt) output_weights = tf.Variable(tf.truncated_normal([output_size, num_hidden], stddev=0.1)) output_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden])) final_weights = tf.Variable(tf.truncated_normal( [num_hidden, num_labels], stddev=0.1)) final_biases = tf.Variable(tf.constant(1.0, shape=[num_labels])) # Model. def model(data): conv = tf.nn.conv2d(data, input_weights, stride_ps[0], use_cudnn_on_gpu=True, padding='SAME') conv = maxpool2d(conv) hidden = tf.nn.relu(conv + input_biases) if drop: hidden = tf.nn.dropout(hidden, 0.5) for i in range(mid_layer_cnt): print(i) conv = tf.nn.conv2d(hidden, layer_weights[i], stride_ps[i + 1], use_cudnn_on_gpu=True, padding='SAME') conv = maxpool2d(conv) hidden = tf.nn.relu(conv + layer_biases[i]) if drop: hidden = tf.nn.dropout(hidden, 0.7) shape = hidden.get_shape().as_list() reshape = tf.reshape(hidden, [shape[0], output_size]) hidden = tf.nn.relu(tf.matmul(reshape, output_weights) + output_biases) if drop: hidden = tf.nn.dropout(hidden, 0.8) return tf.matmul(hidden, final_weights) + final_biases # Training computation. logits = model(tf_train_dataset) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) # Optimizer. if lrd: cur_step = tf.Variable(0) # count the number of steps taken. starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 10000, 0.96, staircase=True) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step) else: optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax(model(tf_valid_dataset)) test_prediction = tf.nn.softmax(model(tf_test_dataset)) num_steps = 3001 fit_frep = 100 with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print('Initialized') end_train = False for step in range(num_steps): if end_train: break offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels} _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) loss_collect.append(l) if step % 50 == 0: print('Minibatch loss at step %d: %f' % (step, l)) print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels)) print('Validation accuracy: %.1f%%' % accuracy( valid_prediction.eval(), valid_labels)) if step == fit_frep: res = fit_loss([batch_size, depth, num_hidden], loss_collect) ret = res['ret'] if ret == 1: print('ret is end train when step is {step}'.format(step=step)) elif step % fit_frep == 0 and step != 0: for i in range(fit_frep): res = fit_loss( [batch_size, depth, num_hidden], loss_collect[i + step - fit_frep * 2 + 1: i + step - fit_frep + 2]) ret = res['ret'] if i == 0: print(res) if ret == 1: print('ret is end train when step is {step}'.format(step=step)) end_train = True break print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels)) for loss in loss_collect: print(loss)
def conv_train(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels, image_size, num_labels, basic_hps, stride_ps, lrd=False): batch_size = basic_hps['batch_size'] patch_size = basic_hps['patch_size'] depth = basic_hps['depth'] num_hidden = basic_hps['num_hidden'] num_channels = 1 layer_cnt = basic_hps['layer_sum'] loss_collect = list() first_hidden_num = basic_hps['num_hidden'] second_hidden_num = first_hidden_num / 2 + 1 graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder( tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) input_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, num_channels, depth], stddev=0.1)) input_biases = tf.Variable(tf.zeros([depth])) mid_layer_cnt = layer_cnt - 1 layer_weights = list() layer_biases = [tf.Variable(tf.constant(1.0, shape=[depth / (i + 2)])) for i in range(mid_layer_cnt)] output_weights = list() output_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden])) first_nn_weights = tf.Variable(tf.truncated_normal( [first_hidden_num, second_hidden_num], stddev=0.1)) second_nn_weights = tf.Variable(tf.truncated_normal( [second_hidden_num, num_labels], stddev=0.1)) first_nn_biases = tf.Variable(tf.constant(1.0, shape=[second_hidden_num])) second_nn_biases = tf.Variable(tf.constant(1.0, shape=[num_labels])) # Model. def model(data, init=False): # Variables. if not large_data_size(data) or not large_data_size(input_weights): stride_ps[0] = [1, 1, 1, 1] conv = tf.nn.conv2d(data, input_weights, stride_ps[0], use_cudnn_on_gpu=True, padding='SAME') conv = maxpool2d(conv) hidden = tf.nn.relu(conv + input_biases) if init: hidden = tf.nn.dropout(hidden, 0.8) for i in range(mid_layer_cnt): # print(hidden) if init: # avoid filter shape larger than input shape hid_shape = hidden.get_shape() # print(hid_shape) filter_w = patch_size / (i + 1) filter_h = patch_size / (i + 1) # print(filter_w) # print(filter_h) if filter_w > hid_shape[1]: filter_w = int(hid_shape[1]) if filter_h > hid_shape[2]: filter_h = int(hid_shape[2]) layer_weight = tf.Variable(tf.truncated_normal(shape=[filter_w, filter_h, depth / (i + 1), depth / (i + 2)], stddev=0.1)) layer_weights.append(layer_weight) if not large_data_size(hidden) or not large_data_size(layer_weights[i]): # print("is not large data") stride_ps[i + 1] = [1, 1, 1, 1] # print(stride_ps[i + 1]) # print(len(stride_ps)) # print(i + 1) conv = tf.nn.conv2d(hidden, layer_weights[i], stride_ps[i + 1], use_cudnn_on_gpu=True, padding='SAME') if not large_data_size(conv): conv = maxpool2d(conv, 1, 1) else: conv = maxpool2d(conv) hidden = tf.nn.relu(conv + layer_biases[i]) if init: hidden = tf.nn.dropout(hidden, 0.8) shapes = hidden.get_shape().as_list() shape_mul = 1 for s in shapes[1:]: shape_mul *= s if init: output_size = shape_mul output_weights.append(tf.Variable(tf.truncated_normal([output_size, num_hidden], stddev=0.1))) reshape = tf.reshape(hidden, [shapes[0], shape_mul]) hidden = tf.nn.relu6(tf.matmul(reshape, output_weights[0]) + output_biases) if init: hidden = tf.nn.dropout(hidden, 0.5) hidden = tf.matmul(hidden, first_nn_weights) + first_nn_biases if init: hidden = tf.nn.dropout(hidden, 0.5) hidden = tf.matmul(hidden, second_nn_weights) + second_nn_biases return hidden # Training computation. logits = model(tf_train_dataset, init=True) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels)) # Optimizer. starter_learning_rate = 0.1 if lrd: cur_step = tf.Variable(0) # count the number of steps taken. learning_rate = tf.train.exponential_decay(starter_learning_rate, cur_step, 10000, 0.96, staircase=True) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step) else: optimizer = tf.train.AdagradOptimizer(starter_learning_rate).minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax(model(tf_valid_dataset)) test_prediction = tf.nn.softmax(model(tf_test_dataset)) num_steps = 3001 start_fit = 600 init_loss = [] with tf.Session(graph=graph) as session: tf.global_variables_initializer().run() print('Initialized') end_train = False mean_loss = 0 for step in range(num_steps): if end_train: break offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels} _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) mean_loss += l if step % 5 == 0: mean_loss /= 5.0 loss_collect.append(mean_loss) mean_loss = 0 if step >= start_fit: # print(loss_collect) if step == start_fit: res = fit_more(1, [batch_size, depth, num_hidden, layer_cnt, patch_size], loss_collect) else: res = fit_more(0, [batch_size, depth, num_hidden, layer_cnt, patch_size], loss_collect) loss_collect.remove(loss_collect[0]) ret = res['ret'] if ret == 1: print('ret is end train when step is {step}'.format(step=step)) init_loss.append(loss_collect) more_index = predict_future([batch_size, depth, num_hidden, layer_cnt, patch_size], init_loss[0]) print('more index: %d' % more_index) for i in range(more_index): offset = ((step + i + 1) * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels} _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) loss_collect.append(l) file_helper.write('/home/cwh/coding/python/NN/line.txt', str(loss_collect[20])) loss_collect.remove(loss_collect[0]) for loss in loss_collect[21:]: file_helper.write('/home/cwh/coding/python/NN/line.txt', str(loss)) end_train = True file_helper.write('/home/cwh/coding/python/NN/line.txt', '===') if step % 50 == 0: print('Minibatch loss at step %d: %f' % (step, l)) print('Validation accuracy: %.1f%%' % accuracy( valid_prediction.eval(), valid_labels)) print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))