def merge_bn_params(): param_list = [] graph = tf.Graph() with graph.as_default(): images = np.random.rand(1, 224, 224, 3) inference(images, False, image_norm=True, has_bn=True) nodes = tf.get_collection('nodes') cfg_nodes = tf.get_collection('cfg_nodes') model_checkpoint_path = 'log/model_dump/model.ckpt' var_list = tf.get_collection('params') saver = tf.train.Saver(var_list) with tf.Session(graph=graph) as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, model_checkpoint_path) nodes = sess.run(nodes) for i in tqdm(range(len(nodes))): name = cfg_nodes[i]['name'] node = nodes[i] if cfg_nodes[i]['type'] == 'Conv2D': cfg_nodes[i]['W'], cfg_nodes[i]['b'] = node['W'], node['b'] if cfg.first_conv_name == name: cfg_nodes[i]['W'], cfg_nodes[i]['b'] = fix_input( cfg_nodes[i]['W'], cfg_nodes[i]['b']) param_list.append(cfg_nodes[i]['W']) param_list.append(cfg_nodes[i]['b']) return param_list
def init(): network = MobileNet(alpha=1.0) params = network.get_weights() graph = tf.Graph() with graph.as_default(): images = np.random.rand(1, 224, 224, 3) inference(images, False) model_checkpoint_path = 'log/model_dump/model.ckpt' var_list = tf.get_collection('params') assert len(var_list) == len(params) saver = tf.train.Saver(var_list) with tf.Session(graph=graph) as sess: sess.run(tf.global_variables_initializer()) for i in range(len(var_list)): if 'depthwise' in var_list[i].name and len( params[i].shape) == 4: params[i] = np.transpose(params[i], (0, 1, 3, 2)) if len(params[i].shape) == 2: params[i] = np.expand_dims(params[i], 0) params[i] = np.expand_dims(params[i], 0) print(var_list[i].name, var_list[i].shape, params[i].shape) sess.run(tf.assign(var_list[i], params[i])) saver.save(sess, model_checkpoint_path, write_meta_graph=False, write_state=False)
def bn_ema(model_checkpoint_path='log/model_dump/model_fix_input.ckpt', qweight=True, qactivation=True): with open('log/scale', 'rb') as f: scale = pickle.load(f) graph = tf.Graph() with graph.as_default(): iterator = dataset.make_train_dataset() images, labels = iterator.get_next() inference(images, True, has_bn=True, image_norm=False, qweight=qweight, qactivation=qactivation, scale=scale) update_bn = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = tf.get_collection('params') saver = tf.train.Saver(var_list) with tf.Session(graph=graph) as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, model_checkpoint_path) for i in tqdm(range(100)): sess.run(update_bn) saver.save(sess, 'log/model_dump/model_fix_input_bn_ema.ckpt')
def train(is_ft=False): with tf.Graph().as_default(): with tf.variable_scope("model") as scope: root_path = "tfData/part" train_queue = list() for part_index in range(1, 10): train_queue.append(root_path + str(part_index) + '.tfrecords') images, label = decode_from_tfrecords(train_queue, batch_size, image_height, image_width) images = tf.py_func(cv_resize, [images, image_height, image_width], tf.float32) images = tf.reshape(images, [batch_size, image_height, image_width, 1]) logits = inference(images) + images logits = tf.clip_by_value(logits, 0, 255) loss = tf.losses.mean_squared_error(logits, label) reg_loss = tf.add_n(tf.losses.get_regularization_losses()) total_loss = loss opt = tf.train.AdamOptimizer(1e-4) global_step = tf.Variable(0, name='global_step', trainable=False) train_op = slim.learning.create_train_op(total_loss, opt, global_step=global_step) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) total_loss = control_flow_ops.with_dependencies([updates], total_loss) saver = tf.train.Saver(tf.all_variables()) init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( log_device_placement=False)) sess.run(init) tf.train.start_queue_runners(sess=sess) if is_ft: model_file = tf.train.latest_checkpoint('./model') saver.restore(sess, model_file) tf.logging.set_verbosity(tf.logging.INFO) loss_cnt = 0.0 for step in range(max_iters): _, loss_value, l = sess.run([train_op, loss, logits]) assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: print l[0] loss_cnt += loss_value if step % 100 == 0: format_str = ('%s: step %d, loss = %.2f') print(format_str % (datetime.now(), step, loss_cnt / 10.0)) loss_cnt = 0.0 if step % 500 == 0 or (step + 1) == max_iters: checkpoint_path = os.path.join('../model', 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def test(model_dir, image_dir): image = cv2.imread(image_dir) image = cv2.resize(image, (160, 144)).astype('float32') image = np.asarray([image]) # Build a Graph that computes the logits predictions from the # inference model. logits = net.inference(image) # Create a saver. saver = tf.train.Saver(tf.global_variables()) with tf.Session() as sess: # Find previous model and restore it ckpt = tf.train.get_checkpoint_state(model_dir) if ckpt and ckpt.model_checkpoint_path: print("Restoring model...") try: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored") except ValueError: print("Can not restore model") output = sess.run([logits])[0][0] dir_out = image_dir[:image_dir.index('.png')]+'_output.png' print ('Store output at '+dir_out) cv2.imwrite(dir_out, output)
def evaluate(model_checkpoint_path='log/model_dump/model.ckpt', has_bn=True, qweight=False, qactivation=False, image_norm=False): scale = None if qweight or qactivation: with open('log/scale', 'rb') as f: scale = pickle.load(f) graph = tf.Graph() with graph.as_default(): iterator = dataset.make_val_dataset() images, labels = iterator.get_next() val_logits = net.inference(images, False, has_bn=has_bn, image_norm=image_norm, qweight=qweight, qactivation=qactivation, scale=scale) val_acc = 100 * tf.reduce_sum(tf.cast(tf.nn.in_top_k(val_logits, labels, 1), dtype=tf.float32)) var_list = tf.get_collection('params') saver = tf.train.Saver(var_list) with tf.Session(graph=graph) as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, model_checkpoint_path) eval_acc = 0 num_epoch = int(math.ceil(cfg.val_num / cfg.eval_batch_size)) # num_epoch = 10 for _ in tqdm(range(num_epoch)): _val_acc = sess.run(val_acc) eval_acc += _val_acc return eval_acc / cfg.val_num
def find_quantize_scale(model_checkpoint_path): graph = tf.Graph() with graph.as_default(): images = prepare_calibrate_imgs() _ = inference(images, False, has_bn=True, image_norm=False) nodes = tf.get_collection('nodes') cfg_nodes = tf.get_collection('cfg_nodes') cfg_nodes = find_connect(nodes, cfg_nodes) saver = tf.train.Saver(tf.get_collection('params')) scale_dict = OrderedDict() with tf.Session(graph=graph) as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, model_checkpoint_path) if os.path.exists('log/scale'): with open('log/scale', 'rb') as f: scale_dict = pickle.load(f) nodes = sess.run(nodes) for i in tqdm(range(len(nodes))): name = cfg_nodes[i]['name'] node = nodes[i] scale_dict[name] = {} if cfg_nodes[i]['type'] == 'Conv2D': weight = node['W'] cfg_nodes[i]['W'] = weight num_bit = cfg.w_bit if cfg_nodes[i]['name'] == cfg.first_conv_name: num_bit = 8 scale = find_weight_scale(weight, num_bit=num_bit) scale_dict[name]['W'] = scale scale_dict[name]['w_bit'] = num_bit cfg_nodes[i]['scale_W'] = scale if cfg_nodes[i]['bn']: cfg_nodes[i]['mean'] = node['mean'] cfg_nodes[i]['var'] = node['var'] cfg_nodes[i]['gamma'] = node['gamma'] cfg_nodes[i]['beta'] = node['beta'] else: biases = node['b'] cfg_nodes[i]['b'] = biases outputs = node['output'] scale = find_feature_map_scale(outputs, num_bit=cfg.a_bit) scale_dict[name]['output'] = scale scale_dict[name]['a_bit'] = cfg.a_bit cfg_nodes[i]['scale_output'] = scale with open('log/scale', 'wb') as f: pickle.dump(scale_dict, f) with open('log/cfg_nodes.pkl', 'wb') as f: pickle.dump(cfg_nodes, f)
def init_merge_bn(): param_list = merge_bn_params() graph = tf.Graph() with graph.as_default(): images = np.random.rand(1, 224, 224, 3) inference(images, False, image_norm=False, has_bn=False) model_checkpoint_path = 'log/model_dump/model_merge_bn.ckpt' var_list = tf.get_collection('params') saver = tf.train.Saver(var_list) with tf.Session(graph=graph) as sess: sess.run(tf.global_variables_initializer()) for i in tqdm(range(len(var_list))): # print(var_list[i].shape, param_list[i].shape) sess.run(tf.assign(var_list[i], param_list[i])) saver.save(sess, model_checkpoint_path, write_meta_graph=False, write_state=False)
def fix_input_params(): graph = tf.Graph() with graph.as_default(): images = np.random.rand(1, 224, 224, 3) inference(images, False, image_norm=False, has_bn=True) nodes = tf.get_collection('nodes') cfg_nodes = tf.get_collection('cfg_nodes') node = None cfg_node = None for i in range(len(nodes)): name = cfg_nodes[i]['name'] if cfg_nodes[i]['type'] == 'Conv2D' and cfg.first_conv_name == name: node = nodes[i] cfg_node = cfg_nodes[i] break model_checkpoint_path = 'log/model_dump/model.ckpt' var_list = tf.get_collection('params') saver = tf.train.Saver(var_list) with tf.Session(graph=graph) as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, model_checkpoint_path) _node = sess.run(node) if cfg_node['bn']: W = _node['W'] mean = _node['mean'] W, mean = fix_input(W, mean, fix_on_bn=True) sess.run(tf.assign(node['W'], W)) sess.run(tf.assign(node['mean'], mean)) else: W = _node['W'] b = _node['b'] W, b = fix_input(W, b, fix_on_bn=False) sess.run(tf.assign(node['W'], W)) sess.run(tf.assign(node['b'], b)) saver.save(sess, 'log/model_dump/model_fix_input.ckpt')
def main(argv=None): print 'Loading......' start_time = time.time() begin_time = start_time data, label = loadDataLabelSequence(DATADIR, BATCH_SIZE) batch_len = label.shape[0] epoch_size = label.shape[1] train_size = batch_len * epoch_size * FRAME_COUNT print 'Loaded %d datas.' % train_size elapsed_time = time.time() - start_time print('Loading datas with label elapsed %.1f s' % elapsed_time) print 'Building net......' start_time = time.time() x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, FRAME_COUNT, 2], name='data') keep_prob = tf.placeholder(tf.float32, name='prob') train_prediction, initial_state, final_state = inference(x, keep_prob, BATCH_SIZE) prediction = tf.nn.softmax(train_prediction) elapsed_time = time.time() - start_time print('Building net elapsed %.1f s' % elapsed_time) print 'Begin testing..., train dataset size:{0}'.format(train_size) start_time = time.time() saver = tf.train.Saver() elapsed_time = time.time() - start_time print('loading net elapsed %.1f s' % elapsed_time) start_time = time.time() ls = [] with tf.Session() as sess: saver.restore(sess, NETPATH) state = sess.run(initial_state) tf.train.write_graph(sess.graph_def, '.', 'data/train.pb', False) for step in range(epoch_size): batch_data = np.reshape(data[:, step, :, :], [BATCH_SIZE, FRAME_COUNT, 2]) feed_dict = {x: batch_data, keep_prob: 1.0} for i, (c, h) in enumerate(initial_state): feed_dict[c] = state[i].c feed_dict[h] = state[i].h tp, p, state = sess.run([train_prediction, prediction, final_state], feed_dict=feed_dict) label_prediction = np.argmax(p, axis=1) ls.append(label_prediction) if step % EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() print('Step %d, %.1f ms.' % (step, 1000 * elapsed_time / EVAL_FREQUENCY)) print 'True label: ', label[:, step, :] print 'Prediction: ', label_prediction sys.stdout.flush() ls = np.asarray(ls, np.int) error_count = train_size - np.sum(ls == np.reshape(label, [epoch_size, FRAME_COUNT])) error_rate = 100.0 * error_count / train_size print('Total size: %d, Test error count: %d, error rate: %f%%' % (train_size, error_count, error_rate)) elapsed_time = time.time() - begin_time print('Total time: %.1f s' % elapsed_time)
def main(argv=None): with tf.Graph().as_default(): print('Start.') start_time = time.time() begin_time = start_time print('Loading images.') data, label = loadDataLabel(DATADIR, shuffle=True) validation_size = len(label) // 20 validation_data = data[:validation_size, ...] validation_labels = label[:validation_size, ...] data = data[validation_size:, ...] label = label[validation_size:, ...] train_size = len(label) validation_size = len(validation_labels) print('Loaded %d images.' % (train_size + validation_size)) print('Train size: %d' % train_size) print('Valid size: %d' % validation_size) elapsed_time = time.time() - start_time print('Loading images with label elapsed %.1f s' % elapsed_time) print('Building net......') start_time = time.time() x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, 9], name='data') y = tf.placeholder(tf.float32, shape=[BATCH_SIZE, 3]) keep_prob = tf.placeholder(tf.float32, name='prob') x_valid = tf.placeholder(tf.float32, shape=[validation_size, 9]) y_valid = tf.placeholder(tf.float32, shape=[validation_size, 3]) # Train model. train_prediction = inference(x, keep_prob) train_prediction_valid = inference(x_valid, keep_prob, reuse=True) batch = tf.Variable(0, dtype=tf.float32) learning_rate = tf.train.exponential_decay( 0.1, # Base learning rate. batch * BATCH_SIZE, # Current index into the dataset. train_size * 100, # Decay step. 0.95, # Decay rate. staircase=True) tf.summary.scalar('learn', learning_rate) loss = total_loss(train_prediction, y) loss_valid = total_loss(train_prediction_valid, y_valid) loss_ce = cross_entropy_loss(train_prediction, y) loss_ce_valid = cross_entropy_loss(train_prediction_valid, y_valid) loss_l2 = l2_loss() tf.summary.scalar('loss', loss) tf.summary.scalar('loss_valid', loss_valid) trainer = train(loss, learning_rate, batch) elapsed_time = time.time() - start_time print('Building net elapsed %.1f s' % elapsed_time) start_time = time.time() best_validation_loss = 100000.0 saver = tf.train.Saver() with tf.Session() as sess: merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter('graph/train', sess.graph) # Inital the whole net. tf.global_variables_initializer().run() print('Initialized!') for step in range(int(NUM_EPOCHS * train_size) // BATCH_SIZE): offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE) batch_data = data[offset:offset + BATCH_SIZE, ...] batch_labels = label[offset:offset + BATCH_SIZE, ...] # Train net. feed_dict = { x: batch_data, y: batch_labels, keep_prob: KEEP_PROB } sess.run(trainer, feed_dict=feed_dict) # Valid net. if (step % VALID_GAP == 0): feed_dict = { x: batch_data, y: batch_labels, x_valid: validation_data, y_valid: validation_labels, keep_prob: 1.0 } summary, l, lr, l_valid, l_ce, l_ce_valid, l_l2 = sess.run( [ merged, loss, learning_rate, loss_valid, loss_ce, loss_ce_valid, loss_l2 ], feed_dict=feed_dict) train_writer.add_summary(summary, step) if (step * BATCH_SIZE > NUM_EPOCHS * train_size * 0.9) & ( l_valid < best_validation_loss): best_validation_loss = l_valid saver.save(sess, NETPATH) print('Saving net at step %d' % step) print('Learning rate: %f' % lr) print('Train Data total loss:%f' % l) print('Valid Data total loss:%f\n' % l_valid) sys.stdout.flush() if step % EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() print('Step %d (epoch %.2f), %.3f ms pre step' % (step, step * BATCH_SIZE / train_size, 1000 * elapsed_time / EVAL_FREQUENCY)) print('Learning rate: %f' % lr) print('L2 loss:%f' % l_l2) print('Train Data cross entropy loss:%f' % l_ce) print('Train Data total loss:%f' % l) print('Valid Data cross entropy loss:%f' % l_ce_valid) print('Valid Data total loss:%f\n' % l_valid) sys.stdout.flush() train_writer.close() elapsed_time = time.time() - begin_time print('Total time: %.1f s' % elapsed_time)
def main(argv=None): with tf.Graph().as_default(): print 'Start.' start_time = time.time() begin_time = start_time print 'Loading data.' data, label = loadDataLabel(DATADIR, shuffle=True, various=True) train_size = len(label) print 'Loaded %d datas.' % train_size elapsed_time = time.time() - start_time print('Loading images with label elapsed %.1f s' % elapsed_time) print 'Building net......' start_time = time.time() def get_input_x(x, offset=0, length=BATCH_SIZE): a = x[offset:(offset + length), ...] return np.reshape(a, [length, FRAME_COUNT, 2]) def get_input_y(y, offset=0, length=BATCH_SIZE): b = y[offset:(offset + length), ...] return np.reshape(b, [length, FRAME_COUNT]) x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, FRAME_COUNT, 2], name='data') y = tf.placeholder(tf.int32, shape=[BATCH_SIZE, FRAME_COUNT]) keep_prob = tf.placeholder(tf.float32, name='prob') # Train model. train_prediction, initial_state, final_state = inference( x, keep_prob, BATCH_SIZE) batch = tf.Variable(0, dtype=tf.float32, trainable=False) learning_rate = tf.train.exponential_decay( 0.01, # Base learning rate. batch * BATCH_SIZE, # Current index into the dataset. train_size * 80, # Decay step. 0.95, # Decay rate. staircase=True) tf.summary.scalar('learn', learning_rate) loss = total_loss(train_prediction, y, BATCH_SIZE) tf.summary.scalar('loss', loss) trainer = myTrain(loss, learning_rate, batch) elapsed_time = time.time() - start_time print('Building net elapsed %.1f s' % elapsed_time) print 'Begin training..., train dataset size:{0}'.format(train_size) start_time = time.time() best_validation_loss = 100000.0 saver = tf.train.Saver() with tf.Session() as sess: merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter('graph/train', sess.graph) # Inital the whole net. tf.global_variables_initializer().run() state = sess.run(initial_state) print('Initialized!') for step in xrange(int(NUM_EPOCHS * train_size) // BATCH_SIZE): offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE) batch_data = get_input_x(offset=offset, x=data) batch_labels = get_input_y(offset=offset, y=label) # Train RNN net. feed_dict = { x: batch_data, y: batch_labels, keep_prob: KEEP_PROB } for i, (c, h) in enumerate(initial_state): feed_dict[c] = state[i].c feed_dict[h] = state[i].h summary, _, l, lr, predictions = sess.run( [merged, trainer, loss, learning_rate, train_prediction], feed_dict=feed_dict) train_writer.add_summary(summary, step) if l < best_validation_loss: print 'Saving net.' print('Net loss:%.3f, learning rate: %.6f' % (l, lr)) best_validation_loss = l saver.save(sess, NETPATH) if step % EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() print('Step %d (epoch %.2f), %.1f ms' % (step, np.float32(step) * BATCH_SIZE / train_size, 1000 * elapsed_time / EVAL_FREQUENCY)) print('Net loss:%.3f, learning rate: %.6f' % (l, lr)) sys.stdout.flush() train_writer.close() elapsed_time = time.time() - begin_time print('Total time: %.1f s' % elapsed_time)
def main(args): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_idx if args.val_data_dir: train_list, train_label, _, _ = preprocessing_data.get_img_path_and_lab( args.train_data_dir, split=False, shuffle=True) val_list, val_label, _, _ = preprocessing_data.get_img_path_and_lab( args.val_data_dir, split=False, shuffle=True) else: train_list, train_label, val_list, val_label = preprocessing_data.get_img_path_and_lab( args.train_data_dir) subdir = time.strftime('%Y%m%d-%H%M%S', time.localtime()) log_dir = os.path.join(os.path.expanduser(args.logs_dir), subdir) if os.path.exists(log_dir): os.rmdir(log_dir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_dir), subdir) if os.path.exists(model_dir): os.rmdir(model_dir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) """ image_batch, label_batch = preprocessing_data.get_batch_data(train_list, train_label, args.image_size, args.image_size, args.batch_size, 256) logits, end_points = net.inference(image_batch, num_classes=2, dropout_rate=args.dropout_rate, is_training=True, weight_decay=args.weight_decay, scope="My_Net") # Loss cross_entropy_mean = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label_batch, ), name="cross_entropy_mean") regularization_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = cross_entropy_mean + tf.add_n(regularization_loss) # Prediction prob = tf.nn.softmax(logits=logits, name='prob') # Accuracy correct_prediction = tf.cast(tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)), tf.float32) accuracy_op = tf.reduce_mean(correct_prediction) # Optimizer optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) global_step = tf.Variable(0, name='global_step', trainable=False) train_op = optimizer.minimize(loss=total_loss, global_step=global_step) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=30) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: for step in np.arange(1000): print(step) if coord.should_stop(): break _, train_acc, train_loss = sess.run([train_op, accuracy_op, total_loss]) print("loss:{} accuracy:{}".format(train_loss, train_acc)) except tf.errors.OutOfRangeError: print("Done!!!") finally: coord.request_stop() coord.join(threads) sess.close() """ # 构建图 with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) assert len(train_list) > 0, 'The training set should not be empty.' # Create a queue that produces indices into the image_list and label_list labels = tf.convert_to_tensor(train_label, dtype=tf.int32) range_size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True) index_dequeue_op = index_queue.dequeue_many( args.batch_size * args.epoch_size, 'index_dequeue') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=200000, dtypes=[tf.string, tf.int32], shapes=[(1, ), (1, )]) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder], name='enqueue_op') nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) image = tf.image.resize_image_with_crop_or_pad( image, args.image_size, args.image_size) #image = tf.image.resize_images(image, size=[args.image_size, args.image_size], # method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) image = tf.image.per_image_standardization( image) # 标准化数据,即减均值,除以方差 images.append(image) images_and_labels.append([images, label]) image_batch, label_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') print("Building training graph.") logits, end_points = net.inference(image_batch, num_classes=2, dropout_rate=args.dropout_rate, is_training=True, weight_decay=args.weight_decay, scope="My_Net") # Loss cross_entropy_mean = net.loss(logits, label_batch) regularization_loss = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = cross_entropy_mean + tf.add_n(regularization_loss) # Prediction prob = tf.nn.softmax(logits=logits, name='prob') # Accuracy accuracy_op = net.accuracy(logits, label_batch, 2) # Optimizer optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) train_op = optimizer.minimize(loss=total_loss, global_step=global_step) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=10) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): print("Running training.") for epoch in range(args.max_nrof_epochs + 1): if coord.should_stop(): break train(args, sess, epoch, train_list, train_label, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, accuracy_op) # 每个epoch结束,保存模型 save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, global_step) validate(args, sess, epoch, val_list, val_label, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, total_loss, accuracy_op)
def main(argv=None): print 'Loading......' begin_time = time.time() print 'Building net......' x = tf.placeholder(tf.float32, shape=[1, IMAGE_HEIGH, IMAGE_WIDTH, 1]) keep_prob = tf.placeholder(tf.float32) train_prediction = inference(x, keep_prob) prediction = tf.nn.softmax(train_prediction) def eval_in_batches(data, sess): feed_dict = { x: np.reshape(data, [1, IMAGE_HEIGH, IMAGE_WIDTH, 1]), keep_prob: 1.0 } tp, p = sess.run([train_prediction, prediction], feed_dict=feed_dict) return tp, p saver = tf.train.Saver() hand_array = c_ubyte * FRAME_SIZE hand_data = hand_array(0) rows_array = c_int * 1 cols_array = c_int * 1 rows = rows_array(0) cols = cols_array(0) clib = cdll.LoadLibrary("./libhandpose.so") clib.init() state = 's' machine = {} machine['s'] = ['s', 'a1', 's', 'c1', 'b1', 's'] machine['a1'] = ['a4', 'a1', 's', 's', 's', 'a2'] machine['a2'] = ['a4', 's', 's', 's', 's', 'a3'] machine['a3'] = ['a4', 's', 's', 's', 's', 's'] machine['a4'] = ['a4', 'a7', 's', 's', 's', 'a5'] machine['a5'] = ['s', 'a7', 's', 's', 's', 'a6'] machine['a6'] = ['s', 'a7', 's', 's', 's', 's'] machine['a7'] = ['s', 'a1', 's', 's', 's', 's'] machine['b1'] = ['b4', 's', 's', 's', 'b1', 'b3'] machine['b2'] = ['b4', 's', 's', 's', 's', 'b3'] machine['b3'] = ['b4', 's', 's', 's', 's', 's'] machine['b4'] = ['b4', 's', 's', 's', 'b7', 'b5'] machine['b5'] = ['s', 's', 's', 's', 'b7', 'b6'] machine['b6'] = ['s', 's', 's', 's', 'b7', 's'] machine['b7'] = ['s', 's', 's', 's', 'b1', 's'] machine['c1'] = ['s', 's', 'c4', 'c1', 's', 'c2'] machine['c2'] = ['s', 's', 'c4', 's', 's', 'c3'] machine['c3'] = ['s', 's', 'c4', 's', 's', 's'] machine['c4'] = ['c3', 'c3', 'c4', 'c3', 'c3', 'c3'] label_array = np.zeros((1, 3), np.int) + 5 i = 0 with tf.Session() as sess: saver.restore(sess, NETPATH) while (cv2.waitKey(10) & 0xFF) != 27: time_temp = time.time() if clib.get_hand_color(hand_data, cols, rows) < 0: label_present = 5 hand = None else: hand = np.reshape(hand_data, FRAME_SIZE) hand = np.asarray(hand[:cols[0] * rows[0] * 3]) hand = np.reshape(hand, (rows[0], cols[0], 3)) hand = np.asarray(hand) data = getDataFromPic(hand) batch_data = np.reshape(data, [1, IMAGE_HEIGH, IMAGE_WIDTH, 1]) tp, p = eval_in_batches(batch_data, sess) label_prediction = np.argmax(p) if p[0][label_prediction] >= MIN_PREDICTION: label_present = label_prediction cv2.putText(hand, "detect: {}".format(label_prediction), (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2) else: label_present = 5 cv2.putText(hand, "detect: error", (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2) sys.stdout.flush() label_array[0][i] = label_present if i == 2: label_counter = Counter( label_array.flatten().tolist()).most_common(1) if label_counter[0][1] > 1: window_label = label_counter[0][0] else: window_label = 5 # print 'window_label: ' , window_label state = machine[state][window_label] if cmp(state, 'a1') == 0: print 'Choose.' elif cmp(state, 'a7') == 0: print 'Click!!!!!!!!!!!!!!' elif cmp(state, 'b7') == 0: print 'Return.' elif cmp(state, 'c4') == 0: print 'Drag.' if hand is not None: if cmp(state, 'a1') == 0: cv2.putText(hand, "Move", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2) elif cmp(state, 'a7') == 0: cv2.putText(hand, "Click", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2) elif cmp(state, 'b7') == 0: cv2.putText(hand, "Return", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2) elif cmp(state, 'c4') == 0: cv2.putText(hand, "Drag", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2) else: cv2.putText(hand, "None", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2) frame_time = time.time() - time_temp fps = 1 / frame_time cv2.putText(hand, "fps: {}".format(fps), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2) cv2.imshow("hand", hand) i = (i + 1) % 3 clib.release() elapsed_time = time.time() - begin_time print('Total time: %.1f s' % elapsed_time)
from net import inference from datagen import SequenceData import numpy as np import math from keras.callbacks import ModelCheckpoint model_savepath = "weights.best.hdf5" epoch = 20 batch_size = 4 x_index = np.load('x_index.npy') #load data index y_index = np.load('y_index.npy') #load label index sequence_data = SequenceData(x_index, y_index, batch_size) steps = math.ceil(len(x_index) / batch_size) myModel = inference() model_checkpoint = ModelCheckpoint(model_savepath, monitor='loss', verbose=1, save_best_only=True) myModel.fit_generator(sequence_data, steps_per_epoch=steps, epochs=epoch, verbose=1, callbacks=[model_checkpoint],use_multiprocessing=False, workers=1)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.device('/cpu:0'), tf.name_scope('input'): global_step = tf.train.get_or_create_global_step() print("Loading CIFAR-10 Data") cifar10 = data.Cifar10() images_placeholder = tf.placeholder( cifar10.train_images.dtype, (None, params.IMAGE_SIZE, params.IMAGE_SIZE, params.CHANNELS), name='images_placeholder') labels_placeholder = tf.placeholder(cifar10.train_labels.dtype, (None, ), name='labels_placeholder') train_data_dict = { images_placeholder: cifar10.train_images, labels_placeholder: cifar10.train_labels } test_data_dict = { images_placeholder: cifar10.test_images, labels_placeholder: cifar10.test_labels } training_dataset = tf.data.Dataset.from_tensor_slices( (images_placeholder, labels_placeholder)) training_dataset = training_dataset.prefetch(params.SHUFFLE_BUFFER) training_dataset = training_dataset.map( data.randomization_function, num_parallel_calls=params.NUM_THREADS) training_dataset = training_dataset.shuffle( buffer_size=params.SHUFFLE_BUFFER) training_dataset = training_dataset.batch(params.BATCH_SIZE) training_dataset = training_dataset.repeat() training_dataset = training_dataset.prefetch( params.TRAIN_OUTPUT_BUFFER) validation_dataset = tf.data.Dataset.from_tensor_slices( (images_placeholder, labels_placeholder)) validation_dataset = validation_dataset.map( data.standardization_function, num_parallel_calls=params.NUM_THREADS) validation_dataset = validation_dataset.batch(params.BATCH_SIZE) validation_dataset = validation_dataset.prefetch( params.VALIDATION_OUTPUT_BUFFER) iterator = tf.contrib.data.Iterator.from_structure( training_dataset.output_types, training_dataset.output_shapes) next_element = iterator.get_next() training_init_op = iterator.make_initializer(training_dataset) validation_init_op = iterator.make_initializer(validation_dataset) training_placeholder = tf.placeholder_with_default( False, (), name='training_placeholder') print("Building TensorFlow Graph") # Build a Graph that computes the logits predictions from the # inference model. logits = net.inference(next_element[0], training=training_placeholder) # Calculate loss. total_loss = losses.total_loss(logits, next_element[1]) with (tf.name_scope('accuracy')): correct = tf.nn.in_top_k(logits, next_element[1], 1) number_correct = tf.reduce_sum(tf.cast(correct, tf.int32)) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = create_train_op(total_loss, global_step) init = tf.global_variables_initializer() print("Starting TensorFlow Session") saver = tf.train.Saver() tf_file_writer = tf.summary.FileWriter(params.TRAIN_DIR, tf.get_default_graph()) merged_summary = tf.summary.merge_all() csv_file_dir = os.path.join(params.TRAIN_DIR, 'log.csv') with tf.Session() as sess, open(csv_file_dir, 'w', newline='') as log_file: log_writer = csv.writer(log_file) log_writer.writerow(["Step", "Train Error", "Test Error", "Step Time"]) print("Initializing Global Variables") init.run() print("Training in Progress") while global_step.eval() < params.TRAIN_STEPS: # Run a number of training steps set by params.LOG_FREQUENCY training_init_op.run(feed_dict=(train_data_dict)) start_time = time.perf_counter() for _ in range(0, params.LOG_FREQUENCY): batch_loss, summary_str = sess.run( [train_op, total_loss, merged_summary], feed_dict={training_placeholder: True})[1:] end_time = time.perf_counter() average_time_per_step = (end_time - start_time) / params.LOG_FREQUENCY # Write a summary of the last training batch for TensorBoard tf_file_writer.add_summary(summary_str, global_step.eval()) # Calculate error rate based on the full train set. validation_init_op.run(feed_dict=train_data_dict) total_correct = 0 n_train_validation_steps = math.ceil(params.NUM_TRAIN_EXAMPLES / params.BATCH_SIZE) for _ in range(0, n_train_validation_steps): total_correct += number_correct.eval() train_error_rate = 1.0 - total_correct / params.NUM_TRAIN_EXAMPLES # Calculate error rate based on the full test set. validation_init_op.run(feed_dict=test_data_dict) total_correct = 0 n_test_validation_steps = math.ceil(params.NUM_TEST_EXAMPLES / params.BATCH_SIZE) for _ in range(0, n_test_validation_steps): total_correct += number_correct.eval() test_error_rate = 1.0 - total_correct / params.NUM_TEST_EXAMPLES print("Step:", global_step.eval()) print(" Train Set Error Rate:", train_error_rate) print(" Test Set Error Rate:", test_error_rate) print(" Average Training Time per Step:", average_time_per_step) log_writer.writerow([ global_step.eval(), train_error_rate, test_error_rate, average_time_per_step ]) saver.save(sess, os.path.join(params.TRAIN_DIR, "model.ckpt")) tf_file_writer.close()
def train(model_checkpoint_path = 'log/model_dump/model_fix_input_bn_ema.ckpt', has_bn=True, qweight=False, qactivation=False, image_norm=False): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False, name='global_step') learning_rate = tf.placeholder(dtype=tf.float32) with tf.device('/cpu:0'): train_iter = dataset.make_train_dataset() image_batch, label_batch = train_iter.get_next() print('images:', image_batch.shape, image_batch.dtype) # Build inference Graph. scale = None if qweight or qactivation: with open('log/scale', 'rb') as f: scale = pickle.load(f) logits = net.inference(image_batch, phase_train=True, has_bn=has_bn, image_norm=image_norm, qactivation=qactivation, qweight=qweight, scale=scale) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. total_loss, softmax_loss, acc = layer.loss(logits, label_batch) tf.summary.scalar('total_loss', total_loss) tf.summary.scalar('softmax_loss', softmax_loss) tf.summary.scalar('acc', acc) train_op = layer.train(total_loss, learning_rate, global_step) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(cfg.log_path, tf.get_default_graph()) pre_saver = tf.train.Saver(tf.get_collection('params')) # Create a saver. saver = tf.train.Saver(max_to_keep=5000) init = tf.global_variables_initializer() sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=tf.GPUOptions(allow_growth=True))) sess.run(init) if model_checkpoint_path is not None: pre_saver.restore(sess, model_checkpoint_path) print('init model from {}'.format(model_checkpoint_path)) best_val_acc = evaluate(model_checkpoint_path, has_bn=True, qweight=True, qactivation=True, image_norm=False) if cfg.timeline_log: options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() else: options = None run_metadata = None start_time = time.time() train_num_batch = cfg.train_num // cfg.batch_size train_log = open('log/work_log.txt', 'w') for epoch in range(cfg.max_epoch): for step in range(0, train_num_batch): lr = get_learning_rate(epoch, step, train_num_batch) feed_dict = {learning_rate: lr} if step % cfg.log_step == 0: _, _total_loss, _softmax_loss, _acc, _summary = \ sess.run([train_op, total_loss, softmax_loss, acc, summary_op], feed_dict=feed_dict, options=options, run_metadata=run_metadata ) duration = float(time.time() - start_time) / cfg.log_step examples_per_sec = cfg.batch_size / duration log_line = "%s: Epoch=%d/%d, Step=%d/%d, lr=%.7f, total_loss=%.3f, softmax_loss=%.3f, " \ "acc=%.2f%%(%.1f examples/sec; %.3f sec/batch)" \ % (datetime.now().strftime('%m-%d %H:%M:%S'), epoch, cfg.max_epoch, step, train_num_batch, lr, _total_loss, _softmax_loss, _acc, examples_per_sec, duration) train_log.write(log_line + '\n') print(log_line) summary_writer.add_summary(_summary, global_step=step) start_time = time.time() if cfg.timeline_log: tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open('timeline.json', 'w') as wd: wd.write(ctf) else: _ = sess.run(train_op, feed_dict=feed_dict) saver.save(sess, '{}/model.ckpt'.format(cfg.model_path), global_step=epoch) _val_acc = evaluate('{}/model.ckpt-{}'.format(cfg.model_path, epoch), has_bn=True, qweight=True, qactivation=True, image_norm=False) if _val_acc > best_val_acc: pre_saver.save(sess, '{}/best_model.ckpt'.format(cfg.model_path), write_meta_graph=False, write_state=False, global_step=None) best_val_acc = _val_acc val_log = 'epoch=%d, val_acc=%.3f%%, best_val_acc=%.3f%%' \ % (epoch, _val_acc, best_val_acc) train_log.write(val_log + '\n') print(val_log)
def main(argv=None): print 'Loading......' start_time = time.time() begin_time = start_time data, label = loadDataLabel(DATADIR, shuffle=True, various=True) train_size = len(label) print 'Loaded %d images.' % train_size elapsed_time = time.time() - start_time print('Loading images with label elapsed %.1f s' % elapsed_time) print 'Building net......' start_time = time.time() x = tf.placeholder(tf.float32, shape=[1, FRAME_COUNT * 2], name='data') keep_prob = tf.placeholder(tf.float32, name='prob') train_prediction = inference(x, keep_prob) prediction = tf.nn.softmax(train_prediction) def eval_in_batches(data, sess): feed_dict = {x: np.reshape(data, [1, FRAME_COUNT * 2]), keep_prob: 1.0} tp, p = sess.run([train_prediction, prediction], feed_dict=feed_dict) return tp, p elapsed_time = time.time() - start_time print('Building net elapsed %.1f s' % elapsed_time) print 'Begin testing..., train dataset size:{0}'.format(train_size) start_time = time.time() saver = tf.train.Saver() elapsed_time = time.time() - start_time print('loading net elapsed %.1f s' % elapsed_time) start_time = time.time() ls = [] with tf.Session() as sess: saver.restore(sess, NETPATH) # saver.save(sess, 'pb_saver/net.ckpt') tf.train.write_graph(sess.graph_def, '.', 'data/train.pb', False) for i in range(train_size): batch_data = np.reshape(data[i, ...], [1, FRAME_COUNT * 2]) tp, p = eval_in_batches(batch_data, sess) label_prediction = np.argmax(p) ls.append(label_prediction) if i % EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() print('Step %d, %.1f ms.' % (i, 1000 * elapsed_time / EVAL_FREQUENCY)) print('True label: %d' % label[i]) print('Prediction: %d' % label_prediction) sys.stdout.flush() ls = np.asarray(ls, np.int) error_count = train_size - np.sum(ls == label) error_rate = 100.0 * error_count / train_size print('Total size: %d, Test error count: %d, error rate: %f%%' % (train_size, error_count, error_rate)) elapsed_time = time.time() - begin_time print('Total time: %.1f s' % elapsed_time)
def find_quantize_scale(model_checkpoint_path): graph = tf.Graph() with graph.as_default(): images = prepare_calibrate_imgs() _ = inference(images, False, has_bn=False, image_norm=False) nodes = tf.get_collection('nodes') cfg_nodes = tf.get_collection('cfg_nodes') cfg_nodes = find_connect(nodes, cfg_nodes) saver = tf.train.Saver(tf.get_collection('params')) scale_dict = OrderedDict() with tf.Session(graph=graph) as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, model_checkpoint_path) if os.path.exists('log/scale'): with open('log/scale', 'rb') as f: scale_dict = pickle.load(f) nodes = sess.run(nodes) for i in tqdm(range(len(nodes))): name = cfg_nodes[i]['name'] node = nodes[i] scale_dict[name] = {} if cfg_nodes[i]['type'] == 'Conv2D': weight = node['W'] cfg_nodes[i]['W'] = weight print(name, 'weights', weight.max(), weight.min()) scale = find_weight_scale(weight) print(name, 'weights', scale.shape) scale_dict[name]['W'] = scale cfg_nodes[i]['scale_W'] = scale biases = node['b'] cfg_nodes[i]['b'] = biases # inputs = node['input'] # if isinstance(inputs, list): # scale_dict[name]['input'] = [] # cfg_nodes[i]['scale_input'] = [] # for _inputs in inputs: # print(name, 'inputs', _inputs.max(), _inputs.min()) # scale = find_feature_map_scale(_inputs) # scale_dict[name]['input'].append(scale) # cfg_nodes[i]['scale_input'].append(scale) # else: # print(name, 'inputs', inputs.max(), inputs.min()) # if name == cfg.first_conv_name: # scale = 1.0 # else: # scale = find_feature_map_scale(inputs) # scale_dict[name]['input'] = scale # cfg_nodes[i]['scale_input'] = scale outputs = node['output'] print(name, 'outputs', outputs.max(), outputs.min()) scale = find_feature_map_scale(outputs) scale_dict[name]['output'] = scale cfg_nodes[i]['scale_output'] = scale with open('log/scale', 'wb') as f: pickle.dump(scale_dict, f) with open('log/cfg_nodes.pkl', 'wb') as f: pickle.dump(cfg_nodes, f)
X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train = (X_train-X_train.min())/(X_train.max()-X_train.min()) y_train = np_utils.to_categorical(y_train ,num_classes=10) noise = 0 Mnist = input_data.read_data_sets('MNIST_data/', one_hot = True, validation_size = 0) y = tf.placeholder(tf.float32,[None, 10]) x_image = tf.placeholder(tf.float32,[None, 28,28,1]) # applying a random rotation and a random shift to the training images datagen = ImageDataGenerator(rotation_range=15, width_shift_range=0.4) datagen.fit(X_train) outputs, parameters = inference(x_image,noise,image_size,batch_size,num_pattrens,scale) prediction = tf.nn.softmax(outputs) loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( logits=outputs, labels=y)) optimizer = tf.train.AdamOptimizer(learning_rate) training_op = optimizer.minimize(loss_op) correct_prediction = tf.equal(tf.argmax(outputs,1), tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) saver = tf.train.Saver() init = tf.global_variables_initializer() model_path = './modelParameters/dcnet_mnist.ckpt' loss_summary = np.zeros(n_epochs) accu_summary = np.zeros(n_epochs)
import os from train_run_image_tfrecords import train_iterator, train_next_element, batch_size import scipy.misc MODEL_SAVE_PATH = './logs' train_image_path = "./train_images" NUM_EXAMPLES = len(next(os.walk(train_image_path))[2]) print("num_examples=", NUM_EXAMPLES) os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" features_batch = train_next_element steps = int(NUM_EXAMPLES / batch_size) logits = net.inference(input_tensor=features_batch, train=False, regularizer=None) saver = tf.train.Saver() num = 0 batch = 0 with tf.Session() as sess: sess.run(train_iterator.initializer) print("Reading checkpoints...") ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH) print("ckpt", ckpt) if ckpt and ckpt.model_checkpoint_path:
def train(model_dir, image_paths): global_step = tf.train.get_or_create_global_step() # Create gray images and original images placeholders gray_images = tf.placeholder(tf.float32, shape=(cfg.BATCH_SIZE, 144, 160, 3)) original_images = tf.placeholder(tf.float32, shape=(cfg.BATCH_SIZE, 144, 160, 3)) # Build a Graph that computes the logits predictions from the inference model. logits = net.inference(gray_images) # Calculate loss loss = net.loss(original_images, logits) # Optimization train_op = tf.train.AdamOptimizer(learning_rate=1e-5).minimize( loss, global_step=global_step) # Create a saver. saver = tf.train.Saver(tf.global_variables()) # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph with tf.Session() as sess: sess.run(init) # Create summary writer # summary_writer = tf.summary.FileWriter(checkpoint_dir + "model", sess.graph) model_dir = os.path.join(model_dir, 'model') # Find previous model and restore it ckpt = tf.train.get_checkpoint_state(model_dir) if ckpt and ckpt.model_checkpoint_path: print("Restoring model...") try: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored") except ValueError: print("Can not restore model") step = tf.train.global_step(sess, global_step) while step < cfg.MAX_STEPS: start_time = time.time() original, gray = image.read_images(image_paths, cfg.BATCH_SIZE) _, loss_value = sess.run([train_op, loss], feed_dict={ gray_images: gray, original_images: original }) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' step += 1 if step % 1 == 0: examples_per_sec = cfg.BATCH_SIZE / duration sec_per_batch = duration format_str = '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch' print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) # if step % 10 == 0: # summary_str = sess.run(summary_op) # summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 100 == 0 or step == cfg.MAX_STEPS: print("Saving model") saver.save(sess, os.path.join(model_dir, 'model.ckpt'), global_step=global_step)
def main(args): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_idx if args.val_data_dir: train_list, train_label, _, _ = preprocessing_data.get_img_path_and_lab(args.train_data_dir, split=False, shuffle=True) val_list, val_label, _, _ = preprocessing_data.get_img_path_and_lab(args.val_data_dir, split=False, shuffle=True) else: train_list, train_label, val_list, val_label = preprocessing_data.get_img_path_and_lab(args.train_data_dir) subdir = time.strftime('%Y%m%d-%H%M%S', time.localtime()) log_dir = os.path.join(os.path.expanduser(args.logs_dir), subdir) if os.path.exists(log_dir): os.rmdir(log_dir) if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_dir), subdir) if os.path.exists(model_dir): os.rmdir(model_dir) if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) train_dataset = DataLoader(train_list, train_label, [160, 160], num_classes=2) val_dataset = DataLoader(val_list, val_label, [160, 160], num_classes=2) with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) input_placeholder = tf.placeholder(tf.float32, [None, 160, 160, 3], name="input") label_placeholder = tf.placeholder(tf.int64, [None, ], name="label") #keep_prob_placeholder = tf.placeholder(tf.float32, name="dropout_prob") #phase_train_placeholder = tf.placeholder(tf.bool, name="phase_train") logits, end_points = net.inference(input_placeholder, num_classes=2, dropout_rate=args.dropout_rate, is_training=True, weight_decay=args.weight_decay, scope="My_Net") # Loss cross_entropy_mean = net.loss(logits, label_placeholder) regularization_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = cross_entropy_mean + tf.add_n(regularization_loss) # Prediction prob = tf.nn.softmax(logits=logits, name='prob') # Accuracy accuracy_op = net.accuracy(logits, label_placeholder, 2) # Optimizer optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) train_op = optimizer.minimize(loss=total_loss, global_step=global_step) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=10) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() train_data, train_init = train_dataset.data_batch(augment=True, shuffle=True, batch_size=32, repeat_times=1000, num_threads=4, buffer=5000) val_data, val_init = val_dataset.data_batch(augment=True, shuffle=False, batch_size=32, repeat_times=1, num_threads=4, buffer=5000) # Start running operations on the Graph. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(train_init) sess.run(val_init) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) max_accuracy = 0 with sess.as_default(): print("Running training.") num_step = args.epoch_size * args.max_nrof_epochs for step in range(1, num_step + 1): train_element = sess.run(train_data) _, = sess.run([train_op],feed_dict={input_placeholder: train_element[0],label_placeholder: train_element[1]}) if (step % args.val_step) == 0 or step == 1: # Calculate Validation loss and accuracy val_element = sess.run(val_data) loss, acc = sess.run([total_loss, accuracy_op], feed_dict={input_placeholder: val_element[0], label_placeholder: val_element[1]}) if acc > max_accuracy: save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, global_step) print("Step " + str(step) + ", Validation Loss= " + "{:.4f}".format( loss) + ", Validation Accuracy= " + "{:.3f}".format(acc))
def main(argv=None): if WRITE_RESULT: f = open('result.txt', 'w') print('Loading......') start_time = time.time() begin_time = start_time data, label = loadDataLabel(DATADIR) train_size = len(label) print('Loaded %d images.' % train_size) elapsed_time = time.time() - start_time print('Loading images with label elapsed %.1f s' % elapsed_time) print('Building net......') start_time = time.time() x = tf.placeholder(tf.float32, shape=[1, 9], name='data') keep_prob = tf.placeholder(tf.float32, name='prob') train_prediction = inference(x, keep_prob) elapsed_time = time.time() - start_time print('Building net elapsed %.1f s' % elapsed_time) print('Begin testing..., train dataset size:{0}'.format(train_size)) start_time = time.time() saver = tf.train.Saver() elapsed_time = time.time() - start_time print('loading net elapsed %.1f s' % elapsed_time) start_time = time.time() distances = [] with tf.Session() as sess: saver.restore(sess, NETPATH) # saver.save(sess, 'pb_saver/net.ckpt') tf.train.write_graph(sess.graph_def, '.', 'data/train.pb', False) for i in range(train_size): feed_dict = {x: np.reshape(data[i, ...], [1, 9]), keep_prob: 1.0} tp = sess.run(train_prediction, feed_dict=feed_dict) if WRITE_RESULT: f.write( str(tp[0][0]) + ' ' + str(tp[0][1]) + ' ' + str(tp[0][2]) + '\n') distance = np.square(np.reshape(tp, (-1)) - label[i, ...]) distance_ave = np.sqrt(distance[0] + distance[1] + distance[2]) distances.append(distance_ave) if i % EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() print('Step %d, %.1f ms.' % (i, 1000 * elapsed_time / EVAL_FREQUENCY)) print('Prediction: ', tp) print('True: ', label[i, ...]) print('Distances: ', distance_ave) sys.stdout.flush() distances = np.asarray(distances, dtype=np.float32) error = np.mean(distances) print('Total Average Distance: ', error) elapsed_time = time.time() - begin_time print('Total time: %.1f s' % elapsed_time) if WRITE_RESULT: f.close()
import net import tensorflow as tf import input_data import numpy as np input=input_data.Input() with tf.Session() as sess: for i in xrange(2000): images,labels=input.get_data(20) logits = net.inference(images) loss=net.loss(logits,labels) train_op=net.training(loss,0.0005) tf.initialize_all_variables().run() sess.run(train_op) if i%10==0: print "iteration: %d"%(i) out,loss=sess.run([logits,loss]) print out print loss print np.argmax(out,1) print labels print i
def main(argv=None): print 'Loading......' start_time = time.time() begin_time = start_time data, label = loadDataLabelRealtime(DATADIR, shuffle=True, various=True) train_size = len(label) print 'Loaded %d datas.' % train_size elapsed_time = time.time() - start_time print('Loading datas with label elapsed %.1f s' % elapsed_time) print 'Building net......' start_time = time.time() x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, FRAME_COUNT, 2], name='data') keep_prob = tf.placeholder(tf.float32, name='prob') train_prediction, initial_state, final_state = inference( x, keep_prob, BATCH_SIZE) prediction = tf.nn.softmax(train_prediction) def eval_in_batches(_data, sess, state): feed_dict = { x: np.reshape(_data, [BATCH_SIZE, FRAME_COUNT, 2]), keep_prob: 1.0 } for i, (c, h) in enumerate(initial_state): feed_dict[c] = state[i].c feed_dict[h] = state[i].h tp, p, _final_state = sess.run( [train_prediction, prediction, final_state], feed_dict=feed_dict) return tp, p, _final_state elapsed_time = time.time() - start_time print('Building net elapsed %.1f s' % elapsed_time) print 'Begin testing..., train dataset size:{0}'.format(train_size) start_time = time.time() saver = tf.train.Saver() elapsed_time = time.time() - start_time print('loading net elapsed %.1f s' % elapsed_time) start_time = time.time() ls = [] with tf.Session() as sess: saver.restore(sess, NETPATH) tf.train.write_graph(sess.graph_def, '.', 'data/train.pb', False) for i in range(train_size): state = sess.run(initial_state) batch_data = np.reshape(data[i], [BATCH_SIZE, -1, 2]) frame_length = batch_data.shape[1] ls_sub = [] for j in range(frame_length): data_mini = batch_data[0, j] tp, p, state = eval_in_batches(data_mini, sess, state) label_prediction = np.argmax(p) ls_sub.append(label_prediction) ls.append(ls_sub) if i % EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() print('Step %d, %.1f ms.' % (i, 1000 * elapsed_time / EVAL_FREQUENCY)) print 'True label: ' print label[i] print 'Prediction: ' print ls_sub sys.stdout.flush() sum = 0 error = 0 for i in range(len(ls)): ls_sub = np.asarray(ls[i], np.int) label_sub = np.asarray(label[i], np.int) sum_count = len(ls_sub) error_count = sum_count - np.sum(ls_sub == label_sub) sum += sum_count error += error_count error_rate = 100.0 * error / sum print('Total size: %d, Test error count: %d, error rate: %f%%' % (sum, error, error_rate)) elapsed_time = time.time() - begin_time print('Total time: %.1f s' % elapsed_time)
REGULARAZTION_RATE = 0.001 regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE) global_step = tf.Variable(0, name="global_step", trainable=False) with tf.name_scope("Train_Input_Data"): train_label, train_feature = train_next_element with tf.name_scope("Test_Input_Data"): test_label, test_feature = test_next_element # with tf.variable_scope("Logits") as scope: train_logits = net.inference(input_tensor=train_feature, train=True, regularizer=regularizer) # # scope.reuse_variables() # # test_logits = net.inference( # input_tensor=test_feature, # train=False, # regularizer=None) "最关键的地方是这里,加载模型的时候,这里不可以写成train和test一起的形式,会出错!!!" with tf.variable_scope("Train_Loss"): MSE = tf.reduce_mean(tf.square(train_logits - train_label)) train_loss = MSE + tf.add_n(tf.get_collection('losses')) tf.summary.scalar("Train Loss", train_loss) # with tf.variable_scope("Test_Loss"):
depth=load.CLASS_NUM, on_value=1, off_value=0, name='Y') print(Y.get_shape()) S = tf.placeholder(dtype=tf.float64, shape=[None, net.STATE_LEN], name='S') P = tf.placeholder(dtype=tf.float64, name='P') global_step = tf.Variable(0, trainable=False) STARTER_LEARNING_RATE = 0.001 DECAY_STEPS = 100 DECAY_RATE = 0.99 MOVING_AVERAGE_DECAY = 0.99 prediction = net.inference(X, S, P) print('input_name:' + X.name) print('keepprob_name:' + P.name) print('output_name:' + prediction.name) learning_rate = tf.train.exponential_decay(STARTER_LEARNING_RATE, global_step, DECAY_STEPS, DECAY_RATE, staircase=False) total_loss = tf.losses.softmax_cross_entropy(Y, prediction) optimizer = tf.train.GradientDescentOptimizer(learning_rate) opt_op = optimizer.minimize(total_loss) correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
def main(argv=None): with tf.Graph().as_default(): print 'Start.' start_time = time.time() begin_time = start_time print 'Loading data.' data, label = loadDataLabelSequence(DATADIR, BATCH_SIZE) batch_len = label.shape[0] epoch_size = label.shape[1] train_size = batch_len * epoch_size * FRAME_COUNT print 'Loaded %d * %d * %d datas.' % (batch_len, epoch_size, FRAME_COUNT) elapsed_time = time.time() - start_time print('Loading images with label elapsed %.1f s' % elapsed_time) print 'Building net......' start_time = time.time() x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, FRAME_COUNT, 2], name='data') y = tf.placeholder(tf.int32, shape=[BATCH_SIZE, FRAME_COUNT]) keep_prob = tf.placeholder(tf.float32, name='prob') # Train model. train_prediction, initial_state, final_state = inference( x, keep_prob, BATCH_SIZE) batch = tf.Variable(0, dtype=tf.float32, trainable=False) learning_rate = tf.train.exponential_decay( 0.1, # Base learning rate. batch * BATCH_SIZE * FRAME_COUNT, # Current index into the dataset. train_size * 100, # Decay step. 0.95, # Decay rate. staircase=True) tf.summary.scalar('learn', learning_rate) loss = total_loss(train_prediction, y, BATCH_SIZE) tf.summary.scalar('loss', loss) trainer = myTrain(loss, learning_rate, batch) elapsed_time = time.time() - start_time print('Building net elapsed %.1f s' % elapsed_time) print 'Begin training..., train dataset size:{0}'.format(train_size) start_time = time.time() best_validation_loss = 100000.0 saver = tf.train.Saver() with tf.Session() as sess: merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter('graph/train', sess.graph) # Inital the whole net. tf.global_variables_initializer().run() state = sess.run(initial_state) print('Initialized!') for step in range(NUM_EPOCHS * epoch_size): offset = step % epoch_size if offset == 0: state = sess.run(initial_state) batch_data = np.reshape(data[:, offset, :, :], [BATCH_SIZE, FRAME_COUNT, 2]) batch_labels = np.reshape(label[:, offset, :], [BATCH_SIZE, FRAME_COUNT]) # Train RNN net. feed_dict = { x: batch_data, y: batch_labels, keep_prob: KEEP_PROB } for i, (c, h) in enumerate(initial_state): feed_dict[c] = state[i].c feed_dict[h] = state[i].h summary, _, l, lr, predictions, state = sess.run( [ merged, trainer, loss, learning_rate, train_prediction, final_state ], feed_dict=feed_dict) train_writer.add_summary(summary, step) if (step // epoch_size > NUM_EPOCHS * 0.9) & (l < best_validation_loss): print 'Previous Saving net.' print('Net loss:%.3f, learning rate: %.6f' % (l, lr)) best_validation_loss = l saver.save(sess, NETPATH) if step % EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() print('Step %d (epoch %.2f), %.1f ms' % (step, np.float32(step) / epoch_size, 1000 * elapsed_time / EVAL_FREQUENCY)) print('Net loss:%.3f, learning rate: %.6f' % (l, lr)) sys.stdout.flush() print 'Saving final net.' saver.save(sess, NETPATH_FINAL) train_writer.close() elapsed_time = time.time() - begin_time print('Total time: %.1f s' % elapsed_time)
from net import inference import os import numpy as np from scipy import misc testimg_path = "D:/LeeX/deep-learning-microscopy/20190410perspective_image_new_9x11/npydata/" predimg_savepath = "D:/LeeX/deep-learning-microscopy/20190410perspective_image_new_9x11/predict_img/" test_path = 'D:/LeeX/deep-learning-microscopy/test/input' model_path = "weights.best.hdf5" count_test = int(len(os.listdir(test_path)) / 2) model = inference() print('loading weights...') model.load_weights(model_path) print('overloaded data succeeded!') print('predicting...') for i in range(9): for j in range(11): test_img = np.load( (testimg_path + 'input_data_(%d,%d).npy') % (i + 1, j + 1)) test_img = np.reshape(test_img, [1, 64, 64, 1]) predimg = model.predict(test_img) predimg = np.squeeze(predimg) predimg = (predimg - np.min(predimg)) / (np.max(predimg) - np.min(predimg)) #predimg = predimg.astype('float32')/255 misc.imsave((predimg_savepath + '(%d,%d).png') % (i + 1, j + 1), predimg) print(('predicted img(%d,%d) has saved!') % (i + 1, j + 1))
def main(argv=None): print 'Loading......' start_time = time.time() begin_time = start_time data, label = loadDataLabel(DATADIR, shuffle=True, various=True) train_size = len(label) print 'Loaded %d datas.' % train_size elapsed_time = time.time() - start_time print('Loading datas with label elapsed %.1f s' % elapsed_time) print 'Building net......' start_time = time.time() x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, FRAME_COUNT, 2], name='data') keep_prob = tf.placeholder(tf.float32, name='prob') train_prediction, initial_state, final_state = inference( x, keep_prob, BATCH_SIZE) prediction = tf.nn.softmax(train_prediction) def eval_in_batches(_data, sess, state, _initial_state=initial_state): feed_dict = { x: np.reshape(_data, [BATCH_SIZE, FRAME_COUNT, 2]), keep_prob: 1.0 } for i, (c, h) in enumerate(_initial_state): feed_dict[c] = state[i].c feed_dict[h] = state[i].h tp, p = sess.run([train_prediction, prediction], feed_dict=feed_dict) return tp, p elapsed_time = time.time() - start_time print('Building net elapsed %.1f s' % elapsed_time) print 'Begin testing..., train dataset size:{0}'.format(train_size) start_time = time.time() saver = tf.train.Saver() elapsed_time = time.time() - start_time print('loading net elapsed %.1f s' % elapsed_time) start_time = time.time() ls = [] with tf.Session() as sess: saver.restore(sess, NETPATH) state = sess.run(initial_state) tf.train.write_graph(sess.graph_def, '.', 'data/train.pb', False) for i in range(train_size): batch_data = np.reshape(data[i, ...], [BATCH_SIZE, FRAME_COUNT, 2]) tp, p = eval_in_batches(batch_data, sess, state) label_prediction = np.argmax(p, axis=1) ls.append(label_prediction) if i % EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() print('Step %d, %.1f ms.' % (i, 1000 * elapsed_time / EVAL_FREQUENCY)) print 'True label: ', label[i] print 'Prediction: ', label_prediction sys.stdout.flush() ls = np.asarray(ls, np.int) error_count = train_size * 1 - np.sum( ls.T[FRAME_COUNT - 1:FRAME_COUNT].T == label.T[FRAME_COUNT - 1:FRAME_COUNT].T) error_rate = 100.0 * error_count / (train_size * 4) print('Total size: %d, Test error count: %d, error rate: %f%%' % (train_size * FRAME_COUNT, error_count, error_rate)) elapsed_time = time.time() - begin_time print('Total time: %.1f s' % elapsed_time)