def tower_loss(images, score_maps, geo_maps, training_masks, reuse_variables=None): # Build inference graph with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): # 模型定义!!!,f_score是和原图大小一样的是否是前景的概率图, f_geometry是5张图,4张是上下左右值,1张是旋转角度值 f_score, f_geometry = model.model(images, is_training=True) # def loss(y_true_cls, y_pred_cls, y_true_geo, y_pred_geo,training_mask): model_loss = model.loss(score_maps, f_score, geo_maps, f_geometry, training_masks) total_loss = tf.add_n( [model_loss] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) tf.summary.image('input', images) tf.summary.image('score_map', score_maps) tf.summary.image('score_map_pred', f_score * 255) tf.summary.image('geo_map_0', geo_maps[:, :, :, 0:1]) tf.summary.image('geo_map_#0_pred', f_geometry[:, :, :, 0:1]) tf.summary.image('geo_map_#1_pred', f_geometry[:, :, :, 0:1]) tf.summary.image('training_masks', training_masks) tf.summary.scalar('model_loss', model_loss) tf.summary.scalar('total_loss', total_loss) return total_loss, model_loss, f_score, f_geometry
def tower_loss(images, gt_score_maps, gt_threshold_map, gt_score_mask, gt_thresh_mask, reuse_variables): with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): binarize_map, threshold_map, thresh_binary = model.model(images, is_training=True) model_loss = compute_loss(binarize_map, threshold_map, thresh_binary, gt_score_maps, gt_threshold_map, gt_score_mask, gt_thresh_mask) total_loss = tf.add_n([model_loss] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # add summary if reuse_variables is None: tf.summary.image('gt/input_imgs', images) tf.summary.image('gt/score_map', gt_score_maps) tf.summary.image('gt/threshold_map', gt_threshold_map * 255) tf.summary.image('gt/score_mask', gt_score_mask) tf.summary.image('gt/thresh_mask', gt_thresh_mask) tf.summary.image('pred/binarize_map', binarize_map) tf.summary.image('pred/threshold_map', threshold_map * 255) tf.summary.image('pred/thresh_binary', thresh_binary) tf.summary.scalar('model_loss', model_loss) tf.summary.scalar('total_loss', total_loss) return total_loss, model_loss, binarize_map, threshold_map, thresh_binary
def predict(im): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list with tf.get_default_graph().as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) seg_maps_pred = model.model(input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) logger.info('Restore from {}'.format(model_path)) saver.restore(sess, model_path) start_time = time.time() im_resized, (ratio_h, ratio_w) = resize_image(im) h, w, _ = im_resized.shape timer = {'net': 0, 'pse': 0} start = time.time() seg_maps = sess.run(seg_maps_pred, feed_dict={input_images: [im_resized]}) timer['net'] = time.time() - start boxes, kernels, timer = detect(seg_maps=seg_maps, timer=timer, image_w=w, image_h=h) if boxes is not None: boxes = boxes.reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_w boxes[:, :, 1] /= ratio_h h, w, _ = im.shape boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w) boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h) duration = time.time() - start_time logger.info('[timing] {}'.format(duration)) # return boxes return im, boxes
def tower_loss(images, annotation,class_labels,reuse_variables=None): with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): logits = model.model(images, is_training=True) pred = tf.argmax(logits, dimension=3) model_loss = model.loss(annotation, logits,class_labels) total_loss = tf.add_n([model_loss] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # add summary if reuse_variables is None: tf.summary.scalar('model_loss', model_loss) tf.summary.scalar('total_loss', total_loss) return total_loss, model_loss,pred
def main(argv=None): import os if os.path.exists(FLAGS.result_path): shutil.rmtree(FLAGS.result_path) os.makedirs(FLAGS.result_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list pascal_voc_lut = pascal_segmentation_lut() with tf.get_default_graph().as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) logits = model.model(input_images, is_training=False) pred = tf.argmax(logits, dimension=3) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: im = cv2.imread(im_fn)[:, :, ::-1] im_resized, (ratio_h, ratio_w) = resize_image(im, size=32) start = time.time() pred_re = sess.run([pred], feed_dict={input_images: [im_resized]}) pred_re = np.array(np.squeeze(pred_re)) img = visualize_segmentation_adaptive(pred_re, pascal_voc_lut) _diff_time = time.time() - start cv2.imwrite( os.path.join(FLAGS.result_path, os.path.basename(im_fn)), img) print('{}: cost {:.0f}ms').format(im_fn, _diff_time * 1000)
def tower_loss(images, score_maps, geo_maps, training_masks, reuse_variables=None): # Build inference graph with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): f_score, f_geometry = model.model(images, is_training=True) model_loss = model.loss(score_maps, f_score, geo_maps, f_geometry, training_masks) total_loss = tf.add_n( [model_loss] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # cls_score = f_score cls_score = tf.nn.softmax(f_score) geo_score_1 = tf.nn.softmax(f_geometry[:, :, :, 0:2]) # geo_score_2 = tf.nn.softmax(f_geometry[:,:,:,2:4]) # geo_score_3 = tf.nn.softmax(f_geometry[:,:,:,4:6]) # geo_score_4 = tf.nn.softmax(f_geometry[:,:,:,6:8]) # geo_score_5 = tf.nn.softmax(f_geometry[:,:,:,8:10]) # geo_score_6 = tf.nn.softmax(f_geometry[:,:,:,10:12]) # geo_score_7 = tf.nn.softmax(f_geometry[:,:,:,12:14]) # geo_score_8 = tf.nn.softmax(f_geometry[:,:,:,14:16]) # add summary if reuse_variables is None: tf.summary.image('input', images, max_outputs=1) tf.summary.image('score_map', score_maps, max_outputs=1) # tf.summary.image('score_map_pred', cls_score * 255, max_outputs=1) tf.summary.image('score_map_pred', cls_score[:, :, :, 1:2] * 255, max_outputs=1) tf.summary.image('geo_map_0', geo_maps[:, :, :, 0:1], max_outputs=1) tf.summary.image('geo_map_1_pred', geo_score_1[:, :, :, 1:2] * 255, max_outputs=1) # tf.summary.image('geo_map_2_pred', geo_score_2[:, :, :, 1:2] * 255, max_outputs=1) # tf.summary.image('geo_map_3_pred', geo_score_3[:, :, :, 1:2] * 255, max_outputs=1) # tf.summary.image('geo_map_4_pred', geo_score_4[:, :, :, 1:2] * 255, max_outputs=1) # tf.summary.image('geo_map_5_pred', geo_score_5[:, :, :, 1:2] * 255, max_outputs=1) # tf.summary.image('geo_map_6_pred', geo_score_6[:, :, :, 1:2] * 255, max_outputs=1) # tf.summary.image('geo_map_7_pred', geo_score_7[:, :, :, 1:2] * 255, max_outputs=1) # tf.summary.image('geo_map_8_pred', geo_score_8[:, :, :, 1:2] * 255, max_outputs=1) tf.summary.scalar('model_loss', model_loss) tf.summary.scalar('total_loss', total_loss) return total_loss, model_loss
def tower_loss(images, seg_maps_gt, training_masks, reuse_variables=None): # Build inference graph with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): seg_maps_pred = model.model(images, is_training=True) model_loss = model.loss(seg_maps_gt, seg_maps_pred, training_masks) total_loss = tf.add_n([model_loss] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # add summary if reuse_variables is None: tf.summary.image('input', images) tf.summary.image('seg_map_0_gt', seg_maps_gt[:, :, :, 0:1] * 255) tf.summary.image('seg_map_0_pred', seg_maps_pred[:, :, :, 0:1] * 255) tf.summary.image('training_masks', training_masks) tf.summary.scalar('model_loss', model_loss) tf.summary.scalar('total_loss', total_loss) return total_loss, model_loss
def __init__(self, ckpt_path, gpuid='0'): os.environ['CUDA_VISIBLE_DEVICES'] = gpuid tf.reset_default_graph() self._input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) self._binarize_map, self._threshold_map, self._thresh_binary = model.model( self._input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) gpu_config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options, allow_soft_placement=True) self.sess = tf.Session(config=gpu_config) saver.restore(self.sess, ckpt_path) self.decoder = SegDetectorRepresenter() print('restore model from:', ckpt_path)
def main(argv=None): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list if not os.path.exists(FLAGS.result_path): os.makedirs(FLAGS.result_path) filename_queue = tf.train.string_input_producer([FLAGS.test_data_path], num_epochs=1) image, annotation = read_tfrecord_and_decode_into_image_annotation_pair_tensors( filename_queue) image_batch_tensor = tf.expand_dims(image, axis=0) annotation_batch_tensor = tf.expand_dims(annotation, axis=0) input_image_shape = tf.shape(image_batch_tensor) image_height_width = input_image_shape[1:3] image_height_width_float = tf.to_float(image_height_width) image_height_width_multiple = tf.to_int32( tf.round(image_height_width_float / 32) * 32) image_batch_tensor = tf.image.resize_images(image_batch_tensor, image_height_width_multiple) global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) logits = model.model(FLAGS.model_type, image_batch_tensor, is_training=False) pred = tf.argmax(logits, dimension=3) pred = tf.expand_dims(pred, 3) pred = tf.image.resize_bilinear(images=pred, size=image_height_width) annotation_batch_tensor = tf.image.resize_bilinear( images=annotation_batch_tensor, size=image_height_width) annotation_batch_tensor = tf.div(annotation_batch_tensor, 255) pred = tf.reshape(pred, [ -1, ]) gt = tf.reshape(annotation_batch_tensor, [ -1, ]) acc, acc_update_op = tf.contrib.metrics.streaming_accuracy(pred, gt) miou, miou_update_op = tf.contrib.metrics.streaming_mean_iou( pred, gt, num_classes=FLAGS.num_classes) with tf.get_default_graph().as_default(): global_vars_init_op = tf.global_variables_initializer() local_vars_init_op = tf.local_variables_initializer() init = tf.group(local_vars_init_op, global_vars_init_op) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init) ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for i in range(150): start = time.time() image_np, annotation_np, pred_np, tmp_acc, tmp_miou = sess.run( [image, annotation, pred, acc_update_op, miou_update_op]) _diff_time = time.time() - start print('{}: cost {:.0f}ms').format(i, _diff_time * 1000) # upsampled_predictions = pred_np.squeeze() # plt.subplot(131) # plt.imshow(image_np) # plt.subplot(132) # plt.imshow(annotation_np.squeeze(), cmap='gray') # plt.subplot(133) # plt.imshow(np.reshape(pred_np, (annotation_np.shape[0], annotation_np.shape[1])).squeeze(), cmap='gray') # plt.savefig(os.path.join(FLAGS.result_path, str(i) + '.png')) prediction = np.reshape( pred_np, (annotation_np.shape[0], annotation_np.shape[1])).squeeze() * 255 cv2.imwrite(os.path.join(FLAGS.result_path, str(i) + '.png'), prediction) print('Test Finished !') coord.request_stop() coord.join(threads)
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list try: os.makedirs(FLAGS.output_dir) except OSError as e: if e.errno != 17: raise with tf.get_default_graph().as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) seg_maps_pred = model.model(input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) logger.info('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: im = cv2.imread(im_fn)[:, :, ::-1] logger.debug('image file:{}'.format(im_fn)) start_time = time.time() im_resized, (ratio_h, ratio_w) = resize_image(im) h, w, _ = im_resized.shape # options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE) # run_metadata = tf.RunMetadata() timer = {'net': 0, 'pse': 0} start = time.time() seg_maps = sess.run(seg_maps_pred, feed_dict={input_images: [im_resized]}) timer['net'] = time.time() - start # fetched_timeline = timeline.Timeline(run_metadata.step_stats) # chrome_trace = fetched_timeline.generate_chrome_trace_format() # with open(os.path.join(FLAGS.output_dir, os.path.basename(im_fn).split('.')[0]+'.json'), 'w') as f: # f.write(chrome_trace) boxes, kernels, timer = detect(seg_maps=seg_maps, timer=timer, image_w=w, image_h=h) logger.info('{} : net {:.0f}ms, pse {:.0f}ms'.format( im_fn, timer['net'] * 1000, timer['pse'] * 1000)) if boxes is not None: boxes = boxes.reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_w boxes[:, :, 1] /= ratio_h h, w, _ = im.shape boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w) boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h) duration = time.time() - start_time logger.info('[timing] {}'.format(duration)) # save to file if boxes is not None: res_file = os.path.join( FLAGS.output_dir, '{}.txt'.format( os.path.splitext(os.path.basename(im_fn))[0])) with open(res_file, 'w') as f: num = 0 for i in range(len(boxes)): # to avoid submitting errors box = boxes[i] if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm( box[3] - box[0]) < 5: continue num += 1 f.write('{},{},{},{},{},{},{},{}\r\n'.format( box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1])) cv2.polylines( im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=2) if not FLAGS.no_write_images: img_path = os.path.join(FLAGS.output_dir, os.path.basename(im_fn)) cv2.imwrite(img_path, im[:, :, ::-1]) #=========================================================================================================== #Converting to 4-co-ordinates txt path = test_data_path + '/' #input_images gt_path = output_dir + '/' #8 co-ordinates txt out_path = APP_ROOT + '/output_label' #4 co-ordinates txt if not os.path.exists(out_path): os.makedirs(out_path) else: shutil.rmtree(out_path) os.mkdir(out_path) files = os.listdir(path) files.sort() #files=files[:100] for file in files: _, basename = os.path.split(file) if basename.lower().split('.')[-1] not in ['jpg', 'png', 'jpeg']: continue stem, ext = os.path.splitext(basename) gt_file = os.path.join(gt_path + stem + '.txt') img_path = os.path.join(path, file) print('Reading image ' + os.path.splitext(file)[0]) img = cv.imread(img_path) img_size = img.shape im_size_min = np.min(img_size[0:2]) im_size_max = np.max(img_size[0:2]) with open(gt_file, 'r') as f: lines = f.readlines() for line in lines: splitted_line = line.strip().lower().split(',') pt_x = np.zeros((4, 1)) pt_y = np.zeros((4, 1)) pt_x[0, 0] = int(float(splitted_line[0])) pt_y[0, 0] = int(float(splitted_line[1])) pt_x[1, 0] = int(float(splitted_line[2])) pt_y[1, 0] = int(float(splitted_line[3])) pt_x[2, 0] = int(float(splitted_line[4])) pt_y[2, 0] = int(float(splitted_line[5])) pt_x[3, 0] = int(float(splitted_line[6])) pt_y[3, 0] = int(float(splitted_line[7])) ind_x = np.argsort(pt_x, axis=0) pt_x = pt_x[ind_x] pt_y = pt_y[ind_x] if pt_y[0] < pt_y[1]: pt1 = (pt_x[0], pt_y[0]) pt3 = (pt_x[1], pt_y[1]) else: pt1 = (pt_x[1], pt_y[1]) pt3 = (pt_x[0], pt_y[0]) if pt_y[2] < pt_y[3]: pt2 = (pt_x[2], pt_y[2]) pt4 = (pt_x[3], pt_y[3]) else: pt2 = (pt_x[3], pt_y[3]) pt4 = (pt_x[2], pt_y[2]) xmin = int(min(pt1[0], pt2[0])) ymin = int(min(pt1[1], pt2[1])) xmax = int(max(pt2[0], pt4[0])) ymax = int(max(pt3[1], pt4[1])) if xmin < 0: xmin = 0 if xmax > img_size[1] - 1: xmax = img_size[1] - 1 if ymin < 0: ymin = 0 if ymax > img_size[0] - 1: ymax = img_size[0] - 1 with open(os.path.join(out_path, stem) + '.txt', 'a') as f: f.writelines(str(int(xmin))) f.writelines(" ") f.writelines(str(int(ymin))) f.writelines(" ") f.writelines(str(int(xmax))) f.writelines(" ") f.writelines(str(int(ymax))) f.writelines("\n")
def main(argv=None): import os # os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list t0 = time.time() try: os.makedirs(FLAGS.output_dir) except OSError as e: if e.errno != 17: raise im_fn_list = get_images() for im_fn in im_fn_list: points_list = [] tf.reset_default_graph() with tf.get_default_graph().as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) seg_maps_pred = model.model(input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state( FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) logger.info('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im = cv2.imread(im_fn)[:, :, ::-1] draw_img = im[:, :, ::-1].copy() logger.debug('image file:{}'.format(im_fn)) start_time = time.time() im_resized, (ratio_h, ratio_w) = resize_image(im) h, w, _ = im_resized.shape # options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE) # run_metadata = tf.RunMetadata() timer = {'net': 0, 'pse': 0} start = time.time() seg_maps = sess.run(seg_maps_pred, feed_dict={input_images: [im_resized]}) timer['net'] = time.time() - start # fetched_timeline = timeline.Timeline(run_metadata.step_stats) # chrome_trace = fetched_timeline.generate_chrome_trace_format() # with open(os.path.join(FLAGS.output_dir, os.path.basename(im_fn).split('.')[0]+'.json'), 'w') as f: # f.write(chrome_trace) boxes, kernels, timer = detect(seg_maps=seg_maps, timer=timer, image_w=w, image_h=h) logger.info('{} : net {:.0f}ms, pse {:.0f}ms'.format( im_fn, timer['net'] * 1000, timer['pse'] * 1000)) if boxes is not None: boxes = boxes.reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_w boxes[:, :, 1] /= ratio_h h, w, _ = im.shape boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w) boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h) duration = time.time() - start_time logger.info('[timing] {}'.format(duration)) # save to file if boxes is not None: res_file = os.path.join( FLAGS.output_dir, '{}.txt'.format( os.path.splitext(os.path.basename(im_fn))[0])) with open(res_file, 'w') as f: num = 0 for i in range(len(boxes)): # to avoid submitting errors box = boxes[i] if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm( box[3] - box[0]) < 5: continue num += 1 f.write('{},{},{},{},{},{},{},{}\r\n'.format( box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1])) yDim, xDim = im[:, :, ::-1].shape[:2] if box[0, 0] > box[2, 0]: # box point1在右下角,顺时针 pt1 = (max(1, box[2, 0]), max(1, box[2, 1])) pt2 = (box[3, 0], box[3, 1]) pt3 = (min(box[0, 0], xDim - 2), min(yDim - 2, box[0, 1])) pt4 = (box[1, 0], box[1, 1]) else: # box point1在左下角, 顺时针 pt1 = (max(1, box[1, 0]), max(1, box[2, 1])) pt2 = (box[2, 0], box[2, 1]) pt3 = (min(box[3, 0], xDim - 2), min(yDim - 2, box[3, 1])) pt4 = (box[0, 0], box[0, 1]) points = [pt1, pt2, pt3, pt4] points_list.append(points) cv2.polylines( im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=2) tf.reset_default_graph() keras.backend.clear_session() input = Input(shape=(32, None, 1), name='the_input') y_pred = dense_cnn(input, nclass) recognition_model = Model(input=input, outputs=y_pred) model_path = './recognition/...' recognition_model.load_weights(model_path) if os.path.exists(model_path): print('loading models') else: print('model do not exist') break j = 0 txt_path = os.path.join(FLAGS.output_dir, im_fn.split('/')[-1].split('.')[0]) with open('{}.txt'.format(txt_path), 'a', encoding='utf-8') as outf: for points in points_list: j += 1 pt1 = points[0] pt2 = points[1] pt3 = points[2] pt4 = points[3] degree = degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0])) text_img = dumpRotateImage(im[:, :, ::-1], degree, pt1, pt2, pt3, pt4) text_img = cv2.cvtColor(text_img, cv2.COLOR_BGR2GRAY) text_h, text_w = text_img.shape[:2] if text_h // text_w > 1: continue dst_h = 32 dst_w = text_w * dst_h // text_h text_img = cv2.resize(text_img, (dst_w, dst_h)) X = text_img.reshape([1, 32, -1, 1]) y_pred = recognition_model.predict(X) y_pred = y_pred[:, :, :] out = _decode(y_pred) img_PIL = Image.fromarray( cv2.cvtColor(draw_img, cv2.COLOR_BGR2RGB)) font = ImageFont.truetype('./utils/simsun.ttc', 12) fillColor = (255, 0, 0) draw = ImageDraw.Draw(img_PIL) if out is None: out = '' draw.text(pt4, out, font=font, fill=fillColor) draw_img = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR) outf.write('{}. \t{}\n'.format(j, out)) if not FLAGS.no_write_images: img_path = os.path.join(FLAGS.output_dir, os.path.basename(im_fn)) cv2.imwrite(img_path, draw_img) print('total time = ', time.time() - t0)
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list try: os.makedirs(FLAGS.output_dir) except OSError as e: if e.errno != 17: raise if not os.path.isdir(os.path.join(FLAGS.output_dir, "crop")): os.makedirs(os.path.join(FLAGS.output_dir, "crop")) with tf.get_default_graph().as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) seg_maps_pred = model.model(input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: # ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) # model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) # logger.info('Restore from {}'.format(model_path)) saver.restore(sess, FLAGS.checkpoint_path) im_fn_list = get_images() for im_fn in im_fn_list: im = cv2.imread(im_fn)[:, :, ::-1] logger.debug('image file:{}'.format(im_fn)) start_time = time.time() im_resized, (ratio_h, ratio_w) = resize_image(im) h, w, _ = im_resized.shape # options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE) # run_metadata = tf.RunMetadata() timer = {'net': 0, 'pse': 0} start = time.time() seg_maps = sess.run(seg_maps_pred, feed_dict={input_images: [im_resized]}) timer['net'] = time.time() - start # fetched_timeline = timeline.Timeline(run_metadata.step_stats) # chrome_trace = fetched_timeline.generate_chrome_trace_format() # with open(os.path.join(FLAGS.output_dir, os.path.basename(im_fn).split('.')[0]+'.json'), 'w') as f: # f.write(chrome_trace) boxes, kernels, timer = detect(seg_maps=seg_maps, timer=timer, image_w=w, image_h=h) logger.info('{} : net {:.0f}ms, pse {:.0f}ms'.format( im_fn, timer['net'] * 1000, timer['pse'] * 1000)) if boxes is not None: boxes = boxes.reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_w boxes[:, :, 1] /= ratio_h h, w, _ = im.shape boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w) boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h) duration = time.time() - start_time logger.info('[timing] {}'.format(duration)) # save to file if boxes is not None: res_file = os.path.join( FLAGS.output_dir, '{}.txt'.format( os.path.splitext(os.path.basename(im_fn))[0])) with open(res_file, 'w') as f: num = 0 for i in xrange(len(boxes)): # to avoid submitting errors box = boxes[i] if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm( box[3] - box[0]) < 5: continue num += 1 f.write('{},{},{},{},{},{},{},{}\r\n'.format( box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1])) if not FLAGS.is_cropping: cv2.polylines( im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=2) else: lt_x = box[2, 0] lt_y = box[2, 1] rt_x = box[3, 0] rt_y = box[3, 1] lb_x = box[1, 0] lb_y = box[1, 1] rb_x = box[0, 0] rb_y = box[0, 1] if lt_x > lb_x: lt_x = lb_x if lt_y > rt_y: lt_y = rt_y if rt_x < rb_x: rt_x = rb_x if rt_y > lt_y: rt_y = lt_y if lb_x > lt_x: lb_x = lt_x if lb_y < rb_y: lb_y = rb_y if rb_x < rt_x: rb_x = rt_x if rb_y < lb_y: rb_y = lb_y # padding = 3 # lt_x -= padding # lt_y -= padding # lb_x -= padding # lb_y += padding # rt_x += padding # rt_y -= padding # rb_x += padding # rb_y += padding crop_img = im[int(lt_y):int(lb_y), int(lt_x):int(rt_x)] cv2.imwrite( os.path.join(FLAGS.output_dir, "crop", ("%d_" % i) + os.path.basename(im_fn)), crop_img[:, :, ::-1]) if not FLAGS.no_write_images: img_path = os.path.join(FLAGS.output_dir, os.path.basename(im_fn)) cv2.imwrite(img_path, im[:, :, ::-1])
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list try: os.makedirs(FLAGS.output_dir) except OSError as e: if e.errno != 17: raise with tf.get_default_graph().as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) f_score, f_geometry = model.model(input_images, is_training=False) cls_score = tf.nn.softmax(f_score)[:, :, :, 1:2] pixel_score = tf.reshape(f_geometry, [-1, 2]) pixel_score = tf.nn.softmax(pixel_score) pixel_shape = tf.shape(f_geometry) pixel_score = tf.reshape( pixel_score, [pixel_shape[0], pixel_shape[1], pixel_shape[2], pixel_shape[3]]) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) timer = {'net': 0} with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: im = cv2.imread(im_fn)[:, :, ::-1] start_time = time.time() im_resized, (ratio_h, ratio_w) = resize_image(im) print ratio_h, ratio_w # score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: [im_resized]}) score, geometry = sess.run( [cls_score, pixel_score], feed_dict={input_images: [im_resized]}) timer['net'] = time.time() - start_time print 'net time:' + str(timer['net'] * 1000) + 'ms' cv2.imwrite('./score_map.jpg', np.array(score[0, :, :, 0] * 255, dtype=np.uint8)) score_map_res = pixel_detect(score_map=score, geo_map=geometry) cv2.imwrite('./img.jpg', score_map_res * 255) # pdb.set_trace() boxes = [] im2, contours, hierarchy = cv2.findContours( score_map_res, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # pdb.set_trace() im_ori = cv2.imread(im_fn) im_ori_resize = cv2.resize(im_ori, (int(im_ori.shape[1] * ratio_w), int(im_ori.shape[0] * ratio_h))) for i in range(len(contours)): np_contours = np.array(np.reshape(contours[i], [-1, 2]), dtype=np.float32) rectangle = cv2.minAreaRect(np_contours) box = np.int0(cv2.boxPoints(rectangle)) box[:, 0] = box[:, 0] * 4 box[:, 1] = box[:, 1] * 4 cv2.drawContours(im_ori_resize, [box], -1, (0, 255, 0), 3) # pdb.set_trace() box[:, 0] = box[:, 0] / ratio_w box[:, 1] = box[:, 1] / ratio_h boxes.append(box) img_path = os.path.join(FLAGS.output_dir, os.path.basename(im_fn)) cv2.imwrite(img_path, im_ori_resize) # save to file if boxes is not None: res_file = os.path.join( FLAGS.output_dir, 'res_{}.txt'.format( os.path.basename(im_fn).split('.')[0])) with open(res_file, 'w') as f: for box in boxes: # pdb.set_trace() # box = sort_poly(box.astype(np.int32)) box = order_points(box) f.write('{},{},{},{},{},{},{},{}\r\n'.format( box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1]))
def train(self): # iteration number global_step = tf.Variable(1, dtype=tf.int32, trainable=False, name='iter_number') # training graph iterator = self._data_layer() image_orig, image_noisy = iterator.get_next() training = tf.placeholder(tf.bool, name='is_training') logits = model(image_noisy, training=training) loss = self._loss_functions(logits, image_orig) optimizer = self._optimizer(loss, global_step) # summary placeholders streaming_loss_p = tf.placeholder(tf.float32) validation_loss_p = tf.placeholder(tf.float32) summ_op_train = tf.summary.scalar('streaming_loss', streaming_loss_p) summ_op_test = tf.summary.scalar('validation_loss', validation_loss_p) # don't allocate entire gpu memory config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) sess.run(iterator.initializer) writer = tf.summary.FileWriter(self.checkpoint_path, sess.graph) saver = tf.train.Saver(max_to_keep=None) # keep all checkpoints ckpt = tf.train.get_checkpoint_state(self.checkpoint_path) # resume training if a checkpoint exists if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print('Loaded parameters from {}'.format( ckpt.model_checkpoint_path)) initial_step = global_step.eval() # train the model streaming_loss = 0 for i in range(initial_step, self.num_iter + 1): _, loss_batch = sess.run([optimizer, loss], feed_dict={training: True}) if not np.isfinite(loss_batch): print('loss diverged, stopping') exit() # log summary streaming_loss += loss_batch if i % self.log_iter == self.log_iter - 1: streaming_loss /= self.log_iter print(i + 1, streaming_loss) summary_train = sess.run( summ_op_train, feed_dict={streaming_loss_p: streaming_loss}) writer.add_summary(summary_train, global_step=i) streaming_loss = 0 # save model if i % self.save_iter == self.save_iter - 1: saver.save(sess, os.path.join(self.checkpoint_path, 'checkpoint'), global_step=global_step) print("Model saved!") # run validation if i % self.val_iter == self.val_iter - 1: print("Running validation.") self.data_generator.set_mode(is_training=False) sess.run(iterator.initializer) validation_loss = 0 for j in range(self.data_generator.num_val // self.batch_size): loss_batch = sess.run(loss, feed_dict={training: False}) validation_loss += loss_batch validation_loss /= j print("Validation loss: {}".format(validation_loss)) summary_test = sess.run( summ_op_test, feed_dict={validation_loss_p: validation_loss}) writer.add_summary(summary_test, global_step=i) self.data_generator.set_mode(is_training=True) sess.run(iterator.initializer) writer.close()
def init_model(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') _, classes = model.model(input_image) return input_image,classes
def main(argv=None): import os if os.path.exists(FLAGS.result_path): shutil.rmtree(FLAGS.result_path) os.makedirs(FLAGS.result_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list pascal_voc_lut = pascal_segmentation_lut() with tf.get_default_graph().as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) logits = model.model(input_images, is_training=False) pred = tf.argmax(logits, dimension=3) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) # saver = tf.train.Saver(variable_averages.variables_to_restore()) saver = tf.train.Saver(tf.global_variables()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: # ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) # model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) # ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) # restore_step=int(ckpt.split('.')[0].split('_')[-1]) model_path = FLAGS.checkpoint_path print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) ############################### pkl_file = 'data/arroyo_seg.pkl' with open(pkl_file, 'rb') as f: obj = pickle.load(f) im_fn_list, anno_files = obj # im_fn_list = get_images() for im_fn, seg_fn in zip(im_fn_list, anno_files): # im = cv2.imread(im_fn)[:, :, ::-1] im = np.array(Image.open(im_fn)) seg = np.array(Image.open(seg_fn)) im_resized, (ratio_h, ratio_w) = resize_image(im, size=32) # import ipdb; ipdb.set_trace() start = time.time() pred_re = sess.run([pred], feed_dict={input_images: [im_resized]}) pred_re = np.array(np.squeeze(pred_re)) seg[seg == 255] = 0 img = visualize_segmentation_adaptive(pred_re, pascal_voc_lut) img_seg = visualize_segmentation_adaptive(seg, pascal_voc_lut) # import ipdb; ipdb.set_trace() #img_true=return_overlayed_img(Image.fromarray(img), Image.fromarray(seg)) #img_pred=return_overlayed_img(Image.fromarray(img), Image.fromarray(pred_re)) _diff_time = time.time() - start cv2.imwrite( os.path.join(FLAGS.result_path, os.path.basename(im_fn)), np.hstack((img, img_seg))) print('{}: cost {:.0f}ms'.format(im_fn, _diff_time * 1000))
def ckpt2pb(ckptpath): os.environ['CUDA_VISIBLE_DEVICES'] = '0' tf.reset_default_graph() input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) binarize_map, threshold_map, thresh_binary = model.model(input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) gpu_config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options, allow_soft_placement=True) sess = tf.Session(config=gpu_config) saver.restore(sess, ckptpath) input_graph_def = sess.graph.as_graph_def() for node in input_graph_def.node: if node.op == 'RefSwitch': node.op = 'Switch' for index in xrange(len(node.input)): if 'moving_' in node.input[index]: node.input[index] = node.input[index] + '/read' elif node.op == 'AssignSub': node.op = 'Sub' if 'use_locking' in node.attr: del node.attr['use_locking'] elif node.op == 'AssignAdd': node.op = 'Add' if 'use_locking' in node.attr: del node.attr['use_locking'] constant_graph = graph_util.convert_variables_to_constants( sess, input_graph_def, ['feature_fusion/binarize_branch/Conv2d_transpose_1/Sigmoid']) output_graph_def = optimize_for_inference( input_graph_def=constant_graph, input_node_names=['input_images'], output_node_names=[ 'feature_fusion/binarize_branch/Conv2d_transpose_1/Sigmoid' ], placeholder_type_enum=[tf.float32.as_datatype_enum]) # 转化为tlite文件 #converter = tf.contrib.lite.TFLiteConverter.from_frozen_graph(args.output_file, ['image_batch'], # ['pfld_inference/fc/BiasAdd'], # {"image_batch": [1, 112, 112, 3] # } # ) # converter.allow_custom_ops = True # converter.inference_type = _types_pb2.QUANTIZED_UINT8 #converter.post_training_quantize = True #tflite_model = converter.convert() with tf.gfile.FastGFile('db.pb', mode='wb') as f: f.write(output_graph_def.SerializeToString())
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list try: os.makedirs(FLAGS.output_dir) except OSError as e: if e.errno != 17: raise with tf.get_default_graph().as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) # [1/4H, 1/4W,1], [1/4*h,1/4*w,4] f_score, f_geometry = model.model(input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.model_path) model_path = os.path.join(FLAGS.model_path, os.path.basename(ckpt_state.model_model_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: im = cv2.imread(im_fn)[:, :, ::-1] start_time = time.time() # 调整图像为32的倍数,但是基本上保持原图大小 im_resized,ratio_h, ratio_w = data_util.resize_image(im) timer = {'net': 0, 'restore': 0, 'nms': 0} start = time.time() score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: [im_resized]}) timer['net'] = time.time() - start boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer) print('{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format( im_fn, timer['net']*1000, timer['restore']*1000, timer['nms']*1000)) if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_w boxes[:, :, 1] /= ratio_h duration = time.time() - start_time print('[timing] {}'.format(duration)) # save to file if boxes is not None: res_file = os.path.join( FLAGS.output_dir, '{}.txt'.format( os.path.basename(im_fn).split('.')[0])) with open(res_file, 'w') as f: for box in boxes: # to avoid submitting errors box = sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5: continue f.write('{},{},{},{},{},{},{},{}\r\n'.format( box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1], )) cv2.polylines(im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=1) if not FLAGS.no_write_images: img_path = os.path.join(FLAGS.output_dir, os.path.basename(im_fn)) cv2.imwrite(img_path, im[:, :, ::-1])
def main(argv=None): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list pascal_voc_lut = pascal_segmentation_lut() filename_queue = tf.train.string_input_producer([FLAGS.test_data_path], num_epochs=1) image, annotation = read_tfrecord_and_decode_into_image_annotation_pair_tensors( filename_queue) image_batch_tensor = tf.expand_dims(image, axis=0) annotation_batch_tensor = tf.expand_dims(annotation, axis=0) input_image_shape = tf.shape(image_batch_tensor) image_height_width = input_image_shape[1:3] image_height_width_float = tf.to_float(image_height_width) image_height_width_multiple = tf.to_int32( tf.round(image_height_width_float / 32) * 32) image_batch_tensor = tf.image.resize_images(image_batch_tensor, image_height_width_multiple) global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) logits = model.model(image_batch_tensor, is_training=False) pred = tf.argmax(logits, dimension=3) pred = tf.expand_dims(pred, 3) pred = tf.image.resize_nearest_neighbor(images=pred, size=image_height_width) annotation_batch_tensor = tf.image.resize_nearest_neighbor( images=annotation_batch_tensor, size=image_height_width) pred = tf.reshape(pred, [ -1, ]) gt = tf.reshape(annotation_batch_tensor, [ -1, ]) temp = tf.less_equal(gt, FLAGS.num_classes - 1) weights = tf.cast(temp, tf.int32) gt = tf.where(temp, gt, tf.cast(temp, tf.uint8)) acc, acc_update_op = tf.contrib.metrics.streaming_accuracy(pred, gt, weights=weights) miou, miou_update_op = tf.contrib.metrics.streaming_mean_iou( pred, gt, num_classes=FLAGS.num_classes, weights=weights) with tf.get_default_graph().as_default(): global_vars_init_op = tf.global_variables_initializer() local_vars_init_op = tf.local_variables_initializer() init = tf.group(local_vars_init_op, global_vars_init_op) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: sess.run(init) ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for i in range(1449): start = time.time() image_np, annotation_np, pred_np, tmp_acc, tmp_miou = sess.run( [image, annotation, pred, acc_update_op, miou_update_op]) _diff_time = time.time() - start print('{}: cost {:.0f}ms').format(i, _diff_time * 1000) #upsampled_predictions = pred_np.squeeze() #plt.imshow(image_np) #plt.show() #visualize_segmentation_adaptive(upsampled_predictions, pascal_voc_lut) acc_res = sess.run(acc) miou_res = sess.run(miou) print("Pascal VOC 2012 validation dataset pixel accuracy: " + str(acc_res)) print("Pascal VOC 2012 validation dataset Mean IoU: " + str(miou_res)) coord.request_stop() coord.join(threads)
def main(argv=None): # 选择GPU if FLAGS.gpu!="1" and FLAGS.gpu!="0": logger.error("无法确定使用哪一个GPU,退出") exit() logger.info("使用GPU%s显卡进行训练",FLAGS.gpu) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger.info( "本次使用的参数:\nlearning_rate:%f\ndecay_steps:%f\nmax_steps:%d\nevaluate_steps:%d\nmodel:%s\nlambda1:%d\nlogs_path:%s\nrestore:%r\ndebug:%r\nsave_checkpoint_steps:%d", \ FLAGS.learning_rate, FLAGS.decay_steps, FLAGS.max_steps, FLAGS.evaluate_steps, FLAGS.model, FLAGS.lambda1, FLAGS.logs_path, FLAGS.restore, FLAGS.debug, FLAGS.save_checkpoint_steps) now = datetime.datetime.now() StyleTime = now.strftime("%Y-%m-%d-%H-%M-%S") os.makedirs(os.path.join(FLAGS.logs_path, StyleTime)) if not os.path.exists(FLAGS.model): os.makedirs(FLAGS.model) # 输入图像数据的维度[批次, 高度, 宽度, 3通道] ph_input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='ph_input_image') ph_label = tf.placeholder(tf.int64, shape=[None], name='ph_label') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.Variable(FLAGS.learning_rate, trainable=False) tf.summary.scalar('learning_rate', learning_rate) adam_opt = tf.train.AdamOptimizer(learning_rate) # 默认是learning_rate是0.001,而且后期会不断的根据梯度调整,一般不用设这个数,所以我索性去掉了 # gpu_id = int(FLAGS.gpu) # with tf.device('/gpu:%d' % gpu_id): # with tf.name_scope('model_%d' % gpu_id) as scope: cls_prob,cls_preb = model.model(ph_input_image) cross_entropy = model.loss(cls_prob,ph_label) batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS)) #计算梯度 grads = adam_opt.compute_gradients(cross_entropy) # logger.info("计算图定义完毕,定义在gpu:%d上", gpu_id) # 使用计算得到的梯度来更新对应的variable apply_gradient_op = adam_opt.apply_gradients(grads, global_step=global_step) # 这个是定义召回率、精确度和F1 v_recall = tf.Variable(0.001, trainable=False) v_precision = tf.Variable(0.001, trainable=False) v_accuracy = tf.Variable(0.001, trainable=False) v_f1 = tf.Variable(0.001, trainable=False) tf.summary.scalar("Recall",v_recall) tf.summary.scalar("Precision",v_precision) tf.summary.scalar("F1",v_f1) summary_op = tf.summary.merge_all() logger.info("summary定义完毕") variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # 某些操作执行的依赖关系,这时我们可以使用tf.control_dependencies()来实现 # 我依赖于 with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') # no_op啥也不干,但是它依赖的操作都会被干一遍 saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) summary_writer = tf.summary.FileWriter(os.path.join(FLAGS.logs_path,StyleTime), tf.get_default_graph()) if FLAGS.pretrained_model_path is not None: logger.info('加载vgg模型:%s',FLAGS.pretrained_model_path) variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) # 早停用的变量 best_f1 = 0 early_stop_counter = 0 config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.95 config.allow_soft_placement = True with tf.Session(config=config) as sess: if FLAGS.restore: ckpt = tf.train.latest_checkpoint(FLAGS.model) logger.debug("最新的模型文件:%s",ckpt) #有点担心learning rate也被恢复 saver.restore(sess, ckpt) else: logger.info("从头开始训练模型") sess.run(tf.global_variables_initializer()) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) logger.debug("开始加载训练数据") # 是的,get_batch返回的是一个generator data_generator = data_provider.get_batch(num_workers=FLAGS.num_readers,label_file=FLAGS.train_label,batch_num=FLAGS.train_batch) start = time.time() train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(start)) logger.debug("开始训练") for step in range(FLAGS.max_steps): image_list,label_list = next(data_generator) # next(<迭代器>)来返回下一个结果 logger.debug("成功加载图片%d张,标签%d个:",len(image_list),len(label_list)) image_list = data_util.prepare4vgg(image_list) logger.debug("开始第%d步训练,运行sess.run,数据shape:%r",step,image_list.shape) _, summary_str,classes = sess.run([train_op, summary_op, cls_prob], feed_dict = {ph_input_image: image_list , ph_label: label_list}) # data[3]是图像的路径,传入sess是为了调试画图用 np.array(image_list) logger.info("结束第%d步训练,结束sess.run",step) summary_writer.add_summary(summary_str, global_step=step) if step!=0 and step % FLAGS.evaluate_steps == 0: logger.info("在第%d步,开始进行模型评估",step) # data[4]是大框的坐标,是个数组,8个值 accuracy_value,precision_value,recall_value,f1_value = validate(sess,cls_preb,ph_input_image,ph_label) if f1_value>best_f1: logger.info("新F1值[%f]大于过去最好的F1值[%f],早停计数器重置",f1_value,best_f1) best_f1 = f1_value early_stop_counter = 0 # 每次效果好的话,就保存一个模型 filename = ('ctpn-{:s}-{:d}'.format(train_start_time,step + 1) + '.ckpt') filename = os.path.join(FLAGS.model, filename) saver.save(sess, filename) logger.info("在第%d步,保存了最好的模型文件:%s,F1:%f",step,filename,best_f1) else: logger.info("新F1值[%f]小于过去最好的F1值[%f],早停计数器+1", f1_value, best_f1) early_stop_counter+= 1 # 更新F1,Recall和Precision sess.run([tf.assign(v_f1, f1_value), tf.assign(v_recall, recall_value), tf.assign(v_precision,precision_value), tf.assign(v_accuracy, accuracy_value)]) logger.info("在第%d步,模型评估结束", step) if early_stop_counter> FLAGS.early_stop: logger.warning("达到了早停计数次数:%d次,训练提前结束",early_stop_counter) break if step != 0 and step % FLAGS.decay_steps == 0: logger.info("学习率(learning rate)衰减:%f=>%f",learning_rate.eval(),learning_rate.eval() * FLAGS.decay_rate) sess.run(tf.assign(learning_rate, learning_rate.eval() * FLAGS.decay_rate))