def _get_init_fn(): if FLAGS.checkpoint_path is None: return None exclusions = [] if FLAGS.checkpoint_exclude_scopes: exclusions = [scope.strip() for scope in FLAGS.checkpoint_exclude_scopes.split(',')] # TODO(sguada) variables.filter_variables() variables_to_restore = [] for var in slim.get_model_variables(): #print var.op.name excluded = False for exclusion in exclusions: if var.op.name.startswith(exclusion): excluded = True break if not excluded: variables_to_restore.append(var) tf.logging.info('Fine-tuning from %s' % FLAGS.checkpoint_path) return slim.assign_from_checkpoint_fn(FLAGS.checkpoint_path,variables_to_restore,ignore_missing_vars=False)
def _get_init_fn(): """Returns a function run by the chief worker to warm-start the training. Note that the init_fn is only run when initializing the model during the very first global step. Returns: An init function run by the supervisor. """ if FLAGS.checkpoint_path is None: return None exclusions = [] if FLAGS.checkpoint_exclude_scopes: exclusions = [scope.strip() for scope in FLAGS.checkpoint_exclude_scopes.split(',')] # TODO(sguada) variables.filter_variables() variables_to_restore = [] for var in slim.get_model_variables(): #print var.op.name excluded = False for exclusion in exclusions: if var.op.name.startswith(exclusion): excluded = True break if not excluded: variables_to_restore.append(var) tf.logging.info('Fine-tuning from %s' % FLAGS.checkpoint_path) return slim.assign_from_checkpoint_fn(FLAGS.checkpoint_path,variables_to_restore,ignore_missing_vars=False)
def main(): model = config.get('config', 'model') cachedir = utils.get_cachedir(config) with open(os.path.join(cachedir, 'names'), 'r') as f: names = [line.strip() for line in f] width = config.getint(model, 'width') height = config.getint(model, 'height') yolo = importlib.import_module('model.' + model) cell_width, cell_height = utils.calc_cell_width_height(config, width, height) tf.logging.info('(width, height)=(%d, %d), (cell_width, cell_height)=(%d, %d)' % (width, height, cell_width, cell_height)) with tf.Session() as sess: paths = [os.path.join(cachedir, profile + '.tfrecord') for profile in args.profile] num_examples = sum(sum(1 for _ in tf.python_io.tf_record_iterator(path)) for path in paths) tf.logging.warn('num_examples=%d' % num_examples) image_rgb, labels = utils.data.load_image_labels(paths, len(names), width, height, cell_width, cell_height, config) image_std = tf.image.per_image_standardization(image_rgb) image_rgb = tf.cast(image_rgb, tf.uint8) ph_image = tf.placeholder(image_std.dtype, [1] + image_std.get_shape().as_list(), name='ph_image') global_step = tf.contrib.framework.get_or_create_global_step() builder = yolo.Builder(args, config) builder(ph_image) variables_to_restore = slim.get_variables_to_restore() ph_labels = [tf.placeholder(l.dtype, [1] + l.get_shape().as_list(), name='ph_' + l.op.name) for l in labels] with tf.name_scope('total_loss') as name: builder.create_objectives(ph_labels) total_loss = tf.losses.get_total_loss(name=name) tf.global_variables_initializer().run() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) _image_rgb, _image_std, _labels = sess.run([image_rgb, image_std, labels]) coord.request_stop() coord.join(threads) feed_dict = dict([(ph, np.expand_dims(d, 0)) for ph, d in zip(ph_labels, _labels)]) feed_dict[ph_image] = np.expand_dims(_image_std, 0) logdir = utils.get_logdir(config) assert os.path.exists(logdir) model_path = tf.train.latest_checkpoint(logdir) tf.logging.info('load ' + model_path) slim.assign_from_checkpoint_fn(model_path, variables_to_restore)(sess) tf.logging.info('global_step=%d' % sess.run(global_step)) tf.logging.info('total_loss=%f' % sess.run(total_loss, feed_dict)) _ = Drawer(sess, names, builder.model.cell_width, builder.model.cell_height, _image_rgb, _labels, builder.model, feed_dict) plt.show()
def main(): model = config.get('config', 'model') yolo = importlib.import_module('model.' + model) width = config.getint(model, 'width') height = config.getint(model, 'height') preprocess = getattr(importlib.import_module('detect'), args.preprocess) with tf.Session() as sess: ph_image = tf.placeholder(tf.float32, [1, height, width, 3], name='ph_image') builder = yolo.Builder(args, config) builder(ph_image) global_step = tf.contrib.framework.get_or_create_global_step() model_path = tf.train.latest_checkpoint(utils.get_logdir(config)) tf.logging.info('load ' + model_path) slim.assign_from_checkpoint_fn(model_path, tf.global_variables())(sess) tf.logging.info('global_step=%d' % sess.run(global_step)) tensors = [builder.model.conf, builder.model.xy_min, builder.model.xy_max] tensors = [tf.check_numerics(t, t.op.name) for t in tensors] cap = cv2.VideoCapture(0) try: while True: ret, image_bgr = cap.read() assert ret image_height, image_width, _ = image_bgr.shape scale = [image_width / builder.model.cell_width, image_height / builder.model.cell_height] image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) image_std = np.expand_dims(preprocess(cv2.resize(image_rgb, (width, height))).astype(np.float32), 0) feed_dict = {ph_image: image_std} conf, xy_min, xy_max = sess.run(tensors, feed_dict) boxes = utils.postprocess.non_max_suppress(conf[0], xy_min[0], xy_max[0], args.threshold, args.threshold_iou) for _conf, _xy_min, _xy_max in boxes: index = np.argmax(_conf) if _conf[index] > args.threshold: _xy_min = (_xy_min * scale).astype(np.int) _xy_max = (_xy_max * scale).astype(np.int) cv2.rectangle(image_bgr, tuple(_xy_min), tuple(_xy_max), (255, 0, 255), 3) cv2.putText(image_bgr, builder.names[index] + ' (%.1f%%)' % (_conf[index] * 100), tuple(_xy_min), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) cv2.imshow('detection', image_bgr) cv2.waitKey(1) finally: cv2.destroyAllWindows() cap.release()
def _get_init_fn(): """Returns a function run by the chief worker to warm-start the training. Note that the init_fn is only run when initializing the model during the very first global step. Returns: An init function run by the supervisor. """ if FLAGS.checkpoint_path is None: return None # Warn the user if a checkpoint exists in the train_dir. Then we'll be # ignoring the checkpoint anyway. if tf.train.latest_checkpoint(FLAGS.train_dir): tf.logging.info( 'Ignoring --checkpoint_path because a checkpoint already exists in %s' % FLAGS.train_dir) return None exclusions = [] if FLAGS.checkpoint_exclude_scopes: exclusions = [scope.strip() for scope in FLAGS.checkpoint_exclude_scopes.split(',')] # TODO(sguada) variables.filter_variables() variables_to_restore = [] for var in slim.get_model_variables(): excluded = False for exclusion in exclusions: if var.op.name.startswith(exclusion): excluded = True break if not excluded: variables_to_restore.append(var) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Fine-tuning from %s' % checkpoint_path) return slim.assign_from_checkpoint_fn( checkpoint_path, variables_to_restore, ignore_missing_vars=FLAGS.ignore_missing_vars)
def get_init_fn(): """Returns a function run by the chief worker to warm-start the training.""" checkpoint_exclude_scopes=["InceptionV1/Logits"] exclusions = [scope.strip() for scope in checkpoint_exclude_scopes] checkpoints_dir = "D:\\zero\\work\\models-master\\model\\" variables_to_restore = [] for var in slim.get_model_variables(): excluded = False for exclusion in exclusions: if var.op.name.startswith(exclusion): excluded = True break if not excluded: variables_to_restore.append(var) return slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v1.ckpt'), variables_to_restore)
def ing_models(extracted_img_list): # VARIABLES # CKPT_PATH = "cosmetic-300/cosmetic-300" CKPT_PATH = os.path.join(settings.BASE_DIR, 'cosmetic-300/cosmetic-300') MEAN_PIXEL = [123.68, 116.78, 103.94] NCLASS = 12 inputs = tf.placeholder(tf.float32, [None, 224, 224, 3]) is_training = tf.placeholder(tf.bool) # MODEL PREPARATION with slim.arg_scope(resnet.resnet_arg_scope()): logit, model = resnet.resnet_v1_50(inputs, num_classes=NCLASS, is_training=is_training) init_fn = slim.assign_from_checkpoint_fn(CKPT_PATH, slim.get_variables_to_restore(), ignore_missing_vars=True) # CREATE SESSION sess = tf.Session() sess.run(tf.global_variables_initializer()) init_fn(sess) predict_list = [] if extracted_img_list: for extracted_img in extracted_img_list: x = Pil_image.open(extracted_img.image.path) x = x.convert('RGB') x = x.resize((224, 224)) x = np.array(x) x = x.astype(np.float32) - MEAN_PIXEL predict_images = [] predict_images.append(x) predict_images = np.array(predict_images) predict = sess.run(model["predictions"], feed_dict={ inputs: predict_images, is_training: False }) predict = np.argmax(predict, 1) predict_list.append(predict) category_list = [] if predict_list: for predict in predict_list: category = Category.objects.get(id=(int(predict))) category_list.append(category) nickname_id_list = [] if category_list: for category in category_list: nickname_queryset = Nickname.objects.filter(category=category) if nickname_queryset: nickname_id = nickname_queryset[random.randrange( 0, len(nickname_queryset))].id nickname_id_list.append(nickname_id) nickname_list = Nickname.objects.filter(id__in=nickname_id_list) if nickname_list: for extracted_img, assigned_nickname in zip(extracted_img_list, nickname_list): extracted_img.nickname = assigned_nickname extracted_img.save() return nickname_list
def setup_to_run(m, args, is_training, batch_norm_is_training, summary_mode): assert(args.arch.multi_scale), 'removed support for old single scale code.' # Set up the model. tf.set_random_seed(args.solver.seed) task_params = args.navtask.task_params batch_norm_is_training_op = \ tf.placeholder_with_default(batch_norm_is_training, shape=[], name='batch_norm_is_training_op') # Setup the inputs m.input_tensors = {} m.train_ops = {} m.input_tensors['common'], m.input_tensors['step'], m.input_tensors['train'] = \ _inputs(task_params) m.init_fn = None if task_params.input_type == 'vision': m.vision_ops = get_map_from_images( m.input_tensors['step']['imgs'], args.mapper_arch, task_params, args.solver.freeze_conv, args.solver.wt_decay, is_training, batch_norm_is_training_op, num_maps=len(task_params.map_crop_sizes)) # Load variables from snapshot if needed. if args.solver.pretrained_path is not None: m.init_fn = slim.assign_from_checkpoint_fn(args.solver.pretrained_path, m.vision_ops.vars_to_restore) # Set up caching of vision features if needed. if args.solver.freeze_conv: m.train_ops['step_data_cache'] = [m.vision_ops.encoder_output] else: m.train_ops['step_data_cache'] = [] # Set up blobs that are needed for the computation in rest of the graph. m.ego_map_ops = m.vision_ops.fss_logits m.coverage_ops = m.vision_ops.confs_probs # Zero pad these to make them same size as what the planner expects. for i in range(len(m.ego_map_ops)): if args.mapper_arch.pad_map_with_zeros_each[i] > 0: paddings = np.zeros((5,2), dtype=np.int32) paddings[2:4,:] = args.mapper_arch.pad_map_with_zeros_each[i] paddings_op = tf.constant(paddings, dtype=tf.int32) m.ego_map_ops[i] = tf.pad(m.ego_map_ops[i], paddings=paddings_op) m.coverage_ops[i] = tf.pad(m.coverage_ops[i], paddings=paddings_op) elif task_params.input_type == 'analytical_counts': m.ego_map_ops = []; m.coverage_ops = [] for i in range(len(task_params.map_crop_sizes)): ego_map_op = m.input_tensors['step']['analytical_counts_{:d}'.format(i)] coverage_op = tf.cast(tf.greater_equal( tf.reduce_max(ego_map_op, reduction_indices=[4], keep_dims=True), 1), tf.float32) coverage_op = tf.ones_like(ego_map_op) * coverage_op m.ego_map_ops.append(ego_map_op) m.coverage_ops.append(coverage_op) m.train_ops['step_data_cache'] = [] num_steps = task_params.num_steps num_goals = task_params.num_goals map_crop_size_ops = [] for map_crop_size in task_params.map_crop_sizes: map_crop_size_ops.append(tf.constant(map_crop_size, dtype=tf.int32, shape=(2,))) with tf.name_scope('check_size'): is_single_step = tf.equal(tf.unstack(tf.shape(m.ego_map_ops[0]), num=5)[1], 1) fr_ops = []; value_ops = []; fr_intermediate_ops = []; value_intermediate_ops = []; crop_value_ops = []; resize_crop_value_ops = []; confs = []; occupancys = []; previous_value_op = None updated_state = []; state_names = []; for i in range(len(task_params.map_crop_sizes)): map_crop_size = task_params.map_crop_sizes[i] with tf.variable_scope('scale_{:d}'.format(i)): # Accumulate the map. fn = lambda ns: running_combine( m.ego_map_ops[i], m.coverage_ops[i], m.input_tensors['step']['incremental_locs'] * task_params.map_scales[i], m.input_tensors['step']['incremental_thetas'], m.input_tensors['step']['running_sum_num_{:d}'.format(i)], m.input_tensors['step']['running_sum_denom_{:d}'.format(i)], m.input_tensors['step']['running_max_denom_{:d}'.format(i)], map_crop_size, ns) running_sum_num, running_sum_denom, running_max_denom = \ tf.cond(is_single_step, lambda: fn(1), lambda: fn(num_steps*num_goals)) updated_state += [running_sum_num, running_sum_denom, running_max_denom] state_names += ['running_sum_num_{:d}'.format(i), 'running_sum_denom_{:d}'.format(i), 'running_max_denom_{:d}'.format(i)] # Concat the accumulated map and goal occupancy = running_sum_num / tf.maximum(running_sum_denom, 0.001) conf = running_max_denom # print occupancy.get_shape().as_list() # Concat occupancy, how much occupied and goal. with tf.name_scope('concat'): sh = [-1, map_crop_size, map_crop_size, task_params.map_channels] occupancy = tf.reshape(occupancy, shape=sh) conf = tf.reshape(conf, shape=sh) sh = [-1, map_crop_size, map_crop_size, task_params.goal_channels] goal = tf.reshape(m.input_tensors['step']['ego_goal_imgs_{:d}'.format(i)], shape=sh) to_concat = [occupancy, conf, goal] if previous_value_op is not None: to_concat.append(previous_value_op) x = tf.concat(to_concat, 3) # Pass the map, previous rewards and the goal through a few convolutional # layers to get fR. fr_op, fr_intermediate_op = fr_v2( x, output_neurons=args.arch.fr_neurons, inside_neurons=args.arch.fr_inside_neurons, is_training=batch_norm_is_training_op, name='fr', wt_decay=args.solver.wt_decay, stride=args.arch.fr_stride) # Do Value Iteration on the fR if args.arch.vin_num_iters > 0: value_op, value_intermediate_op = value_iteration_network( fr_op, num_iters=args.arch.vin_num_iters, val_neurons=args.arch.vin_val_neurons, action_neurons=args.arch.vin_action_neurons, kernel_size=args.arch.vin_ks, share_wts=args.arch.vin_share_wts, name='vin', wt_decay=args.solver.wt_decay) else: value_op = fr_op value_intermediate_op = [] # Crop out and upsample the previous value map. remove = args.arch.crop_remove_each if remove > 0: crop_value_op = value_op[:, remove:-remove, remove:-remove,:] else: crop_value_op = value_op crop_value_op = tf.reshape(crop_value_op, shape=[-1, args.arch.value_crop_size, args.arch.value_crop_size, args.arch.vin_val_neurons]) if i < len(task_params.map_crop_sizes)-1: # Reshape it to shape of the next scale. previous_value_op = tf.image.resize_bilinear(crop_value_op, map_crop_size_ops[i+1], align_corners=True) resize_crop_value_ops.append(previous_value_op) occupancys.append(occupancy) confs.append(conf) value_ops.append(value_op) crop_value_ops.append(crop_value_op) fr_ops.append(fr_op) fr_intermediate_ops.append(fr_intermediate_op) m.value_ops = value_ops m.value_intermediate_ops = value_intermediate_ops m.fr_ops = fr_ops m.fr_intermediate_ops = fr_intermediate_ops m.final_value_op = crop_value_op m.crop_value_ops = crop_value_ops m.resize_crop_value_ops = resize_crop_value_ops m.confs = confs m.occupancys = occupancys sh = [-1, args.arch.vin_val_neurons*((args.arch.value_crop_size)**2)] m.value_features_op = tf.reshape(m.final_value_op, sh, name='reshape_value_op') # Determine what action to take. with tf.variable_scope('action_pred'): batch_norm_param = args.arch.pred_batch_norm_param if batch_norm_param is not None: batch_norm_param['is_training'] = batch_norm_is_training_op m.action_logits_op, _ = tf_utils.fc_network( m.value_features_op, neurons=args.arch.pred_neurons, wt_decay=args.solver.wt_decay, name='pred', offset=0, num_pred=task_params.num_actions, batch_norm_param=batch_norm_param) m.action_prob_op = tf.nn.softmax(m.action_logits_op) init_state = tf.constant(0., dtype=tf.float32, shape=[ task_params.batch_size, 1, map_crop_size, map_crop_size, task_params.map_channels]) m.train_ops['state_names'] = state_names m.train_ops['updated_state'] = updated_state m.train_ops['init_state'] = [init_state for _ in updated_state] m.train_ops['step'] = m.action_prob_op m.train_ops['common'] = [m.input_tensors['common']['orig_maps'], m.input_tensors['common']['goal_loc']] m.train_ops['batch_norm_is_training_op'] = batch_norm_is_training_op m.loss_ops = []; m.loss_ops_names = []; if args.arch.readout_maps: with tf.name_scope('readout_maps'): all_occupancys = tf.concat(m.occupancys + m.confs, 3) readout_maps, probs = readout_general( all_occupancys, num_neurons=args.arch.rom_arch.num_neurons, strides=args.arch.rom_arch.strides, layers_per_block=args.arch.rom_arch.layers_per_block, kernel_size=args.arch.rom_arch.kernel_size, batch_norm_is_training_op=batch_norm_is_training_op, wt_decay=args.solver.wt_decay) gt_ego_maps = [m.input_tensors['step']['readout_maps_{:d}'.format(i)] for i in range(len(task_params.readout_maps_crop_sizes))] m.readout_maps_gt = tf.concat(gt_ego_maps, 4) gt_shape = tf.shape(m.readout_maps_gt) m.readout_maps_logits = tf.reshape(readout_maps, gt_shape) m.readout_maps_probs = tf.reshape(probs, gt_shape) # Add a loss op m.readout_maps_loss_op = tf.losses.sigmoid_cross_entropy( tf.reshape(m.readout_maps_gt, [-1, len(task_params.readout_maps_crop_sizes)]), tf.reshape(readout_maps, [-1, len(task_params.readout_maps_crop_sizes)]), scope='loss') m.readout_maps_loss_op = 10.*m.readout_maps_loss_op ewma_decay = 0.99 if is_training else 0.0 weight = tf.ones_like(m.input_tensors['train']['action'], dtype=tf.float32, name='weight') m.reg_loss_op, m.data_loss_op, m.total_loss_op, m.acc_ops = \ compute_losses_multi_or(m.action_logits_op, m.input_tensors['train']['action'], weights=weight, num_actions=task_params.num_actions, data_loss_wt=args.solver.data_loss_wt, reg_loss_wt=args.solver.reg_loss_wt, ewma_decay=ewma_decay) if args.arch.readout_maps: m.total_loss_op = m.total_loss_op + m.readout_maps_loss_op m.loss_ops += [m.readout_maps_loss_op] m.loss_ops_names += ['readout_maps_loss'] m.loss_ops += [m.reg_loss_op, m.data_loss_op, m.total_loss_op] m.loss_ops_names += ['reg_loss', 'data_loss', 'total_loss'] if args.solver.freeze_conv: vars_to_optimize = list(set(tf.trainable_variables()) - set(m.vision_ops.vars_to_restore)) else: vars_to_optimize = None m.lr_op, m.global_step_op, m.train_op, m.should_stop_op, m.optimizer, \ m.sync_optimizer = tf_utils.setup_training( m.total_loss_op, args.solver.initial_learning_rate, args.solver.steps_per_decay, args.solver.learning_rate_decay, args.solver.momentum, args.solver.max_steps, args.solver.sync, args.solver.adjust_lr_sync, args.solver.num_workers, args.solver.task, vars_to_optimize=vars_to_optimize, clip_gradient_norm=args.solver.clip_gradient_norm, typ=args.solver.typ, momentum2=args.solver.momentum2, adam_eps=args.solver.adam_eps) if args.arch.sample_gt_prob_type == 'inverse_sigmoid_decay': m.sample_gt_prob_op = tf_utils.inverse_sigmoid_decay(args.arch.isd_k, m.global_step_op) elif args.arch.sample_gt_prob_type == 'zero': m.sample_gt_prob_op = tf.constant(-1.0, dtype=tf.float32) elif args.arch.sample_gt_prob_type.split('_')[0] == 'step': step = int(args.arch.sample_gt_prob_type.split('_')[1]) m.sample_gt_prob_op = tf_utils.step_gt_prob( step, m.input_tensors['step']['step_number'][0,0,0]) m.sample_action_type = args.arch.action_sample_type m.sample_action_combine_type = args.arch.action_sample_combine_type m.summary_ops = { summary_mode: _add_summaries(m, args, summary_mode, args.summary.arop_full_summary_iters)} m.init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) m.saver_op = tf.train.Saver(keep_checkpoint_every_n_hours=4, write_version=tf.train.SaverDef.V2) return m
def setup_to_run(m, args, is_training, batch_norm_is_training, summary_mode): # Set up the model. tf.set_random_seed(args.solver.seed) task_params = args.navtask.task_params num_steps = task_params.num_steps num_goals = task_params.num_goals num_actions = task_params.num_actions num_actions_ = num_actions n_views = task_params.n_views batch_norm_is_training_op = \ tf.placeholder_with_default(batch_norm_is_training, shape=[], name='batch_norm_is_training_op') # Setup the inputs m.input_tensors = {} lstm_states = [] lstm_state_dims = [] state_names = [] updated_state_ops = [] init_state_ops = [] if args.arch.lstm_output: lstm_states += ['lstm_output'] lstm_state_dims += [ args.arch.lstm_output_dim + task_params.num_actions ] if args.arch.lstm_ego: lstm_states += ['lstm_ego'] lstm_state_dims += [args.arch.lstm_ego_dim + args.arch.lstm_ego_out] lstm_states += ['lstm_img'] lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out] elif args.arch.lstm_img: # An LSTM only on the image lstm_states += ['lstm_img'] lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out] else: # No LSTMs involved here. None m.input_tensors['common'], m.input_tensors['step'], m.input_tensors['train_bkp'] = \ _inputs(task_params, lstm_states, lstm_state_dims) with tf.name_scope('check_size'): is_single_step = tf.equal( tf.unstack(tf.shape(m.input_tensors['step']['imgs']), num=6)[1], 1) images_reshaped = tf.reshape(m.input_tensors['step']['imgs'], shape=[ -1, task_params.img_height, task_params.img_width, task_params.img_channels ], name='re_image') rel_goal_loc_reshaped = tf.reshape( m.input_tensors['step']['rel_goal_loc'], shape=[-1, task_params.rel_goal_loc_dim], name='re_rel_goal_loc') x, vars_ = get_repr_from_image(images_reshaped, task_params.modalities, task_params.data_augment, args.arch.encoder, args.solver.freeze_conv, args.solver.wt_decay, is_training) # Reshape into nice things so that these can be accumulated over time steps # for faster backprop. sh_before = x.get_shape().as_list() m.encoder_output = tf.reshape(x, shape=[task_params.batch_size, -1, n_views] + sh_before[1:]) x = tf.reshape(m.encoder_output, shape=[-1] + sh_before[1:]) # Add a layer to reduce dimensions for a fc layer. if args.arch.dim_reduce_neurons > 0: ks = 1 neurons = args.arch.dim_reduce_neurons init_var = np.sqrt(2.0 / (ks**2) / neurons) batch_norm_param = args.arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op m.conv_feat = slim.conv2d( x, neurons, kernel_size=ks, stride=1, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_param, padding='SAME', scope='dim_reduce', weights_regularizer=slim.l2_regularizer(args.solver.wt_decay), weights_initializer=tf.random_normal_initializer(stddev=init_var)) reshape_conv_feat = slim.flatten(m.conv_feat) sh = reshape_conv_feat.get_shape().as_list() m.reshape_conv_feat = tf.reshape(reshape_conv_feat, shape=[-1, sh[1] * n_views]) # Restore these from a checkpoint. if args.solver.pretrained_path is not None: m.init_fn = slim.assign_from_checkpoint_fn(args.solver.pretrained_path, vars_) else: m.init_fn = None # Hit the goal_location with a bunch of fully connected layers, to embed it # into some space. with tf.variable_scope('embed_goal'): batch_norm_param = args.arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op m.embed_goal, _ = tf_utils.fc_network( rel_goal_loc_reshaped, neurons=args.arch.goal_embed_neurons, wt_decay=args.solver.wt_decay, name='goal_embed', offset=0, batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, is_training=is_training) if args.arch.embed_goal_for_state: with tf.variable_scope('embed_goal_for_state'): batch_norm_param = args.arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op m.embed_goal_for_state, _ = tf_utils.fc_network( m.input_tensors['common']['rel_goal_loc_at_start'][:, 0, :], neurons=args.arch.goal_embed_neurons, wt_decay=args.solver.wt_decay, name='goal_embed', offset=0, batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, is_training=is_training) # Hit the goal_location with a bunch of fully connected layers, to embed it # into some space. with tf.variable_scope('embed_img'): batch_norm_param = args.arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op m.embed_img, _ = tf_utils.fc_network( m.reshape_conv_feat, neurons=args.arch.img_embed_neurons, wt_decay=args.solver.wt_decay, name='img_embed', offset=0, batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, is_training=is_training) # For lstm_ego, and lstm_image, embed the ego motion, accumulate it into an # LSTM, combine with image features and accumulate those in an LSTM. Finally # combine what you get from the image LSTM with the goal to output an action. if args.arch.lstm_ego: ego_reshaped = preprocess_egomotion( m.input_tensors['step']['incremental_locs'], m.input_tensors['step']['incremental_thetas']) with tf.variable_scope('embed_ego'): batch_norm_param = args.arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op m.embed_ego, _ = tf_utils.fc_network( ego_reshaped, neurons=args.arch.ego_embed_neurons, wt_decay=args.solver.wt_decay, name='ego_embed', offset=0, batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, is_training=is_training) state_name, state_init_op, updated_state_op, out_op = lstm_setup( 'lstm_ego', m.embed_ego, task_params.batch_size, is_single_step, args.arch.lstm_ego_dim, args.arch.lstm_ego_out, num_steps * num_goals, m.input_tensors['step']['lstm_ego']) state_names += [state_name] init_state_ops += [state_init_op] updated_state_ops += [updated_state_op] # Combine the output with the vision features. m.img_ego_op = combine_setup('img_ego', args.arch.combine_type_ego, m.embed_img, out_op, args.arch.img_embed_neurons[-1], args.arch.lstm_ego_out) # LSTM on these vision features. state_name, state_init_op, updated_state_op, out_op = lstm_setup( 'lstm_img', m.img_ego_op, task_params.batch_size, is_single_step, args.arch.lstm_img_dim, args.arch.lstm_img_out, num_steps * num_goals, m.input_tensors['step']['lstm_img']) state_names += [state_name] init_state_ops += [state_init_op] updated_state_ops += [updated_state_op] m.img_for_goal = out_op num_img_for_goal_neurons = args.arch.lstm_img_out elif args.arch.lstm_img: # LSTM on just the image features. state_name, state_init_op, updated_state_op, out_op = lstm_setup( 'lstm_img', m.embed_img, task_params.batch_size, is_single_step, args.arch.lstm_img_dim, args.arch.lstm_img_out, num_steps * num_goals, m.input_tensors['step']['lstm_img']) state_names += [state_name] init_state_ops += [state_init_op] updated_state_ops += [updated_state_op] m.img_for_goal = out_op num_img_for_goal_neurons = args.arch.lstm_img_out else: m.img_for_goal = m.embed_img num_img_for_goal_neurons = args.arch.img_embed_neurons[-1] if args.arch.use_visit_count: m.embed_visit_count = visit_count_fc( m.input_tensors['step']['visit_count'], m.input_tensors['step']['last_visit'], args.arch.goal_embed_neurons, args.solver.wt_decay, args.arch.fc_dropout, is_training=is_training) m.embed_goal = m.embed_goal + m.embed_visit_count m.combined_f = combine_setup('img_goal', args.arch.combine_type, m.img_for_goal, m.embed_goal, num_img_for_goal_neurons, args.arch.goal_embed_neurons[-1]) # LSTM on the combined representation. if args.arch.lstm_output: name = 'lstm_output' # A few fully connected layers here. with tf.variable_scope('action_pred'): batch_norm_param = args.arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op x, _ = tf_utils.fc_network(m.combined_f, neurons=args.arch.pred_neurons, wt_decay=args.solver.wt_decay, name='pred', offset=0, batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout) if args.arch.lstm_output_init_state_from_goal: # Use the goal embedding to initialize the LSTM state. # UGLY CLUGGY HACK: if this is doing computation for a single time step # then this will not involve back prop, so we can use the state input from # the feed dict, otherwise we compute the state representation from the # goal and feed that in. Necessary for using goal location to generate the # state representation. m.embed_goal_for_state = tf.expand_dims(m.embed_goal_for_state, dim=1) state_op = tf.cond(is_single_step, lambda: m.input_tensors['step'][name], lambda: m.embed_goal_for_state) state_name, state_init_op, updated_state_op, out_op = lstm_setup( name, x, task_params.batch_size, is_single_step, args.arch.lstm_output_dim, num_actions_, num_steps * num_goals, state_op) init_state_ops += [m.embed_goal_for_state] else: state_op = m.input_tensors['step'][name] state_name, state_init_op, updated_state_op, out_op = lstm_setup( name, x, task_params.batch_size, is_single_step, args.arch.lstm_output_dim, num_actions_, num_steps * num_goals, state_op) init_state_ops += [state_init_op] state_names += [state_name] updated_state_ops += [updated_state_op] out_op = tf.reshape(out_op, shape=[-1, num_actions_]) if num_actions_ > num_actions: m.action_logits_op = out_op[:, :num_actions] m.baseline_op = out_op[:, num_actions:] else: m.action_logits_op = out_op m.baseline_op = None m.action_prob_op = tf.nn.softmax(m.action_logits_op) else: # A few fully connected layers here. with tf.variable_scope('action_pred'): batch_norm_param = args.arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op out_op, _ = tf_utils.fc_network(m.combined_f, neurons=args.arch.pred_neurons, wt_decay=args.solver.wt_decay, name='pred', offset=0, num_pred=num_actions_, batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, is_training=is_training) if num_actions_ > num_actions: m.action_logits_op = out_op[:, :num_actions] m.baseline_op = out_op[:, num_actions:] else: m.action_logits_op = out_op m.baseline_op = None m.action_prob_op = tf.nn.softmax(m.action_logits_op) m.train_ops = {} m.train_ops['step'] = m.action_prob_op m.train_ops['common'] = [ m.input_tensors['common']['orig_maps'], m.input_tensors['common']['goal_loc'], m.input_tensors['common']['rel_goal_loc_at_start'] ] m.train_ops['state_names'] = state_names m.train_ops['init_state'] = init_state_ops m.train_ops['updated_state'] = updated_state_ops m.train_ops['batch_norm_is_training_op'] = batch_norm_is_training_op # Flat list of ops which cache the step data. m.train_ops['step_data_cache'] = [tf.no_op()] if args.solver.freeze_conv: m.train_ops['step_data_cache'] = [m.encoder_output] else: m.train_ops['step_data_cache'] = [] ewma_decay = 0.99 if is_training else 0.0 weight = tf.ones_like(m.input_tensors['train_bkp']['action'], dtype=tf.float32, name='weight') m.reg_loss_op, m.data_loss_op, m.total_loss_op, m.acc_ops = \ compute_losses_multi_or( m.action_logits_op, m.input_tensors['train_bkp']['action'], weights=weight, num_actions=num_actions, data_loss_wt=args.solver.data_loss_wt, reg_loss_wt=args.solver.reg_loss_wt, ewma_decay=ewma_decay) if args.solver.freeze_conv: vars_to_optimize = list(set(tf.trainable_variables()) - set(vars_)) else: vars_to_optimize = None m.lr_op, m.global_step_op, m.train_op, m.should_stop_op, m.optimizer, \ m.sync_optimizer = tf_utils.setup_training( m.total_loss_op, args.solver.initial_learning_rate, args.solver.steps_per_decay, args.solver.learning_rate_decay, args.solver.momentum, args.solver.max_steps, args.solver.sync, args.solver.adjust_lr_sync, args.solver.num_workers, args.solver.task, vars_to_optimize=vars_to_optimize, clip_gradient_norm=args.solver.clip_gradient_norm, typ=args.solver.typ, momentum2=args.solver.momentum2, adam_eps=args.solver.adam_eps) if args.arch.sample_gt_prob_type == 'inverse_sigmoid_decay': m.sample_gt_prob_op = tf_utils.inverse_sigmoid_decay( args.arch.isd_k, m.global_step_op) elif args.arch.sample_gt_prob_type == 'zero': m.sample_gt_prob_op = tf.constant(-1.0, dtype=tf.float32) elif args.arch.sample_gt_prob_type.split('_')[0] == 'step': step = int(args.arch.sample_gt_prob_type.split('_')[1]) m.sample_gt_prob_op = tf_utils.step_gt_prob( step, m.input_tensors['step']['step_number'][0, 0, 0]) m.sample_action_type = args.arch.action_sample_type m.sample_action_combine_type = args.arch.action_sample_combine_type _add_summaries(m, summary_mode, args.summary.arop_full_summary_iters) m.init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) m.saver_op = tf.train.Saver(keep_checkpoint_every_n_hours=4, write_version=tf.train.SaverDef.V2) return m
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_seg_maps = tf.placeholder(tf.float32, shape=[None, None, None, 6], name='input_score_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) # opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9) opt = tf.train.AdamOptimizer(learning_rate) # opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # split input_images_split = tf.split(input_images, len(gpus)) input_seg_maps_split = tf.split(input_seg_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isegs = input_seg_maps_split[i] itms = input_training_masks_split[i] total_loss, model_loss = tower_loss(iis, isegs, itms, reuse_variables) batch_norm_updates_op = tf.group( *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) reuse_variables = True grads = opt.compute_gradients(total_loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() # save moving average variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # batch norm updates with tf.control_dependencies( [variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables()) summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) gpu_options = tf.GPUOptions(allow_growth=True) #gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.75) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) as sess: if FLAGS.restore: logger.info('continue training from previous checkpoint') ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) logger.debug(ckpt) saver.restore(sess, ckpt) else: sess.run(init) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) data_generator = data_provider.get_batch( num_workers=FLAGS.num_readers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size_per_gpu * len(gpus)) start = time.time() for step in range(FLAGS.max_steps): data = next(data_generator) ml, tl, _ = sess.run( [model_loss, total_loss, train_op], feed_dict={ input_images: data[0], input_seg_maps: data[2], input_training_masks: data[3] }) if np.isnan(tl): logger.error('Loss diverged, stop training') break if step % 10 == 0: avg_time_per_step = (time.time() - start) / 10 avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu * len(gpus)) / (time.time() - start) start = time.time() logger.info( 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second' .format(step, ml, tl, avg_time_per_step, avg_examples_per_second)) if step % FLAGS.save_checkpoint_steps == 0: saver.save(sess, os.path.join(FLAGS.checkpoint_path, 'model.ckpt'), global_step=global_step) if step % FLAGS.save_summary_steps == 0: _, tl, summary_str = sess.run( [train_op, total_loss, summary_op], feed_dict={ input_images: data[0], input_seg_maps: data[2], input_training_masks: data[3] }) summary_writer.add_summary(summary_str, global_step=step)
def __init__(self, flags, is_training=True): self.is_training = is_training self.preprocessing_name = (flags.preprocessing_name or flags.model_name) network_fn = nets_factory.get_network_fn( flags.model_name, num_classes=config.num_label, weight_decay=flags.weight_decay, is_training=is_training) self.image_size = network_fn.default_image_size self.image_ph = tf.placeholder(tf.float32, shape=(None, self.image_size, self.image_size, config.channels)) self.label_ph = tf.placeholder(tf.float32, shape=(None, config.num_label)) self.logits, end_points = network_fn(self.image_ph) if not is_training: return # global_step = tf.train.create_global_step() global_step = tf.train.get_global_step() decay_steps = int(config.train_data_size / config.train_batch_size * flags.num_epochs_per_decay) learning_rate = tf.train.exponential_decay( flags.init_learning_rate, global_step, decay_steps, flags.learning_rate_decay_factor, staircase=True, name='exponential_decay_learning_rate') tf.losses.sigmoid_cross_entropy(self.label_ph, self.logits) losses = tf.get_collection(tf.GraphKeys.LOSSES) regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) losses.extend(regularization_losses) loss = tf.add_n(losses, name='loss') total_loss = tf.losses.get_total_loss(name='total_loss') diff = tf.subtract(loss, total_loss) tf.summary.scalar('learning_rate', learning_rate) tf.summary.scalar('loss', loss) tf.summary.scalar('diff', diff) self.summary_op = tf.summary.merge_all() exclusions = [ scope.strip() for scope in flags.checkpoint_exclude_scopes.split(',') ] variables_to_restore = [] for variable in slim.get_model_variables(): excluded = False for exclusion in exclusions: if variable.op.name.startswith(exclusion): excluded = True break if not excluded: variables_to_restore.append(variable) else: num_params = 1 for dim in variable.shape: num_params *= dim.value print('randinit {}\t({} params)'.format( variable.name, num_params)) scopes = [scope.strip() for scope in flags.trainable_scopes.split(',')] variables_to_train = [] for scope in scopes: variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) variables_to_train.extend(variables) self.save_dict = {} for variable in variables_to_train: if not variable.name.startswith('vgg_16'): continue num_params = 1 for dim in variable.shape: num_params *= dim.value print('trainable {}\t({} params)'.format(variable.name, num_params)) self.save_dict[variable.name] = variable self.saver = tf.train.Saver(self.save_dict) optimizer = tf.train.GradientDescentOptimizer(learning_rate) self.train_op = optimizer.minimize(loss, var_list=variables_to_train, global_step=global_step) self.init_fn = slim.assign_from_checkpoint_fn(flags.checkpoint_path, variables_to_restore)
def main(argv=None): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list gpus = list(range(len(FLAGS.gpu_list.split(',')))) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') if FLAGS.geometry == 'RBOX': input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') else: input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # 这个是定义召回率、精确度和F1 v_recall = tf.Variable(0.001, trainable=False) v_precision = tf.Variable(0.001, trainable=False) v_f1 = tf.Variable(0.001, trainable=False) tf.summary.scalar("Recall", v_recall) tf.summary.scalar("Precision", v_precision) tf.summary.scalar("F1", v_f1) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # split input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split = tf.split(input_score_maps, len(gpus)) input_geo_maps_split = tf.split(input_geo_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isms = input_score_maps_split[i] igms = input_geo_maps_split[i] itms = input_training_masks_split[i] # 模型定义!!! total_loss, model_loss, f_score, f_geometry = tower_loss( iis, isms, igms, itms, reuse_variables) batch_norm_updates_op = tf.group( *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) reuse_variables = True grads = opt.compute_gradients(total_loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() # save moving average variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # batch norm updates with tf.control_dependencies( [variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables()) summary_writer = create_summary_writer() init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: # pretrained_model_path实际上是resnet50的pretrain模型 variable_restore_op = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) logger.debug("成功加载resnet预训练模型:%s", FLAGS.pretrained_model_path) early_stop = EarlyStop(FLAGS.early_stop) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if FLAGS.restore: logger.debug('尝试从[%s]中恢复训练到半截的模型', FLAGS.model_path) # 这个是之前的checkpoint模型,可以半截接着训练 ckpt = tf.train.latest_checkpoint(FLAGS.model_path) saver.restore(sess, ckpt) else: sess.run(init) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) logger.debug("从头开始训练...") data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size, type="train") validate_data_generator = icdar.get_batch( num_workers=FLAGS.num_readers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size, type="validate") # 开始训练啦! for step in range(FLAGS.max_steps): # 取出一个batch的数据 start = time.time() data = next(data_generator) logger.debug("[训练] 第%d步,加载了一批(%d)图片(%f秒),准备训练...", step, FLAGS.batch_size, (time.time() - start)) # 训练他们 run_start = time.time() ml, tl, _, summary_str = sess.run( [model_loss, total_loss, train_op, summary_op], feed_dict={ input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4] }) if np.isnan(tl): logger.debug('Loss diverged, stop training') break logger.debug("[训练] 跑完批次的梯度下降,耗时:%f", time.time() - run_start) # if step % FLAGS.validate_steps == 0: # logger.debug("保存checkpoint:",FLAGS.model_path + 'model.ckpt') # saver.save(sess, FLAGS.model_path + 'model.ckpt', global_step=global_step) # 默认是1000步,validate一下 if step != 0 and step % FLAGS.validate_steps == 0: precision, recall, f1 = evaluator.validate( sess, FLAGS.validate_batch_num, FLAGS.batch_size, validate_data_generator, f_score, f_geometry, input_images) # 更新三个scalar tensor sess.run([ tf.assign(v_f1, f1), tf.assign(v_recall, recall), tf.assign(v_precision, precision) ]) logger.debug("评估完毕:在第%d步,F1:%f,Recall:%f,Precision:%f", step, f1, recall, precision) if is_need_early_stop(early_stop, f1, saver, sess, step): break # 用负的编辑距离 if step != 0 and step % FLAGS.save_summary_steps == 0: logger.debug("写入summary文件,第%d步", step) summary_writer.add_summary(summary_str, global_step=step) avg_time_per_step = (time.time() - start) / FLAGS.save_summary_steps avg_examples_per_second = (FLAGS.save_summary_steps * FLAGS.batch_size * len(gpus)) / (time.time() - start) start = time.time() logger.debug( 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second' .format(step, ml, tl, avg_time_per_step, avg_examples_per_second)) logger.debug("[训练] 第%d步结束,整体耗时(包括加载数据):%f", step, (time.time() - start))
def train(self): img_size = [self.image_height, self.image_width, self.image_depth] train_batch = tf.train.shuffle_batch( [read_tfrecord(self.train_file, img_size)], batch_size=self.train_batch_size, capacity=3000, num_threads=2, min_after_dequeue=1000) test_batch = tf.train.shuffle_batch( [read_tfrecord(self.test_file, img_size)], batch_size=self.test_batch_size, capacity=500, num_threads=2, min_after_dequeue=300) init = tf.global_variables_initializer() init_fn = slim.assign_from_checkpoint_fn( "resnet_v2_50.ckpt", slim.get_model_variables('resnet_v2')) saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) init_fn(sess) train_writer = tf.summary.FileWriter(self.log_dir + "/train", sess.graph) test_writer = tf.summary.FileWriter(self.log_dir + "/test", sess.graph) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) inputs_test, outputs_gt_test = build_img_pair(sess.run(test_batch)) for iter in range(self.max_iteration): inputs_train, outputs_gt_train = build_img_pair( sess.run(train_batch)) # train with dynamic learning rate if iter <= 500: self.train_step.run({ self.input_data: inputs_train, self.gt: outputs_gt_train, self.learning_rate: 1e-3, self.batch_size: self.train_batch_size }) elif iter <= self.max_iteration - 1000: self.train_step.run({ self.input_data: inputs_train, self.gt: outputs_gt_train, self.learning_rate: 0.5e-3, self.batch_size: self.train_batch_size }) else: self.train_step.run({ self.input_data: inputs_train, self.gt: outputs_gt_train, self.learning_rate: 1e-4, self.batch_size: self.train_batch_size }) # print training loss and test loss if iter % 10 == 0: summary_train = sess.run( self.summary, { self.input_data: inputs_train, self.gt: outputs_gt_train, self.batch_size: self.train_batch_size }) train_writer.add_summary(summary_train, iter) train_writer.flush() summary_test = sess.run( self.summary, { self.input_data: inputs_test, self.gt: outputs_gt_test, self.batch_size: self.test_batch_size }) test_writer.add_summary(summary_test, iter) test_writer.flush() # record training loss and test loss if iter % 10 == 0: train_loss = self.cross_entropy.eval({ self.input_data: inputs_train, self.gt: outputs_gt_train, self.batch_size: self.train_batch_size }) test_loss = self.cross_entropy.eval({ self.input_data: inputs_test, self.gt: outputs_gt_test, self.batch_size: self.test_batch_size }) print("iter step %d trainning batch loss %f" % (iter, train_loss)) print("iter step %d test loss %f\n" % (iter, test_loss)) # record model if iter % 100 == 0: saver.save(sess, self.log_dir + "/model.ckpt", global_step=iter) coord.request_stop() coord.join(threads)
def train(H, test_images): ''' Setup computation graph, run 2 prefetch data threads, and then run the main loop ''' if not os.path.exists(H['save_dir']): os.makedirs(H['save_dir']) ckpt_file = H['save_dir'] + '/save.ckpt' with open(H['save_dir'] + '/hypes.json', 'w') as f: json.dump(H, f, indent=4) x_in = tf.placeholder(tf.float32) confs_in = tf.placeholder(tf.float32) boxes_in = tf.placeholder(tf.float32) q = {} enqueue_op = {} for phase in ['train', 'test']: dtypes = [tf.float32, tf.float32, tf.float32] grid_size = H['grid_width'] * H['grid_height'] channels = H.get('image_channels', 3) print('Image channels: %d' % channels) shapes = ( [H['image_height'], H['image_width'], channels], [grid_size, H['rnn_len'], H['num_classes']], [grid_size, H['rnn_len'], 4], ) q[phase] = tf.FIFOQueue(capacity=30, dtypes=dtypes, shapes=shapes) enqueue_op[phase] = q[phase].enqueue((x_in, confs_in, boxes_in)) def make_feed(d): return { x_in: d['image'], confs_in: d['confs'], boxes_in: d['boxes'], learning_rate: H['solver']['learning_rate'] } def thread_loop(sess, enqueue_op, phase, gen): for d in gen: sess.run(enqueue_op[phase], feed_dict=make_feed(d)) (config, loss, accuracy, summary_op, train_op, smooth_op, global_step, learning_rate) = build(H, q) saver = tf.train.Saver(max_to_keep=None) writer = tf.summary.FileWriter(logdir=H['save_dir'], flush_secs=10) with tf.Session(config=config) as sess: tf.train.start_queue_runners(sess=sess) for phase in ['train', 'test']: # enqueue once manually to avoid thread start delay gen = train_utils.load_data_gen(H, phase, jitter=H['solver']['use_jitter']) d = next(gen) sess.run(enqueue_op[phase], feed_dict=make_feed(d)) t = threading.Thread(target=thread_loop, args=(sess, enqueue_op, phase, gen)) t.daemon = True t.start() tf.set_random_seed(H['solver']['rnd_seed']) sess.run(tf.global_variables_initializer()) writer.add_graph(sess.graph) weights_str = H['solver']['weights'] if len(weights_str) > 0: print('Restoring from: %s' % weights_str) saver.restore(sess, weights_str) elif H['slim_ckpt'] == '': sess.run( tf.variables_initializer([ x for x in tf.global_variables() if x.name.startswith(H['slim_basename']) and H['solver']['opt'] not in x.name ])) else: init_fn = slim.assign_from_checkpoint_fn( '%s/data/%s' % (os.path.dirname(os.path.realpath(__file__)), H['slim_ckpt']), [ x for x in tf.global_variables() if x.name.startswith(H['slim_basename']) and H['solver']['opt'] not in x.name ]) init_fn(sess) # train model for N iterations start = time.time() max_iter = H['solver'].get('max_iter', 10000000) for i in range(max_iter): display_iter = H['logging']['display_iter'] adjusted_lr = ( H['solver']['learning_rate'] * 0.5**max(0, (i / H['solver']['learning_rate_step']) - 2)) lr_feed = {learning_rate: adjusted_lr} if i % display_iter != 0: # train network batch_loss_train, _ = sess.run([loss['train'], train_op], feed_dict=lr_feed) else: # test network every N iterations; log additional info if i > 0: dt = (time.time() - start) / (H['batch_size'] * display_iter) start = time.time() (train_loss, test_accuracy, summary_str, _, _) = sess.run([ loss['train'], accuracy['test'], summary_op, train_op, smooth_op, ], feed_dict=lr_feed) writer.add_summary(summary_str, global_step=global_step.eval()) print_str = ', '.join([ 'Step: %d', 'lr: %f', 'Train Loss: %.2f', 'Softmax Test Accuracy: %.1f%%', 'Time/image (ms): %.1f', ]) print(print_str % (i, adjusted_lr, train_loss, test_accuracy * 100, dt * 1000 if i > 0 else 0)) if global_step.eval() % H['logging'][ 'save_iter'] == 0 or global_step.eval() == max_iter - 1: saver.save(sess, ckpt_file, global_step=global_step)
def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) lr = FLAGS.learning_rate opt = tf.train.RMSPropOptimizer(lr, decay=0.9, momentum=0.9, epsilon=1) # Get images and labels # for train with tf.name_scope('train_images'): images, labels, boxes, num_objects = input.distorted_inputs( FLAGS.batch_size) batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue( [images, labels, boxes, num_objects], capacity=2 * FLAGS.num_gpus) tower_grads = [] tower_losses = [] with tf.variable_scope(tf.get_variable_scope()): for i in xrange(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % ('tower', i)) as scope: image_batch, label_batch, box_batch, num_objects_batch = batch_queue.dequeue( ) cls_loss, loc_loss = ssd.loss(image_batch, label_batch, box_batch, num_objects_batch) loss = cls_loss + loc_loss regularization_loss = tf.add_n( tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES)) loss = loss + regularization_loss tf.get_variable_scope().reuse_variables() summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) grads = opt.compute_gradients(loss) tower_grads.append(grads) tower_losses.append(loss) grads = average_gradients(tower_grads) #validation val_images, val_labels, val_boxes, val_num_objects = input.inputs(1) with tf.device('/gpu:0'): with tf.name_scope('eval_images'): cls_pred, loc_pred = ssd.inference(val_images) summaries.extend( tf.get_collection(tf.GraphKeys.SUMMARIES, 'train_images')) summaries.extend( tf.get_collection(tf.GraphKeys.SUMMARIES, 'eval_images')) # Add a summary to track the learning rate. summaries.append(tf.summary.scalar('learning_rate', lr)) for grad, var in grads: if grad is not None: summaries.append( tf.summary.histogram(var.op.name + '/gradients', grad)) with tf.control_dependencies(update_ops): train_op = opt.apply_gradients(grads, global_step=global_step) for var in tf.trainable_variables(): print(var.name) summaries.append(tf.summary.histogram(var.op.name, var)) saver = tf.train.Saver(max_to_keep=20) summary_op = tf.summary.merge(summaries) pretrained_ckpt_path = FLAGS.pretrained_ckpt_path if not tf.train.latest_checkpoint(FLAGS.ckpt_save_path): print('pretrained ckpt') exclude_layers = ['global_step'] restore_variables = slim.get_variables_to_restore( exclude=exclude_layers) init_fn = slim.assign_from_checkpoint_fn(pretrained_ckpt_path, restore_variables, ignore_missing_vars=True) else: print('training ckpt') init_fn = None sv = tf.train.Supervisor(logdir=FLAGS.ckpt_save_path, summary_op=None, saver=saver, save_model_secs=0, init_fn=init_fn) config_ = tf.ConfigProto(allow_soft_placement=True) config_.gpu_options.per_process_gpu_memory_fraction = 0.4 # sess=sv.managed_session(config=config_) with sv.managed_session(config=config_) as sess: # Start the queue runners. sv.start_queue_runners(sess=sess) for step in xrange(FLAGS.max_steps): start_time = time.time() sess.run(train_op) loss_value, cls_loss_value, loc_loss_value = sess.run( [loss, cls_loss, loc_loss]) duration = time.time() - start_time assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if step % 100 == 0: num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / FLAGS.num_gpus format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) print(cls_loss_value, loc_loss_value) if step % 100 == 0: summary_str = sess.run(summary_op) if step % (int(FLAGS.num_train / FLAGS.batch_size) * 4) == 0 and step != 0: print('start validation') entire_TF = [] entire_score = [] entire_numGT = [] for val_step in range(FLAGS.num_validation): if val_step % 500 == 0: print(val_step, ' / ', FLAGS.num_validation) val_GT_boxes, val_GT_cls, val_loc_pred, val_cls_pred, num_objects = sess.run( [ val_boxes, val_labels, loc_pred, cls_pred, val_num_objects ]) TF_array, TF_score, num_GT = validation.one_image_validation( val_GT_boxes, val_GT_cls, val_loc_pred, val_cls_pred, num_objects) if len(entire_TF) == 0: entire_TF = TF_array entire_score = TF_score entire_numGT = num_GT else: for k_cls in range(FLAGS.num_classes - 1): entire_TF[k_cls] = np.concatenate( [entire_TF[k_cls], TF_array[k_cls]], axis=0) entire_score[k_cls] = np.concatenate( [entire_score[k_cls], TF_score[k_cls]], axis=0) entire_numGT[k_cls] += num_GT[k_cls] entire_AP_sum = validation.compute_AP( entire_score, entire_TF, entire_numGT) mAP = np.sum(np.array(entire_AP_sum)) / np.sum( np.array(entire_AP_sum) != 0) print('class AP : ', entire_AP_sum) print('mAP : ', mAP) checkpoint_path = os.path.join(FLAGS.ckpt_save_path, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def main(argv=None): #训练过程分为以下步骤:1 加载数据 2定义网络模型 3定义损失函数 4定义优化器 5定义评估指标 #加载处理好的图片数据 processed_data = np.load(DATA_FILE) training_images = processed_data[0] n_training_example = len(training_images) training_labels = processed_data[1] validation_images = processed_data[2] n_validation_example = len(validation_images) validation_labels = processed_data[3] testing_images = processed_data[4] n_testing_example = len(testing_images) testing_labels = processed_data[5] logger.info( '%d training examples, %d validation examples, %d testing examples.' % (n_training_example, n_validation_example, n_testing_example)) #定义输入数据和label images = tf.placeholder(tf.float32, [None, 299, 299, 3], name='input_images') labels = tf.placeholder(tf.int64, [None], name='labels') #定义inception-v3模型。因为谷歌中给的inception_v3模型只有参数取值,所以这里要定义inception_v3模型结构。因为训练好的inception_v3模型中使用的Batch_normlization参数与新的数据会有差异,导致训练结果很差,所以这里直接使用一个模型进行测试,不区分训练模型和测试模型 with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits, _ = inception_v3.inception_v3(images, num_classes=N_CLASSES) #获取需要训练的变量 trainable_variables = get_trainable_variables() #定义交叉熵损失函数,参数的正则项损失在定义模型的时候已经加载 tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES), logits, weights=1.0) #定义优化器 train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize( tf.losses.get_total_loss()) #计算正确率,评估模型 with tf.name_scope('evaluation'): correct_prediction = tf.equal(tf.argmax(logits, 1), labels) evalution_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 定义加载模型的函数 load_fn = slim.assign_from_checkpoint_fn(CKPT_FILE, get_tuned_variables(), ignore_missing_vars=True) #定义保存训练好的模型 saver = tf.train.Saver() #开始训练 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) #加载谷歌训练好的模型 logger.info('Loading tuned variables from %s' % CKPT_FILE) load_fn(sess) start = 0 end = BATCH_SIZE for i in range(STEPS): logger.info('Step %d-%d is training....' % (i, STEPS)) try: sess.run(train_step, feed_dict={ images: training_images[start:end], labels: training_labels[start:end] }) except Exception: logger.error('trainging fail', exc_info=True) #输出日志 if i % display_steps == 0 or i + 1 == STEPS: validation_acc = sess.run(evalution_step, feed_dict={ images: validation_images, labels: validation_labels }) logger.info('Step %d-%d:validation acc = %.1f%%' % (i, STEPS, validation_acc * 100.0)) #模型持久化 if i % save_steps == 0 or i + 1 == STEPS: saver.save(sess, TRAIN_FILE_SAVE_PATH, global_step=i) #因为数据处理时就已经打乱了顺序,所以在这里直接顺序使用训练数据就可以 start = end if start == n_training_example: start = 0 end = end + BATCH_SIZE if end > n_training_example: end = n_training_example #最后在测试集上测试正确率 test_acc = sess.run(evalution_step, feed_dict={ images: testing_images, labels: testing_labels }) logger.info('Final test acc = %.1f%%' % (test_acc * 100.0))
def main(margin, batch_size, output_size, learning_rate, whichGPU, is_finetuning, pretrained_net): def handler(signum, frame): print 'Saving checkpoint before closing' pretrained_net = os.path.join(ckpt_dir, 'checkpoint-' + param_str) saver.save(sess, pretrained_net, global_step=step) print 'Checkpoint-', pretrained_net + '-' + str(step), ' saved!' sys.exit(0) signal.signal(signal.SIGINT, handler) ckpt_dir = './output/traffickcam/ckpts/finetuning' log_dir = './output/traffickcam/logs/finetuning' train_filename = './input/traffickcam/train.txt' mean_file = './input/traffickcam/meanIm.npy' img_size = [256, 256] crop_size = [224, 224] num_iters = 200000 summary_iters = 100 save_iters = 5000 featLayer = 'resnet_v2_50/logits' is_training = True margin = float(margin) batch_size = int(batch_size) output_size = int(output_size) learning_rate = float(learning_rate) whichGPU = str(whichGPU) if batch_size % 30 != 0: print 'Batch size must be divisible by 30!' sys.exit(0) num_pos_examples = batch_size / 30 # Create data "batcher" train_data = CombinatorialTripletSet(train_filename, mean_file, img_size, crop_size, batch_size, num_pos_examples, isTraining=is_training) numClasses = len(train_data.files) numIms = np.sum( [len(train_data.files[idx]) for idx in range(0, numClasses)]) datestr = datetime.now().strftime("%Y_%m_%d_%H%M") param_str = datestr + '_lr' + str(learning_rate).replace( '.', 'pt') + '_outputSz' + str(output_size) + '_margin' + str( margin).replace('.', 'pt') logfile_path = os.path.join(log_dir, param_str + '_train.txt') train_log_file = open(logfile_path, 'a') print '------------' print '' print 'Going to train with the following parameters:' print '# Classes: ', numClasses train_log_file.write('# Classes: ' + str(numClasses) + '\n') print '# Ims: ', numIms train_log_file.write('# Ims: ' + str(numIms) + '\n') print 'Margin: ', margin train_log_file.write('Margin: ' + str(margin) + '\n') print 'Output size: ', output_size train_log_file.write('Output size: ' + str(output_size) + '\n') print 'Learning rate: ', learning_rate train_log_file.write('Learning rate: ' + str(learning_rate) + '\n') print 'Logging to: ', logfile_path train_log_file.write('Param_str: ' + param_str + '\n') train_log_file.write('----------------\n') print '' print '------------' # Queuing op loads data into input tensor image_batch = tf.placeholder( tf.float32, shape=[batch_size, crop_size[0], crop_size[0], 3]) label_batch = tf.placeholder(tf.int32, shape=(batch_size)) # doctor image params percent_crop = .5 percent_rotate = .2 percent_filters = .4 percent_text = .1 # # richard's argument: since the data is randomly loaded, we don't need to change the indices that we perform operations on every time; i am on board with this, but had already implemented the random crops, so will leave that for now # # apply random rotations num_rotate = int(batch_size * percent_rotate) rotate_inds = np.random.choice(np.arange(0, batch_size), num_rotate, replace=False) rotate_vals = np.random.randint(-65, 65, num_rotate).astype('float32') / float(100) rotate_angles = np.zeros((batch_size)) rotate_angles[rotate_inds] = rotate_vals rotated_batch = tf.contrib.image.rotate(image_batch, rotate_angles, interpolation='BILINEAR') # do random crops num_to_crop = int(batch_size * percent_crop) num_to_not_crop = batch_size - num_to_crop shuffled_inds = tf.random_shuffle(np.arange(0, batch_size, dtype='int32')) # shuffled_inds = np.arange(0,batch_size,dtype='int32') # np.random.shuffle(shuffled_inds) crop_inds = tf.slice(shuffled_inds, [0], [num_to_crop]) uncropped_inds = tf.slice(shuffled_inds, [num_to_crop], [num_to_not_crop]) # crop_ratio = float(3)/float(5) # crop_yx = tf.random_uniform([num_to_crop,2], 0,1-crop_ratio, dtype=tf.float32, seed=0) # crop_sz = tf.add(crop_yx,np.tile([crop_ratio,crop_ratio],[num_to_crop, 1])) # crop_boxes = tf.concat([crop_yx,crop_sz],axis=1) # randomly select a crop between 3/5 of the image and the entire image crop_ratio = tf.random_uniform([num_to_crop, 1], float(3) / float(5), 1, dtype=tf.float32, seed=0) # randomly select a starting location between 0 and the max valid x position crop_yx = tf.random_uniform([1, 2], 0., 1. - crop_ratio, dtype=tf.float32, seed=0) crop_sz = tf.add(crop_yx, tf.concat([crop_ratio, crop_ratio], axis=1)) crop_boxes = tf.concat([crop_yx, crop_sz], axis=1) uncropped_boxes = np.tile([0, 0, 1, 1], [num_to_not_crop, 1]) all_inds = tf.concat([crop_inds, uncropped_inds], axis=0) all_boxes = tf.concat([crop_boxes, uncropped_boxes], axis=0) sorted_inds = tf.nn.top_k(-shuffled_inds, sorted=True, k=batch_size).indices cropped_batch = tf.gather( tf.image.crop_and_resize(rotated_batch, all_boxes, all_inds, crop_size), sorted_inds) # apply different filters flt_image = convert_image_dtype(cropped_batch, dtypes.float32) num_to_filter = int(batch_size * percent_filters) filter_inds = np.random.choice(np.arange(0, batch_size), num_to_filter, replace=False) filter_mask = np.zeros(batch_size) filter_mask[filter_inds] = 1 filter_mask = filter_mask.astype('float32') inv_filter_mask = np.ones(batch_size) inv_filter_mask[filter_inds] = 0 inv_filter_mask = inv_filter_mask.astype('float32') # hsv = gen_image_ops.rgb_to_hsv(flt_image) hue = array_ops.slice(hsv, [0, 0, 0, 0], [batch_size, -1, -1, 1]) saturation = array_ops.slice(hsv, [0, 0, 0, 1], [batch_size, -1, -1, 1]) value = array_ops.slice(hsv, [0, 0, 0, 2], [batch_size, -1, -1, 1]) # hue delta_vals = random_ops.random_uniform([batch_size], -.15, .15) hue_deltas = tf.multiply(filter_mask, delta_vals) hue_deltas2 = tf.expand_dims( tf.transpose( tf.tile(tf.reshape(hue_deltas, [1, 1, batch_size]), (crop_size[0], crop_size[1], 1)), (2, 0, 1)), 3) # hue = math_ops.mod(hue + (hue_deltas2 + 1.), 1.) hue_mod = tf.add(hue, hue_deltas2) hue = clip_ops.clip_by_value(hue_mod, 0.0, 1.0) # saturation saturation_factor = random_ops.random_uniform([batch_size], -.05, .05) saturation_factor2 = tf.multiply(filter_mask, saturation_factor) saturation_factor3 = tf.expand_dims( tf.transpose( tf.tile(tf.reshape(saturation_factor2, [1, 1, batch_size]), (crop_size[0], crop_size[1], 1)), (2, 0, 1)), 3) saturation_mod = tf.add(saturation, saturation_factor3) saturation = clip_ops.clip_by_value(saturation_mod, 0.0, 1.0) hsv_altered = array_ops.concat([hue, saturation, value], 3) rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) # brightness brightness_factor = random_ops.random_uniform([batch_size], -.25, .25) brightness_factor2 = tf.multiply(filter_mask, brightness_factor) brightness_factor3 = tf.expand_dims( tf.transpose( tf.tile(tf.reshape(brightness_factor2, [1, 1, batch_size]), (crop_size[0], crop_size[1], 1)), (2, 0, 1)), 3) adjusted = math_ops.add(rgb_altered, math_ops.cast(brightness_factor3, dtypes.float32)) filtered_batch = clip_ops.clip_by_value(adjusted, 0.0, 255.0) # after we've doctored everything, we need to remember to subtract off the mean repMeanIm = np.tile(np.expand_dims(train_data.meanImage, 0), [batch_size, 1, 1, 1]) noise = tf.random_normal(shape=[batch_size, crop_size[0], crop_size[0], 1], mean=0.0, stddev=0.0025, dtype=tf.float32) final_batch = tf.add(tf.subtract(filtered_batch, repMeanIm), noise) print("Preparing network...") with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, layers = resnet_v2.resnet_v2_50(final_batch, num_classes=output_size, is_training=True) variables_to_restore = [] for var in slim.get_model_variables(): excluded = False if is_finetuning.lower() == 'true' and var.op.name.startswith( 'resnet_v2_50/logits') or 'momentum' in var.op.name.lower(): excluded = True if not excluded: variables_to_restore.append(var) feat = tf.squeeze(tf.nn.l2_normalize(layers[featLayer], 3)) expanded_a = tf.expand_dims(feat, 1) expanded_b = tf.expand_dims(feat, 0) D = tf.reduce_sum(tf.squared_difference(expanded_a, expanded_b), 2) # if not train_data.isOverfitting: # D_max = tf.reduce_max(D) # D_mean, D_var = tf.nn.moments(D, axes=[0,1]) # lowest_nonzero_distance = tf.reduce_max(-D) # bottom_thresh = 1.2*lowest_nonzero_distance # top_thresh = (D_max + D_mean)/2.0 # bool_mask = tf.logical_and(D>=bottom_thresh,D<=top_thresh) # D = tf.multiply(D,tf.cast(bool_mask,tf.float32)) posIdx = np.floor(np.arange(0, batch_size) / num_pos_examples).astype('int') posIdx10 = num_pos_examples * posIdx posImInds = np.tile(posIdx10, (num_pos_examples, 1)).transpose() + np.tile( np.arange(0, num_pos_examples), (batch_size, 1)) anchorInds = np.tile(np.arange(0, batch_size), (num_pos_examples, 1)).transpose() posImInds_flat = posImInds.ravel() anchorInds_flat = anchorInds.ravel() posPairInds = zip(posImInds_flat, anchorInds_flat) posDists = tf.reshape(tf.gather_nd(D, posPairInds), (batch_size, num_pos_examples)) shiftPosDists = tf.reshape(posDists, (1, batch_size, num_pos_examples)) posDistsRep = tf.tile(shiftPosDists, (batch_size, 1, 1)) allDists = tf.tile(tf.expand_dims(D, 2), (1, 1, num_pos_examples)) ra, rb, rc = np.meshgrid(np.arange(0, batch_size), np.arange(0, batch_size), np.arange(0, num_pos_examples)) bad_negatives = np.floor((ra) / num_pos_examples) == np.floor( (rb) / num_pos_examples) bad_positives = np.mod(rb, num_pos_examples) == np.mod(rc, num_pos_examples) mask = ((1 - bad_negatives) * (1 - bad_positives)).astype('float32') # loss = tf.reduce_sum(tf.maximum(0.,tf.multiply(mask,margin + posDistsRep - allDists)))/batch_size loss = tf.reduce_mean( tf.maximum(0., tf.multiply(mask, margin + posDistsRep - allDists))) # slightly counterintuitive to not define "init_op" first, but tf vars aren't known until added to graph update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) optimizer = tf.train.AdamOptimizer(learning_rate) train_op = slim.learning.create_train_op(loss, optimizer) summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Create a saver for writing training checkpoints. saver = tf.train.Saver(max_to_keep=2000) # tf will consume any GPU it finds on the system. Following lines restrict it to specific gpus c = tf.ConfigProto() c.gpu_options.visible_device_list = whichGPU print("Starting session...") sess = tf.Session(config=c) sess.run(init_op) writer = tf.summary.FileWriter(log_dir, sess.graph) restore_fn = slim.assign_from_checkpoint_fn(pretrained_net, variables_to_restore) restore_fn(sess) print("Start training...") ctr = 0 for step in range(num_iters): start_time = time.time() batch, labels, ims = train_data.getBatch() _, loss_val = sess.run([train_op, loss], feed_dict={ image_batch: batch, label_batch: labels }) end_time = time.time() duration = end_time - start_time out_str = 'Step %d: loss = %.6f -- (%.3f sec)' % (step, loss_val, duration) # print(out_str) if step % summary_iters == 0: print(out_str) train_log_file.write(out_str + '\n') # Update the events file. # summary_str = sess.run(summary_op) # writer.add_summary(summary_str, step) # writer.flush() # # Save a checkpoint if (step + 1) % save_iters == 0: print('Saving checkpoint at iteration: %d' % (step)) pretrained_net = os.path.join(ckpt_dir, 'checkpoint-' + param_str) saver.save(sess, pretrained_net, global_step=step) print 'checkpoint-', pretrained_net + '-' + str(step), ' saved!' if (step + 1) == num_iters: print('Saving final') pretrained_net = os.path.join(ckpt_dir, 'final-' + param_str) saver.save(sess, pretrained_net, global_step=step) print 'final-', pretrained_net + '-' + str(step), ' saved!' sess.close() train_log_file.close()
def main(_): if FLAGS.csv_file_path: if os.path.exists(FLAGS.csv_file_path) is False: csv_dir = FLAGS.csv_file_path.rsplit('/', 1)[0] if os.path.exists(csv_dir) is False: os.makedirs(csv_dir) if FLAGS.task_name == 'chalearn/age': with open(FLAGS.csv_file_path, 'w') as f: writer = csv.writer(f) writer.writerow([ 'Pruned rate', 'MAE', 'Acc', 'Epoch No.', 'Model size through inference (MB) (Shared part + task-specific part)', 'Shared part (MB)', 'Task specific part (MB)', 'Whole masks (MB)', 'Task specific masks (MB)', 'Task specific batch norm vars (MB)', 'Task specific biases (MB)' ]) else: with open(FLAGS.csv_file_path, 'w') as f: writer = csv.writer(f) writer.writerow([ 'Pruned rate', 'Acc', 'Epoch No.', 'Model size through inference (MB) (Shared part + task-specific part)', 'Shared part (MB)', 'Task specific part (MB)', 'Whole masks (MB)', 'Task specific masks (MB)', 'Task specific batch norm vars (MB)', 'Task specific biases (MB)' ]) args, unparsed = parse_arguments(sys.argv[1:]) FLAGS.filters_expand_ratio = math.sqrt(FLAGS.filters_expand_ratio) FLAGS.history_filters_expand_ratios = [ math.sqrt(float(ratio)) for ratio in FLAGS.history_filters_expand_ratios ] with tf.Graph().as_default(): with tf.Session() as sess: if 'emotion' in FLAGS.task_name or 'chalearn' in FLAGS.task_name: test_data_path = os.path.join(args.data_dir, 'val') else: test_data_path = os.path.join(args.data_dir, 'test') test_set = utils.get_dataset(test_data_path) # Get the paths for the corresponding images image_list, label_list = facenet.get_image_paths_and_labels( test_set) image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='labels') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') control_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='control') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') nrof_preprocess_threads = 4 image_size = (args.image_size, args.image_size) eval_input_queue = data_flow_ops.FIFOQueue( capacity=2000000, dtypes=[tf.string, tf.int32, tf.int32], shapes=[(1, ), (1, ), (1, )], shared_name=None, name=None) eval_enqueue_op = eval_input_queue.enqueue_many( [ image_paths_placeholder, labels_placeholder, control_placeholder ], name='eval_enqueue_op') image_batch, label_batch = facenet.create_input_pipeline( eval_input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) # Load the model if os.path.isdir(args.model): temp_record_file = os.path.join(args.model, 'temp_record.txt') checkpoint_file = os.path.join(args.model, 'checkpoint') if os.path.exists(temp_record_file) and os.path.exists( checkpoint_file): with open(temp_record_file) as json_file: data = json.load(json_file) max_acc = max(data, key=float) epoch_no = data[max_acc] ckpt_file = args.model + '/model-.ckpt-' + epoch_no with open(checkpoint_file) as f: context = f.read() original_epoch = re.search("(\d)+", context).group() context = context.replace(original_epoch, epoch_no) with open(checkpoint_file, 'w') as f: f.write(context) if os.path.exists(os.path.join(args.model, 'copied')) is False: os.makedirs(os.path.join(args.model, 'copied')) copyfile( temp_record_file, os.path.join(args.model, 'copied', 'temp_record.txt')) os.remove(temp_record_file) elif os.path.exists(checkpoint_file): ckpt = tf.train.get_checkpoint_state(args.model) ckpt_file = ckpt.model_checkpoint_path epoch_no = ckpt_file.rsplit('-', 1)[-1] else: print( 'No `temp_record.txt` or `checkpoint` in `{}`, you should pass args.model the file path, not the directory' .format(args.model)) sys.exit(1) else: ckpt_file = args.model epoch_no = ckpt_file.rsplit('-')[-1] prelogits, _ = network.inference( image_batch, 1.0, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=0.0) with tf.variable_scope('task_{}'.format(FLAGS.task_id)): if FLAGS.task_name == 'chalearn/age': logits = slim.fully_connected(prelogits, 100, activation_fn=None, scope='Logits', reuse=False) else: logits = slim.fully_connected(prelogits, len(test_set), activation_fn=None, scope='Logits', reuse=False) # Get output tensor if FLAGS.task_name == 'chalearn/age': softmax = tf.nn.softmax(logits=logits) labels_range = tf.range(1.0, 101.0) # [1.0, ..., 100.0] labels_matrix = tf.broadcast_to( labels_range, [args.test_batch_size, labels_range.shape[0]]) result_vector = tf.reduce_sum(softmax * labels_matrix, axis=1) MAE_error_vector = tf.abs(result_vector - tf.cast(label_batch, tf.float32)) MAE_avg_error = tf.reduce_mean(MAE_error_vector) correct_prediction = tf.cast( tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)), tf.float32) accuracy = tf.reduce_mean(correct_prediction) regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([MAE_avg_error] + regularization_losses) criterion = MAE_avg_error else: cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_batch, logits=logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy) correct_prediction = tf.cast( tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)), tf.float32) accuracy = tf.reduce_mean(correct_prediction) regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses) criterion = cross_entropy_mean init_fn = slim.assign_from_checkpoint_fn(ckpt_file, tf.global_variables()) init_fn(sess) pruned_ratio_relative_to_curr_task = 0.0 model_size = 0.0 if FLAGS.print_mem or FLAGS.print_mask_info: masks = tf.get_collection('masks') if FLAGS.print_mask_info: if masks: num_elems_in_each_task_op = {} num_elems_in_tasks_in_masks_op = { } # two dimentional dictionary num_elems_in_masks_op = [] num_remain_elems_in_masks_op = [] for task_id in range(1, FLAGS.task_id + 1): num_elems_in_each_task_op[task_id] = tf.constant( 0, dtype=tf.int32) num_elems_in_tasks_in_masks_op[task_id] = {} # Define graph for i, mask in enumerate(masks): num_elems_in_masks_op.append(tf.size(mask)) num_remain_elems_in_curr_mask = tf.size(mask) for task_id in range(1, FLAGS.task_id + 1): cnt = tf_count(mask, task_id) num_elems_in_tasks_in_masks_op[task_id][ i] = cnt num_elems_in_each_task_op[task_id] = tf.add( num_elems_in_each_task_op[task_id], cnt) num_remain_elems_in_curr_mask -= cnt num_remain_elems_in_masks_op.append( num_remain_elems_in_curr_mask) num_elems_in_network_op = tf.add_n( num_elems_in_masks_op) print('Calculate pruning status ...') # Doing operation num_elems_in_masks = sess.run(num_elems_in_masks_op) num_elems_in_each_task = sess.run( num_elems_in_each_task_op) num_elems_in_tasks_in_masks = sess.run( num_elems_in_tasks_in_masks_op) num_elems_in_network = sess.run( num_elems_in_network_op) num_remain_elems_in_masks = sess.run( num_remain_elems_in_masks_op) # Print out the result print('Showing pruning status ...') if FLAGS.verbose: for i, mask in enumerate(masks): print('Layer %s: ' % mask.op.name, end='') for task_id in range(1, FLAGS.task_id + 1): cnt = num_elems_in_tasks_in_masks[task_id][ i] print('task_%d -> %d/%d (%.2f%%), ' % (task_id, cnt, num_elems_in_masks[i], 100 * cnt / num_elems_in_masks[i]), end='') print('remain -> {:.2f}%'.format( 100 * num_remain_elems_in_masks[i] / num_elems_in_masks[i])) print('Num elems in network: {}'.format( num_elems_in_network)) num_elems_of_usued_weights = num_elems_in_network for task_id in range(1, FLAGS.task_id + 1): print('Num elems in task_{}: {}'.format( task_id, num_elems_in_each_task[task_id])) print('Ratio of task_{} to all: {}'.format( task_id, num_elems_in_each_task[task_id] / num_elems_in_network)) num_elems_of_usued_weights -= num_elems_in_each_task[ task_id] print('Num usued elems in all masks: {}'.format( num_elems_of_usued_weights)) pruned_ratio_relative_to_all_elems = num_elems_of_usued_weights / num_elems_in_network print('Ratio of usused_elem to all: {}'.format( pruned_ratio_relative_to_all_elems)) pruned_ratio_relative_to_curr_task = num_elems_of_usued_weights / ( num_elems_of_usued_weights + num_elems_in_each_task[FLAGS.task_id]) print('Pruning degree relative to task_{}: {:.3f}'. format(FLAGS.task_id, pruned_ratio_relative_to_curr_task)) if FLAGS.print_mem: # Analyze param start_time = time.time() (MB_of_model_through_inference, MB_of_shared_variables, MB_of_task_specific_variables, MB_of_whole_masks, MB_of_task_specific_masks, MB_of_task_specific_batch_norm_variables, MB_of_task_specific_biases ) = model_analyzer.analyze_vars_for_current_task( tf.model_variables(), sess=sess, task_id=FLAGS.task_id, verbose=False) duration = time.time() - start_time print('duration time: {}'.format(duration)) if FLAGS.eval_once: validate( args, sess, image_list, label_list, eval_enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, phase_train_placeholder, batch_size_placeholder, total_loss, regularization_losses, criterion, accuracy, args.use_fixed_image_standardization, FLAGS.csv_file_path, pruned_ratio_relative_to_curr_task, epoch_no, MB_of_model_through_inference, MB_of_shared_variables, MB_of_task_specific_variables, MB_of_whole_masks, MB_of_task_specific_masks, MB_of_task_specific_batch_norm_variables, MB_of_task_specific_biases) return
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default() as graph: num_channels = FLAGS.input_channels num_channels = max(3, num_channels) # simulate num_channels channel input data = imageutils.resize(imageutils.read( 'deeplab/Oxford.street.london.arp.jpg'), (128, 192)).reshape(1, 128, 192, 3) inputs = tf.to_float(numpy.concatenate([data[...,0:1]]*num_channels, axis=3)) # Create the global step on the device storing the variables. global_step = tf.train.get_or_create_global_step() # Define the model and create clones. model_fn = _build_deeplab_inputs num_classes = FLAGS.num_classes model_args = (inputs, {common.OUTPUT_TYPE: num_classes}) model_fn(*model_args) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) ### Adapted from code by Paul Upchurch. ### model_vars = slim.get_model_variables() # debug #print("##2##") #print(model_vars) if FLAGS.model_variant == 'xception_65': input_kernel_name = 'xception_65/entry_flow/conv1_1/weights' elif FLAGS.model_variant == 'mobilenet_v2': input_kernel_name = 'MobilenetV2/Conv/weights' else: raise Exception("{} is not supported. Modify the code.".format(FLAGS.model_variant)) variables_to_restore = slim.get_variables_to_restore( exclude=['global_step', input_kernel_name]) #### Deeplab #### checkpoint_dir = FLAGS.source_checkpoint_dir checkpoint_name = FLAGS.source_checkpoint_name loader = slim.assign_from_checkpoint_fn( checkpoint_dir+'/' + checkpoint_name, variables_to_restore, ignore_missing_vars=False) ################ init_op = tf.global_variables_initializer() #print ('##3## init_op...') #print(init_op) saver = tf.train.Saver(tf.global_variables()) with tf.Session(config=session_config) as sess: sess.run(init_op) loader(sess) f = assign_conv2d_from_checkpoint_fn(input_kernel_name+':0', checkpoint_dir+'/' + checkpoint_name, input_kernel_name) f(sess) print('== Expanded kernel, first output feature ==') print(get_tensor_value(model_vars[0], sess).shape) print(get_tensor_value(model_vars[0], sess)[:, :, :, 0]) print('== Original kernel, first output feature ==') print(tf.contrib.framework.load_variable(checkpoint_dir+'/' + checkpoint_name, input_kernel_name).shape) print(tf.contrib.framework.load_variable(checkpoint_dir+'/' + checkpoint_name, input_kernel_name)[:, :, :, 0]) output_dir = FLAGS.output_checkpoint_dir if not os.path.exists(output_dir): os.makedirs(output_dir) print(' == == ==') print('Saving to {}'.format(output_dir)) saver.save(sess, os.path.join(output_dir, "model.ckpt"))
logits, _ = mobilenet_v1.mobilenet_v1(processed_images, is_training=False, depth_multiplier=1.0, num_classes=1001, id_act_layer_input=layer_id, act_quant_delta_input=quant_delta, act_quant_levels_input=quant_level) #probabilities = tf.nn.softmax(logits) variables_to_restore = slim.get_variables_to_restore() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) init_fn = slim.assign_from_checkpoint_fn(checkpoint_file, variables_to_restore) init_fn(sess) ######################## compute output error ############################ final_output_layer_quantized = [0] * n_images for i in range(n_images): img_path = IMAGENET_VAL_PATH + 'ILSVRC2012_val_%08d.JPEG' % (i + 1) final_output_layer_quantized[i] = sess.run( logits, feed_dict={input_string: img_path}) output_error = 0.0 for i in range(n_images): output_error = output_error + np.mean( (final_output_layer_original[i] - final_output_layer_quantized[i])**2) output_error = output_error / n_images
def main(argv=None): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu now = datetime.datetime.now() StyleTime = now.strftime("%Y-%m-%d-%H-%M-%S") os.makedirs(FLAGS.logs_path + StyleTime) if not os.path.exists(FLAGS.checkpoint_path): os.makedirs(FLAGS.checkpoint_path) input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_bbox = tf.placeholder(tf.float32, shape=[None, 5], name='input_bbox') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.Variable(FLAGS.learning_rate, trainable=False) tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) gpu_id = int(FLAGS.gpu) with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: bbox_pred, cls_pred, cls_prob = model.model(input_image) total_loss, model_loss, rpn_cross_entropy, rpn_loss_box = model.loss( bbox_pred, cls_pred, input_bbox, input_im_info) batch_norm_updates_op = tf.group( *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) grads = opt.compute_gradients(total_loss) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies( [variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) summary_writer = tf.summary.FileWriter(FLAGS.logs_path + StyleTime, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.95 config.allow_soft_placement = True with tf.Session(config=config) as sess: if FLAGS.restore: ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) restore_step = int(ckpt.split('.')[0].split('_')[-1]) print("continue training from previous checkpoint {}".format( restore_step)) saver.restore(sess, ckpt) else: sess.run(init) restore_step = 0 if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) data_generator = data_provider.get_batch(num_workers=FLAGS.num_readers) start = time.time() for step in range(restore_step, FLAGS.max_steps): data = next(data_generator) ml, tl, _, summary_str = sess.run( [model_loss, total_loss, train_op, summary_op], feed_dict={ input_image: data[0], input_bbox: data[1], input_im_info: data[2] }) summary_writer.add_summary(summary_str, global_step=step) if step != 0 and step % FLAGS.decay_steps == 0: sess.run( tf.assign(learning_rate, learning_rate.eval() * FLAGS.decay_rate)) if step % 10 == 0: avg_time_per_step = (time.time() - start) / 10 start = time.time() print( 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, LR: {:.6f}' .format(step, ml, tl, avg_time_per_step, learning_rate.eval())) if (step + 1) % FLAGS.save_checkpoint_steps == 0: filename = ('ctpn_{:d}'.format(step + 1) + '.ckpt') filename = os.path.join(FLAGS.checkpoint_path, filename) saver.save(sess, filename) print('Write model to: {:s}'.format(filename))
if __name__ == '__main__': # test fpn class output import os import numpy as np os.environ['CUDA_VISIBLE_DEVICES'] = '0' test_input = tf.Variable(initial_value=tf.ones((((5, 384, 384, 3)))), dtype=tf.float32) fpn_model = FPN('resnet_v1_101', test_input, is_training=True) # output = fpn_model.model() output = fpn_model.pre_seg_maps init_op = tf.global_variables_initializer() restore = slim.assign_from_checkpoint_fn( 'libs\\nets\\resnet_v1_101\\resnet_v1_101.ckpt', slim.get_trainable_variables(), ignore_missing_vars=True) logits, share_net = fpn_model.get_logits_and_share_net() feature_maps = fpn_model.get_feature_maps() with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess: sess.run(init_op) restore(sess) # out = sess.run([output]) # print(len(out[0])) logit_print = sess.run([logits]) feature_maps_print = sess.run([feature_maps]) print('***************logits*****************')
def main(): # 加载预处理好的数据。 processed_data = np.load(INPUT_DATA) training_images = processed_data[0] n_training_example = len(training_images) training_labels = processed_data[1] validation_images = processed_data[2] validation_labels = processed_data[3] testing_images = processed_data[4] testing_labels = processed_data[5] print( "%d training examples, %d validation examples and %d testing examples." % (n_training_example, len(validation_labels), len(testing_labels))) # 定义inception-v3的输入,images为输入图片,labels为每一张图片对应的标签。 images = tf.placeholder(tf.float32, [None, 299, 299, 3], name='input_images') labels = tf.placeholder(tf.int64, [None], name='labels') # 定义inception-v3模型。因为谷歌给出的只有模型参数取值,所以这里 # 需要在这个代码中定义inception-v3的模型结构。虽然理论上需要区分训练和 # 测试中使用到的模型,也就是说在测试时应该使用is_training=False,但是 # 因为预先训练好的inception-v3模型中使用的batch normalization参数与 # 新的数据会有出入,所以这里直接使用同一个模型来做测试。 with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits, _ = inception_v3.inception_v3(images, num_classes=N_CLASSES, is_training=True) print(logits) trainable_variables = get_trainable_variables() # 定义损失函数和训练过程。 tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES), logits, weights=1.0) total_loss = tf.losses.get_total_loss() train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize(total_loss) # 计算正确率。 with tf.name_scope('evaluation'): correct_prediction = tf.equal(tf.argmax(logits, 1), labels) evaluation_step = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) # 定义加载Google训练好的Inception-v3模型的Saver。 load_fn = slim.assign_from_checkpoint_fn(CKPT_FILE, get_tuned_variables(), ignore_missing_vars=True) # 定义保存新模型的Saver。 saver = tf.train.Saver() with tf.Session() as sess: # 初始化没有加载进来的变量。 init = tf.global_variables_initializer() sess.run(init) # 加载谷歌已经训练好的模型。 print('Loading tuned variables from %s' % CKPT_FILE) load_fn(sess) start = 0 end = BATCH for i in range(STEPS): _, loss = sess.run( [train_step, total_loss], feed_dict={ images: training_images[start:end], labels: training_labels[start:end] }) if i % 30 == 0 or i + 1 == STEPS: saver.save(sess, TRAIN_FILE, global_step=i) validation_accuracy = sess.run(evaluation_step, feed_dict={ images: validation_images, labels: validation_labels }) print( 'Step %d: Training loss is %.1f Validation accuracy = %.1f%%' % (i, loss, validation_accuracy * 100.0)) start = end if start == n_training_example: start = 0 end = start + BATCH if end > n_training_example: end = n_training_example # 在最后的测试数据上测试正确率。 test_accuracy = sess.run(evaluation_step, feed_dict={ images: testing_images, labels: testing_labels }) print('Final test accuracy = %.1f%%' % (test_accuracy * 100))
# Experiment initialization and running with tf.Session() as sess: sess.run(init) train_saver = tf.train.Saver() val_saver = tf.train.Saver() if continue_from_epoch != -1: #load checkpoint if needed checkpoint = "saved_models/{}_{}.ckpt".format(experiment_name, continue_from_epoch) variables_to_restore = [] for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES): print(var) variables_to_restore.append(var) tf.logging.info('Fine-tuning from %s' % checkpoint) fine_tune = slim.assign_from_checkpoint_fn( checkpoint, variables_to_restore, ignore_missing_vars=True) fine_tune(sess) best_val_acc_mean = 0. best_val_epoch = 6 with tqdm.tqdm(total=epochs) as pbar_e: for e in range(0, epochs): total_train_c_loss_mean, total_train_c_loss_std, total_train_accuracy_mean, total_train_accuracy_std =\ experiment.run_training_epoch(total_train_batches=total_train_batches, sess=sess) print("Epoch {}: train_loss_mean: {}, train_loss_std: {}, train_accuracy_mean: {}, train_accuracy_std: {}" .format(e, total_train_c_loss_mean, total_train_c_loss_std, total_train_accuracy_mean, total_train_accuracy_std)) total_val_c_loss_mean, total_val_c_loss_std, total_val_accuracy_mean, total_val_accuracy_std = \ experiment.run_validation_epoch(total_val_batches=total_val_batches,
def build_pspnet(inputs, label_size, num_classes, preset_model='PSPNet-Res50', pooling_type="MAX", weight_decay=1e-5, upscaling_method="bilinear", is_training=True, pretrained_dir="models"): """ Builds the PSPNet model. Arguments: inputs: The input tensor label_size: Size of the final label tensor. We need to know this for proper upscaling preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes pooling_type: Max or Average pooling Returns: PSPNet model """ inputs = mean_image_subtraction(inputs) if preset_model == 'PSPNet-Res50': with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v2.resnet_v2_50( inputs, is_training=is_training, scope='resnet_v2_50') resnet_scope = 'resnet_v2_50' # PSPNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v2_50.ckpt'), slim.get_model_variables('resnet_v2_50')) elif preset_model == 'PSPNet-Res101': with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v2.resnet_v2_101( inputs, is_training=is_training, scope='resnet_v2_101') resnet_scope = 'resnet_v2_101' # PSPNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v2_101.ckpt'), slim.get_model_variables('resnet_v2_101')) elif preset_model == 'PSPNet-Res152': with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v2.resnet_v2_152( inputs, is_training=is_training, scope='resnet_v2_152') resnet_scope = 'resnet_v2_152' # PSPNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v2_152.ckpt'), slim.get_model_variables('resnet_v2_152')) else: raise ValueError( "Unsupported ResNet model '%s'. This function only supports ResNet 50, ResNet 101, and ResNet 152" % (preset_model)) feature_map_shape = [int(x / 8.0) for x in label_size] print(feature_map_shape) psp = PyramidPoolingModule(end_points['pool3'], feature_map_shape=feature_map_shape, pooling_type=pooling_type) net = slim.conv2d(psp, 512, [3, 3], activation_fn=None) net = slim.batch_norm(net, fused=True) net = tf.nn.relu(net) if upscaling_method.lower() == "conv": net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 256) net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 128) net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 64) elif upscaling_method.lower() == "bilinear": net = Upsampling(net, label_size) net = slim.dropout(net, keep_prob=(0.9)) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR) image = tf.convert_to_tensor(image_np) processed_image = inception_preprocessing.preprocess_image( image, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(inception.inception_resnet_v2_arg_scope()): logits, _ = inception.inception_resnet_v2(processed_images, num_classes=11, is_training=False) probabilities = tf.nn.softmax(logits) init_fn = slim.assign_from_checkpoint_fn( os.path.join( "/home/jade/Models/Image_Classif/dfgoods_inception_resnet_v2_use_checkpoitns_2019-04-29", 'model.ckpt-196478'), slim.get_model_variables('InceptionResnetV2')) with tf.Session() as sess: init_fn(sess) np_image, probabilities = sess.run([image, probabilities]) probabilities = probabilities[0, 0:] sorted_inds = [ i[0] for i in sorted(enumerate(-probabilities), key=lambda x: x[1]) ] names = imagenet.create_readable_names_for_imagenet_labels() for i in range(5): index = sorted_inds[i] print('Probability %0.2f%% => [%s]' %
def init_fn_part(): #从checkpoint读入网络权值 variables_to_restore = slim.get_variables_to_restore( exclude=["vgg_16/fc8"]) return slim.assign_from_checkpoint_fn(checkpoint_path, variables_to_restore)
#image_size = inception.inception_v1.default_image_size with tf.Graph().as_default(): url = 'https://upload.wikimedia.org/wikipedia/commons/7/70/EnglishCockerSpaniel_simon.jpg' image_string = urllib.urlopen(url).read() image = tf.image.decode_jpeg(image_string, channels=3) processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(inception.inception_v1_arg_scope()): logits, _ = inception.inception_v4(processed_images, num_classes=1001, is_training=False) probabilities = tf.nn.softmax(logits) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v4.ckpt'), slim.get_model_variables('InceptionV4')) with tf.Session() as sess: init_fn(sess) np_image, probabilities = sess.run([image, probabilities]) probabilities = probabilities[0, 0:] sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])] plt.figure() plt.imshow(np_image.astype(np.uint8)) plt.axis('off') plt.show() names = imagenet.create_readable_names_for_imagenet_labels() for i in range(5):
def main(argv=None): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') if FLAGS.geometry == 'RBOX': input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') else: input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') input_labels = tf.placeholder(tf.float32, shape=[None, None, 4, 2], name='input_labels') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.01, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # split input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split = tf.split(input_score_maps, len(gpus)) input_geo_maps_split = tf.split(input_geo_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) input_labels_split = tf.split(input_labels, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isms = input_score_maps_split[i] igms = input_geo_maps_split[i] itms = input_training_masks_split[i] il = input_labels_split[i] total_loss, model_loss, f_score, f_geometry, _ = tower_loss( iis, isms, igms, itms, il, reuse_variables) #f_score, f_geometry = i_am_testing(iis) batch_norm_updates_op = tf.group( *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) #print "below..." #batch_norm_updates_op = tf.group(*[op for op in tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) if 'resnet_v1_50/block4' in op.name or 'resnet_v1_50/block3' in op.name or 'feature_fusion' in op.name]) #print "above..." reuse_variables = True #print "below.." #train_var = [var for var in tf.trainable_variables() if 'resnet_v1_50/block1' in var.name] #train_var = [var for var in tf.trainable_variables() if 'resnet_v1_50/block4' in var.name] #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_7' in var.name] #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_8' in var.name] #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_9' in var.name] #print train_var #print "above..." train_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='feature_fusion') grads = opt.compute_gradients(total_loss, var_list=train_var) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) #train_var = [var for var in tf.trainable_variables() if ('resnet_v1_50/block3' in var.name or 'resnet_v1_50/block4' in var.name or 'feature_fusion' in var.name)] variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies( [variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables()) summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) my_char_l = "5" my_char_U = "" data_size = 0 train_data_indices = [] list_of_img_pos = [] with open( 'Data/cropped_annotations_new/cropped_annotations' + my_char_l + '.txt', 'r') as f: annotation_file = f.readlines() #with open('Data/cropped_annotations_new/cropped_annotations' + my_char_U + '.txt', 'r') as f: # annotation_file += f.readlines() idx = 0 for line in annotation_file: if len(line) > 1 and line[:13] == './cropped_img' and str( line[14:27]) in training_list: data_size += 1 train_data_indices.append(idx) list_of_img_pos.append(line[14:].split(".")[0] + ".tiff") idx += 1 list_of_img_all = os.listdir('Data/cropped_img') list_of_img_neg = np.array( list(set(list_of_img_all) - set(list_of_img_pos))) print "Char model: " + my_char_U + my_char_l print "Data size: " + str(data_size) epoche_size = data_size / (16 * 2) #print epoche_size print "This many steps per epoche: " + str(epoche_size) list_of_img_neg_char = os.listdir('Data/j') with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if FLAGS.restore: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) saver.restore(sess, model_path) else: sess.run(init) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) #print "below:" #tvars = tf.trainable_variables() #g_vars = [var for var in tvars if 'resnet_v1_50/block4' in var.name] #print g_vars #print tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='resnet_v1_50') #return print FLAGS.learning_rate print reg_constant for step in range(24 * epoche_size): ### Generate Dwata ### data = [], [], [], [], [] np.random.shuffle(train_data_indices) num_im = 0 actual_num_im = 0 while len(data[0]) < 32: prob = np.random.random(1)[0] if prob > 0.49: i = train_data_indices[num_im] im_fn = "Data/cropped_img/" + annotation_file[i][ 14:].split(".tiff", 1)[0] + ".tiff" im = cv2.imread(im_fn) if im is not None: r, c, _ = im.shape text_polys = [] text_tags = [] if int(annotation_file[i + 1]) > 0: for idx in range( i + 2, i + 2 + int(annotation_file[i + 1])): annotation_data = annotation_file[idx] annotation_data = annotation_data.split(" ") x, y = float(annotation_data[0]), float( annotation_data[1]) w, h = float(annotation_data[2]), float( annotation_data[3]) text_polys.append([ list([int(x), int(y - h)]), list([int(x + w), int(y - h)]), list([int(x + w), int(y)]), list([int(x), int(y)]) ]) text_tags.append(False) score_map, geo_map, training_mask = icdar.generate_rbox( (int(r), int(c)), np.array(text_polys), np.array(text_tags)) data[0].append(im[:, :, ::-1].astype(np.float32)) data[1].append(im_fn) data[2].append(score_map[::4, ::4, np.newaxis].astype( np.float32)) data[3].append(geo_map[::4, ::4, :].astype(np.float32)) data[4].append(training_mask[::4, ::4, np.newaxis].astype( np.float32)) actual_num_im += 1 num_im += 1 else: im_fn = np.random.choice(list_of_img_neg) im = cv2.imread("Data/cropped_img/" + im_fn) #if prob > 0.25: # im_fn = np.random.choice(list_of_img_neg_char) # im_mini = cv2.imread("Data/j/" + im_fn) # r0, c0, _ = im_mini.shape # im = np.zeros((512, 512, 3), dtype=np.uint8) # ra, rb, ca, cb = 256-r0/2, 256+(r0+1)/2, 256-c0/2, 256+(c0+1)/2 # im[ra:rb, ca:cb, :] = im_mini.copy() if im is not None: r, c, _ = im.shape score_map, geo_map, training_mask = icdar.generate_rbox( (int(r), int(c)), np.array([]), np.array([])) data[0].append(im[:, :, ::-1].astype(np.float32)) data[1].append(im_fn) data[2].append(score_map[::4, ::4, np.newaxis].astype( np.float32)) data[3].append(geo_map[::4, ::4, :].astype(np.float32)) data[4].append(training_mask[::4, ::4, np.newaxis].astype( np.float32)) ### Run model ### ml, tl, _ = sess.run( [model_loss, total_loss, train_op], feed_dict={ input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4] }) epoch = step / epoche_size batch_num = step % epoche_size if step % (epoche_size / 3) == 0: print "Epoch no.: " + str(epoch) + " batch no.: " + str( batch_num) + " loss: " + str(ml) print "Epoch no.: " + str(epoch) + " batch no.: " + str( batch_num) + " loss: " + str(tl) if step % (epoche_size / 2) == 0: #print "Epoche: " + str(step / (epoche_size/2)) saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) _, tl, summary_str = sess.run( [train_op, total_loss, summary_op], feed_dict={ input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4] }) summary_writer.add_summary(summary_str, global_step=step) if False: count_right = 0 count_wrong = 0 count_posNotDetected = 0 im0 = cv2.imread("Data/maps/D0117-5755036.tiff")[:, :, ::-1] w, h, _ = im0.shape slide_window = 300 crop_size = 512 crop_center = (256, 256) num_rows, num_cols = int(np.ceil(w / slide_window)), int( np.ceil(h / slide_window)) print num_cols for rot in [-90.0, -60.0, -30.0, 0.0, 30.0, 60.0, 90.0]: im = cv2.imread("Data/maps/D0117-5755036.tiff")[:, :, ::-1] boxes_one_rot = [] count = 0 while count < num_rows * num_cols: images, data2, data3, data4 = [], [], [], [] for k in range(16): i = (count + k) / num_rows j = (count + k) % num_cols temp = im[slide_window*i:slide_window*i+crop_size, \ slide_window*j:slide_window*j+crop_size, ::-1] w2, h2, _ = temp.shape if w2 < crop_size or h2 < crop_size: result = np.zeros((crop_size, crop_size, 3)) result[:w2, :h2] = temp temp = result M = cv2.getRotationMatrix2D(crop_center, rot, 1.0) temp = cv2.warpAffine(temp, M, (crop_size, crop_size)) images.append(temp) score_map, geo_map, training_mask = icdar.generate_rbox( (int(crop_size), int(crop_size)), np.array([]), np.array([])) data2.append(score_map[::4, ::4, np.newaxis].astype( np.float32)) data3.append(geo_map[::4, ::4, :].astype( np.float32)) data4.append(training_mask[::4, ::4, np.newaxis].astype( np.float32)) score, geometry = sess.run( [f_score, f_geometry], feed_dict={ input_images: images, input_score_maps: data2, input_geo_maps: data3, input_training_masks: data4 }) for k in range(16): i = (count + k) / num_rows j = (count + k) % num_cols boxes = detect(score_map=score[j], geo_map=geometry[j], score_map_thresh=0.01, box_thresh=0.01, nms_thres=0.01) if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) for box in boxes: M_inv = cv2.getRotationMatrix2D( crop_center, -1 * rot, 1) box[0] = M_inv.dot( np.array((box[0, 0], box[0, 1]) + (1, ))) box[1] = M_inv.dot( np.array((box[1, 0], box[1, 1]) + (1, ))) box[2] = M_inv.dot( np.array((box[2, 0], box[2, 1]) + (1, ))) box[3] = M_inv.dot( np.array((box[3, 0], box[3, 1]) + (1, ))) box = sort_poly(box.astype(np.int32)) box[0, 0] = box[0, 0] + j * slide_window box[0, 1] = box[0, 1] + i * slide_window box[1, 0] = box[1, 0] + j * slide_window box[1, 1] = box[1, 1] + i * slide_window box[2, 0] = box[2, 0] + j * slide_window box[2, 1] = box[2, 1] + i * slide_window box[3, 0] = box[3, 0] + j * slide_window box[3, 1] = box[3, 1] + i * slide_window boxes_one_rot.append(box) boxes_single_rot = np.zeros((len(boxes_one_rot), 9)) boxes_single_rot[:, :8] = np.array(boxes_one_rot).reshape( (-1, 8)) boxes_single_rot[:, 8] = 1 labels += boxes_single_rot.tolist() boxes = lanms.merge_quadrangle_n9(np.array(labels), nms_thres) annotation = np.load( "/mnt/nfs/work1/elm/ray/new_char_anots_ncs/" + "j" + "/" + "D0117-5755036" + ".npy").item() ### Compute the TP, FP, FN info for each image count_right_cache = 0 boxes = boxes[:, :8].reshape((-1, 4, 2)) num_true_pos = len(annotation) for box in boxes: box = sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm( box[3] - box[0]) < 5: continue k = 0 idx = 0 count_wrong += 1 while (idx < num_true_pos): if k in annotation: proposed_label = annotation[k]['vertices'] if len(proposed_label) == 4: x3, y3, x2, y2, x1, y1, x0, y0 = proposed_label[0][0], proposed_label[0][1], proposed_label[1][0], proposed_label[1][1], \ proposed_label[2][0], proposed_label[2][1], proposed_label[3][0], proposed_label[3][1] if (checkIOU(box, [[x0, y0], [x1, y1], [x2, y2], [x3, y3]]) == True): count_right_cache += 1 count_wrong -= 1 break idx += 1 k += 1 count_posNotDetected += num_true_pos - count_right_cache count_right += count_right_cache precision = (float)(count_right) / (float)( count_right + count_wrong) # TP / TP + FP recall = (float)(count_right) / (float)( count_right + count_posNotDetected) # TP / TP + FN fscore = 2 * (precision * recall) / (precision + recall) print "Precision, recall, fscore: " + str( precision) + ", " + str(recall) + ", " + str(fscore)
def main(): parser = argparse.ArgumentParser( description='Preprocess imagenet dataset for qvis') parser.add_argument('--datapath', type=str, help='location of imagenet dataset') parser.add_argument('--modelpath', type=str, help='location of tensorflow-slim model') parser.add_argument('--batch_size', type=int, help='batch size', default=32) parser.add_argument('--pca', action='store_true') parser.add_argument('--fvec', action='store_true') args = parser.parse_args() if args.fvec: transfer_to_fvecs() return if args.pca: do_pca() return # infer resnet config = tf.ConfigProto() # config.operation_timeout_in_ms = 6000 dataset = get_dataset(args.datapath) # from tensorflow.python.training import input as tf_input # from tensorflow.contrib.slim.python.slim.data import parallel_reader # data_files = parallel_reader.get_data_files(args.datapath) # print(len(data_files), 'files.') # filename_queue = tf_input.string_input_producer(data_files, num_epochs=1, shuffle=False, name='filenames') # reader = tf.TFRecordReader() # key, value = reader.read(filename_queue) # dvalue = dataset.decoder.decode(value) # with tf.Session(config=config) as sess: # ini_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # sess.run(ini_op) # coord = tf.train.Coordinator() # threads = tf.train.start_queue_runners(coord=coord) # counter = 0 # while True: # k, v = sess.run([key, dvalue]) # counter += 1 # # print(k, v) # print(k, counter) # return provider = slim.dataset_data_provider.DatasetDataProvider(dataset, shuffle=False, num_epochs=1) images, labels = provider.get(['image', 'label']) # import urllib # url = 'https://upload.wikimedia.org/wikipedia/commons/7/70/EnglishCockerSpaniel_simon.jpg' # image_string = urllib.request.urlopen(url).read() # image = tf.image.decode_jpeg(image_string, channels=3) processed_images = preprocess_for_eval(images, 224, 224) # processed_images = tf.expand_dims(processed_images, 0) # Batch up processed_images, labels = tf.train.batch([processed_images, labels], batch_size=args.batch_size, num_threads=8, capacity=2 * args.batch_size, allow_smaller_final_batch=True) with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, endpoints = resnet_v1.resnet_v1_50(processed_images, num_classes=1000, scope='resnet_v1_50', is_training=False) pool5 = math_ops.reduce_mean(endpoints['resnet_v1_50/block4'], [1, 2], name='pool5', keep_dims=True) vectors = tf.squeeze(pool5, axis=[1, 2]) init_fn = slim.assign_from_checkpoint_fn(args.modelpath, slim.get_model_variables()) vectors_to_save = [] labels_to_save = [] with tf.Session(config=config) as sess: ini_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(ini_op) coord = tf.train.Coordinator() thread = tf.train.start_queue_runners(sess=sess, coord=coord) init_fn(sess) # prob = tf.squeeze(logits, axis=[1, 2]) # probabilities = tf.nn.softmax(prob, dim=-1) counter = 0 while True: try: vector, label = sess.run([vectors, labels]) except OutOfRangeError as e: break print(vector.shape) vectors_to_save.append(vector) labels_to_save.append(label) counter += vector.shape[0] print(counter) # results, gtlabel = sess.run([probabilities, labels]) # print(sorted(enumerate(results[0]), key=lambda x: -x[1])[:5], gtlabel) np.save("imagenet_resnet_v1_50_vectors.npy", np.concatenate(vectors_to_save)) np.save("imagenet_resnet_v1_50_lables.npy", np.concatenate(labels_to_save))
# convolutional manner logits, _ = vgg.vgg_16(input_image, num_classes=1000, is_training=False, spatial_squeeze=False) # For each pixel we get predictions for each class # out of 1000. We need to pick the one with the highest # probability. To be more precise, these are not probabilities, # because we didn't apply softmax. But if we pick a class # with the highest value it will be equivalent to picking # the highest value after applying softmax pred = tf.argmax(logits, dimension=3) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'vgg_16.ckpt'), slim.get_model_variables('vgg_16')) with tf.Session() as sess: init_fn(sess) segmentation, np_image = sess.run([pred, image]) # Remove the first empty dimension segmentation = np.squeeze(segmentation) # Let's get unique predicted classes (from 0 to 1000) and # relable the original predictions so that classes are # numerated starting from zero unique_classes, relabeled_image = np.unique(segmentation, return_inverse=True) segmentation_size = segmentation.shape
image_size = inception.inception_v4.default_image_size checkpoints_dir = '/Users/zhangxin/data_public/goolenet/v4' # inception_v4.ckpt with tf.Graph().as_default(): url = 'https://upload.wikimedia.org/wikipedia/commons/7/70/EnglishCockerSpaniel_simon.jpg' image_string = urllib.urlopen(url).read() image = tf.image.decode_jpeg(image_string, channels=3) processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(inception.inception_v4_arg_scope()): logits, _ = inception.inception_v4(processed_images, num_classes=1001, is_training=False) probabilities = tf.nn.softmax(logits) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v4.ckpt'), slim.get_model_variables('InceptionV4')) with tf.Session() as sess: init_fn(sess) np_image, probabilities = sess.run([image, probabilities]) probabilities = probabilities[0, 0:] sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])] # plt.figure() # plt.imshow(np_image.astype(np.uint8)) # plt.axis('off') # plt.show() names = imagenet.create_readable_names_for_imagenet_labels() for i in range(5):
def setup_to_run(m, args, is_training, batch_norm_is_training, summary_mode): # Set up the model. tf.set_random_seed(args.solver.seed) task_params = args.navtask.task_params num_steps = task_params.num_steps num_goals = task_params.num_goals num_actions = task_params.num_actions num_actions_ = num_actions n_views = task_params.n_views batch_norm_is_training_op = \ tf.placeholder_with_default(batch_norm_is_training, shape=[], name='batch_norm_is_training_op') # Setup the inputs m.input_tensors = {} lstm_states = []; lstm_state_dims = []; state_names = []; updated_state_ops = []; init_state_ops = []; if args.arch.lstm_output: lstm_states += ['lstm_output'] lstm_state_dims += [args.arch.lstm_output_dim+task_params.num_actions] if args.arch.lstm_ego: lstm_states += ['lstm_ego'] lstm_state_dims += [args.arch.lstm_ego_dim + args.arch.lstm_ego_out] lstm_states += ['lstm_img'] lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out] elif args.arch.lstm_img: # An LSTM only on the image lstm_states += ['lstm_img'] lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out] else: # No LSTMs involved here. None m.input_tensors['common'], m.input_tensors['step'], m.input_tensors['train'] = \ _inputs(task_params, lstm_states, lstm_state_dims) with tf.name_scope('check_size'): is_single_step = tf.equal(tf.unstack(tf.shape(m.input_tensors['step']['imgs']), num=6)[1], 1) images_reshaped = tf.reshape(m.input_tensors['step']['imgs'], shape=[-1, task_params.img_height, task_params.img_width, task_params.img_channels], name='re_image') rel_goal_loc_reshaped = tf.reshape(m.input_tensors['step']['rel_goal_loc'], shape=[-1, task_params.rel_goal_loc_dim], name='re_rel_goal_loc') x, vars_ = get_repr_from_image( images_reshaped, task_params.modalities, task_params.data_augment, args.arch.encoder, args.solver.freeze_conv, args.solver.wt_decay, is_training) # Reshape into nice things so that these can be accumulated over time steps # for faster backprop. sh_before = x.get_shape().as_list() m.encoder_output = tf.reshape( x, shape=[task_params.batch_size, -1, n_views] + sh_before[1:]) x = tf.reshape(m.encoder_output, shape=[-1] + sh_before[1:]) # Add a layer to reduce dimensions for a fc layer. if args.arch.dim_reduce_neurons > 0: ks = 1; neurons = args.arch.dim_reduce_neurons; init_var = np.sqrt(2.0/(ks**2)/neurons) batch_norm_param = args.arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op m.conv_feat = slim.conv2d( x, neurons, kernel_size=ks, stride=1, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_param, padding='SAME', scope='dim_reduce', weights_regularizer=slim.l2_regularizer(args.solver.wt_decay), weights_initializer=tf.random_normal_initializer(stddev=init_var)) reshape_conv_feat = slim.flatten(m.conv_feat) sh = reshape_conv_feat.get_shape().as_list() m.reshape_conv_feat = tf.reshape(reshape_conv_feat, shape=[-1, sh[1]*n_views]) # Restore these from a checkpoint. if args.solver.pretrained_path is not None: m.init_fn = slim.assign_from_checkpoint_fn(args.solver.pretrained_path, vars_) else: m.init_fn = None # Hit the goal_location with a bunch of fully connected layers, to embed it # into some space. with tf.variable_scope('embed_goal'): batch_norm_param = args.arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op m.embed_goal, _ = tf_utils.fc_network( rel_goal_loc_reshaped, neurons=args.arch.goal_embed_neurons, wt_decay=args.solver.wt_decay, name='goal_embed', offset=0, batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, is_training=is_training) if args.arch.embed_goal_for_state: with tf.variable_scope('embed_goal_for_state'): batch_norm_param = args.arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op m.embed_goal_for_state, _ = tf_utils.fc_network( m.input_tensors['common']['rel_goal_loc_at_start'][:,0,:], neurons=args.arch.goal_embed_neurons, wt_decay=args.solver.wt_decay, name='goal_embed', offset=0, batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, is_training=is_training) # Hit the goal_location with a bunch of fully connected layers, to embed it # into some space. with tf.variable_scope('embed_img'): batch_norm_param = args.arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op m.embed_img, _ = tf_utils.fc_network( m.reshape_conv_feat, neurons=args.arch.img_embed_neurons, wt_decay=args.solver.wt_decay, name='img_embed', offset=0, batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, is_training=is_training) # For lstm_ego, and lstm_image, embed the ego motion, accumulate it into an # LSTM, combine with image features and accumulate those in an LSTM. Finally # combine what you get from the image LSTM with the goal to output an action. if args.arch.lstm_ego: ego_reshaped = preprocess_egomotion(m.input_tensors['step']['incremental_locs'], m.input_tensors['step']['incremental_thetas']) with tf.variable_scope('embed_ego'): batch_norm_param = args.arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op m.embed_ego, _ = tf_utils.fc_network( ego_reshaped, neurons=args.arch.ego_embed_neurons, wt_decay=args.solver.wt_decay, name='ego_embed', offset=0, batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, is_training=is_training) state_name, state_init_op, updated_state_op, out_op = lstm_setup( 'lstm_ego', m.embed_ego, task_params.batch_size, is_single_step, args.arch.lstm_ego_dim, args.arch.lstm_ego_out, num_steps*num_goals, m.input_tensors['step']['lstm_ego']) state_names += [state_name] init_state_ops += [state_init_op] updated_state_ops += [updated_state_op] # Combine the output with the vision features. m.img_ego_op = combine_setup('img_ego', args.arch.combine_type_ego, m.embed_img, out_op, args.arch.img_embed_neurons[-1], args.arch.lstm_ego_out) # LSTM on these vision features. state_name, state_init_op, updated_state_op, out_op = lstm_setup( 'lstm_img', m.img_ego_op, task_params.batch_size, is_single_step, args.arch.lstm_img_dim, args.arch.lstm_img_out, num_steps*num_goals, m.input_tensors['step']['lstm_img']) state_names += [state_name] init_state_ops += [state_init_op] updated_state_ops += [updated_state_op] m.img_for_goal = out_op num_img_for_goal_neurons = args.arch.lstm_img_out elif args.arch.lstm_img: # LSTM on just the image features. state_name, state_init_op, updated_state_op, out_op = lstm_setup( 'lstm_img', m.embed_img, task_params.batch_size, is_single_step, args.arch.lstm_img_dim, args.arch.lstm_img_out, num_steps*num_goals, m.input_tensors['step']['lstm_img']) state_names += [state_name] init_state_ops += [state_init_op] updated_state_ops += [updated_state_op] m.img_for_goal = out_op num_img_for_goal_neurons = args.arch.lstm_img_out else: m.img_for_goal = m.embed_img num_img_for_goal_neurons = args.arch.img_embed_neurons[-1] if args.arch.use_visit_count: m.embed_visit_count = visit_count_fc( m.input_tensors['step']['visit_count'], m.input_tensors['step']['last_visit'], args.arch.goal_embed_neurons, args.solver.wt_decay, args.arch.fc_dropout, is_training=is_training) m.embed_goal = m.embed_goal + m.embed_visit_count m.combined_f = combine_setup('img_goal', args.arch.combine_type, m.img_for_goal, m.embed_goal, num_img_for_goal_neurons, args.arch.goal_embed_neurons[-1]) # LSTM on the combined representation. if args.arch.lstm_output: name = 'lstm_output' # A few fully connected layers here. with tf.variable_scope('action_pred'): batch_norm_param = args.arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op x, _ = tf_utils.fc_network( m.combined_f, neurons=args.arch.pred_neurons, wt_decay=args.solver.wt_decay, name='pred', offset=0, batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout) if args.arch.lstm_output_init_state_from_goal: # Use the goal embedding to initialize the LSTM state. # UGLY CLUGGY HACK: if this is doing computation for a single time step # then this will not involve back prop, so we can use the state input from # the feed dict, otherwise we compute the state representation from the # goal and feed that in. Necessary for using goal location to generate the # state representation. m.embed_goal_for_state = tf.expand_dims(m.embed_goal_for_state, dim=1) state_op = tf.cond(is_single_step, lambda: m.input_tensors['step'][name], lambda: m.embed_goal_for_state) state_name, state_init_op, updated_state_op, out_op = lstm_setup( name, x, task_params.batch_size, is_single_step, args.arch.lstm_output_dim, num_actions_, num_steps*num_goals, state_op) init_state_ops += [m.embed_goal_for_state] else: state_op = m.input_tensors['step'][name] state_name, state_init_op, updated_state_op, out_op = lstm_setup( name, x, task_params.batch_size, is_single_step, args.arch.lstm_output_dim, num_actions_, num_steps*num_goals, state_op) init_state_ops += [state_init_op] state_names += [state_name] updated_state_ops += [updated_state_op] out_op = tf.reshape(out_op, shape=[-1, num_actions_]) if num_actions_ > num_actions: m.action_logits_op = out_op[:,:num_actions] m.baseline_op = out_op[:,num_actions:] else: m.action_logits_op = out_op m.baseline_op = None m.action_prob_op = tf.nn.softmax(m.action_logits_op) else: # A few fully connected layers here. with tf.variable_scope('action_pred'): batch_norm_param = args.arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op out_op, _ = tf_utils.fc_network( m.combined_f, neurons=args.arch.pred_neurons, wt_decay=args.solver.wt_decay, name='pred', offset=0, num_pred=num_actions_, batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, is_training=is_training) if num_actions_ > num_actions: m.action_logits_op = out_op[:,:num_actions] m.baseline_op = out_op[:,num_actions:] else: m.action_logits_op = out_op m.baseline_op = None m.action_prob_op = tf.nn.softmax(m.action_logits_op) m.train_ops = {} m.train_ops['step'] = m.action_prob_op m.train_ops['common'] = [m.input_tensors['common']['orig_maps'], m.input_tensors['common']['goal_loc'], m.input_tensors['common']['rel_goal_loc_at_start']] m.train_ops['state_names'] = state_names m.train_ops['init_state'] = init_state_ops m.train_ops['updated_state'] = updated_state_ops m.train_ops['batch_norm_is_training_op'] = batch_norm_is_training_op # Flat list of ops which cache the step data. m.train_ops['step_data_cache'] = [tf.no_op()] if args.solver.freeze_conv: m.train_ops['step_data_cache'] = [m.encoder_output] else: m.train_ops['step_data_cache'] = [] ewma_decay = 0.99 if is_training else 0.0 weight = tf.ones_like(m.input_tensors['train']['action'], dtype=tf.float32, name='weight') m.reg_loss_op, m.data_loss_op, m.total_loss_op, m.acc_ops = \ compute_losses_multi_or( m.action_logits_op, m.input_tensors['train']['action'], weights=weight, num_actions=num_actions, data_loss_wt=args.solver.data_loss_wt, reg_loss_wt=args.solver.reg_loss_wt, ewma_decay=ewma_decay) if args.solver.freeze_conv: vars_to_optimize = list(set(tf.trainable_variables()) - set(vars_)) else: vars_to_optimize = None m.lr_op, m.global_step_op, m.train_op, m.should_stop_op, m.optimizer, \ m.sync_optimizer = tf_utils.setup_training( m.total_loss_op, args.solver.initial_learning_rate, args.solver.steps_per_decay, args.solver.learning_rate_decay, args.solver.momentum, args.solver.max_steps, args.solver.sync, args.solver.adjust_lr_sync, args.solver.num_workers, args.solver.task, vars_to_optimize=vars_to_optimize, clip_gradient_norm=args.solver.clip_gradient_norm, typ=args.solver.typ, momentum2=args.solver.momentum2, adam_eps=args.solver.adam_eps) if args.arch.sample_gt_prob_type == 'inverse_sigmoid_decay': m.sample_gt_prob_op = tf_utils.inverse_sigmoid_decay(args.arch.isd_k, m.global_step_op) elif args.arch.sample_gt_prob_type == 'zero': m.sample_gt_prob_op = tf.constant(-1.0, dtype=tf.float32) elif args.arch.sample_gt_prob_type.split('_')[0] == 'step': step = int(args.arch.sample_gt_prob_type.split('_')[1]) m.sample_gt_prob_op = tf_utils.step_gt_prob( step, m.input_tensors['step']['step_number'][0,0,0]) m.sample_action_type = args.arch.action_sample_type m.sample_action_combine_type = args.arch.action_sample_combine_type _add_summaries(m, summary_mode, args.summary.arop_full_summary_iters) m.init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) m.saver_op = tf.train.Saver(keep_checkpoint_every_n_hours=4, write_version=tf.train.SaverDef.V2) return m
def train(H, test_images): ''' Setup computation graph, run 2 prefetch data threads, and then run the main loop ''' if not os.path.exists(H['save_dir']): os.makedirs(H['save_dir']) ckpt_file = H['save_dir'] + '/save.ckpt' with open(H['save_dir'] + '/hypes.json', 'w') as f: json.dump(H, f, indent=4) x_in = tf.placeholder(tf.float32) confs_in = tf.placeholder(tf.float32) boxes_in = tf.placeholder(tf.float32) q = {} enqueue_op = {} for phase in ['train', 'test']: dtypes = [tf.float32, tf.float32, tf.float32] grid_size = H['grid_width'] * H['grid_height'] shapes = ( [H['image_height'], H['image_width'], 3], [grid_size, H['rnn_len'], H['num_classes']], [grid_size, H['rnn_len'], 4], ) q[phase] = tf.FIFOQueue(capacity=30, dtypes=dtypes, shapes=shapes) enqueue_op[phase] = q[phase].enqueue((x_in, confs_in, boxes_in)) def make_feed(d): return {x_in: d['image'], confs_in: d['confs'], boxes_in: d['boxes'], learning_rate: H['solver']['learning_rate']} def thread_loop(sess, enqueue_op, phase, gen): for d in gen: sess.run(enqueue_op[phase], feed_dict=make_feed(d)) (config, loss, accuracy, summary_op, train_op, smooth_op, global_step, learning_rate) = build(H, q) saver = tf.train.Saver(max_to_keep=None) writer = tf.train.SummaryWriter( logdir=H['save_dir'], flush_secs=10 ) with tf.Session(config=config) as sess: tf.train.start_queue_runners(sess=sess) for phase in ['train', 'test']: # enqueue once manually to avoid thread start delay gen = train_utils.load_data_gen(H, phase, jitter=H['solver']['use_jitter']) d = gen.next() sess.run(enqueue_op[phase], feed_dict=make_feed(d)) t = threading.Thread(target=thread_loop, args=(sess, enqueue_op, phase, gen)) t.daemon = True t.start() tf.set_random_seed(H['solver']['rnd_seed']) sess.run(tf.initialize_all_variables()) writer.add_graph(sess.graph) weights_str = H['solver']['weights'] if len(weights_str) > 0: print('Restoring from: %s' % weights_str) saver.restore(sess, weights_str) else: init_fn = slim.assign_from_checkpoint_fn( '%s/data/inception_v1.ckpt' % os.path.dirname(os.path.realpath(__file__)), [x for x in tf.all_variables() if x.name.startswith('InceptionV1') and not H['solver']['opt'] in x.name]) init_fn(sess) # train model for N iterations start = time.time() max_iter = H['solver'].get('max_iter', 10000000) for i in xrange(max_iter): display_iter = H['logging']['display_iter'] adjusted_lr = (H['solver']['learning_rate'] * 0.5 ** max(0, (i / H['solver']['learning_rate_step']) - 2)) lr_feed = {learning_rate: adjusted_lr} if i % display_iter != 0: # train network batch_loss_train, _ = sess.run([loss['train'], train_op], feed_dict=lr_feed) else: # test network every N iterations; log additional info if i > 0: dt = (time.time() - start) / (H['batch_size'] * display_iter) start = time.time() (train_loss, test_accuracy, summary_str, _, _) = sess.run([loss['train'], accuracy['test'], summary_op, train_op, smooth_op, ], feed_dict=lr_feed) writer.add_summary(summary_str, global_step=global_step.eval()) print_str = string.join([ 'Step: %d', 'lr: %f', 'Train Loss: %.2f', 'Softmax Test Accuracy: %.1f%%', 'Time/image (ms): %.1f' ], ', ') print(print_str % (i, adjusted_lr, train_loss, test_accuracy * 100, dt * 1000 if i > 0 else 0)) if global_step.eval() % H['logging']['save_iter'] == 0 or global_step.eval() == max_iter - 1: saver.save(sess, ckpt_file, global_step=global_step)
def train(dataset, epochs, batch_size, weight_path): with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(is_training=True, batch_norm_decay=0.99)): im_inputs = tf.placeholder( tf.float32, [None, dataset.input_shape[0], dataset.input_shape[1], 3], name="inputs") # images_arg = data_augmentation(im_inputs) y_true = tf.placeholder(tf.float32, [None, dataset.num_classes], name="labels") logits, endpoints = mobilenet_v1.mobilenet_v1( im_inputs, num_classes=dataset.num_classes, is_training=True, global_pool=True) net_out_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y_true)) weight_loss = tf.losses.get_regularization_losses() # net_out_loss = tf.losses.get_losses() variable_summaries(net_out_loss, "net_loss") all_loss = weight_loss cost = tf.add_n(all_loss) + net_out_loss variable_summaries(cost, "total_loss") update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) global_step = tf.Variable(0, trainable=False) with tf.control_dependencies(update_ops): Adam_optim = tf.train.AdamOptimizer(learning_rate=0.0001) Momentum_optim = tf.train.MomentumOptimizer(momentum=0.9, learning_rate=0.0001) optim = slim.learning.create_train_op(cost, Momentum_optim, global_step=global_step) # Momentum_optim = tf.train.MomentumOptimizer(momentum=0.9, learning_rate=0.001).minimize(cost, global_step=global_step) with tf.name_scope('evaluation'): correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y_true, 1)) evaluation_step = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) variable_summaries(evaluation_step, "accuracy") train_writer = tf.summary.FileWriter("log", tf.get_default_graph()) merge_summary = tf.summary.merge_all() vars = slim.get_model_variables() saver = tf.train.Saver(tf.global_variables()) load_fn = slim.assign_from_checkpoint_fn(weight_path, tf.global_variables(), ignore_missing_vars=True) with tf.Session() as sess: print("load:", weight_path) saver.restore(sess, weight_path) for epoch in range(epochs): startTime = time.time() for iter_ in range(dataset.num_data // batch_size): x, y = dataset.read_data_label(batch_size) if iter_ % 50 == 0: loss, _, train_summary, step = sess.run( [cost, optim, merge_summary, global_step], feed_dict={ im_inputs: x, y_true: y }) val_loss, validation_accuracy = sess.run( [cost, evaluation_step], feed_dict={ im_inputs: x, y_true: y }) train_writer.add_summary(train_summary, step) print( "epoch:{};iter:{};train_loss:{};train_loss:{};val_acc{}:step:{}" .format(epoch, iter_, loss, val_loss, validation_accuracy, step)) else: _ = sess.run([optim], feed_dict={im_inputs: x, y_true: y}) endTime = time.time() print("epoch_time:{}".format(endTime - startTime)) saver.save(sess, "model/416_tree_mobilev1.ckpt")
#loss = tf.reduce_mean(loss) with tf.Session() as sess: if FLAGS.train == True: sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) var_list = [] for x in slim.get_model_variables(): if not ("MobilenetV1/AuxLogits" in x.op.name or "MobilenetV1/Logits" in x.op.name or "MobilenetV1" not in x.op.name): var_list.append(x) mobilenet_restore = slim.assign_from_checkpoint_fn( 'checkpoints/mobilenet_v1_1.0_224.ckpt', var_list, ignore_missing_vars=True) mobilenet_restore(sess) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) #saver = tf.train.Saver() #saver.restore(sess, './checkpoints/model.ckpt') #images = input_pipeline(filenames, 32, num_epochs=10000) try: step = 0 while not coord.should_stop(): result = sess.run([op1, op2, update_k]) result = sess.run([incr, global_step]) step = result[-1]
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') if FLAGS.geometry == 'RBOX': input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') else: input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # split input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split = tf.split(input_score_maps, len(gpus)) input_geo_maps_split = tf.split(input_geo_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isms = input_score_maps_split[i] igms = input_geo_maps_split[i] itms = input_training_masks_split[i] total_loss, model_loss = tower_loss(iis, isms, igms, itms, reuse_variables) batch_norm_updates_op = tf.group( *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) reuse_variables = True grads = opt.compute_gradients(total_loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() # save moving average variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # batch norm updates with tf.control_dependencies( [variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables(), max_to_keep=1000) summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) step = 0 with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if FLAGS.restore: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) if ckpt_state is not None: print('continue training from previous checkpoint') model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) print(sess.run(global_step)) step = int(ckpt.split('-')[-1]) - 1 #else : # print('Load the backbone, Name {}'.format(FLAGS.backbone)) # load_layers = tf.global_variables(scope=FLAGS.backbone) # print(load_layers) # saver = tf.train.Saver(load_layers) # saver.restore(sess, FLAGS.backbone_ckpt) # step = 0 else: sess.run(init) #for layer in tf.global_variables(scope='Mobilenet')[:2]: # print("layer name : {} mean : {}".format(layer.name, sess.run(tf.reduce_mean(layer.eval(session=sess))))) if FLAGS.pretrained_model_path is not None: print("--------------------------------") print("---Load the Pretraiend-Weight---") print("--------------------------------") variable_restore_op(sess) #for layer in tf.global_variables(scope='Mobilenet')[:2]: # print("layer name : {} mean : {}".format(layer.name, sess.run(tf.reduce_mean(layer.eval(session=sess))))) else: sess.run(init) total_parameters = 0 for variable in tf.trainable_variables(): local_parameters = 1 shape = variable.get_shape() #getting shape of a variable for i in shape: local_parameters *= i.value #mutiplying dimension values total_parameters += local_parameters print("-----params-----", total_parameters) if os.name is 'nt': workers = 0 else: workers = multiprocessing.cpu_count() print(" num of worker : ", workers) data_generator = icdar.get_batch(num_workers=workers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size_per_gpu * len(gpus)) start = time.time() while step < FLAGS.max_steps: data = next(data_generator) ml, tl, _ = sess.run( [model_loss, total_loss, train_op], feed_dict={ input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4] }) if np.isnan(tl): print('Loss diverged, stop training') break if step % 10 == 0: avg_time_per_step = (time.time() - start) / 10 avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu * len(gpus)) / (time.time() - start) start = time.time() print( 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second' .format(step, ml, tl, avg_time_per_step, avg_examples_per_second)) if step % FLAGS.save_checkpoint_steps == 0: saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) if step % FLAGS.save_summary_steps == 0: _, tl, summary_str = sess.run( [train_op, total_loss, summary_op], feed_dict={ input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4] }) summary_writer.add_summary(summary_str, global_step=step) step += 1
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') if FLAGS.geometry == 'RBOX': input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') else: input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # split input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split = tf.split(input_score_maps, len(gpus)) input_geo_maps_split = tf.split(input_geo_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isms = input_score_maps_split[i] igms = input_geo_maps_split[i] itms = input_training_masks_split[i] total_loss, model_loss = tower_loss(iis, isms, igms, itms, reuse_variables) batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) reuse_variables = True grads = opt.compute_gradients(total_loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() # save moving average variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # batch norm updates with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables()) summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if FLAGS.restore: print 'continue training from previous checkpoint' ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) saver.restore(sess, ckpt) else: sess.run(init) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size * len(gpus)) start = time.time() for step in xrange(FLAGS.max_steps): data = data_generator.next() ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4]}) if np.isnan(tl): print 'Loss diverged, stop training' break if step % 10 == 0: avg_time_per_step = (time.time() - start)/10 avg_examples_per_second = (10 * FLAGS.batch_size * len(gpus))/(time.time() - start) start = time.time() print 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'.format( step, ml, tl, avg_time_per_step, avg_examples_per_second) if step % FLAGS.save_checkpoint_steps == 0: saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) if step % FLAGS.save_summary_steps == 0: _, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4]}) summary_writer.add_summary(summary_str, global_step=step)
def run_experiment(self): with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: sess.run(self.init) self.train_writer = tf.summary.FileWriter( "{}/train_logs/".format(self.log_path), graph=tf.get_default_graph()) self.validation_writer = tf.summary.FileWriter( "{}/validation_logs/".format(self.log_path), graph=tf.get_default_graph()) self.train_saver = tf.train.Saver() self.val_saver = tf.train.Saver() start_from_epoch = 0 if self.continue_from_epoch != -1: start_from_epoch = self.continue_from_epoch checkpoint = "{}train_saved_model_{}_{}.ckpt".format( self.saved_models_filepath, self.experiment_name, self.continue_from_epoch) variables_to_restore = [] for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES): print(var) variables_to_restore.append(var) tf.logging.info('Fine-tuning from %s' % checkpoint) fine_tune = slim.assign_from_checkpoint_fn( checkpoint, variables_to_restore, ignore_missing_vars=True) fine_tune(sess) self.iter_done = 0 self.disc_iter = 5 self.gen_iter = 1 best_d_val_loss = np.inf if self.spherical_interpolation: dim = int(np.sqrt(self.num_generations) * 2) self.z_2d_vectors = interpolations.create_mine_grid( rows=dim, cols=dim, dim=self.z_dim, space=3, anchors=None, spherical=True, gaussian=True) self.z_vectors = interpolations.create_mine_grid( rows=1, cols=self.num_generations, dim=self.z_dim, space=3, anchors=None, spherical=True, gaussian=True) else: self.z_vectors = np.random.normal(size=(self.num_generations, self.z_dim)) self.z_2d_vectors = np.random.normal( size=(self.num_generations, self.z_dim)) with tqdm.tqdm(total=self.total_epochs - start_from_epoch) as pbar_e: for e in range(start_from_epoch, self.total_epochs): train_g_loss = [] val_g_loss = [] train_d_loss = [] val_d_loss = [] with tqdm.tqdm( total=self.total_train_batches) as pbar_train: for iter in range(self.total_train_batches): cur_sample = 0 for n in range(self.disc_iter): x_train_i, x_train_j = self.data.get_train_batch( ) x_val_i, x_val_j = self.data.get_val_batch() _, d_train_loss_value = sess.run( [ self.graph_ops["d_opt_op"], self.losses["d_losses"] ], feed_dict={ self.input_x_i: x_train_i, self.input_x_j: x_train_j, self.dropout_rate: self.dropout_rate_value, self.training_phase: True, self.random_rotate: True }) d_val_loss_value = sess.run( self.losses["d_losses"], feed_dict={ self.input_x_i: x_val_i, self.input_x_j: x_val_j, self.dropout_rate: self.dropout_rate_value, self.training_phase: False, self.random_rotate: False }) cur_sample += 1 train_d_loss.append(d_train_loss_value) val_d_loss.append(d_val_loss_value) for n in range(self.gen_iter): x_train_i, x_train_j = self.data.get_train_batch( ) x_val_i, x_val_j = self.data.get_val_batch() _, g_train_loss_value, train_summaries = sess.run( [ self.graph_ops["g_opt_op"], self.losses["g_losses"], self.summary ], feed_dict={ self.input_x_i: x_train_i, self.input_x_j: x_train_j, self.dropout_rate: self.dropout_rate_value, self.training_phase: True, self.random_rotate: True }) g_val_loss_value, val_summaries = sess.run( [self.losses["g_losses"], self.summary], feed_dict={ self.input_x_i: x_val_i, self.input_x_j: x_val_j, self.dropout_rate: self.dropout_rate_value, self.training_phase: False, self.random_rotate: False }) cur_sample += 1 train_g_loss.append(g_train_loss_value) val_g_loss.append(g_val_loss_value) if iter % ( self.tensorboard_update_interval) == 0: self.train_writer.add_summary( train_summaries, global_step=self.iter_done) self.validation_writer.add_summary( val_summaries, global_step=self.iter_done) self.iter_done = self.iter_done + 1 iter_out = "{}_train_d_loss: {}, train_g_loss: {}, " \ "val_d_loss: {}, val_g_loss: {}".format(self.iter_done, d_train_loss_value, g_train_loss_value, d_val_loss_value, g_val_loss_value) pbar_train.set_description(iter_out) pbar_train.update(1) total_d_train_loss_mean = np.mean(train_d_loss) total_d_train_loss_std = np.std(train_d_loss) total_g_train_loss_mean = np.mean(train_g_loss) total_g_train_loss_std = np.std(train_g_loss) print( "Epoch {}: d_train_loss_mean: {}, d_train_loss_std: {}," "g_train_loss_mean: {}, g_train_loss_std: {}".format( e, total_d_train_loss_mean, total_d_train_loss_std, total_g_train_loss_mean, total_g_train_loss_std)) total_d_val_loss_mean = np.mean(val_d_loss) total_d_val_loss_std = np.std(val_d_loss) total_g_val_loss_mean = np.mean(val_g_loss) total_g_val_loss_std = np.std(val_g_loss) print("Epoch {}: d_val_loss_mean: {}, d_val_loss_std: {}," "g_val_loss_mean: {}, g_val_loss_std: {}, ".format( e, total_d_val_loss_mean, total_d_val_loss_std, total_g_val_loss_mean, total_g_val_loss_std)) sample_generator( num_generations=self.num_generations, sess=sess, same_images=self.same_images, inputs=x_train_i, data=self.data, batch_size=self.batch_size, z_input=self.z_input, file_name="{}/train_z_variations_{}_{}.png".format( self.save_image_path, self.experiment_name, e), input_a=self.input_x_i, training_phase=self.training_phase, z_vectors=self.z_vectors, dropout_rate=self.dropout_rate, dropout_rate_value=self.dropout_rate_value) sample_two_dimensions_generator( sess=sess, same_images=self.same_images, inputs=x_train_i, data=self.data, batch_size=self.batch_size, z_input=self.z_input, file_name="{}/train_z_spherical_{}_{}".format( self.save_image_path, self.experiment_name, e), input_a=self.input_x_i, training_phase=self.training_phase, dropout_rate=self.dropout_rate, dropout_rate_value=self.dropout_rate_value, z_vectors=self.z_2d_vectors) with tqdm.tqdm(total=self.total_gen_batches) as pbar_samp: for i in range(self.total_gen_batches): x_gen_a = self.data.get_gen_batch() sample_generator( num_generations=self.num_generations, sess=sess, same_images=self.same_images, inputs=x_gen_a, data=self.data, batch_size=self.batch_size, z_input=self.z_input, file_name="{}/test_z_variations_{}_{}_{}.png". format(self.save_image_path, self.experiment_name, e, i), input_a=self.input_x_i, training_phase=self.training_phase, z_vectors=self.z_vectors, dropout_rate=self.dropout_rate, dropout_rate_value=self.dropout_rate_value) sample_two_dimensions_generator( sess=sess, same_images=self.same_images, inputs=x_gen_a, data=self.data, batch_size=self.batch_size, z_input=self.z_input, file_name="{}/val_z_spherical_{}_{}_{}".format( self.save_image_path, self.experiment_name, e, i), input_a=self.input_x_i, training_phase=self.training_phase, dropout_rate=self.dropout_rate, dropout_rate_value=self.dropout_rate_value, z_vectors=self.z_2d_vectors) pbar_samp.update(1) train_save_path = self.train_saver.save( sess, "{}/train_saved_model_{}_{}.ckpt".format( self.saved_models_filepath, self.experiment_name, e)) if total_d_val_loss_mean < best_d_val_loss: best_d_val_loss = total_d_val_loss_mean val_save_path = self.train_saver.save( sess, "{}/val_saved_model_{}_{}.ckpt".format( self.saved_models_filepath, self.experiment_name, e)) print("Saved current best val model at", val_save_path) #save_statistics(self.log_path, [e, total_d_train_loss_mean, total_d_val_loss_mean, # total_d_train_loss_std, total_d_val_loss_std, # total_g_train_loss_mean, total_g_val_loss_mean, # total_g_train_loss_std, total_g_val_loss_std]) pbar_e.update(1)
def restore_model(checkpoint_paths, variables_to_restore, ignore_missing_vars=False, num_streams=1, checkpoint_style=None, special_assign_vars=None): all_ops = [] if len(checkpoint_paths) == 1 and num_streams > 1: logging.info('Provided one checkpoint for multi-stream ' 'network. Will use this as a saved model ' 'with this exact multi stream network.') all_ops.append(slim.assign_from_checkpoint_fn( checkpoint_paths[0], variables_to_restore, ignore_missing_vars=ignore_missing_vars)) else: for sid in range(num_streams): this_checkpoint_style = checkpoint_style.split(',')[sid] if \ checkpoint_style is not None else None checkpoint_path = checkpoint_paths[sid] # assert tf.gfile.Exists(checkpoint_path) this_stream_name = 'stream%d/' % sid this_checkpoint_variables = [var for var in variables_to_restore if var in slim.get_model_variables(this_stream_name)] if checkpoint_path.endswith('.npy'): vars_to_restore_names = [ el.name for el in this_checkpoint_variables] key_name_mapper = var_name_mapper.map() init_weights = np.load(checkpoint_path).item() init_weights_final = {} vars_restored = [] for key in init_weights.keys(): for subkey in init_weights[key].keys(): prefix = this_stream_name if this_checkpoint_style == 'v2_withStream': prefix = 'stream0/' # because any model trained with stream # will have that stream as 0 final_key_name = prefix + key_name_mapper( key + '/' + subkey) if final_key_name not in vars_to_restore_names: logging.error('Not using %s from npy' % final_key_name) continue target_shape = slim.get_model_variables( final_key_name)[0].get_shape().as_list() pretrained_wts = init_weights[key][subkey] target_shape_squeezed = np.delete( target_shape, np.where(np.array(target_shape) == 1)) pretrained_shape_squeezed = np.delete( pretrained_wts.shape, np.where(np.array(pretrained_wts.shape) == 1)) if np.all(target_shape_squeezed != pretrained_shape_squeezed): logging.error('Shape mismatch var: %s from npy [%s vs %s]' % (final_key_name, target_shape, pretrained_wts.shape)) init_weights_final[final_key_name] = \ pretrained_wts vars_restored.append(final_key_name) init_weights = init_weights_final for v in vars_to_restore_names: if v not in vars_restored: logging.fatal('No weights found for %s' % v) all_ops.append(slim.assign_from_values_fn( init_weights)) else: if this_checkpoint_style != 'v2_withStream': all_ops.append(slim.assign_from_checkpoint_fn( checkpoint_path, # stripping the stream name to map variables dict( [('/'.join(el.name.split('/')[1:]).split(':')[0], el) for el in this_checkpoint_variables]), ignore_missing_vars=ignore_missing_vars)) else: all_ops.append(slim.assign_from_checkpoint_fn( checkpoint_path, # stripping the stream name to map variables, to stream0, # as the model is v2_withStream, hence must be trained with # stream0/ prefix dict( [('/'.join(['stream0'] + el.name.split('/')[1:]).split(':')[0], el) for el in this_checkpoint_variables]), ignore_missing_vars=ignore_missing_vars)) if special_assign_vars is not None: all_ops.append(get_special_assigns(special_assign_vars)) def combined(sess): for op in all_ops: op(sess) return combined
def train(self): def get_optimizer(loss, global_step=None, var_list=None, is_gradient_clip=False): train_op = tf.train.AdamOptimizer(self.lr) if is_gradient_clip: grads_and_vars = train_op.compute_gradients(loss, var_list=var_list) unchanged_gvs = [(grad, var) for grad, var in grads_and_vars if not 'LSTM' in var.name] rnn_grad = [ grad for grad, var in grads_and_vars if 'LSTM' in var.name ] rnn_var = [ var for grad, var in grads_and_vars if 'LSTM' in var.name ] capped_grad, _ = tf.clip_by_global_norm(rnn_grad, clip_norm=3) capped_gvs = list(zip(capped_grad, rnn_var)) train_op = train_op.apply_gradients(grads_and_vars=capped_gvs + unchanged_gvs, global_step=global_step) else: train_op = train_op.minimize(loss, global_step, var_list) return train_op global_step = tf.Variable(initial_value=0, dtype=tf.int32, trainable=False) self.global_step = global_step # build model self.build_model() # learning rate decay self.lr = tf.train.polynomial_decay(self.learning_rate, global_step, self.max_steps, end_learning_rate=1e-6, power=0.3) tf.summary.scalar('learning_rate', self.lr) # training operators train_gnet = get_optimizer(self.loss_total, global_step, self.all_vars) # session and thread gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) self.sess = sess sess.run( tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())) # load vgg model vgg_model_path = '/home/chenli/Downloads/vgg_16.ckpt' exclude = [ 'vgg_16/fc6', 'vgg_16/pool4', 'vgg_16/conv5', 'vgg_16/pool5', 'vgg_16/fc7', 'vgg_16/global_pool', 'vgg_16/fc8/squeezed', 'vgg_16/fc8' ] vgg_vars = slim.get_variables_to_restore(include=['vgg_16'], exclude=exclude) # vgg_init_var = slim.get_variables_to_restore(include=['vgg_16/fc6']) init_fn = slim.assign_from_checkpoint_fn(vgg_model_path, vgg_vars) init_fn(self.sess) # tf.initialize_variables(var_list=vgg_init_var) print('vgg s weights load done') self.saver = tf.train.Saver(max_to_keep=50, keep_checkpoint_every_n_hours=1) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # training summary summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(self.train_dir, sess.graph, flush_secs=30) # self.load(sess, self.restore_dir, step=self.restore_step) for step in xrange(sess.run(global_step), self.max_steps + 1): start_time = time.time() # update G network _, loss_total_val = sess.run([train_gnet, self.loss_total]) duration = time.time() - start_time # print loss_value assert not np.isnan( loss_total_val), 'Model diverged with loss = NaN' if step % 5 == 0: num_examples_per_step = self.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = (%.5f; %.5f, %.5f)(%.1f data/s; %.3f s/bch)' ) print(format_str % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), step, loss_total_val, 0.0, 0.0, examples_per_sec, sec_per_batch)) if step % 20 == 0: # summary_str = sess.run(summary_op, feed_dict={inputs:batch_input, gt:batch_gt}) summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, global_step=step) # Save the model checkpoint periodically. if step > self.max_steps * 0.5: if step % 1000 == 0 or step == self.max_steps: checkpoint_path = os.path.join(self.train_dir, 'checkpoints') self.save(sess, checkpoint_path, step)
np.float32) processed_image = vgg_preprocessing.preprocess_image( tfimg, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(vgg.vgg_arg_scope()): # 1000 classes instead of 1001. logits, end_points = vgg.vgg_16(processed_images, num_classes=1000, is_training=False) probabilities = tf.nn.softmax(logits) init_fn = slim.assign_from_checkpoint_fn( 'C:/Users/falindrith/Dropbox/Documents/research/sliders_project/vgg_16/vgg_16.ckpt', slim.get_model_variables('vgg_16')) #print (slim.get_model_variables('vgg_16')) feature_conv_5_3 = end_points['vgg_16/conv4/conv4_2'] with tf.Session() as sess: tf.train.start_queue_runners(sess=sess) init_fn(sess) probabilities, feats = sess.run( [probabilities, feature_conv_5_3]) #probabilities = probabilities[0, 0:] #sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])] np.save(outFolder + '/' + f, feats) tf.get_variable_scope().reuse_variables()
import inception_v1 import input_data import tensorflow.contrib.slim as slim import tensorflow as tf x = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3]) keep_prob = tf.placeholder(dtype=tf.float32) logits = inception_v1.inception_v1(x, keep_prob, 5) logits = tf.reshape(logits, [-1, 5]) exclusions = ['InceptionV1/Logits'] inception_except_logits = slim.get_variables_to_restore(exclude=exclusions) CKPT_FILE = 'inception_v1.ckpt' init_fn = slim.assign_from_checkpoint_fn( CKPT_FILE, inception_except_logits, ignore_missing_vars=True) y = tf.nn.softmax(logits) y_ = tf.placeholder(dtype=tf.float32,shape=[None, 5]) output_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='InceptionV1/Logits') cross_entropy = -tf.reduce_sum(y_*tf.log(y)) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy, var_list=output_vars) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32)) flower_photos = input_data.read_data_sets('flower_photos/') with tf.Session() as sess: