def parse(self,args): self.cfg = self.parser.parse_args(args) if self.cfg.set == 'CIFAR10': self.cfg.num_cls = 10 self.cfg.eval_tst = True elif self.cfg.set == 'CIFAR100': self.cfg.num_cls = 100 self.cfg.eval_tst = True else: raise NotImplementedError('Invalid dataset {}'.format(self.cfg.set)) self.cfg.exp_dir = osp.join(path_utils.get_checkpoint_dir() , self.cfg.name) os_utils.touch_dir(self.cfg.exp_dir) log_file = os.path.join(self.cfg.exp_dir, self.cfg.log_file) logging.config.dictConfig(log_utils.get_logging_dict(log_file)) self.cfg.logger = logging.getLogger('train') return self.cfg
def main(cfg): # cfg.num_classes = 1001 os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu output_dir = cfg.output_dir os_utils.touch_dir(output_dir) args_file = os.path.join(cfg.output_dir, 'args.json') with open(args_file, 'w') as f: json.dump(vars(cfg), f, ensure_ascii=False, indent=2, sort_keys=True) log_file = os.path.join(cfg.output_dir, cfg.log_filename + '.txt') logger = log_utils.create_logger(log_file) img_name_ext = cfg.img_name img_name, _ = os.path.splitext(img_name_ext) datasets_dir = './input_imgs' test_img = imageio.imread('{}/{}'.format(datasets_dir, img_name_ext)) test_img = cv2.resize(test_img, (const.frame_height, const.frame_height)) with tf.Graph().as_default(): images_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, const.frame_height, const.frame_height, const.num_channels), name='input_img') lbls_ph = tf.compat.v1.placeholder(tf.int32, shape=(None, cfg.num_classes), name='class_lbls') logits_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, cfg.num_classes), name='logits_lbls') per_class_logits_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, cfg.num_classes), name='logits_lbls') input_ph = nn_utils.adjust_color_space(images_ph, cfg.preprocess_func) network_class = locate(cfg.network_name) model = network_class(cfg, images_ph=input_ph, lbls_ph=lbls_ph) pre_atten_feat_map_tf = tf.compat.v1.get_default_graph( ).get_tensor_by_name(cfg.replicate_net_at) pre_atten_feat_map_tf_shape = pre_atten_feat_map_tf.shape sub_feat_map_ph = tf.compat.v1.placeholder( tf.float32, shape=[ None, pre_atten_feat_map_tf_shape[1], pre_atten_feat_map_tf_shape[2], pre_atten_feat_map_tf_shape[3] ], name='feat_map_input') sub_network_class = locate(cfg.sub_network_name) sub_model = sub_network_class(cfg, images_ph=sub_feat_map_ph, lbls_ph=lbls_ph) sub_logits = sub_model.val_logits logits = model.val_logits sess = tf.compat.v1.InteractiveSession() atten_filter_position = cfg.atten_filter_position tf_atten_var = [ v for v in tf.compat.v1.global_variables() if atten_filter_position.format('atten') in v.name ][-1] ## Didn't make a difference for tf_atten_var becuase tf_atten_var is created using get_varibale, i.e., shared tf_gate_atten_var = [ v for v in tf.compat.v1.global_variables() if atten_filter_position.format('gate') in v.name ][-1] # print(tf_gate_atten_var) # optimizer = tf.train.AdamOptimizer(0.01) global_step = tf.Variable(0, name='global_step', trainable=False) logger.info('Learning rate {} {}'.format(cfg.learning_rate, cfg.max_iters)) learning_rate = tf_utils.poly_lr(global_step, cfg) optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate) class_specific = True if cfg.caf_variant == 'cls_specific' else False if class_specific: logger.info( 'Solving class specific optimization problem -- classification network' ) mult_logits_2 = per_class_logits_ph * sub_logits loss_sub = tf.reduce_sum(mult_logits_2) grads = optimizer.compute_gradients(loss_sub, var_list=[tf_atten_var]) train_op = optimizer.apply_gradients(grads, global_step=global_step) else: raise NotImplementedError('cls_oblivious version implemented yet') # train_op = optimizer.minimize(loss, var_list=[tf_atten_var]) tf.compat.v1.global_variables_initializer().run() ckpt_file = tf.train.latest_checkpoint(output_dir) logger.info('Model Path {}'.format(ckpt_file)) saver = tf.compat.v1.train.Saver( ) # saves variables learned during training load_model_msg = model.load_model(output_dir, ckpt_file, sess, saver, load_logits=True) logger.info(load_model_msg) class_predictions, ground_logits = sess.run( [model.val_class_prediction, logits], feed_dict={images_ph: np.expand_dims(test_img, 0)}) class_predictions = class_predictions[0] # print('Class Prediction {}'.format(imagenet_lbls[class_predictions])) k = 1 top_k = np.argsort(np.squeeze(ground_logits))[::-1][:k] # top_k = [235,282,94,1,225] logger.info('Top K={} {}'.format(k, [imagenet_lbls[i] for i in top_k])) filter_type = cfg.filter_type if filter_type == 'gauss': rand_initilzalier = np.random.normal( 0, 1, (tf_atten_var.shape[0], tf_atten_var.shape[1])) else: rand_initilzalier = np.random.normal( 0, 1, (tf_atten_var.shape[0], tf_atten_var.shape[1], 1)) close_gate = tf.compat.v1.assign(tf_gate_atten_var, False) open_gate = tf.compat.v1.assign(tf_gate_atten_var, True) random_init = tf.compat.v1.assign(tf_atten_var, rand_initilzalier) lr_reset = tf.compat.v1.assign(global_step, 0) MAX_INT = np.iinfo(np.int16).max # output_dir = cfg.output_dir for top_i in top_k: # top_i = 207 # To control which top_i to work on directly sess.run([open_gate, random_init, lr_reset]) # sess.run(open_gate) iteration = 0 prev_loss = MAX_INT event_gif_images = [] per_class_maximization = np.ones((1, cfg.num_classes)) per_class_maximization[0, top_i] = -1 while iteration < cfg.max_iters: if iteration == 0: sess.run([close_gate]) _pre_atten_feat_map_tf, _atten_var = sess.run( [pre_atten_feat_map_tf, tf_atten_var], feed_dict={ # sub_feat_map_ph: _pre_atten_feat_map_tf, images_ph: np.expand_dims(test_img, 0), per_class_logits_ph: per_class_maximization }) sess.run([open_gate]) _atten_var, _sub_logits, _loss, _ = sess.run( [tf_atten_var, sub_logits, loss_sub, train_op], feed_dict={ sub_feat_map_ph: _pre_atten_feat_map_tf, # images_ph:np.expand_dims(img_crops[crop_idx,:,:,:],0), per_class_logits_ph: per_class_maximization }) if iteration % 50 == 0: logger.info('Iter {0:2d}: {1:.5f} Top {2:3d} {3}'.format( iteration, _loss, top_i, imagenet_lbls[top_i])) # print(np.round(np.reshape(_atten_var,(7,7)),2)) if cfg.save_gif: frame_mask = normalize_filter(filter_type, _atten_var, tf_atten_var.shape[0], tf_atten_var.shape[1]) if class_specific: # # heatmap_utils.save_heatmap(frame_mask,save=output_dir + img_name +'_msk_cls_{}_{}.png'.format(top_i,filter_type)) plt = heatmap_utils.apply_heatmap( test_img / 255.0, frame_mask, alpha=0.7, save=output_dir + img_name + '_cls_{}_{}.png'.format(top_i, filter_type), axis='off', cmap='bwr') else: plt = heatmap_utils.apply_heatmap( test_img / 255.0, frame_mask, alpha=0.7, save=output_dir + img_name + '_{}.png'.format(filter_type), axis='off', cmap='bwr') fig = plt.gcf() data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') w, h = fig.canvas.get_width_height() data_img = data.reshape((h, w, 3)) event_gif_images.append(data_img) # imageio.imwrite(dump_dir + '{}_test.jpg'.format(iteration),data_img) plt.close() if np.abs(_loss - prev_loss) < 10e-5: break prev_loss = _loss iteration += 1 frame_mask = normalize_filter(filter_type, _atten_var, tf_atten_var.shape[0], tf_atten_var.shape[1]) if class_specific: # imageio.imwrite(output_dir + img_name + '_msk_cls_{}_{}.png'.format(top_i, filter_type), frame_mask) heatmap_utils.apply_heatmap( test_img / 255.0, frame_mask, alpha=0.6, save=output_dir + img_name + '_cls_{}_{}.png'.format(top_i, filter_type), axis='off', cmap='bwr') else: heatmap_utils.apply_heatmap(test_img / 255.0, frame_mask, alpha=0.6, save=output_dir + img_name + '_{}.png'.format(filter_type), axis='off', cmap='bwr') if cfg.save_gif: if class_specific: imageio.mimsave( output_dir + img_name + '_cls_{}_{}.gif'.format( top_i, atten_filter_position[:-2].format('').replace( '/', '')), event_gif_images, duration=1.0) else: imageio.mimsave( output_dir + img_name + '_cls_{}_{}.gif'.format( filter_type, atten_filter_position[:-2].format('').replace( '/', '')), event_gif_images, duration=1.0)
def main(argv): # Verify that parameters are set correctly. args = parser.parse_args(argv) if not os.path.exists(args.dataset): return # Possibly auto-generate the output filename. if args.filename is None: basename = os.path.basename(args.dataset) args.filename = os.path.splitext(basename)[0] + '_embeddings.h5' os_utils.touch_dir(os.path.join(args.experiment_root, args.foldername)) log_file = os.path.join(args.experiment_root, args.foldername, "embed") logging.config.dictConfig(common.get_logging_dict(log_file)) log = logging.getLogger('embed') args.filename = os.path.join(args.experiment_root, args.foldername, args.filename) var_filepath = os.path.join(args.experiment_root, args.foldername, args.filename[:-3] + '_var.txt') # Load the args from the original experiment. args_file = os.path.join(args.experiment_root, 'args.json') if os.path.isfile(args_file): if not args.quiet: print('Loading args from {}.'.format(args_file)) with open(args_file, 'r') as f: args_resumed = json.load(f) # Add arguments from training. for key, value in args_resumed.items(): args.__dict__.setdefault(key, value) # A couple special-cases and sanity checks if (args_resumed['crop_augment']) == (args.crop_augment is None): print('WARNING: crop augmentation differs between training and ' 'evaluation.') args.image_root = args.image_root or args_resumed['image_root'] else: raise IOError( '`args.json` could not be found in: {}'.format(args_file)) # Check a proper aggregator is provided if augmentation is used. if args.flip_augment or args.crop_augment == 'five': if args.aggregator is None: print( 'ERROR: Test time augmentation is performed but no aggregator' 'was specified.') exit(1) else: if args.aggregator is not None: print('ERROR: No test time augmentation that needs aggregating is ' 'performed but an aggregator was specified.') exit(1) if not args.quiet: print('Evaluating using the following parameters:') for key, value in sorted(vars(args).items()): print('{}: {}'.format(key, value)) # Load the data from the CSV file. _, data_fids = common.load_dataset(args.dataset, args.image_root) net_input_size = (args.net_input_height, args.net_input_width) pre_crop_size = (args.pre_crop_height, args.pre_crop_width) # Setup a tf Dataset containing all images. dataset = tf.data.Dataset.from_tensor_slices(data_fids) # Convert filenames to actual image tensors. dataset = dataset.map(lambda fid: common.fid_to_image( fid, tf.constant('dummy'), image_root=args.image_root, image_size=pre_crop_size if args.crop_augment else net_input_size), num_parallel_calls=args.loading_threads) # Augment the data if specified by the arguments. # `modifiers` is a list of strings that keeps track of which augmentations # have been applied, so that a human can understand it later on. modifiers = ['original'] if args.flip_augment: dataset = dataset.map(flip_augment) dataset = dataset.apply(tf.contrib.data.unbatch()) modifiers = [o + m for m in ['', '_flip'] for o in modifiers] if args.crop_augment == 'center': dataset = dataset.map(lambda im, fid, pid: (five_crops(im, net_input_size)[0], fid, pid)) modifiers = [o + '_center' for o in modifiers] elif args.crop_augment == 'five': dataset = dataset.map(lambda im, fid, pid: (tf.stack(five_crops(im, net_input_size)), tf.stack([fid] * 5), tf.stack([pid] * 5))) dataset = dataset.apply(tf.contrib.data.unbatch()) modifiers = [ o + m for o in modifiers for m in [ '_center', '_top_left', '_top_right', '_bottom_left', '_bottom_right' ] ] elif args.crop_augment == 'avgpool': modifiers = [o + '_avgpool' for o in modifiers] else: modifiers = [o + '_resize' for o in modifiers] # Group it back into PK batches. dataset = dataset.batch(args.batch_size) # Overlap producing and consuming. dataset = dataset.prefetch(1) #images, _, _ = dataset.make_one_shot_iterator().get_next() #init_iter = dataset.make_initializable_iterator() init_iter = tf.data.Iterator.from_structure(dataset.output_types, dataset.output_shapes) images, _, _ = init_iter.get_next() iter_init_op = init_iter.make_initializer(dataset) # Create the model and an embedding head. model = import_module('nets.' + args.model_name) head = import_module('heads.' + args.head_name) images_ph = tf.placeholder(dataset.output_types[0], dataset.output_shapes[0]) endpoints, body_prefix = model.endpoints(images_ph, is_training=False) with tf.name_scope('head'): endpoints = head.head(endpoints, args.embedding_dim, is_training=False) gpu_options = tf.GPUOptions(allow_growth=True) gpu_config = tf.ConfigProto(gpu_options=gpu_options) with h5py.File(args.filename, 'w') as f_out, tf.Session(config=gpu_config) as sess: # Initialize the network/load the checkpoint. if args.checkpoint is None: checkpoint = tf.train.latest_checkpoint(args.experiment_root) else: checkpoint = os.path.join(args.experiment_root, args.checkpoint) if not args.quiet: print('Restoring from checkpoint: {}'.format(checkpoint)) tf.train.Saver().restore(sess, checkpoint) # Go ahead and embed the whole dataset, with all augmented versions too. emb_storage = np.zeros( (len(data_fids) * len(modifiers), args.embedding_dim), np.float32) ##sess.run(init_iter.initializer) sess.run(iter_init_op) for start_idx in count(step=args.batch_size): try: current_imgs = sess.run(images) batch_embedding = endpoints['emb'] emb = sess.run(batch_embedding, feed_dict={images_ph: current_imgs}) emb_storage[start_idx:start_idx + len(emb)] += emb print('\rEmbedded batch {}-{}/{}'.format( start_idx, start_idx + len(emb), len(emb_storage)), flush=True, end='') except tf.errors.OutOfRangeError: break # This just indicates the end of the dataset. if not args.quiet: print("Done with embedding, aggregating augmentations...", flush=True) if len(modifiers) > 1: # Pull out the augmentations into a separate first dimension. emb_storage = emb_storage.reshape(len(data_fids), len(modifiers), -1) emb_storage = emb_storage.transpose((1, 0, 2)) # (Aug,FID,128D) # Store the embedding of all individual variants too. emb_dataset = f_out.create_dataset('emb_aug', data=emb_storage) # Aggregate according to the specified parameter. emb_storage = AGGREGATORS[args.aggregator](emb_storage) # Store the final embeddings. emb_dataset = f_out.create_dataset('emb', data=emb_storage) # Store information about the produced augmentation and in case no crop # augmentation was used, if the images are resized or avg pooled. f_out.create_dataset('augmentation_types', data=np.asarray(modifiers, dtype='|S'))
def main(argv): cfg = BaseConfig().parse(argv) os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu save_model_dir = cfg.checkpoint_dir model_basename = os.path.basename(save_model_dir) touch_dir(save_model_dir) args_file = os.path.join(cfg.checkpoint_dir, 'args.json') with open(args_file, 'w') as f: json.dump(vars(cfg), f, ensure_ascii=False, indent=2, sort_keys=True) # os_utils.touch_dir(save_model_dir) log_file = os.path.join(cfg.checkpoint_dir, cfg.log_filename + '.txt') os_utils.touch_dir(cfg.checkpoint_dir) logger = log_utils.create_logger(log_file) img_generator_class = locate(cfg.db_tuple_loader) args = dict() args['db_path'] = cfg.db_path args['tuple_loader_queue_size'] = cfg.tuple_loader_queue_size args['preprocess_func'] = cfg.preprocess_func args['batch_size'] = cfg.batch_size args['shuffle'] = False args['csv_file'] = cfg.train_csv_file args['img_size'] = const.max_frame_size args['gen_hot_vector'] = True train_iter = img_generator_class(args) args['batch_size'] = cfg.batch_size args['csv_file'] = cfg.test_csv_file val_iter = img_generator_class(args) trn_images, trn_lbls = train_iter.imgs_and_lbls() val_imgs, val_lbls = val_iter.imgs_and_lbls() with tf.Graph().as_default(): if cfg.train_mode == 'semi_hard' or cfg.train_mode == 'hard' or cfg.train_mode == 'cntr': train_dataset = TripletTupleLoader(trn_images, trn_lbls, cfg).dataset elif cfg.train_mode == 'vanilla': train_dataset = QuickTupleLoader(trn_images, trn_lbls, cfg, is_training=True, shuffle=True, repeat=True).dataset else: raise NotImplementedError('{} is not a valid train mode'.format( cfg.train_mode)) val_dataset = QuickTupleLoader(val_imgs, val_lbls, cfg, is_training=False, repeat=False).dataset handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle( handle, train_dataset.output_types, train_dataset.output_shapes) images_ph, lbls_ph = iterator.get_next() network_class = locate(cfg.network_name) model = network_class(cfg, images_ph=images_ph, lbls_ph=lbls_ph) # Which loss fn to impose. For example, softmax only is applied in vanilla mode, # while softmax + semi-hard triplet is applied in semi_hard mode. if cfg.train_mode == 'semi_hard': pre_logits = model.train_pre_logits _, w, h, channels = pre_logits.shape embed_dim = cfg.emb_dim embedding_net = ConvEmbed(emb_dim=embed_dim, n_input=channels, n_h=h, n_w=w) embedding = embedding_net.forward(pre_logits) embedding = tf.nn.l2_normalize(embedding, axis=-1, epsilon=1e-10) margin = cfg.margin gt_lbls = tf.argmax(model.gt_lbls, 1) metric_loss = triplet_semi.triplet_semihard_loss( gt_lbls, embedding, margin) logger.info('Triplet loss lambda {}, with margin {}'.format( cfg.triplet_loss_lambda, margin)) total_loss = model.train_loss + cfg.triplet_loss_lambda * tf.reduce_mean( metric_loss) elif cfg.train_mode == 'hard': pre_logits = model.train_pre_logits _, w, h, channels = pre_logits.shape embed_dim = cfg.emb_dim embedding_net = ConvEmbed(emb_dim=embed_dim, n_input=channels, n_h=h, n_w=w) embedding = embedding_net.forward(pre_logits) embedding = tf.nn.l2_normalize(embedding, axis=-1, epsilon=1e-10) margin = cfg.margin logger.info('Triplet loss lambda {}, with margin {}'.format( cfg.triplet_loss_lambda, margin)) gt_lbls = tf.argmax(model.gt_lbls, 1) metric_loss = triplet_hard.batch_hard(gt_lbls, embedding, margin) total_loss = model.train_loss + cfg.triplet_loss_lambda * tf.reduce_mean( metric_loss) elif cfg.train_mode == 'cntr': pre_logits = model.train_pre_logits _, w, h, channels = pre_logits.shape embed_dim = cfg.emb_dim embedding_net = ConvEmbed(emb_dim=embed_dim, n_input=channels, n_h=h, n_w=w) embedding = embedding_net.forward(pre_logits) embedding = tf.nn.l2_normalize(embedding, axis=-1, epsilon=1e-10) CENTER_LOSS_LAMBDA = 0.003 CENTER_LOSS_ALPHA = 0.5 num_fg_classes = cfg.num_classes gt_lbls = tf.argmax(model.gt_lbls, 1) center_loss_order, centroids, centers_update_op, appear_times, diff = center_loss.get_center_loss( embedding, gt_lbls, CENTER_LOSS_ALPHA, num_fg_classes) # sample_centroid = tf.reshape(tf.gather(centroids, gt_lbls), [-1, config.emb_dim]) # center_loss_order = center_loss.center_loss(sample_centroid , embedding) logger.info('Center loss lambda {}'.format(CENTER_LOSS_LAMBDA)) total_loss = model.train_loss + CENTER_LOSS_LAMBDA * tf.reduce_mean( center_loss_order) elif cfg.train_mode == 'vanilla': total_loss = model.train_loss logger.info('Train Mode {}'.format(cfg.train_mode)) # variables_to_train = model.var_2_train(); # logger.info('variables_to_train ' + str(variables_to_train)) trainable_vars = tf.trainable_variables() if cfg.caffe_iter_size > 1: ## Accumulated Gradient ## Creation of a list of variables with the same shape as the trainable ones # initialized with 0s accum_vars = [ tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in trainable_vars ] zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars] update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if cfg.train_mode == const.Train_Mode.CNTR: update_ops.append(centers_update_op) # print(update_ops) with tf.control_dependencies(update_ops): global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf_utils.poly_lr(global_step, cfg) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) if cfg.caffe_iter_size > 1: ## Accumulated Gradient # grads = tf.Print(grads,[grads],'Grad Print'); grads = optimizer.compute_gradients(total_loss, trainable_vars) # Adds to each element from the list you initialized earlier with zeros its gradient (works because accum_vars and gvs are in the same order) accum_ops = [ accum_vars[i].assign_add(gv[0]) for i, gv in enumerate(grads) ] iter_size = cfg.caffe_iter_size # Define the training step (part with variable value update) train_op = optimizer.apply_gradients( [(accum_vars[i] / iter_size, gv[1]) for i, gv in enumerate(grads)], global_step=global_step) else: grads = optimizer.compute_gradients(total_loss) train_op = optimizer.apply_gradients(grads, global_step=global_step) sess = tf.InteractiveSession() training_iterator = train_dataset.make_one_shot_iterator() validation_iterator = val_dataset.make_initializable_iterator() training_handle = sess.run(training_iterator.string_handle()) validation_handle = sess.run(validation_iterator.string_handle()) tb_path = save_model_dir logger.info(tb_path) start_iter = tb_utils.get_latest_iteration(tb_path) train_writer = tf.summary.FileWriter(tb_path, sess.graph) tf.global_variables_initializer().run() saver = tf.train.Saver() # saves variables learned during training ckpt_file = tf.train.latest_checkpoint(save_model_dir) logger.info('Model Path {}'.format(ckpt_file)) load_model_msg = model.load_model(save_model_dir, ckpt_file, sess, saver, load_logits=False) logger.info(load_model_msg) ckpt_file = os.path.join(save_model_dir, cfg.checkpoint_filename) val_loss = tf.summary.scalar('Val_Loss', model.val_loss) val_acc_op = tf.summary.scalar('Batch_Val_Acc', model.val_accuracy) model_acc_op = tf.summary.scalar('Split_Val_Accuracy', model.val_accumulated_accuracy) best_model_step = 0 best_acc = 0 logger.info('Start Training from {}, till {}'.format( start_iter, cfg.train_iters)) # Start Training for step in range(start_iter + 1, cfg.train_iters + 1): start_time_train = time.time() # Update network weights while supporting caffe_iter_size for mini_batch in range(cfg.caffe_iter_size - 1): feed_dict = {handle: training_handle} model_loss_value, accuracy_value, _ = sess.run( [model.train_loss, model.train_accuracy, accum_ops], feed_dict) feed_dict = {handle: training_handle} model_loss_value, accuracy_value, _ = sess.run( [model.train_loss, model.train_accuracy, train_op], feed_dict) if cfg.caffe_iter_size > 1: ## Accumulated Gradient sess.run(zero_ops) train_time = time.time() - start_time_train if (step == 1 or step % cfg.logging_threshold == 0): logger.info( 'i {0:04d} loss {1:4f} Acc {2:2f} Batch Time {3:3f}'. format(step, model_loss_value, accuracy_value, train_time)) if (step % cfg.test_interval == 0): run_metadata = tf.RunMetadata() tf.local_variables_initializer().run() sess.run(validation_iterator.initializer) _val_acc_op = 0 while True: try: # Eval network on validation/testing split feed_dict = {handle: validation_handle} val_loss_op, batch_accuracy, accuracy_op, _val_acc_op, _val_acc, c_cnf_mat, macro_acc = sess.run( [ val_loss, model.val_accuracy, model_acc_op, val_acc_op, model.val_accumulated_accuracy, model.val_confusion_mat, model.val_per_class_acc_acc ], feed_dict) except tf.errors.OutOfRangeError: logger.info('Val Acc {0}, Macro Acc: {1}'.format( _val_acc, macro_acc)) break train_writer.add_run_metadata(run_metadata, 'step%03d' % step) train_writer.add_summary(val_loss_op, step) train_writer.add_summary(_val_acc_op, step) train_writer.add_summary(accuracy_op, step) train_writer.flush() if (step % 100 == 0): saver.save(sess, ckpt_file) if best_acc < _val_acc: saver.save(sess, ckpt_file + 'best') best_acc = _val_acc best_model_step = step logger.info('Best Acc {0} at {1} == {2}'.format( best_acc, best_model_step, model_basename)) logger.info('Triplet loss lambda {}'.format(cfg.triplet_loss_lambda)) logger.info('Mode {}'.format(cfg.train_mode)) logger.info('Loop complete') sess.close()
def main(argv): cfg = BaseConfig().parse(argv) os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu img_generator_class = locate(cfg.db_tuple_loader) args = dict() args['db_path'] = cfg.db_path args['tuple_loader_queue_size'] = cfg.tuple_loader_queue_size args['preprocess_func'] = cfg.preprocess_func args['batch_size'] = cfg.batch_size args['shuffle'] = False args['img_size'] = const.max_frame_size args['gen_hot_vector'] = True args['csv_file'] = cfg.train_csv_file train_iter = img_generator_class(args) args['csv_file'] = cfg.test_csv_file val_iter = img_generator_class(args) train_imgs, train_lbls = train_iter.imgs_and_lbls() val_imgs, val_lbls = val_iter.imgs_and_lbls() # Where to save the trained model save_model_dir = cfg.checkpoint_dir model_basename = os.path.basename(save_model_dir) touch_dir(save_model_dir) ## Log experiment args_file = os.path.join(cfg.checkpoint_dir, 'args.json') with open(args_file, 'w') as f: json.dump(vars(cfg), f, ensure_ascii=False, indent=2, sort_keys=True) # os_utils.touch_dir(save_model_dir) log_file = os.path.join(cfg.checkpoint_dir, cfg.log_filename + '.txt') os_utils.touch_dir(cfg.checkpoint_dir) logger = log_utils.create_logger(log_file) with tf.Graph().as_default(): # Create train and val dataset following tensorflow Data API ## A dataset element has an image and lable train_dataset = TensorflowTupleLoader(train_imgs, train_lbls,cfg, is_training=True).dataset val_dataset = TensorflowTupleLoader(val_imgs, val_lbls,cfg, is_training=False, batch_size=cfg.batch_size, repeat=False).dataset handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle( handle, train_dataset.output_types, train_dataset.output_shapes) images_ph, lbls_ph = iterator.get_next() training_iterator = train_dataset.make_one_shot_iterator() validation_iterator = val_dataset.make_initializable_iterator() ## Load a pretrained network {resnet_v2 or densenet161} based on config.network_name configuration network_class = locate(cfg.network_name) model = network_class(cfg, is_training=True, images_ph=images_ph, lbls_ph=lbls_ph) trainable_vars = tf.trainable_variables() if cfg.caffe_iter_size > 1: ## Accumulated Gradient ## Creation of a list of variables with the same shape as the trainable ones # initialized with 0s accum_vars = [tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in trainable_vars] zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars] update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf_utils.poly_lr(global_step,cfg) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) if cfg.caffe_iter_size > 1: ## Accumulated Gradient grads = optimizer.compute_gradients(model.train_loss, trainable_vars) # Adds to each element from the list you initialized earlier with zeros its gradient (works because accum_vars and gvs are in the same order) accum_ops = [accum_vars[i].assign_add(gv[0]) for i, gv in enumerate(grads)] iter_size = cfg.caffe_iter_size # Define the training step (part with variable value update) train_op = optimizer.apply_gradients([(accum_vars[i] / iter_size, gv[1]) for i, gv in enumerate(grads)], global_step=global_step) else: # If accumulated gradient disabled, do regular training grads = optimizer.compute_gradients(model.train_loss) train_op = optimizer.apply_gradients(grads, global_step=global_step) # logger.info('=========================================================') # for v in tf.trainable_variables(): # mprint('trainable_variables: {0} \t {1}'.format(str(v.name),str(v.shape))) sess = tf.InteractiveSession() tf.global_variables_initializer().run() tf.local_variables_initializer().run() training_handle = sess.run(training_iterator.string_handle()) validation_handle = sess.run(validation_iterator.string_handle()) # now = datetime.now() # if (config.tensorbaord_file == None): # tb_path = config.tensorbaord_dir + now.strftime("%Y%m%d-%H%M%S") # else: # tb_path = config.tensorbaord_dir + config.tensorbaord_file start_iter = 1 # No Resume in this code version # train_writer = tf.summary.FileWriter(tb_path, sess.graph) saver = tf.train.Saver() # saves variables learned during training ckpt_file = os.path.join(save_model_dir, cfg.checkpoint_filename) print('Model Path ', ckpt_file) load_model_msg = model.load_model(save_model_dir, ckpt_file, sess, saver, is_finetuning=True) logger.info(load_model_msg) val_loss = tf.summary.scalar('Val_Loss', model.val_loss) val_acc_op = tf.summary.scalar('Batch_Val_Acc', model.val_accuracy) model_acc_op = tf.summary.scalar('Split_Val_Accuracy', model.val_accumulated_accuracy) logger.info('Start Training ***********') best_acc = 0 best_model_step = 0 for current_iter in range(start_iter, cfg.train_iters+1): start_time_train = time.time() feed_dict = {handle: training_handle} ## Here is where training and backpropagation start # In case accumulated gradient enabled, i.e. config.caffe_iter_size > 1 for mini_batch in range(cfg.caffe_iter_size - 1): sess.run(accum_ops, feed_dict) model_loss_value, accuracy_value, _ = sess.run([model.train_loss, model.train_accuracy, train_op], feed_dict) # In case accumulated gradient enabled, reset shadow variables if cfg.caffe_iter_size > 1: sess.run(zero_ops) ## Here is where training and backpropagation end train_time = time.time() - start_time_train if (current_iter % cfg.logging_threshold == 0 or current_iter ==1): logger.info( 'i {0:04d} loss {1:4f} Acc {2:2f} Batch Time {3:3f}'.format(current_iter, model_loss_value, accuracy_value, train_time)) if (current_iter % cfg.test_interval == 0): # run_metadata = tf.RunMetadata() tf.local_variables_initializer().run() sess.run(validation_iterator.initializer) while True: try: feed_dict = {handle: validation_handle} val_loss_op, batch_accuracy, accuracy_op, _val_acc_op, _val_acc, c_cnf_mat = sess.run( [val_loss, model.val_accuracy, model_acc_op, val_acc_op, model.val_accumulated_accuracy, model.val_confusion_mat], feed_dict) except tf.errors.OutOfRangeError: logger.info('Val Acc {0}'.format(_val_acc)) break # train_writer.add_run_metadata(run_metadata, 'step%03d' % current_iter) # train_writer.add_summary(val_loss_op, current_iter) # train_writer.add_summary(_val_acc_op, current_iter) # train_writer.add_summary(accuracy_op, current_iter) # # train_writer.flush() if (current_iter % cfg.logging_threshold == 0): saver.save(sess, ckpt_file) if best_acc < _val_acc: saver.save(sess, ckpt_file + 'best') best_acc = _val_acc best_model_step = current_iter ## Early dropping style. logger.info('Best Acc {0} at {1} == {2}'.format(best_acc, best_model_step, model_basename)) saver.save(sess, ckpt_file) ## Save final ckpt before closing sess.close()
def main(argv): # Verify that parameters are set correctly. args = parser.parse_args(argv) if not os.path.exists(args.dataset): return # Possibly auto-generate the output filename. if args.filename is None: basename = os.path.basename(args.dataset) args.filename = os.path.splitext(basename)[0] + '_embeddings.h5' os_utils.touch_dir(os.path.join(args.experiment_root, args.foldername)) log_file = os.path.join(args.experiment_root, args.foldername, "embed") logging.config.dictConfig(common.get_logging_dict(log_file)) log = logging.getLogger('embed') args.filename = os.path.join(args.experiment_root, args.foldername, args.filename) var_filepath = os.path.join(args.experiment_root, args.foldername, args.filename[:-3] + '_var.txt') # Load the args from the original experiment. args_file = os.path.join(args.experiment_root, 'args.json') if os.path.isfile(args_file): if not args.quiet: print('Loading args from {}.'.format(args_file)) with open(args_file, 'r') as f: args_resumed = json.load(f) # Add arguments from training. for key, value in args_resumed.items(): args.__dict__.setdefault(key, value) # A couple special-cases and sanity checks if (args_resumed['crop_augment']) == (args.crop_augment is None): print('WARNING: crop augmentation differs between training and ' 'evaluation.') args.image_root = args.image_root or args_resumed['image_root'] else: raise IOError( '`args.json` could not be found in: {}'.format(args_file)) # Check a proper aggregator is provided if augmentation is used. if args.flip_augment or args.crop_augment == 'five': if args.aggregator is None: print( 'ERROR: Test time augmentation is performed but no aggregator' 'was specified.') exit(1) else: if args.aggregator is not None: print('ERROR: No test time augmentation that needs aggregating is ' 'performed but an aggregator was specified.') exit(1) if not args.quiet: print('Evaluating using the following parameters:') for key, value in sorted(vars(args).items()): print('{}: {}'.format(key, value)) # Load the data from the CSV file. _, data_fids = common.load_dataset(args.dataset, args.image_root) net_input_size = (args.net_input_height, args.net_input_width) pre_crop_size = (args.pre_crop_height, args.pre_crop_width) # Setup a tf Dataset containing all images. dataset = tf.data.Dataset.from_tensor_slices(data_fids) # Convert filenames to actual image tensors. dataset = dataset.map(lambda fid: common.fid_to_image( fid, tf.constant('dummy'), image_root=args.image_root, image_size=pre_crop_size if args.crop_augment else net_input_size), num_parallel_calls=args.loading_threads) # Augment the data if specified by the arguments. # `modifiers` is a list of strings that keeps track of which augmentations # have been applied, so that a human can understand it later on. modifiers = ['original'] if args.flip_augment: dataset = dataset.map(flip_augment) dataset = dataset.apply(tf.contrib.data.unbatch()) modifiers = [o + m for m in ['', '_flip'] for o in modifiers] if args.crop_augment == 'center': dataset = dataset.map(lambda im, fid, pid: (five_crops(im, net_input_size)[0], fid, pid)) modifiers = [o + '_center' for o in modifiers] elif args.crop_augment == 'five': dataset = dataset.map(lambda im, fid, pid: (tf.stack(five_crops(im, net_input_size)), tf.stack([fid] * 5), tf.stack([pid] * 5))) dataset = dataset.apply(tf.contrib.data.unbatch()) modifiers = [ o + m for o in modifiers for m in [ '_center', '_top_left', '_top_right', '_bottom_left', '_bottom_right' ] ] elif args.crop_augment == 'avgpool': modifiers = [o + '_avgpool' for o in modifiers] else: modifiers = [o + '_resize' for o in modifiers] emb_model = EmbeddingModel(args) # Group it back into PK batches. dataset = dataset.batch(args.batch_size) dataset = dataset.map(lambda im, fid, pid: (emb_model.preprocess_input(im), fid, pid)) # Overlap producing and consuming. dataset = dataset.prefetch(1) tf.keras.backend.set_learning_phase(0) with h5py.File(args.filename, 'w') as f_out: ckpt = tf.train.Checkpoint(step=tf.Variable(1), net=emb_model) manager = tf.train.CheckpointManager(ckpt, osp.join(args.experiment_root, 'tf_ckpts'), max_to_keep=1) ckpt.restore(manager.latest_checkpoint) if manager.latest_checkpoint: print("Restored from {}".format(manager.latest_checkpoint)) else: print("Initializing from scratch.") emb_storage = np.zeros( (len(data_fids) * len(modifiers), args.embedding_dim), np.float32) # for batch_idx,batch in enumerate(dataset): dataset_iter = iter(dataset) for start_idx in count(step=args.batch_size): try: images, _, _ = next(dataset_iter) emb = emb_model(images) emb_storage[start_idx:start_idx + len(emb)] += emb print('\rEmbedded batch {}-{}/{}'.format( start_idx, start_idx + len(emb), len(emb_storage)), flush=True, end='') except StopIteration: break # This just indicates the end of the dataset. if not args.quiet: print("Done with embedding, aggregating augmentations...", flush=True) if len(modifiers) > 1: # Pull out the augmentations into a separate first dimension. emb_storage = emb_storage.reshape(len(data_fids), len(modifiers), -1) emb_storage = emb_storage.transpose((1, 0, 2)) # (Aug,FID,128D) # Store the embedding of all individual variants too. emb_dataset = f_out.create_dataset('emb_aug', data=emb_storage) # Aggregate according to the specified parameter. emb_storage = AGGREGATORS[args.aggregator](emb_storage) # Store the final embeddings. emb_dataset = f_out.create_dataset('emb', data=emb_storage) # Store information about the produced augmentation and in case no crop # augmentation was used, if the images are resized or avg pooled. f_out.create_dataset('augmentation_types', data=np.asarray(modifiers, dtype='|S'))
def directories(cfg, state='train'): # Seleccionamos los directorios dependiendo del dataset if cfg.dataset == 'cub': dataset_dir = 'CUB_200_2011' dataset_file = 'cub_train' test_file ='cub_test' elif cfg.dataset == 'bags': dataset_dir = 'BAGS' dataset_file = 'bags_train' test_file ='bags_test' elif cfg.dataset == 'bags_40': dataset_dir = 'BAGS_40' dataset_file = 'bags_train' test_file ='bags_test' elif cfg.dataset == 'bags_40_v2': dataset_dir = 'BAGS_40_v2' dataset_file = 'bags_train' test_file ='bags_test' else: raise NotImplementedError('El dataset {} no existe'.format(cfg.dataset)) cfg.dirs=Namespace() cfg.dirs.csv_file= os.path.join(const.dataset_dir, dataset_dir, dataset_file + '.csv') cfg.dirs.images= os.path.join(const.dataset_dir,dataset_dir, 'images') cfg.dirs.trained_models = const.trained_models_dir # Creamos el nombre para la almacenar la información del emb_modelo if cfg.model.fit.loss == "angular_loss": exp_name = [cfg.dataset, cfg.model.name, cfg.model.head,cfg.model.fit.optimizer,cfg.model.fit.loss, 'alpha_{}'.format(cfg.model.fit.alpha)] else: exp_name = [cfg.dataset, cfg.model.name, cfg.model.head,cfg.model.fit.optimizer,cfg.model.fit.loss, 'm_{}'.format(cfg.model.fit.margin)] cfg.model_name = '_'.join(exp_name) # El directorio de los checkpoint cfg.dirs.checkpoint = os.path.join(const.experiment_root_dir, cfg.model_name ,'tf_ckpts') os_utils.touch_dir(cfg.dirs.checkpoint) # El directorio de los modelos entrenados cfg.dirs.trained = os.path.join(const.trained_models_dir,cfg.model_name) os_utils.touch_dir(cfg.dirs.trained) # Definimos el directorio donde almacenar los datos de Tensorboard current_time = datetime.now().strftime("%Y%m%d-%H%M%S") cfg.dirs.train_log = os.path.join(const.tensorboard_dir, cfg.model_name,'train', current_time) os_utils.touch_dir(cfg.dirs.train_log) cfg.dirs.test_log = os.path.join(const.tensorboard_dir, cfg.model_name,'test', current_time) os_utils.touch_dir(cfg.dirs.train_log) cfg.dirs.eval_log = os.path.join(const.tensorboard_dir, cfg.model_name,'eval', current_time) os_utils.touch_dir(cfg.dirs.eval_log) # Definimos la ubicación de los log cfg.dirs.logs = os.path.join(const.experiment_root_dir, "train") # Cuando vayamos a testear el modelo if state == 'test': cfg.dirs.embeddings= const.embeddings_dir cfg.dirs.embeddings_file = os.path.join(cfg.dirs.embeddings, cfg.model_name +".h5") cfg.dirs.test_file= os.path.join(const.dataset_dir, dataset_dir, test_file + '.csv') #Creamos el directorios os_utils.touch_dir(cfg.dirs.embeddings) if state == 'emb': cfg.dirs.embeddings= const.embeddings_dir cfg.dirs.embeddings_file = os.path.join(cfg.dirs.embeddings, cfg.model_name +".h5") cfg.dirs.emb_log = os.path.join(const.tensorboard_dir, cfg.model_name,'emb', current_time) os_utils.touch_dir(cfg.dirs.emb_log) #Creamos el directorios os_utils.touch_dir(cfg.dirs.embeddings) elif state =='production': if cfg.dataset == 'cub': labels_file = 'classes.txt' elif cfg.dataset == 'bags': labels_file = 'bags_labels.csv' elif cfg.dataset == 'bags_40': labels_file = 'bags_labels.csv' elif cfg.dataset == 'bags_40_v2': labels_file = 'bags_labels.csv' # Directorio para recuperar los nombres de las etiquetas de las imágenes cfg.dirs.labels_file= os.path.join(const.dataset_dir, dataset_dir, labels_file) cfg.dirs.embeddings= const.embeddings_dir cfg.dirs.embeddings_file = os.path.join(cfg.dirs.embeddings, cfg.model_name +".h5") cfg.dirs.cbir_log = os.path.join(const.tensorboard_dir, cfg.model_name,'cbir', current_time) os_utils.touch_dir(cfg.dirs.cbir_log) return cfg