def main(argv): # Variable parameters os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu if FLAGS.vf_h == 64 and FLAGS.vf_w == 64: data_folder = '../quadsplit/data/' + FLAGS.dataset + '/' + FLAGS.setname + '_batch' else: data_folder = './data/' + FLAGS.dataset + '/' + FLAGS.setname + '_batch' data_prefix = FLAGS.dataset + '_' + FLAGS.setname reader = data_reader.DataReader(data_folder, data_prefix) snapshot_file = os.path.join( FLAGS.sfolder, FLAGS.dataset + '_' + FLAGS.modelname + '_iter_%d.tfmodel') if FLAGS.dataset in ['unc', 'unc+', 'Gref']: visual_feat_dir = '../data/coco/visual_feat/' elif FLAGS.dataset == 'referit': visual_feat_dir = '../data/referit/visual_feat/' else: raise ValueError('Unknown dataset %s' % dataset) if FLAGS.vf_h == 64 and FLAGS.vf_w == 64: visual_feat_dir = visual_feat_dir[:-1] + '_512x512/' if FLAGS.mode == 'train': if not os.path.isdir(FLAGS.sfolder): os.makedirs(FLAGS.sfolder) train(reader, snapshot_file, visual_feat_dir) elif FLAGS.mode == 'test': test(reader, snapshot_file, visual_feat_dir) else: raise ValueError('Invalid mode: %s' % FLAGS.mode)
def train(self): init_op = tf.global_variables_initializer() self.sess.run(init_op) saver = tf.train.Saver() data_reader = dr.DataReader(self.data_dir, 163446, self.batch_size, 0.8, reproducible=True) tf.summary.image('image', self.image_in, 10) summary_op = tf.summary.merge_all() writer_train = tf.summary.FileWriter(self.log_dir + '/train', self.sess.graph) writer_test = tf.summary.FileWriter(self.log_dir + '/test', self.sess.graph) step = 1 while data_reader.epoch < self.max_epoch: if step % 100 == 0: images, label = data_reader.next_batch(phase_train=False) reshaped_image = np.reshape(images, [self.batch_size, 250, 250, 3]) feed_dict = { self.image_in: reshaped_image, self.label_in: label } start_time = time.time() err, acc, sum = self.sess.run( [self.loss, self.accuracy, summary_op], feed_dict=feed_dict) duration = time.time() - start_time print( 'Epoch:%d/%d\tTime:%.3f\tLoss:%2.4f\tAcc:%2.4f\t@[TEST]' % (data_reader.current_test_batch_index, data_reader.epoch, duration, err, acc)) writer_test.add_summary(sum, step) else: images, label = data_reader.next_batch(phase_train=True) reshaped_image = np.reshape(images, [self.batch_size, 250, 250, 3]) feed_dict = { self.image_in: reshaped_image, self.label_in: label } start_time = time.time() err, acc, sum, _ = self.sess.run( [self.loss, self.accuracy, summary_op, self.opt], feed_dict=feed_dict) duration = time.time() - start_time print('Epoch:%d/%d\tTime:%.3f\tLoss:%2.4f\tAcc:%2.4f\t' % (data_reader.current_train_batch_index, data_reader.epoch, duration, err, acc)) writer_train.add_summary(sum, step) if step % 3268 == 0: if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) saver.save(self.sess, self.model_dir, step) step += 12
def testDataReader(self): batch_size = 5 dataReader = data_reader.DataReader('/home/bingzhang/Documents/Dataset/CACD/data', 163446, batch_size, 0.8, True) # print dataReader.train_indices_set # print dataReader.test_indices_set for i in range(2): x, y = dataReader.next_batch(phase_train=True) x = np.reshape(x, [batch_size, 250, 250, 3]) sio.savemat('testDataReader.mat', {'im': x, 'label': y})
grads_and_vars = solver.compute_gradients(total_loss, var_list=train_var_list) # Apply learning rate multiplication to gradients grads_and_vars = [ ((g if var_lr_mult[v] == 1 else tf.multiply(var_lr_mult[v], g)), v) for g, v in grads_and_vars ] # Apply gradients train_step = solver.apply_gradients(grads_and_vars, global_step=global_step) ################################################################################ # Initialize parameters and load data ################################################################################ snapshot_loader = tf.train.Saver(tf.trainable_variables()) # Load data reader = data_reader.DataReader(data_folder, data_prefix) snapshot_saver = tf.train.Saver() sess = tf.Session() # Run Initialization operations sess.run(tf.global_variables_initializer()) snapshot_loader.restore(sess, pretrained_model) ################################################################################ # Optimization loop ################################################################################ cls_loss_avg = 0 avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0 decay = 0.99
def train(max_iter, snapshot, dataset, setname, mu, lr, bs, tfmodel_folder, conv5, model_name, stop_iter, pre_emb=False): iters_per_log = 100 data_folder = './' + dataset + '/' + setname + '_batch/' data_prefix = dataset + '_' + setname snapshot_file = os.path.join(tfmodel_folder, dataset + '_iter_%d.tfmodel') if not os.path.isdir(tfmodel_folder): os.makedirs(tfmodel_folder) cls_loss_avg = 0 avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0 decay = 0.99 vocab_size = 8803 if dataset == 'referit' else 12112 emb_name = 'referit' if dataset == 'referit' else 'Gref' if pre_emb: print("Use pretrained Embeddings.") model = get_segmentation_model(model_name, mode='train', vocab_size=vocab_size, start_lr=lr, batch_size=bs, conv5=conv5, emb_name=emb_name) else: model = get_segmentation_model(model_name, mode='train', vocab_size=vocab_size, start_lr=lr, batch_size=bs, conv5=conv5) weights = './data/weights/deeplab_resnet_init.ckpt' print("Loading pretrained weights from {}".format(weights)) load_var = {var.op.name: var for var in tf.global_variables() if var.name.startswith('res') or var.name.startswith('bn') or var.name.startswith('conv1')} snapshot_loader = tf.train.Saver(load_var) snapshot_saver = tf.train.Saver(max_to_keep=4) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) snapshot_loader.restore(sess, weights) im_h, im_w, num_steps = model.H, model.W, model.num_steps text_batch = np.zeros((bs, num_steps), dtype=np.float32) image_batch = np.zeros((bs, im_h, im_w, 3), dtype=np.float32) mask_batch = np.zeros((bs, im_h, im_w, 1), dtype=np.float32) valid_idx_batch = np.zeros((bs, 1), dtype=np.int32) reader = data_reader.DataReader(data_folder, data_prefix) # for time calculate last_time = time.time() time_avg = MovingAverage() for n_iter in range(max_iter): for n_batch in range(bs): batch = reader.read_batch(is_log=(n_batch == 0 and n_iter % iters_per_log == 0)) text = batch['text_batch'] im = batch['im_batch'].astype(np.float32) mask = np.expand_dims(batch['mask_batch'].astype(np.float32), axis=2) im = im[:, :, ::-1] im -= mu text_batch[n_batch, ...] = text image_batch[n_batch, ...] = im mask_batch[n_batch, ...] = mask for idx in range(text.shape[0]): if text[idx] != 0: valid_idx_batch[n_batch, :] = idx break _, cls_loss_val, lr_val, scores_val, label_val = sess.run([model.train_step, model.cls_loss, model.learning_rate, model.pred, model.target], feed_dict={ model.words: text_batch, # np.expand_dims(text, axis=0), model.im: image_batch, # np.expand_dims(im, axis=0), model.target_fine: mask_batch, # np.expand_dims(mask, axis=0) model.valid_idx: valid_idx_batch }) cls_loss_avg = decay * cls_loss_avg + (1 - decay) * cls_loss_val # Accuracy accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(scores_val, label_val) avg_accuracy_all = decay * avg_accuracy_all + (1 - decay) * accuracy_all avg_accuracy_pos = decay * avg_accuracy_pos + (1 - decay) * accuracy_pos avg_accuracy_neg = decay * avg_accuracy_neg + (1 - decay) * accuracy_neg # timing cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time if n_iter % iters_per_log == 0: print('iter = %d, loss (cur) = %f, loss (avg) = %f, lr = %f' % (n_iter, cls_loss_val, cls_loss_avg, lr_val)) print('iter = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)' % (n_iter, accuracy_all, accuracy_pos, accuracy_neg)) print('iter = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' % (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg)) time_avg.add(elapsed) print('iter = %d, cur time = %.5f, avg time = %.5f, model_name: %s' % (n_iter, elapsed, time_avg.get_avg(), model_name)) # Save snapshot if (n_iter + 1) % snapshot == 0 or (n_iter + 1) >= max_iter: snapshot_saver.save(sess, snapshot_file % (n_iter + 1)) print('snapshot saved to ' + snapshot_file % (n_iter + 1)) if (n_iter + 1) >= stop_iter: print('stop training at iter ' + str(stop_iter)) break print('Optimization done.')
def train(self): #Build model, and get train_op self.build_model() train_op = self.train_op(self.total_loss, self.get_train_var_list()) reader = data_reader.DataReader(self.data_folder, self.data_prefix) cls_loss_avg = 0 avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0 decay = 0.99 #Accuracy palceholder acc_all, acc_pos, acc_neg = tf.placeholder( tf.float32, shape=()), tf.placeholder(tf.float32, shape=()), tf.placeholder(tf.float32, shape=()) acc_all_avg, acc_pos_avg, acc_neg_avg = tf.placeholder( tf.float32, shape=()), tf.placeholder(tf.float32, shape=()), tf.placeholder(tf.float32, shape=()) #Add summary for tensorboard tf.summary.scalar('loss', self.cls_loss, ['train']) tf.summary.scalar('learning_rate', self.learning_rate, ['train']) tf.summary.scalar('accuracy_all', acc_all, ['acc']) tf.summary.scalar('accuracy_positive', acc_pos, ['acc']) tf.summary.scalar('accuracy_negative', acc_neg, ['acc']) tf.summary.scalar('accuracy_all_average', acc_all_avg, ['acc']) tf.summary.scalar('accuracy_positive_average', acc_pos_avg, ['acc']) tf.summary.scalar('accuracy_negative_average', acc_neg_avg, ['acc']) train_summary = tf.summary.merge_all(key='train') acc_summary = tf.summary.merge_all(key='acc') # tf.train.Saver is used to save and load intermediate models. self.saver = tf.train.Saver(max_to_keep=50, keep_checkpoint_every_n_hours=1) sess = tf.Session() self.sess = sess # Init train_writer train_writer = tf.summary.FileWriter(self.log_folder, sess.graph) #Run initialization operations sess.run(tf.global_variables_initializer()) self.initialize(sess) self.log_info() for n_iter in range(1, self.max_iter + 1): batch = reader.read_batch() text_seq_val = batch['text_seq_batch'] imcrop_val = batch['imcrop_batch'].astype( np.float32) - self.channel_mean label_val = batch['label_fine_batch'].astype(np.float32) start_time = time.time() # Forward and Backward pass scores_val, cls_loss_val, _, lr_val, train_sum = sess.run( [ self.scores, self.cls_loss, train_op, self.learning_rate, train_summary ], feed_dict={ self.text_seq_batch: text_seq_val, self.imcrop_batch: imcrop_val, self.label_batch: label_val }) duration = time.time() - start_time cls_loss_avg = decay * cls_loss_avg + (1 - decay) * cls_loss_val # Accuracy accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy( scores_val, label_val) avg_accuracy_all = decay * avg_accuracy_all + ( 1 - decay) * accuracy_all avg_accuracy_pos = decay * avg_accuracy_pos + ( 1 - decay) * accuracy_pos avg_accuracy_neg = decay * avg_accuracy_neg + ( 1 - decay) * accuracy_neg # log accuracy per iter num_examples_per_step = self.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: iter %d, %.1f data/sec; %.3f ' 'sec/batch') print(format_str % (datetime.now(), n_iter, examples_per_sec, sec_per_batch)) print( '\titer = %d, cls_loss (cur) = %f, cls_loss (avg) = %f, lr = %f' % (n_iter, cls_loss_val, cls_loss_avg, lr_val)) print( '\titer = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)' % (n_iter, accuracy_all, accuracy_pos, accuracy_neg)) print( '\titer = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' % (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg)) if n_iter % self.log_step == 0: #Fillin placeholder of accuracy acc_sum = sess.run(acc_summary, feed_dict={ acc_all: accuracy_all, acc_pos: accuracy_pos, acc_neg: accuracy_neg, acc_all_avg: avg_accuracy_all, acc_pos_avg: avg_accuracy_pos, acc_neg_avg: avg_accuracy_neg }) train_writer.add_summary(train_sum, n_iter) train_writer.add_summary(acc_sum, n_iter) if n_iter % self.checkpoint_step == 0 or n_iter >= self.max_iter: checkpoint_path = os.path.join(self.log_folder, 'checkpoints') self.save(checkpoint_path, n_iter)
def train(modelname, max_iter, snapshot, dataset, weights, setname, mu, lr, bs, tfmodel_folder, conv5, re_iter): iters_per_log = 50000 data_folder = './' + dataset + '/' + setname + '_batch/' data_prefix = dataset + '_' + setname tfmodel_folder = './' + dataset + '/tfmodel/CMSA/' snapshot_file = os.path.join( tfmodel_folder, dataset + '_' + weights + '_' + modelname + '_iter_%d.tfmodel') if not os.path.isdir(tfmodel_folder): os.makedirs(tfmodel_folder) cls_loss_avg = 0 avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0 decay = 0.99 vocab_size = 8803 if dataset == 'referit' else 12112 model = CMSA_model(mode='train', vocab_size=vocab_size, weights=weights, start_lr=lr, batch_size=bs, conv5=conv5) if re_iter is None: pretrained_model = 'models/deeplab_resnet_init.ckpt' #pretrained_model = 'models/deeplab_resnet.ckpt' load_var = { var.op.name: var for var in tf.global_variables() if var.name.startswith('res') or var.name.startswith('bn') or var.name.startswith('conv1') } snapshot_loader = tf.train.Saver(load_var) snapshot_saver = tf.train.Saver(max_to_keep=1000) re_iter = 0 else: print('resume from %d' % re_iter) pretrained_model = os.path.join( tfmodel_folder, dataset + '_' + weights + '_' + modelname + '_iter_' + str(re_iter) + '.tfmodel') snapshot_loader = tf.train.Saver() snapshot_saver = tf.train.Saver(max_to_keep=1000) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) snapshot_loader.restore(sess, pretrained_model) im_h, im_w, num_steps = model.H, model.W, model.num_steps text_batch = np.zeros((bs, num_steps), dtype=np.float32) image_batch = np.zeros((bs, im_h, im_w, 3), dtype=np.float32) mask_batch = np.zeros((bs, im_h, im_w, 1), dtype=np.float32) reader = data_reader.DataReader(data_folder, data_prefix) for n_iter in range(max_iter - re_iter): n_iter += re_iter for n_batch in range(bs): batch = reader.read_batch( is_log=(n_batch == 0 and n_iter % iters_per_log == 0)) text = batch['text_batch'] im = batch['im_batch'].astype(np.float32) mask = np.expand_dims(batch['mask_batch'].astype(np.float32), axis=2) im = im[:, :, ::-1] im -= mu text_batch[n_batch, ...] = text image_batch[n_batch, ...] = im mask_batch[n_batch, ...] = mask _, cls_loss_val, lr_val, scores_val, label_val = sess.run( [ model.train_step, model.cls_loss, model.learning_rate, model.pred, model.target ], feed_dict={ model.words: text_batch, model.im: image_batch, model.target_fine: mask_batch }) cls_loss_avg = decay * cls_loss_avg + (1 - decay) * cls_loss_val # Accuracy accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy( scores_val, label_val) avg_accuracy_all = decay * avg_accuracy_all + (1 - decay) * accuracy_all avg_accuracy_pos = decay * avg_accuracy_pos + (1 - decay) * accuracy_pos avg_accuracy_neg = decay * avg_accuracy_neg + (1 - decay) * accuracy_neg if n_iter % iters_per_log == 0: print('iter = %d, loss (cur) = %f, loss (avg) = %f, lr = %f' % (n_iter, cls_loss_val, cls_loss_avg, lr_val)) #print('iter = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)' # % (n_iter, accuracy_all, accuracy_pos, accuracy_neg)) print( 'iter = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' % (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg)) # Save snapshot if (n_iter + 1) % snapshot == 0 or (n_iter + 1) >= max_iter: snapshot_saver.save(sess, snapshot_file % (n_iter + 1)) print('snapshot saved to ' + snapshot_file % (n_iter + 1)) print('Optimization done.')
def test(modelname, iter, dataset, weights, setname, dcrf, mu, tfmodel_folder): data_folder = './' + dataset + '/' + setname + '_batch/' data_prefix = dataset + '_' + setname tfmodel_folder = './' + dataset + '/tfmodel/CMSA' pretrained_model = os.path.join( tfmodel_folder, dataset + '_' + modelname + '_release' + '.tfmodel') score_thresh = 1e-9 eval_seg_iou_list = [.5, .6, .7, .8, .9] cum_I, cum_U = 0, 0 mean_IoU, mean_dcrf_IoU = 0, 0 seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32) if dcrf: cum_I_dcrf, cum_U_dcrf = 0, 0 seg_correct_dcrf = np.zeros(len(eval_seg_iou_list), dtype=np.int32) seg_total = 0. H, W = 320, 320 vocab_size = 8803 if dataset == 'referit' else 12112 IU_result = list() model = CMSA_model(H=H, W=W, mode='eval', vocab_size=vocab_size, weights=weights) # Load pretrained model snapshot_restorer = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) snapshot_restorer.restore(sess, pretrained_model) reader = data_reader.DataReader(data_folder, data_prefix, shuffle=False) NN = reader.num_batch print('test in', dataset, setname) for n_iter in range(reader.num_batch): if n_iter % (NN // 50) == 0: if n_iter / (NN // 50) % 5 == 0: sys.stdout.write(str(n_iter / (NN // 50) // 5)) else: sys.stdout.write('.') sys.stdout.flush() batch = reader.read_batch(is_log=False) text = batch['text_batch'] im = batch['im_batch'] mask = batch['mask_batch'].astype(np.float32) proc_im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, H, W)) proc_im_ = proc_im.astype(np.float32) proc_im_ = proc_im_[:, :, ::-1] proc_im_ -= mu scores_val, up_val, sigm_val = sess.run( [model.pred, model.up, model.sigm], feed_dict={ model.words: np.expand_dims(text, axis=0), model.im: np.expand_dims(proc_im_, axis=0) }) up_val = np.squeeze(up_val) pred_raw = (up_val >= score_thresh).astype(np.float32) predicts = im_processing.resize_and_crop(pred_raw, mask.shape[0], mask.shape[1]) if dcrf: # Dense CRF post-processing sigm_val = np.squeeze(sigm_val) d = densecrf.DenseCRF2D(W, H, 2) U = np.expand_dims(-np.log(sigm_val), axis=0) U_ = np.expand_dims(-np.log(1 - sigm_val), axis=0) unary = np.concatenate((U_, U), axis=0) unary = unary.reshape((2, -1)) d.setUnaryEnergy(unary) d.addPairwiseGaussian(sxy=3, compat=3) d.addPairwiseBilateral(sxy=20, srgb=3, rgbim=proc_im, compat=10) Q = d.inference(5) pred_raw_dcrf = np.argmax(Q, axis=0).reshape( (H, W)).astype(np.float32) predicts_dcrf = im_processing.resize_and_crop( pred_raw_dcrf, mask.shape[0], mask.shape[1]) I, U = eval_tools.compute_mask_IU(predicts, mask) IU_result.append({'batch_no': n_iter, 'I': I, 'U': U}) mean_IoU += float(I) / U cum_I += I cum_U += U msg = 'cumulative IoU = %f' % (cum_I / cum_U) for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct[n_eval_iou] += (I / U >= eval_seg_iou) if dcrf: I_dcrf, U_dcrf = eval_tools.compute_mask_IU(predicts_dcrf, mask) mean_dcrf_IoU += float(I_dcrf) / U_dcrf cum_I_dcrf += I_dcrf cum_U_dcrf += U_dcrf msg += '\tcumulative IoU (dcrf) = %f' % (cum_I_dcrf / cum_U_dcrf) for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct_dcrf[n_eval_iou] += (I_dcrf / U_dcrf >= eval_seg_iou) # print(msg) seg_total += 1 # Print results print('Segmentation evaluation (without DenseCRF):') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou]/seg_total) result_str += 'overall IoU = %f; mean IoU = %f\n' % (cum_I / cum_U, mean_IoU / seg_total) print(result_str) if dcrf: print('Segmentation evaluation (with DenseCRF):') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct_dcrf[n_eval_iou]/seg_total) result_str += 'overall IoU = %f; mean IoU = %f\n' % ( cum_I_dcrf / cum_U_dcrf, mean_dcrf_IoU / seg_total) print(result_str)
def test(iter, dataset, visualize, setname, dcrf, mu, tfmodel_folder, pre_emb=False, use_tree=False, neg_num=0.1): data_folder = './' + dataset + '/' + setname + '_batch/' data_prefix = dataset + '_' + setname if visualize: save_dir = './' + dataset + '/visualization/' + str(iter) + '/' if not os.path.isdir(save_dir): os.makedirs(save_dir) weights = os.path.join(tfmodel_folder, dataset + '_iter_' + str(iter) + '.tfmodel') score_thresh = 1e-9 eval_seg_iou_list = [.5, .6, .7, .8, .9] cum_I, cum_U = 0, 0 mean_IoU, mean_dcrf_IoU = 0, 0 seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32) if dcrf: cum_I_dcrf, cum_U_dcrf = 0, 0 seg_correct_dcrf = np.zeros(len(eval_seg_iou_list), dtype=np.int32) seg_total = 0. H, W = 320, 320 vocab_size = 8226 if dataset == 'referit' else 21692 emb_name = 'referit' if dataset == 'referit' else 'Gref' IU_result = list() if pre_emb: # use pretrained embbeding print("Use pretrained Embeddings.") model = LSCM_model(num_steps=30, H=H, W=W, mode='eval', vocab_size=vocab_size, emb_name=emb_name) else: model = LSCM_model(num_steps=30, H=H, W=W, mode='eval', vocab_size=vocab_size) # Load pretrained model snapshot_restorer = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) snapshot_restorer.restore(sess, weights) reader = data_reader.DataReader(data_folder, data_prefix, shuffle=False) NN = reader.num_batch for n_iter in range(reader.num_batch): if n_iter % (NN // 50) == 0: if n_iter / (NN // 50) % 5 == 0: sys.stdout.write(str(n_iter / (NN // 50) // 5)) else: sys.stdout.write('.') sys.stdout.flush() batch = reader.read_batch(is_log=False) text = batch['text_batch'] im = batch['im_batch'] mask = batch['mask_batch'].astype(np.float32) valid_idx = np.zeros([1], dtype=np.int32) graph = batch['graph_batch'] height = batch['height_batch'] for idx in range(text.shape[0]): if text[idx] != 0: valid_idx[0] = idx break if neg_num != 0.1: graph[graph < 0.5] = neg_num proc_im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, H, W)) proc_im_ = proc_im.astype(np.float32) proc_im_ = proc_im_[:, :, ::-1] proc_im_ -= mu if use_tree: scores_val, up_val, sigm_val = sess.run( [model.pred, model.up, model.sigm], feed_dict={ model.words: np.expand_dims(text, axis=0), model.im: np.expand_dims(proc_im_, axis=0), model.valid_idx: np.expand_dims(valid_idx, axis=0), model.graph_adj: np.expand_dims(graph, axis=0), model.tree_height: np.expand_dims(height, axis=0) }) else: scores_val, up_val, sigm_val = sess.run( [model.pred, model.up, model.sigm], feed_dict={ model.words: np.expand_dims(text, axis=0), model.im: np.expand_dims(proc_im_, axis=0), model.valid_idx: np.expand_dims(valid_idx, axis=0) }) # scores_val = np.squeeze(scores_val) # pred_raw = (scores_val >= score_thresh).astype(np.float32) up_val = np.squeeze(up_val) pred_raw = (up_val >= score_thresh).astype(np.float32) predicts = im_processing.resize_and_crop(pred_raw, mask.shape[0], mask.shape[1]) if dcrf: # Dense CRF post-processing sigm_val = np.squeeze(sigm_val) d = densecrf.DenseCRF2D(W, H, 2) U = np.expand_dims(-np.log(sigm_val), axis=0) U_ = np.expand_dims(-np.log(1 - sigm_val), axis=0) unary = np.concatenate((U_, U), axis=0) unary = unary.reshape((2, -1)) d.setUnaryEnergy(unary) d.addPairwiseGaussian(sxy=3, compat=3) d.addPairwiseBilateral(sxy=20, srgb=3, rgbim=proc_im, compat=10) Q = d.inference(5) pred_raw_dcrf = np.argmax(Q, axis=0).reshape( (H, W)).astype(np.float32) predicts_dcrf = im_processing.resize_and_crop( pred_raw_dcrf, mask.shape[0], mask.shape[1]) if visualize: sent = batch['sent_batch'][0] visualize_seg(im, mask, predicts, sent) if dcrf: visualize_seg(im, mask, predicts_dcrf, sent) I, U = eval_tools.compute_mask_IU(predicts, mask) IU_result.append({'batch_no': n_iter, 'I': I, 'U': U}) mean_IoU += float(I) / U cum_I += I cum_U += U msg = 'cumulative IoU = %f' % (cum_I / cum_U) for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct[n_eval_iou] += (I / U >= eval_seg_iou) if dcrf: I_dcrf, U_dcrf = eval_tools.compute_mask_IU(predicts_dcrf, mask) mean_dcrf_IoU += float(I_dcrf) / U_dcrf cum_I_dcrf += I_dcrf cum_U_dcrf += U_dcrf msg += '\tcumulative IoU (dcrf) = %f' % (cum_I_dcrf / cum_U_dcrf) for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct_dcrf[n_eval_iou] += (I_dcrf / U_dcrf >= eval_seg_iou) # print(msg) seg_total += 1 # Print results print('Segmentation evaluation (without DenseCRF):') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou] / seg_total) result_str += 'overall IoU = %f; mean IoU = %f\n' % (cum_I / cum_U, mean_IoU / seg_total) print(result_str) if dcrf: print('Segmentation evaluation (with DenseCRF):') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct_dcrf[n_eval_iou] / seg_total) result_str += 'overall IoU = %f; mean IoU = %f\n' % ( cum_I_dcrf / cum_U_dcrf, mean_dcrf_IoU / seg_total) print(result_str)
def test(modelname, iter, dataset, visualize, weights, setname, dcrf, mu): data_folder = './' + dataset + '/' + setname + '_batch/' data_prefix = dataset + '_' + setname if visualize: save_dir = './' + dataset + '/visualization/' + modelname + '_' + str(iter) + '/' if not os.path.isdir(save_dir): os.makedirs(save_dir) pretrained_model = './' + dataset + '/tfmodel_BRI/' + dataset + '_' + weights + '_' + modelname + '_iter_' + str(iter) + '.tfmodel' score_thresh = 1e-9 eval_seg_iou_list = [.5, .6, .7, .8, .9] cum_I, cum_U = 0, 0 seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32) if dcrf: cum_I_dcrf, cum_U_dcrf = 0, 0 seg_correct_dcrf = np.zeros(len(eval_seg_iou_list), dtype=np.int32) seg_total = 0. H, W = 320, 320 vocab_size = 8803 if dataset == 'referit' else 12112 if modelname == 'BRI': model = BRI_model(H=H, W=W, mode='eval', vocab_size=vocab_size, weights=weights) else: raise ValueError('Unknown model name %s' % (modelname)) # Load pretrained model snapshot_restorer = tf.train.Saver() sess = tf.Session() sess.run(tf.global_variables_initializer()) snapshot_restorer.restore(sess, pretrained_model) reader = data_reader.DataReader(data_folder, data_prefix, shuffle=False) for n_iter in range(reader.num_batch): batch = reader.read_batch() text = batch['text_batch'] im = batch['im_batch'] mask = batch['mask_batch'].astype(np.float32) proc_im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, H, W)) proc_im_ = proc_im.astype(np.float32) proc_im_ = proc_im_[:,:,::-1] proc_im_ -= mu scores_val, up_val, sigm_val = sess.run([model.pred, model.up, model.sigm], feed_dict={ model.words: np.expand_dims(text, axis=0), model.im: np.expand_dims(proc_im_, axis=0) }) up_val = np.squeeze(up_val) pred_raw = (up_val >= score_thresh).astype(np.float32) predicts = im_processing.resize_and_crop(pred_raw, mask.shape[0], mask.shape[1]) if dcrf: # Dense CRF post-processing sigm_val = np.squeeze(sigm_val) d = Dcrf.DenseCRF2D(W, H, 2) U = np.expand_dims(-np.log(sigm_val), axis=0) U_ = np.expand_dims(-np.log(1 - sigm_val), axis=0) unary = np.concatenate((U_, U), axis=0) unary = unary.reshape((2, -1)) d.setUnaryEnergy(unary) d.addPairwiseGaussian(sxy=3, compat=3) d.addPairwiseBilateral(sxy=20, srgb=3, rgbim=proc_im, compat=10) Q = d.inference(5) pred_raw_dcrf = np.argmax(Q, axis=0).reshape((H, W)).astype(np.float32) predicts_dcrf = im_processing.resize_and_crop(pred_raw_dcrf, mask.shape[0], mask.shape[1]) I, U = eval_tools.compute_mask_IU(predicts, mask) cum_I += I cum_U += U msg = 'cumulative IoU = %f' % (cum_I/cum_U) for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct[n_eval_iou] += (I/U >= eval_seg_iou) if dcrf: I_dcrf, U_dcrf = eval_tools.compute_mask_IU(predicts_dcrf, mask) cum_I_dcrf += I_dcrf cum_U_dcrf += U_dcrf msg += '\tcumulative IoU (dcrf) = %f' % (cum_I_dcrf/cum_U_dcrf) for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct_dcrf[n_eval_iou] += (I_dcrf/U_dcrf >= eval_seg_iou) print(msg) seg_total += 1 # Print results print('Segmentation evaluation (without DenseCRF):') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou]/seg_total) result_str += 'overall IoU = %f\n' % (cum_I/cum_U) print(result_str) if dcrf: print('Segmentation evaluation (with DenseCRF):') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct_dcrf[n_eval_iou]/seg_total) result_str += 'overall IoU = %f\n' % (cum_I_dcrf/cum_U_dcrf) print(result_str)
def train(modelname, max_iter, snapshot, dataset, weights, setname, mu): data_folder = './' + dataset + '/' + setname + '_batch/' data_prefix = dataset + '_' + setname tfmodel_folder = './' + dataset + '/tfmodel_BRI/' snapshot_file = tfmodel_folder + dataset + '_' + weights + '_' + modelname + '_iter_%d.tfmodel' if not os.path.isdir(tfmodel_folder): os.makedirs(tfmodel_folder) cls_loss_avg = 0 avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0 decay = 0.99 vocab_size = 8803 if dataset == 'referit' else 12112 if modelname == 'BRI': model = BRI_model(mode='train', vocab_size=vocab_size, weights=weights) else: raise ValueError('Unknown model name %s' % (modelname)) if weights == 'resnet': pretrained_model = './external/TF-resnet/model/ResNet101_init.tfmodel' load_var = {var.op.name: var for var in tf.global_variables() if var.op.name.startswith('ResNet')} elif weights == 'deeplab': pretrained_model = './external/TF-deeplab/model/ResNet101_train.tfmodel' load_var = {var.op.name: var for var in tf.global_variables() if var.op.name.startswith('DeepLab/group')} snapshot_loader = tf.train.Saver(load_var) snapshot_saver = tf.train.Saver(max_to_keep = 1000) sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) snapshot_loader.restore(sess, pretrained_model) reader = data_reader.DataReader(data_folder, data_prefix) for n_iter in range(max_iter): batch = reader.read_batch() text = batch['text_batch'] im = batch['im_batch'].astype(np.float32) mask = np.expand_dims(batch['mask_batch'].astype(np.float32), axis=2) im = im[:,:,::-1] im -= mu _, cls_loss_val, lr_val, scores_val, label_val = sess.run([model.train_step, model.cls_loss, model.learning_rate, model.pred, model.target], feed_dict={ model.words: np.expand_dims(text, axis=0), model.im: np.expand_dims(im, axis=0), model.target_fine: np.expand_dims(mask, axis=0) }) cls_loss_avg = decay*cls_loss_avg + (1-decay)*cls_loss_val if (n_iter + 1) % 100 == 0: print('iter = %d, loss (cur) = %f, loss (avg) = %f, lr = %f' % (n_iter, cls_loss_val, cls_loss_avg, lr_val)) # Accuracy accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(scores_val, label_val) avg_accuracy_all = decay*avg_accuracy_all + (1-decay)*accuracy_all avg_accuracy_pos = decay*avg_accuracy_pos + (1-decay)*accuracy_pos avg_accuracy_neg = decay*avg_accuracy_neg + (1-decay)*accuracy_neg if (n_iter + 1) % 100 == 0: print('iter = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)' % (n_iter, accuracy_all, accuracy_pos, accuracy_neg)) print('iter = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' % (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg)) # Save snapshot if (n_iter+1) >= 400000: if (n_iter+1) % snapshot == 0 or (n_iter+1) >= max_iter: snapshot_saver.save(sess, snapshot_file % (n_iter+1)) print('snapshot saved to ' + snapshot_file % (n_iter+1)) gc.collect() print('Optimization done.')
def train(modelname, max_iter, snapshot, dataset, weights, setname, mu, lr, bs, tfmodel_folder, conv5): iters_per_log = 100 data_folder = './' + dataset + '/' + setname + '_batch/' data_prefix = dataset + '_' + setname # tfmodel_folder = './' + dataset + '/tfmodel/' # tfmodel_folder = './%s/ckpt_lr%.5f_bs%d/' % (dataset, lr, bs) snapshot_file = os.path.join( tfmodel_folder, dataset + '_' + weights + '_' + modelname + '_iter_%d.tfmodel') if not os.path.isdir(tfmodel_folder): os.makedirs(tfmodel_folder) cls_loss_avg = 0 avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0 decay = 0.99 vocab_size = 8803 if dataset == 'referit' else 12112 if modelname == 'LSTM': model = LSTM_model(mode='train', vocab_size=vocab_size, weights=weights, start_lr=lr, batch_size=bs, conv5=conv5) elif modelname == 'RMI': model = RMI_model(mode='train', vocab_size=vocab_size, weights=weights) else: raise ValueError('Unknown model name %s' % (modelname)) if weights == 'resnet': pretrained_model = './external/TF-resnet/model/ResNet101_init.tfmodel' load_var = { var.op.name: var for var in tf.global_variables() if var.op.name.startswith('ResNet') } elif weights == 'deeplab': pretrained_model = '/data/ryli/text_objseg/tensorflow-deeplab-resnet/models/deeplab_resnet_init.ckpt' load_var = { var.op.name: var for var in tf.global_variables() if var.name.startswith('res') or var.name.startswith('bn') or var.name.startswith('conv1') } snapshot_loader = tf.train.Saver(load_var) snapshot_saver = tf.train.Saver(max_to_keep=1000) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) snapshot_loader.restore(sess, pretrained_model) im_h, im_w, num_steps = model.H, model.W, model.num_steps text_batch = np.zeros((bs, num_steps), dtype=np.float32) image_batch = np.zeros((bs, im_h, im_w, 3), dtype=np.float32) mask_batch = np.zeros((bs, im_h, im_w, 1), dtype=np.float32) reader = data_reader.DataReader(data_folder, data_prefix) for n_iter in range(max_iter): for n_batch in range(bs): batch = reader.read_batch( is_log=(n_batch == 0 and n_iter % iters_per_log == 0)) text = batch['text_batch'] im = batch['im_batch'].astype(np.float32) mask = np.expand_dims(batch['mask_batch'].astype(np.float32), axis=2) im = im[:, :, ::-1] im -= mu text_batch[n_batch, ...] = text image_batch[n_batch, ...] = im mask_batch[n_batch, ...] = mask _, cls_loss_val, lr_val, scores_val, label_val = sess.run( [ model.train_step, model.cls_loss, model.learning_rate, model.pred, model.target ], feed_dict={ model.words: text_batch, #np.expand_dims(text, axis=0), model.im: image_batch, #np.expand_dims(im, axis=0), model.target_fine: mask_batch #np.expand_dims(mask, axis=0) }) cls_loss_avg = decay * cls_loss_avg + (1 - decay) * cls_loss_val # Accuracy accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy( scores_val, label_val) avg_accuracy_all = decay * avg_accuracy_all + (1 - decay) * accuracy_all avg_accuracy_pos = decay * avg_accuracy_pos + (1 - decay) * accuracy_pos avg_accuracy_neg = decay * avg_accuracy_neg + (1 - decay) * accuracy_neg if n_iter % iters_per_log == 0: print('iter = %d, loss (cur) = %f, loss (avg) = %f, lr = %f' % (n_iter, cls_loss_val, cls_loss_avg, lr_val)) print('iter = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)' % (n_iter, accuracy_all, accuracy_pos, accuracy_neg)) print( 'iter = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' % (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg)) # Save snapshot if (n_iter + 1) % snapshot == 0 or (n_iter + 1) >= max_iter: snapshot_saver.save(sess, snapshot_file % (n_iter + 1)) print('snapshot saved to ' + snapshot_file % (n_iter + 1)) print('Optimization done.')