def test_guided_backprop(): # placeholder for input image image = tf.placeholder(tf.float32, shape=[None, None, None, 3]) # initialize input dataflow # change '.png' to other image types if other types of images are used input_im = ImageFromFile('.png', data_dir=IMPATH, num_channel=3, shuffle=False) # batch size has to be one input_im.set_batch_size(1) # initialize guided back propagation class # use VGG19 as an example # images will be rescaled to smallest side = 224 is is_rescale=True model = GuideBackPro(vis_model=VGG19_FCN(is_load=False, is_rescale=True)) # get op to compute guided back propagation map # final output respect to input image back_pro_op = model.get_visualization(image) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) im = input_im.next_batch()[0] guided_backpro, label, o_im =\ sess.run([back_pro_op, model.pre_label, model.input_im], feed_dict={image: im}) print(label) tf.reset_default_graph()
def train(): FLAGS = get_args() style_name = os.path.splitext(FLAGS.styleim)[0] style_im = scipy.misc.imread('../data/{}'.format(FLAGS.styleim)) style_im = [imagetool.resize_image_with_smallest_side(style_im, 512)] style_shape = [style_im[0].shape[0], style_im[0].shape[1]] train_data = ImageFromFile(ext_name='.jpg', data_dir=DATA_PATH, num_channel=3, shuffle=True, batch_dict_name=['im'], pf=imagetool.im_normalize) train_data.setup(epoch_val=0, batch_size=FLAGS.batch) test_im = scipy.misc.imread('../data/cat.png') test_im = [test_im] train_model = FastStyle(content_size=256, style_size=style_shape, c_channel=3, s_channel=3, vgg_path=VGG_PATH, s_weight=FLAGS.style, c_weight=FLAGS.content, tv_weight=FLAGS.tv) train_model.create_train_model() generate_model = FastStyle(c_channel=3) generate_model.create_generate_model() writer = tf.summary.FileWriter(SAVE_PATH) saver = tf.train.Saver(var_list=tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='style_net')) sessconfig = tf.ConfigProto() sessconfig.gpu_options.allow_growth = True with tf.Session(config=sessconfig) as sess: sess.run(tf.global_variables_initializer(), feed_dict={train_model.style_image: style_im}) writer.add_graph(sess.graph) # 40000 steps for i in range(400): train_model.train(sess, train_data, num_iteration=100, summary_writer=writer) generate_model.generate(sess, test_im, summary_writer=writer) saver.save(sess, '{}{}_step_{}'.format(SAVE_PATH, style_name, i)) writer.close()
def get_predict_config(FLAGS): dataset_test = ImageFromFile(FLAGS.type, data_dir=config_path.test_data_dir, shuffle=False, resize=224, num_channel=NUM_CHANNEL) # dataset_test = ImageLabelFromFolder('.jpg', # data_dir = config_path.data_dir, # num_class = FLAGS.nclass, # resize = 224, # num_channel = NUM_CHANNEL) prediction_list = [ # PredictionScalar(['pre_label'], ['label']), # PredictionMeanScalar('accuracy/result', 'test_accuracy'), PredictionMat('classmap/result', ['test']), PredictionOverlay(['classmap/result', 'image'], ['map', 'image'], color=True, merge_im=True), PredictionImage(['image'], ['image'], color=True, merge_im=True)] return PridectConfig( dataflow=dataset_test, model=VGGCAM(num_class=FLAGS.nclass, inspect_class=FLAGS.label, is_load=True, pre_train_path=config_path.vgg_dir), model_name=FLAGS.model, predictions=prediction_list, batch_size=FLAGS.bsize, default_dirs=config_path)
def test_gradcam(): # merge several output images in one large image merge_im = 1 grid_size = np.ceil(merge_im**0.5).astype(int) # class label for Grad-CAM generation # 355 llama 543 dumbbell 605 iPod 515 hat 99 groose 283 tiger cat # 282 tabby cat 233 border collie 242 boxer # class_id = [355, 543, 605, 515] class_id = [283, 242] # initialize Grad-CAM # using VGG19 gcam = ClassifyGradCAM(vis_model=VGG19_FCN(is_load=False, is_rescale=True)) gbackprob = GuideBackPro( vis_model=VGG19_FCN(is_load=False, is_rescale=True)) # placeholder for input image image = tf.placeholder(tf.float32, shape=[None, None, None, 3]) # create VGG19 model gcam.create_model(image) gcam.setup_graph() # generate class map and prediction label ops map_op = gcam.get_visualization(class_id=class_id) label_op = gcam.pre_label back_pro_op = gbackprob.get_visualization(image) # initialize input dataflow # change '.png' to other image types if other types of images are used input_im = ImageFromFile('.png', data_dir=IMPATH, num_channel=3, shuffle=False) input_im.set_batch_size(1) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) cnt = 0 merge_cnt = 0 o_im_list = [] im = input_im.next_batch()[0] gcam_map, b_map, label, o_im =\ sess.run([map_op, back_pro_op, label_op, gcam.input_im], feed_dict={image: im}) print(label) o_im_list.extend(o_im) for idx, cid, cmap in zip(count(), gcam_map[1], gcam_map[0]): overlay_im = image_overlay(cmap, o_im) weight_im = image_weight_mask(b_map[0], cmap) try: weight_im_list[idx].append(weight_im) overlay_im_list[idx].append(overlay_im) except NameError: gcam_class_id = gcam_map[1] weight_im_list = [[] for i in range(len(gcam_class_id))] overlay_im_list = [[] for i in range(len(gcam_class_id))] weight_im_list[idx].append(weight_im) overlay_im_list[idx].append(overlay_im) tf.reset_default_graph()
parser.add_argument('--type', default='.jpg', type=str, help='image file extension') return parser.parse_args() if __name__ == '__main__': FLAGS = get_args() model = GoogleNet(is_load=True, pre_train_path=conf.PARA_DIR) image = tf.placeholder(tf.float32, shape=[None, None, None, 3]) test_data = ImageFromFile(FLAGS.type, data_dir=conf.DATA_DIR, num_channel=3) display_data(test_data, 'test_data') word_dict = get_word_list('../data/imageNetLabel.txt') model.create_model([image, 1]) test_op = tf.nn.top_k(tf.nn.softmax(model.layer['output']), k=5, sorted=True) input_op = model.layer['input'] writer = tf.summary.FileWriter(conf.SAVE_DIR) with tf.Session() as sess: sess.run(tf.global_variables_initializer())
def get_config(FLAGS): # data for training dataset_train = ImageLabelFromFolder(FLAGS.type, data_dir=config_path.data_dir, num_class=FLAGS.nclass, resize=224, num_channel=NUM_CHANNEL) # Print image class name and label # print(dataset_train.label_dict) # Since the aim of training is visulization of class map, all the images # are used for training. Using the training set as validation set is just # for checking whether the training works correctly. dataset_val = ImageLabelFromFolder(FLAGS.type, data_dir=config_path.data_dir, num_class=FLAGS.nclass, resize=224, num_channel=NUM_CHANNEL) # Check accuracy during training using training set inference_list_validation = InferScalars('accuracy/result', 'test_accuracy') training_callbacks = [ ModelSaver(periodic=100), TrainSummary(key='train', periodic=50), FeedInferenceBatch(dataset_val, batch_count=10, periodic=100, inferencers=inference_list_validation), CheckScalar(['accuracy/result', 'loss/result'], periodic=10)] inspect_class = None if FLAGS.label > 0: inspect_class = FLAGS.label # Image use for inference the class acitivation map during training dataset_test = ImageFromFile(FLAGS.type, data_dir=config_path.infer_data_dir, shuffle=False, resize=224, num_channel=NUM_CHANNEL) # Check class acitivation map during training inference_list_test = [ InferOverlay(['classmap/result', 'image'], ['map', 'image'], color=True), InferImages('classmap/result', 'map', color=True)] training_callbacks += FeedInference(dataset_test, periodic=50, infer_batch_size=1, inferencers=inference_list_test), return TrainConfig( dataflow=dataset_train, model=VGGCAM(num_class=FLAGS.nclass, inspect_class=inspect_class, learning_rate=0.001, is_load=True, pre_train_path=config_path.vgg_dir), monitors=TFSummaryWriter(), callbacks=training_callbacks, batch_size=FLAGS.bsize, max_epoch=25, summary_periodic=50, default_dirs=config_path)
parser.add_argument('-cid', '--class_id', type=int, default=None, help='Assign class id! Default = None') parser.add_argument('-t', '--top', type=int, default=1, help='前幾大激活值得導向反向傳播,預設為1。') return parser.parse_args() if __name__ == '__main__': FLAGS = get_parse() # placeholder for input image image = tf.placeholder(tf.float32, shape=[None, None, None, 3]) # initialize input dataflow # change '.png' to other image types if other types of images are used input_im = ImageFromFile(FLAGS.imtype, data_dir = config.im_path, num_channel=3, shuffle=False) # batch size has to be one input_im.set_batch_size(1) # initialize guided back propagation class # use VGG19 as an example # images will be rescaled to smallest side = 224 if is_rescale=True #class_id != None:會返回辨認為指定class的導向反向傳播的圖片 #class_id = None:則會告知Top5(imagnet預設為1000類)的分類結果與對應的機率,並透過導向反向傳播解釋。 model = GuideBackPro(vis_model=VGG19_FCN(is_load=True, pre_train_path = config.vgg_path, is_rescale=True) , class_id = FLAGS.class_id , top = FLAGS.top) # get op to compute guided back propagation map
image = tf.placeholder(tf.float32, shape=[None, None, None, 3]) # create VGG19 model gcam.create_model(image) gcam.setup_graph() # generate class map and prediction label ops map_op = gcam.get_visualization(class_id=class_id) label_op = gcam.pre_label back_pro_op = gbackprob.get_visualization(image) # initialize input dataflow # change '.png' to other image types if other types of images are used input_im = ImageFromFile('.png', data_dir=IM_PATH, num_channel=3, shuffle=False) input_im.set_batch_size(1) writer = tf.summary.FileWriter(SAVE_DIR) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) writer.add_graph(sess.graph) cnt = 0 merge_cnt = 0 # weight_im_list = [[] for i in range(len(class_id))] o_im_list = [] while input_im.epochs_completed < 1: im = input_im.next_batch()[0]
def get_config(FLAGS): # data for training dataset_train = ImageLabelFromFolder(FLAGS.type, data_dir=config_path.data_dir, num_class=FLAGS.nclass, resize=224, num_channel=NUM_CHANNEL) # Print image class name and label # print(dataset_train.label_dict) # Since the aim of training is visulization of class map, all the images # are used for training. Using the training set as validation set is just # for checking whether the training works correctly. dataset_val = ImageLabelFromFolder(FLAGS.type, data_dir=config_path.val_data_dir, num_class=FLAGS.nclass, resize=224, num_channel=NUM_CHANNEL) # Check accuracy during training using training set inference_list_validation = InferScalars('accuracy/result', 'test_accuracy') training_callbacks = [ ModelSaver(periodic=100 ), #每100個training step就儲存model到config.checkpoint_dir裡面。 TrainSummary( key='train', periodic=50 ), #每50個training step就會儲存訓練過程的準確率、loss等資訊到summary,方便之後用tensorboard查看 FeedInferenceBatch( dataset_val, batch_count=10, periodic= 100, #每100個training step就透過所有dataset_val(同dataset_train)去測試準確率與lost。 inferencers=inference_list_validation), CheckScalar(['accuracy/result', 'loss/result'], periodic=10) ] #每10個training step就顯示準確率與lost。 inspect_class = None if FLAGS.label > 0: #訓練時指定inspect_class(如:imagenet的282是tiger cat),紀錄訓練過程中不同epochs的CAM變化。 inspect_class = FLAGS.label # Image use for inference the class acitivation map during training dataset_test = ImageFromFile(FLAGS.infer_type, data_dir=config_path.infer_data_dir, shuffle=False, resize=224, num_channel=NUM_CHANNEL) # Check class acitivation map during training inference_list_test = [ InferOverlay(['classmap/result', 'image'], ['map', 'image'], color=True), InferImages('classmap/result', 'map', color=True) ] training_callbacks += FeedInference( dataset_test, periodic= 50, #每50個training step就測試infer image,並將測試結果儲存到config.infer_dir裡面。 infer_batch_size=1, inferencers=inference_list_test), return TrainConfig(dataflow=dataset_train, model=VGGCAM(num_class=FLAGS.nclass, inspect_class=inspect_class, learning_rate=0.001, is_load=True, pre_train_path=config_path.vgg_dir), monitors=TFSummaryWriter(), callbacks=training_callbacks, batch_size=FLAGS.bsize, max_epoch=25, summary_periodic=50, default_dirs=config_path)
parser.add_argument('--id', type=int, default=None, help='feature map id') return parser.parse_args() def im_scale(im): return uim.im_rescale(im, [IM_SIZE, IM_SIZE]) if __name__ == '__main__': FLAGS = get_parse() input_im = ImageFromFile(FLAGS.imtype, data_dir=config.im_path, num_channel=3, shuffle=False, pf=im_scale, ) input_im.set_batch_size(1) print('size', input_im.size()) vizmodel = DeconvBaseVGG19(config.vgg_path, feat_key=FLAGS.feat, pick_feat=FLAGS.id) vizmap = vizmodel.layers['deconvim'] # print('vizmap', vizmap) feat_op = vizmodel.feats # 4D Tensor, Dim is [N, H, W, C], depending on FLAGS.feat max_act_op = vizmodel.max_act # 1D Tensor, the cur_feats_pick max value, depending on FLAGS.feat act_size = vizmodel.receptive_size[FLAGS.feat] act_scale = vizmodel.stride[FLAGS.feat]
default='.jpg', type=str, help='image file extension') return parser.parse_args() if __name__ == '__main__': FLAGS = get_args() model = GoogleNet(is_load=True, pre_train_path=conf.PARA_DIR) image = tf.placeholder(tf.float32, shape=[None, None, None, 3]) test_data = ImageFromFile( # ed: FLAGS.type이 argparse라서 ipython에서 실행시키기 힘드므로 아래와 같이 한다 # FLAGS.type, '.jpg', data_dir=conf.DATA_DIR, num_channel=3) display_data(test_data, 'test_data') word_dict = get_word_list('../data/imageNetLabel.txt') model.create_model([image, 1]) test_op = tf.nn.top_k(tf.nn.softmax(model.layer['output']), k=5, sorted=True) input_op = model.layer['input']