def detect_image_folder(image_folder, output_path, model_config_path, model_checkpoint_path): """ Runs object detection on a folder of images. Saves the results to a csv """ img_reader = ImageReader(image_folder) model = Detector(model_config_path, model_checkpoint_path) results = model.detect_images(img_reader.load_images(), img_reader.filenames) results.to_csv(output_path, index=None)
def eval(net, data_dict, ensemble_num, recalls): net.eval() data_set = ImageReader(data_dict, get_transform(DATA_NAME, 'test')) data_loader = DataLoader(data_set, BATCH_SIZE, shuffle=False, num_workers=8) features = [] with torch.no_grad(): for inputs, labels in data_loader: out = net(inputs.to(DEVICE)) out = F.normalize(out) features.append(out.cpu()) features = torch.cat(features, 0) torch.save( features, 'results/{}_test_features_{:03}.pth'.format(DATA_NAME, ensemble_num)) # load feature vectors features = [ torch.load('results/{}_test_features_{:03}.pth'.format(DATA_NAME, d)) for d in range(1, ensemble_num + 1) ] features = torch.cat(features, 1) acc_list = recall(features, data_set.labels, rank=recalls) desc = '' for index, recall_id in enumerate(recalls): desc += 'R@{}:{:.2f}% '.format(recall_id, acc_list[index] * 100) print(desc)
def test_setup(self): # Load reader with tf.name_scope("create_inputs"): reader = ImageReader(self.conf.data_dir, self.conf.valid_data_list, None, False, False, self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] self.image_batch, self.label_batch = tf.expand_dims( image, dim=0), tf.expand_dims(label, dim=0) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(self.image_batch, self.conf.num_classes, False) else: net = ResNet_segmentation(self.image_batch, self.conf.num_classes, False, self.conf.encoder_name) pass # predictions raw_output = net.outputs raw_output = tf.image.resize_bilinear( raw_output, tf.shape(self.image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) pred = tf.expand_dims(raw_output, dim=3) self.pred = tf.reshape(pred, [ -1, ]) # labels gt = tf.reshape(self.label_batch, [ -1, ]) # Ignoring all labels greater than or equal to n_classes. temp = tf.less_equal(gt, self.conf.num_classes - 1) weights = tf.cast(temp, tf.int32) # fix for tf 1.3.0 gt = tf.where(temp, gt, tf.cast(temp, tf.uint8)) # Pixel accuracy self.accu, self.accu_update_op = tcm.streaming_accuracy( self.pred, gt, weights=weights) # mIoU self.mIoU, self.mIou_update_op = tcm.streaming_mean_iou( self.pred, gt, self.conf.num_classes, weights) # confusion matrix self.confusion_matrix = tcm.confusion_matrix( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables()) pass
def predict_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.test_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(image_batch, self.conf.num_classes, False) else: net = ResNet_segmentation(image_batch, self.conf.num_classes, False, self.conf.encoder_name) # Predictions. global raw_output_ raw_output = tf.nn.softmax(net.outputs, axis=-1) prior1 = self.conf.prior prior = 1 - prior1 class0, class1 = tf.split(raw_output, 2, -1) class0 = (class0 * prior) / (class0 * prior + class1 * prior1) class1 = (class1 * prior1) / (class0 * prior + class1 * prior1) raw_output = tf.concat([class0, class1], -1) raw_output_ = raw_output raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8) # Create directory if not os.path.exists(self.conf.out_dir + '/prediction' + '/' + str(prior1)): #os.makedirs(self.conf.out_dir) os.makedirs(self.conf.out_dir + '/prediction' + '/' + str(prior1)) if self.conf.visual: os.makedirs(self.conf.out_dir + '/visual_prediction' + '/' + str(prior1)) # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def test_setup(self): # Create queue coordinator. num_layers = 50 self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.valid_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] self.image_batch, self.label_batch = tf.expand_dims( image, dim=0), tf.expand_dims(label, dim=0) # Create network net, end_points = deeplabv3(self.image_batch, num_classes=self.conf.num_classes, depth=num_layers, is_training=True) raw_output = end_points['resnet{}/logits'.format(num_layers)] # predictions #raw_output = net.o # [batch_size, 41, 41, 21] raw_output = tf.image.resize_bilinear( raw_output, tf.shape(self.image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) pred = tf.expand_dims(raw_output, dim=3) self.pred = tf.reshape(pred, [ -1, ]) # labels gt = tf.reshape(self.label_batch, [ -1, ]) # Ignoring all labels greater than or equal to n_classes. temp = tf.less_equal(gt, self.conf.num_classes - 1) weights = tf.cast(temp, tf.int32) # fix for tf 1.3.0 gt = tf.where(temp, gt, tf.cast(temp, tf.uint8)) # Pixel accuracy self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy( self.pred, gt, weights=weights) # mIoU self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def embed_url_image(self, image_url, id=None): try: image = ImageReader.read_from_url(image_url) return self.embed_pil_image(image, id) except Exception as e: print(f'Failed to create embeddings for id {id}') print(e) return None
def predict_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.test_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float(tf.shape(image_batch)[2]) image_batch_075 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75))])) image_batch_05 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5))])) # Create network if self.conf.encoder_name not in ['res101', 'res50']: print('encoder_name ERROR!') print("Please input: res101, res50") sys.exit(-1) else: with tf.variable_scope('', reuse=False): net = ResNet_segmentation(image_batch, self.conf.num_classes, False, self.conf.encoder_name) with tf.variable_scope('', reuse=True): net075 = ResNet_segmentation(image_batch_075, self.conf.num_classes, False, self.conf.encoder_name) with tf.variable_scope('', reuse=True): net05 = ResNet_segmentation(image_batch_05, self.conf.num_classes, False, self.conf.encoder_name) # predictions # Network raw output raw_output100 = net.outputs raw_output075 = net075.outputs raw_output05 = net05.outputs raw_output = tf.reduce_max(tf.stack([raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0) raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) raw_output = tf.argmax(raw_output, axis=3) self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8) # Create directory if not os.path.exists(self.conf.out_dir): os.makedirs(self.conf.out_dir) os.makedirs(self.conf.out_dir + '/prediction') if self.conf.visual: os.makedirs(self.conf.out_dir + '/visual_prediction') # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def predict_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.test_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(image_batch, self.conf.num_classes, False) else: net = ResNet_segmentation(image_batch, self.conf.num_classes, False, self.conf.encoder_name) # Predictions. raw_output = net.outputs raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) print("Vor dem argmax: ", type(raw_output)) # array = raw_output.eval(session=self.sess) # print("Array ist:",array) # raw_output = tf.argmax(raw_output, axis=3) print("nach dem argmax", raw_output) raw_output_sm = tf.nn.softmax(raw_output) self.pred = tf.cast(tf.expand_dims(raw_output_sm, dim=3), tf.float32) print("Prediction is: ", self.pred) # Create directory if not os.path.exists(self.conf.out_dir): os.makedirs(self.conf.out_dir) os.makedirs(self.conf.out_dir + '/prediction') if self.conf.visual: os.makedirs(self.conf.out_dir + '/visual_prediction') # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def __init__(self, images, config, norm=None, shuffle=True): self.images = images # self.true_box_buffer = config['true_box_buffer'] # Maximun objects per box!! self.batch_size = config['batch_size'] self.anchors = config['anchors'] self.nb_anchors = len(config['anchors']) self.img_w, self.img_h = config['image_shape'] self.grid = config['grid'] self.img_encoder = ImageReader(img_width=self.img_w, img_height=self.img_h, norm=norm, grid=self.grid) self.labels = np.array(config['labels']) self.shuffle = shuffle if self.shuffle: np.random.shuffle(self.images)
def get_data_queue(args, coord, is_training=True): h, w = map(int, args.input_size.split(',')) input_size_img = (h, w) input_size_label = (h / FEATSTRIDE, w / FEATSTRIDE) # Load reader. if is_training: with tf.name_scope("create_train_inputs"): reader_train = ImageReader( args.data_dir, args.data_train_list, input_size_img, input_size_label, RANDOM_SCALE, IMG_MEAN, coord) image_batch_train, label_batch_train = reader_train.dequeue(args.batch_size) return image_batch_train, label_batch_train else: with tf.name_scope("create_val_inputs"): reader_val = ImageReader( args.data_dir, args.data_val_list, input_size_img, input_size_label, False, IMG_MEAN, coord) image_batch_val, label_batch_val = reader_val.dequeue(args.batch_size, is_training=False) return image_batch_val, label_batch_val
def query_upload_image(): image_file = request.files['file'] radius = json.loads(request.form['radius']) entity_types = json.loads(request.form['type']) entity_types = [ent_type['value'] for ent_type in entity_types] query_types = [ typ for ent_type in entity_types for typ in list(ENTITY_TYPES[ent_type].keys()) ] pil_image = ImageReader.read_and_resize(image_file.stream) return geowine.retrieve_entities_with_pil_image(image=pil_image, radius=radius, entity_type=query_types)
def predict_setup(self): # Load reader with tf.name_scope("create_inputs"): reader = ImageReader(self.conf.data_dir, self.conf.test_data_list, None, False, False, self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(image_batch, self.conf.num_classes, False) else: net = ResNet_segmentation(image_batch, self.conf.num_classes, False, self.conf.encoder_name) pass # Predictions. raw_output = net.outputs raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8) # Create directory if not os.path.exists(self.conf.out_dir): os.makedirs(self.conf.out_dir) os.makedirs(self.conf.out_dir + '/prediction') if self.conf.visual: os.makedirs(self.conf.out_dir + '/visual_prediction') # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables()) pass
def train(net, data_dict, optim): net.train() data_set = ImageReader(data_dict, get_transform(DATA_NAME, 'train')) data_loader = DataLoader(data_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8) l_data, t_data, n_data = 0.0, 0, 0 for inputs, labels in data_loader: optim.zero_grad() out = net(inputs.to(DEVICE)) loss = criterion(out, labels.to(DEVICE)) print('loss:{:.4f}'.format(loss.item()), end='\r') loss.backward() optim.step() _, pred = torch.max(out, 1) l_data += loss.item() t_data += torch.sum(pred.cpu() == labels).item() n_data += len(labels) return l_data / n_data, t_data / n_data
def train_setup(self): tf.set_random_seed(self.conf.random_seed) # Create queue coordinator. self.coord = tf.train.Coordinator() # Input size input_size = (self.conf.input_height, self.conf.input_width) # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.data_list, input_size, self.conf.random_scale, self.conf.random_mirror, self.conf.ignore_label, IMG_MEAN, self.coord) self.image_batch, self.label_batch = reader.dequeue(self.conf.batch_size) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(self.image_batch, self.conf.num_classes, True) # Variables that load from pre-trained model. restore_var = [v for v in tf.global_variables() if 'fc' not in v.name] # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part encoder_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 # Decoder part decoder_trainable = [v for v in all_trainable if 'fc' in v.name] else: net = ResNet_segmentation(self.image_batch, self.conf.num_classes, True, self.conf.encoder_name) # Variables that load from pre-trained model. restore_var = [v for v in tf.global_variables() if 'resnet_v1' in v.name] # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part encoder_trainable = [v for v in all_trainable if 'resnet_v1' in v.name] # lr * 1.0 # Decoder part decoder_trainable = [v for v in all_trainable if 'decoder' in v.name] decoder_w_trainable = [v for v in decoder_trainable if 'weights' in v.name or 'gamma' in v.name] # lr * 10.0 decoder_b_trainable = [v for v in decoder_trainable if 'biases' in v.name or 'beta' in v.name] # lr * 20.0 # Check assert(len(all_trainable) == len(decoder_trainable) + len(encoder_trainable)) assert(len(decoder_trainable) == len(decoder_w_trainable) + len(decoder_b_trainable)) # Network raw output raw_output = net.outputs # [batch_size, h, w, 21] # Output size output_shape = tf.shape(raw_output) output_size = (output_shape[1], output_shape[2]) # Groud Truth: ignoring all labels greater or equal than n_classes label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes, one_hot=False) raw_gt = tf.reshape(label_proc, [-1,]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes]) prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax_cross_entropy loss loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) # L2 regularization l2_losses = [self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable if 'weights' in v.name] # Loss function self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Define optimizers # 'poly' learning rate base_lr = tf.constant(self.conf.learning_rate) self.curr_step = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - self.curr_step / self.conf.num_steps), self.conf.power)) # We have several optimizers here in order to handle the different lr_mult # which is a kind of parameters in Caffe. This controls the actual lr for each # layer. opt_encoder = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) opt_decoder_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum) opt_decoder_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum) # To make sure each layer gets updated by different lr's, we do not use 'minimize' here. # Instead, we separate the steps compute_grads+update_params. # Compute grads grads = tf.gradients(self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable) grads_encoder = grads[:len(encoder_trainable)] grads_decoder_w = grads[len(encoder_trainable) : (len(encoder_trainable) + len(decoder_w_trainable))] grads_decoder_b = grads[(len(encoder_trainable) + len(decoder_w_trainable)):] # Update params train_op_conv = opt_encoder.apply_gradients(zip(grads_encoder, encoder_trainable)) train_op_fc_w = opt_decoder_w.apply_gradients(zip(grads_decoder_w, decoder_w_trainable)) train_op_fc_b = opt_decoder_b.apply_gradients(zip(grads_decoder_b, decoder_b_trainable)) # Finally, get the train_op! update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # for collecting moving_mean and moving_variance with tf.control_dependencies(update_ops): self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Saver for storing checkpoints of the model self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0) # Loader for loading the pre-trained model self.loader = tf.train.Saver(var_list=restore_var) # Training summary # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, input_size) raw_output_up = tf.argmax(raw_output_up, axis=3) self.pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [self.image_batch, 2, IMG_MEAN], tf.uint8) labels_summary = tf.py_func(decode_labels, [self.label_batch, 2, self.conf.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [self.pred, 2, self.conf.num_classes], tf.uint8) self.total_summary = tf.summary.image('images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=2) # Concatenate row-wise. if not os.path.exists(self.conf.logdir): os.makedirs(self.conf.logdir) self.summary_writer = tf.summary.FileWriter(self.conf.logdir, graph=tf.get_default_graph())
data_path, data_name, crop_type, backbone_type = opt.data_path, opt.data_name, opt.crop_type, opt.backbone_type gd_config, feature_dim, smoothing, temperature = opt.gd_config, opt.feature_dim, opt.smoothing, opt.temperature margin, recalls, batch_size = opt.margin, [ int(k) for k in opt.recalls.split(',') ], opt.batch_size num_epochs = opt.num_epochs save_name_pre = '{}_{}_{}_{}_{}_{}_{}_{}_{}'.format( data_name, crop_type, backbone_type, gd_config, feature_dim, smoothing, temperature, margin, batch_size) results = {'train_loss': [], 'train_accuracy': []} for recall_id in recalls: results['test_recall@{}'.format(recall_id)] = [] # dataset loader train_data_set = ImageReader(data_path, data_name, 'train', crop_type) train_sample = MPerClassSampler(train_data_set.labels, batch_size) train_data_loader = DataLoader(train_data_set, batch_sampler=train_sample, num_workers=8) test_data_set = ImageReader(data_path, data_name, 'query' if data_name == 'isc' else 'test', crop_type) test_data_loader = DataLoader(test_data_set, batch_size, shuffle=False, num_workers=8) eval_dict = {'test': {'data_loader': test_data_loader}} if data_name == 'isc': gallery_data_set = ImageReader(data_path, data_name, 'gallery', crop_type)
def train_setup(self): tf.set_random_seed(self.conf.random_seed) # Create queue coordinator. self.coord = tf.train.Coordinator() # Input size h, w = (self.conf.input_height, self.conf.input_width) input_size = (h, w) # Load reader with tf.name_scope("create_inputs"): reader = ImageReader(self.conf.data_dir, self.conf.data_list, input_size, self.conf.random_scale, self.conf.random_mirror, self.conf.ignore_label, IMG_MEAN, self.coord) self.image_batch, self.label_batch = reader.dequeue( self.conf.batch_size) image_batch_075 = tf.image.resize_images( self.image_batch, [int(h * 0.75), int(w * 0.75)]) image_batch_05 = tf.image.resize_images( self.image_batch, [int(h * 0.5), int(w * 0.5)]) # #testWWang # image = self.image_batch[0] # label = self.label_batch[0] # utils.save_image(image, "/home/py36tf14/wangyichao/Deeplab-v2--ResNet-101--Tensorflow-master/images", # name = image ,mean = IMG_MEAN) # utils.save_image(label, "/home/py36tf14/wangyichao/Deeplab-v2--ResNet-101--Tensorflow-master/images", # name=label, mean=IMG_MEAN) # # #end # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': with tf.variable_scope('', reuse=False): net = Deeplab_v2(self.image_batch, self.conf.num_classes, True) with tf.variable_scope('', reuse=True): net075 = Deeplab_v2(image_batch_075, self.conf.num_classes, True) with tf.variable_scope('', reuse=True): net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, True) # Variables that load from pre-trained model. restore_var = [ v for v in tf.global_variables() if 'fc' not in v.name ] # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part encoder_trainable = [ v for v in all_trainable if 'fc' not in v.name ] # lr * 1.0 # Decoder part decoder_trainable = [v for v in all_trainable if 'fc' in v.name] else: with tf.variable_scope('', reuse=False): net = ResNet_segmentation(self.image_batch, self.conf.num_classes, True, self.conf.encoder_name) with tf.variable_scope('', reuse=True): net075 = ResNet_segmentation(image_batch_075, self.conf.num_classes, True, self.conf.encoder_name) with tf.variable_scope('', reuse=True): net05 = ResNet_segmentation(image_batch_05, self.conf.num_classes, True, self.conf.encoder_name) # Variables that load from pre-trained model. restore_var = [ v for v in tf.global_variables() if 'resnet_v1' in v.name ] # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part encoder_trainable = [ v for v in all_trainable if 'resnet_v1' in v.name ] # lr * 1.0 # Decoder part decoder_trainable = [ v for v in all_trainable if 'decoder' in v.name ] decoder_w_trainable = [ v for v in decoder_trainable if 'weights' in v.name or 'gamma' in v.name ] # lr * 10.0 decoder_b_trainable = [ v for v in decoder_trainable if 'biases' in v.name or 'beta' in v.name ] # lr * 20.0 # Check assert (len(all_trainable) == len(decoder_trainable) + len(encoder_trainable)) assert (len(decoder_trainable) == len(decoder_w_trainable) + len(decoder_b_trainable)) # Network raw output raw_output100 = net.outputs raw_output075 = net075.outputs raw_output05 = net05.outputs raw_output = tf.reduce_max(tf.stack([ raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3, ]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3, ]) ]), axis=0) # Groud Truth: ignoring all labels greater or equal than n_classes label_proc = prepare_label(self.label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=False) # [batch_size, h, w] label_proc075 = prepare_label(self.label_batch, tf.stack(raw_output075.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=False) label_proc05 = prepare_label(self.label_batch, tf.stack(raw_output05.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=False) raw_gt = tf.reshape(label_proc, [ -1, ]) raw_gt075 = tf.reshape(label_proc075, [ -1, ]) raw_gt05 = tf.reshape(label_proc05, [ -1, ]) indices = tf.squeeze( tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) indices075 = tf.squeeze( tf.where(tf.less_equal(raw_gt075, self.conf.num_classes - 1)), 1) indices05 = tf.squeeze( tf.where(tf.less_equal(raw_gt05, self.conf.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32) gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32) raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes]) raw_prediction100 = tf.reshape(raw_output100, [-1, self.conf.num_classes]) raw_prediction075 = tf.reshape(raw_output075, [-1, self.conf.num_classes]) raw_prediction05 = tf.reshape(raw_output05, [-1, self.conf.num_classes]) prediction = tf.gather(raw_prediction, indices) prediction100 = tf.gather(raw_prediction100, indices) prediction075 = tf.gather(raw_prediction075, indices075) prediction05 = tf.gather(raw_prediction05, indices05) # Pixel-wise softmax_cross_entropy loss loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction, labels=gt) loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction100, labels=gt) loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction075, labels=gt075) loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction05, labels=gt05) # L2 regularization l2_losses = [ self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable if 'weights' in v.name ] # Loss function self.reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean( loss100) + tf.reduce_mean(loss075) + tf.reduce_mean( loss05) + tf.add_n(l2_losses) # Define optimizers # 'poly' learning rate base_lr = tf.constant(self.conf.learning_rate) self.curr_step = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - self.curr_step / self.conf.num_steps), self.conf.power)) # We have several optimizers here in order to handle the different lr_mult # which is a kind of parameters in Caffe. This controls the actual lr for each # layer. opt_encoder = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) opt_decoder_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum) opt_decoder_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum) # Gradient accumulation # Define a variable to accumulate gradients. accum_grads = [ tf.Variable(tf.zeros_like(v.initialized_value()), trainable=False) for v in encoder_trainable + decoder_w_trainable + decoder_b_trainable ] # Define an operation to clear the accumulated gradients for next batch. self.zero_op = [v.assign(tf.zeros_like(v)) for v in accum_grads] # To make sure each layer gets updated by different lr's, we do not use 'minimize' here. # Instead, we separate the steps compute_grads+update_params. # Compute grads grads = tf.gradients( self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable) # Accumulate and normalise the gradients. self.accum_grads_op = [ accum_grads[i].assign_add(grad / self.conf.grad_update_every) for i, grad in enumerate(grads) ] grads = tf.gradients( self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable) grads_encoder = accum_grads[:len(encoder_trainable)] grads_decoder_w = accum_grads[len(encoder_trainable):( len(encoder_trainable) + len(decoder_w_trainable))] grads_decoder_b = accum_grads[(len(encoder_trainable) + len(decoder_w_trainable)):] # Update params train_op_conv = opt_encoder.apply_gradients( zip(grads_encoder, encoder_trainable)) train_op_fc_w = opt_decoder_w.apply_gradients( zip(grads_decoder_w, decoder_w_trainable)) train_op_fc_b = opt_decoder_b.apply_gradients( zip(grads_decoder_b, decoder_b_trainable)) # Finally, get the train_op! update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) # for collecting moving_mean and moving_variance with tf.control_dependencies(update_ops): self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Saver for storing checkpoints of the model self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0) # Loader for loading the pre-trained model self.loader = tf.train.Saver(var_list=restore_var) # Training summary # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, input_size) raw_output_up = tf.argmax(raw_output_up, axis=3) self.pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [self.image_batch, 1, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [self.label_batch, 1, self.conf.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [self.pred, 1, self.conf.num_classes], tf.uint8) self.total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=20) # Concatenate row-wise. if not os.path.exists(self.conf.logdir): os.makedirs(self.conf.logdir) self.summary_writer = tf.summary.FileWriter( self.conf.logdir, graph=tf.get_default_graph())
def test_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.valid_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] self.image_batch, self.label_batch = tf.expand_dims( image, dim=0), tf.expand_dims(label, dim=0) h_orig, w_orig = tf.to_float(tf.shape( self.image_batch)[1]), tf.to_float(tf.shape(self.image_batch)[2]) image_batch_075 = tf.image.resize_images( self.image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75)) ])) image_batch_05 = tf.image.resize_images( self.image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5)) ])) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': with tf.variable_scope('', reuse=False): net = Deeplab_v2(self.image_batch, self.conf.num_classes, False) with tf.variable_scope('', reuse=True): net075 = Deeplab_v2(image_batch_075, self.conf.num_classes, False) with tf.variable_scope('', reuse=True): net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, False) else: with tf.variable_scope('', reuse=False): net = ResNet_segmentation(self.image_batch, self.conf.num_classes, False, self.conf.encoder_name) with tf.variable_scope('', reuse=True): net075 = ResNet_segmentation(image_batch_075, self.conf.num_classes, False, self.conf.encoder_name) with tf.variable_scope('', reuse=True): net05 = ResNet_segmentation(image_batch_05, self.conf.num_classes, False, self.conf.encoder_name) # predictions # Network raw output raw_output100 = net.outputs raw_output075 = net075.outputs raw_output05 = net05.outputs raw_output = tf.reduce_max(tf.stack([ raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3, ]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3, ]) ]), axis=0) raw_output = tf.image.resize_bilinear( raw_output, tf.shape(self.image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) pred = tf.expand_dims(raw_output, dim=3) self.pred = tf.reshape(pred, [ -1, ]) # labels gt = tf.reshape(self.label_batch, [ -1, ]) # Ignoring all labels greater than or equal to n_classes. temp = tf.less_equal(gt, self.conf.num_classes - 1) weights = tf.cast(temp, tf.int32) # fix for tf 1.3.0 gt = tf.where(temp, gt, tf.cast(temp, tf.uint8)) # Pixel accuracy self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy( self.pred, gt, weights=weights) # mIoU self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # confusion matrix self.confusion_matrix = tf.contrib.metrics.confusion_matrix( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def train_setup(self, reuse=False): tf.set_random_seed(self.conf.random_seed) num_layers = 50 #----------------------------------------------------------------------------------------- # Create queue coordinator. self.coord = tf.train.Coordinator() self.n_gpu = self.conf.n_gpu # Input size self.input_size = (self.conf.input_height, self.conf.input_width) j_step = 0 with tf.name_scope("create_inputs"): reader = ImageReader(self.conf.data_dir, self.conf.data_list, self.input_size, self.conf.random_scale, self.conf.random_mirror, self.conf.ignore_label, IMG_MEAN, self.coord) # print "1"*22 # print reader image_data, image_label = reader.dequeue(self.conf.batch_size) self.image_data = image_data if tf.__version__.startswith('1.'): split_train_data_node = tf.split(image_data, self.n_gpu) split_train_labels_node = tf.split(image_label, self.n_gpu) else: split_train_data_node = tf.split(0, self.n_gpu, image_data) split_train_labels_node = tf.split(0, self.n_gpu, image_label) with tf.variable_scope(tf.get_variable_scope()): all_loss = [] for device_index, (i, self.image_batch, self.label_batch) in enumerate( zip([1], split_train_data_node, split_train_labels_node)): with tf.device('/gpu:%d' % i): #print i with tf.name_scope('%s_%d' % ("gpu", i)) as scope: if j_step == 0: j_step = 1 pass else: reuse = True # net = DeepLab_v2_Network(self.image_batch, num_classes=self.conf.num_classes, # is_training=self.conf.is_training ,reuse=reuse) net, end_points = deeplabv3( self.image_batch, num_classes=self.conf.num_classes, depth=num_layers, is_training=True, reuse=reuse) self.raw_output = end_points[ 'gpu_{}/resnet{}/logits'.format(i, num_layers)] # Network raw output # [batch_size, 41, 41, 21] output_size = (self.raw_output.shape[1].value, self.raw_output.shape[2].value) label_proc = prepare_label( self.label_batch, output_size, num_classes=self.conf.num_classes, one_hot=False) # [batch_size, 41, 41] raw_gt = tf.reshape(label_proc, [ -1, ]) indices = tf.squeeze( tf.where( tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) raw_prediction = tf.reshape( self.raw_output, [-1, self.conf.num_classes]) # print raw_prediction # print gt prediction = raw_prediction # prediction = tf.expand_dims(raw_prediction, 3) # prediction = tl.act.pixel_wise_softmax(prediction) # print prediction # print label_proc # loss = 1 - tl.cost.dice_coe(prediction, label_proc, axis=[1, 2, 3, 4]) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction, labels=gt) l2_losses = [ self.conf.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] # Loss function all_loss.append( tf.reduce_mean(loss) + tf.add_n(l2_losses)) tf.get_variable_scope().reuse_variables() # Output size #output_size = (self.raw_output.shape[1].value, self.raw_output.shape[2].value) # Variables that load from pre-trained model. # For training, last few layers should not be loaded. if self.conf.pretrain_file is not None: restore_var = [ v for v in tf.global_variables() if 'fc' not in v.name ] original_step = int(self.conf.pretrain_file.split("-")[-1]) else: original_step = 0 num_steps = self.conf.num_steps + original_step # Trainable Variables # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # So we remove them from the list. all_trainable = [ v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name ] # Fine-tune part conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 # ASPP part fc_trainable = [v for v in all_trainable if 'fc' in v.name] # fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 # fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 # check #assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable)) #assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # Groud Truth: ignoring all labels greater or equal than n_classes #label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes, #one_hot=False) # [batch_size, 41, 41] #raw_gt = tf.reshape(label_proc, [-1, ]) #indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) #gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) #raw_prediction = tf.reshape(self.raw_output, [-1, self.conf.num_classes]) #prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax_cross_entropy loss #loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) # L2 regularization #l2_losses = [self.conf.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] # Loss function self.reduced_loss = tf.add_n(all_loss) / self.n_gpu #self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Define optimizers # 'poly' learning rate base_lr = tf.constant(self.conf.learning_rate) self.curr_step = tf.placeholder(dtype=tf.float32, shape=()) self.loss_trans = tf.placeholder(dtype=tf.float32, shape=()) self.final_loss = (self.reduced_loss + self.loss_trans) / 2 learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - self.curr_step / num_steps), self.conf.power)) #print self.conf.power self.learning_rate = learning_rate #print learning_rate # We have several optimizers here in order to handle the different lr_mult # which is a kind of parameters in Caffe. This controls the actual lr for each # layer. opt = tf.train.AdamOptimizer(learning_rate, self.conf.momentum, 0.98) #opt= tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) #opt_fc_w = tf.train.AdamOptimizer(learning_rate , self.conf.momentum,0.98) #opt_fc_b = tf.train.AdamOptimizer(learning_rate , self.conf.momentum,0.98) #opt_conv = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) #opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum) #opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum) # To make sure each layer gets updated by different lr's, we do not use 'minimize' here. # Instead, we separate the steps compute_grads+update_params. # Compute grads grads_conv = tf.gradients(self.final_loss, conv_trainable) # train_op = opt.apply_gradients(zip(grads_conv, conv_trainable)) #grads = tf.gradients(self.reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) grads_conv = grads_conv[:len(conv_trainable)] # grads_fc_w = grads[len(conv_trainable): (len(conv_trainable) + len(fc_w_trainable))] # grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] # Update params train_op_conv = opt.apply_gradients(zip(grads_conv, conv_trainable)) # train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) # train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) # train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) # Finally, get the train_op! self.train_op = train_op_conv # Saver for storing checkpoints of the model self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0) # Loader for loading the pre-trained model if self.conf.pretrain_file is not None: self.loader = tf.train.Saver(var_list=restore_var)
def main(): """Create the model and start the evaluation process.""" args = get_arguments() # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, None, # No defined input size. False, # No random scale. False, # No random mirror. args.ignore_label, IMG_MEAN, coord) image, label = reader.image, reader.label image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims( label, dim=0) # Add one batch dimension. # Create network. if args.encoder_name not in ['res101', 'res50']: print('encoder_name ERROR!') print("Please input: res101, res50") sys.exit(-1) else: net = ResNet_segmentation(image_batch, args.num_classes, False, args.encoder_name) # predictions raw_output = net.outputs raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) pred = tf.expand_dims(raw_output, dim=3) pred = tf.reshape(pred, [ -1, ]) # labels gt = tf.reshape(label_batch, [ -1, ]) # Ignoring all labels greater than or equal to n_classes. temp = tf.less_equal(gt, args.num_classes - 1) weights = tf.cast(temp, tf.int32) # fix for tf 1.3.0 gt = tf.where(temp, gt, tf.cast(temp, tf.uint8)) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.outputs raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, dimension=3) pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. pred = tf.reshape(pred, [ -1, ]) #groud truth gt = tf.reshape(label_batch, [ -1, ]) indexes = tf.less_equal(gt, args.num_classes - 1) gt = tf.where(indexes, gt, tf.cast(temp, tf.uint8)) weights = tf.cast( indexes, tf.int32) # Ignoring all labels greater than or equal to n_classes. # mIoU mIoU, update_op = tf.contrib.metrics.streaming_mean_iou( pred, gt, num_classes=args.num_classes, weights=weights) # Pixel accuracy accu, accu_update_op = tf.contrib.metrics.streaming_accuracy( pred, gt, weights=weights) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. loader = tf.train.Saver(var_list=restore_var) if args.restore_from is not None: load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): preds, _, _ = sess.run([pred, update_op, accu_update_op]) if step % 100 == 0: print('step {:d}'.format(step)) print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess))) print('Pixel Accuracy: {:.3f}'.format(accu.eval(session=sess))) coord.request_stop() coord.join(threads)
int(k) for k in opt.recalls.split(',') ], opt.batch_size num_epochs = opt.num_epochs save_name_pre = '{}_{}_{}_{}_{}_{}_{}_{}'.format(data_name, backbone_type, gd_config, feature_dim, smoothing, temperature, margin, batch_size) results = {'train_loss': [], 'train_accuracy': []} for recall_id in recalls: results['test_recall@{}'.format(recall_id)] = [] process_sop_data(opt.data_dir, opt.df_path) # dataset loader train_data_set = ImageReader(data_path, data_name, 'train') train_sample = MPerClassSampler(train_data_set.labels, batch_size) train_data_loader = DataLoader(train_data_set, batch_sampler=train_sample, num_workers=opt.workers, pin_memory=True) test_data_set = ImageReader(data_path, data_name, 'test') test_data_loader = DataLoader(test_data_set, batch_size, shuffle=False, num_workers=opt.workers, pin_memory=True) eval_dict = {'test': {'data_loader': test_data_loader}} # model setup, model profile, optimizer config and loss definition model = Model(backbone_type,
def test_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.valid_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] self.image_batch, self.label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) self.image_batch = tf.identity( self.image_batch, name='image_batch') self.image_batch -= IMG_MEAN # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(self.image_batch, self.conf.num_classes, False) else: net = ResNet_segmentation(self.image_batch, self.conf.num_classes, False, self.conf.encoder_name) # predictions raw_output = net.outputs raw_output = tf.image.resize_bilinear(raw_output, tf.shape(self.image_batch)[1:3,]) raw_output = tf.argmax(raw_output, axis=3) pred = tf.expand_dims(raw_output, dim=3) self.pred = tf.reshape(pred, [-1,], name="predictions") # labels gt = tf.reshape(self.label_batch, [-1,]) # Ignoring all labels greater than or equal to n_classes. temp = tf.less_equal(gt, self.conf.num_classes - 1) weights = tf.cast(temp, tf.int32) # fix for tf 1.3.0 gt = tf.where(temp, gt, tf.cast(temp, tf.uint8)) # Pixel accuracy self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy( self.pred, gt, weights=weights) # mIoU self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # f1 score pred = tf.cast(self.pred, tf.int32) gt = tf.cast(gt, tf.int32) self.areaOverlap = tf.count_nonzero(pred * gt) self.areaGTObj = tf.count_nonzero(gt) self.areaPredicted = tf.count_nonzero(pred) # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def train_setup(self): tf.set_random_seed(self.conf.random_seed) # Create queue coordinator. self.coord = tf.train.Coordinator() # Input size input_size = (self.conf.input_height, self.conf.input_width) # Load reader with tf.name_scope("create_inputs"): reader = ImageReader(self.conf.data_dir, self.conf.data_list, input_size, self.conf.random_scale, self.conf.random_mirror, self.conf.ignore_label, IMG_MEAN, self.coord) self.image_batch, self.label_batch = reader.dequeue( self.conf.batch_size) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(self.image_batch, self.conf.num_classes, True) # Variables that load from pre-trained model. restore_var = [ v for v in tf.global_variables() if 'fc' not in v.name ] # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part encoder_trainable = [ v for v in all_trainable if 'fc' not in v.name ] # lr * 1.0 # Decoder part decoder_trainable = [v for v in all_trainable if 'fc' in v.name] else: net = ResNet_segmentation(self.image_batch, self.conf.num_classes, True, self.conf.encoder_name) # Variables that load from pre-trained model. restore_var = [ v for v in tf.global_variables() if 'resnet_v1' in v.name ] # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part encoder_trainable = [ v for v in all_trainable if 'resnet_v1' in v.name ] # lr * 1.0 # Decoder part decoder_trainable = [ v for v in all_trainable if 'decoder' in v.name ] decoder_w_trainable = [ v for v in decoder_trainable if 'weights' in v.name or 'gamma' in v.name ] # lr * 10.0 decoder_b_trainable = [ v for v in decoder_trainable if 'biases' in v.name or 'beta' in v.name ] # lr * 20.0 # Check assert (len(all_trainable) == len(decoder_trainable) + len(encoder_trainable)) assert (len(decoder_trainable) == len(decoder_w_trainable) + len(decoder_b_trainable)) # Network raw output raw_output = net.outputs # [batch_size, h, w, 21] # Output size output_shape = tf.shape(raw_output) output_size = (output_shape[1], output_shape[2]) # Groud Truth: ignoring all labels greater or equal than n_classes label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes, one_hot=False) raw_gt = tf.reshape(label_proc, [ -1, ]) indices = tf.squeeze( tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes]) prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax_cross_entropy loss loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction, labels=gt) # L2 regularization l2_losses = [ self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable if 'weights' in v.name ] # Loss function self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Define optimizers # 'poly' learning rate base_lr = tf.constant(self.conf.learning_rate) self.curr_step = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - self.curr_step / self.conf.num_steps), self.conf.power)) # We have several optimizers here in order to handle the different lr_mult # which is a kind of parameters in Caffe. This controls the actual lr for each # layer. opt_encoder = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) opt_decoder_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum) opt_decoder_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum) # To make sure each layer gets updated by different lr's, we do not use 'minimize' here. # Instead, we separate the steps compute_grads+update_params. # Compute grads grads = tf.gradients( self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable) grads_encoder = grads[:len(encoder_trainable)] grads_decoder_w = grads[len(encoder_trainable):( len(encoder_trainable) + len(decoder_w_trainable))] grads_decoder_b = grads[(len(encoder_trainable) + len(decoder_w_trainable)):] # Update params train_op_conv = opt_encoder.apply_gradients( zip(grads_encoder, encoder_trainable)) train_op_fc_w = opt_decoder_w.apply_gradients( zip(grads_decoder_w, decoder_w_trainable)) train_op_fc_b = opt_decoder_b.apply_gradients( zip(grads_decoder_b, decoder_b_trainable)) # Finally, get the train_op! update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) # for collecting moving_mean and moving_variance with tf.control_dependencies(update_ops): self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Saver for storing checkpoints of the model self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0) # Loader for loading the pre-trained model self.loader = tf.train.Saver(var_list=restore_var) # Training summary # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, input_size) raw_output_up = tf.argmax(raw_output_up, axis=3) self.pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [self.image_batch, 2, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [self.label_batch, 2, self.conf.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [self.pred, 2, self.conf.num_classes], tf.uint8) self.total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=2) # Concatenate row-wise. if not os.path.exists(self.conf.logdir): os.makedirs(self.conf.logdir) self.summary_writer = tf.summary.FileWriter( self.conf.logdir, graph=tf.get_default_graph())
def _read_image_from_path(self, path): return ImageReader.read_from_path(path)
class BatchGenerator(Sequence): def __init__(self, images, config, norm=None, shuffle=True): self.images = images # self.true_box_buffer = config['true_box_buffer'] # Maximun objects per box!! self.batch_size = config['batch_size'] self.anchors = config['anchors'] self.nb_anchors = len(config['anchors']) self.img_w, self.img_h = config['image_shape'] self.grid = config['grid'] self.img_encoder = ImageReader(img_width=self.img_w, img_height=self.img_h, norm=norm, grid=self.grid) self.labels = np.array(config['labels']) self.shuffle = shuffle if self.shuffle: np.random.shuffle(self.images) def __getitem__(self, idx): ''' Arguments: --------- idx : [int] non-negative integer value e.g., 0 Return: ------ x_batch: [np.array] Array of shape (BATCH_SIZE, IMAGE_H, IMAGE_W, N channels). x_batch[iframe,:,:,:] contains a iframeth frame of size (IMAGE_H,IMAGE_W). y_batch: [np.array] Array of shape (BATCH_SIZE, GRID_H, GRID_W, BOX, 4 + 1 + N classes). BOX = The number of anchor boxes. y_batch[iframe,igrid_h,igrid_w,ianchor,:4] contains (center_x,center_y,center_w,center_h) of ianchorth anchor at grid cell=(igrid_h,igrid_w) if the object exists in this (grid cell, anchor) pair, else they simply contain 0. Bbox's center coordinates (x,y) are given between 0 and 1 relative to cell's origin (i.e. 0.4 means 40% from cell's origin) and its dimensions relative to the cell's size (i.e. 3.4 means 3.4 times the cell's grid) y_batch[iframe,igrid_h,igrid_w,ianchor,4] contains 1 if the object exists in this (grid cell, anchor) pair, else it contains 0. y_batch[iframe,igrid_h,igrid_w,ianchor,5 + iclass] contains 1 if the iclass^th class object exists in this (grid cell, anchor) pair, else it contains 0. b_batch: [np.array] Array of shape (BATCH_SIZE, 1, 1, 1, TRUE_BOX_BUFFER, 4). b_batch[iframe,1,1,1,ibuffer,ianchor,:] contains ibufferth object's (center_x,center_y,center_w,center_h) in iframeth frame. If ibuffer > N objects in iframeth frame, then the values are simply 0. TRUE_BOX_BUFFER has to be some large number, so that the frame with the biggest number of objects can also record all objects. The order of the objects do not matter. This is just a hack to easily calculate loss. ''' l_bound = idx * self.batch_size r_bound = (idx + 1) * self.batch_size if r_bound > len(self.images): r_bound = len(self.images) l_bound = r_bound - self.batch_size instance_count = 0 # Prepare storage for outputs x_batch = np.zeros( (r_bound - l_bound, self.img_h, self.img_w, 3)) # Input images y_batch = np.zeros((r_bound - l_bound, self.grid[1], self.grid[0], self.nb_anchors, 5 + len(self.labels))) # b_batch = np.zeros((r_bound - l_bound, 1, 1, 1, self.true_box_buffer, 4)) grid_width = float(self.img_w) / self.grid[0] grid_height = float(self.img_h) / self.grid[1] iou_vfunc = np.frompyfunc( lambda w1, h1, w2, h2: calculate_IOU(np.array([w1, h1]), np.array([w2, h2])), 4, 1) for train_instance in self.images[l_bound:r_bound]: # Resize image img, all_objs = self.img_encoder.fit_data(train_instance) # Construct output from object's x, y, w, h true_box_index = 0 for obj in all_objs: if (obj['xmax'] > obj['xmin']) and ( obj['ymax'] > obj['ymin']) and (obj['name'] in self.labels): center_x, center_y, center_w, center_h = self.img_encoder.abs2grid( obj) grid_x = int(np.floor(center_x)) grid_y = int(np.floor(center_y)) # Now we save in y_batch, center position relative to cell's origin center_x -= grid_x center_y -= grid_y if (grid_x < self.grid[0]) and (grid_y < self.grid[1]): obj_idx = self.labels.tolist().index(obj['name']) ious = iou_vfunc(self.anchors[:, 0], self.anchors[:, 1], center_w, center_h) best_anchor_id = np.argmax(ious) # Assign ground truth x, y, w, h, confidence and class probs to y_batch # it could happen that the same grid cell contain 2 similar shape objects # as a result the same anchor box is selected as the best anchor box by the multiple objects # in such ase, the object is over written # As stated in paper, width and height are predicted # relative to anchor's dimensions center_w = center_w * (grid_width / self.anchors[best_anchor_id, 0]) center_h = center_h * (grid_height / self.anchors[best_anchor_id, 1]) bbox = [center_x, center_y, center_w, center_h] # center_x, center_y, w, h and 1 because ground truth confidence is always 1 y_batch[instance_count, grid_y, grid_x, best_anchor_id, 0:4] = bbox y_batch[instance_count, grid_y, grid_x, best_anchor_id, 4] = 1 # Class' probability for detected object y_batch[instance_count, grid_y, grid_x, best_anchor_id, 5 + obj_idx] = 1 # Assign the true bbox to b_batch # b_batch[instance_count, 0, 0, 0, true_box_index] = bbox # true_box_index = (true_box_index + 1) % self.true_box_buffer else: print( "Omitting image {} because of inconsistent labeling..". format(train_instance['filename'])) x_batch[instance_count] = img instance_count += 1 # return [x_batch, b_batch], y_batch return x_batch, y_batch def __len__(self): return int(np.ceil(float(len(self.images)) / self.batch_size)) def on_epoch_end(self): if self.shuffle: np.random.shuffle(self.images)
def train_setup(self): tf.set_random_seed(self.conf.random_seed) # Create queue coordinator. self.coord = tf.train.Coordinator() # Input size h, w = (self.conf.input_height, self.conf.input_width) input_size = (h, w) # Devices gpu_list = get_available_gpus() zip_encoder, zip_decoder_b, zip_decoder_w, zip_crf = [], [], [], [] previous_crf_names = [] restore_vars = [] self.loaders = [] self.im_list = [] for i in range(len(gpu_list)): with tf.device(gpu_list[i]): # Load reader with tf.name_scope("create_inputs"): reader = ImageReader(self.conf.data_dir, self.conf.data_list, input_size, self.conf.random_scale, self.conf.random_mirror, self.conf.ignore_label, IMG_MEAN, self.coord) self.image_batch, self.label_batch = reader.dequeue( self.conf.batch_size) self.im_list.append(self.image_batch) image_batch_075 = tf.image.resize_images( self.image_batch, [int(h * 0.75), int(w * 0.75)]) image_batch_05 = tf.image.resize_images( self.image_batch, [int(h * 0.5), int(w * 0.5)]) # Create network with tf.variable_scope('', reuse=False): net = Deeplab_v2(self.image_batch, self.conf.num_classes, True, rescale075=False, rescale05=False, crf_type=self.conf.crf_type) with tf.variable_scope('', reuse=True): net075 = Deeplab_v2(image_batch_075, self.conf.num_classes, True, rescale075=True, rescale05=False, crf_type=self.conf.crf_type) with tf.variable_scope('', reuse=True): net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, True, rescale075=False, rescale05=True, crf_type=self.conf.crf_type) # Variables that load from pre-trained model. restore_var = [ v for v in tf.global_variables() if ('fc' not in v.name and 'crfrnn' not in v.name) ] restore_vars.append(restore_var) # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part for name in previous_crf_names: for v in all_trainable: if v.name == name: all_trainable.remove(v) crf_trainable = [ v for v in all_trainable if ('crfrnn' in v.name and v.name not in previous_crf_names ) ] previous_crf_names.extend(v.name for v in crf_trainable) encoder_trainable = [ v for v in all_trainable if 'fc' not in v.name and 'crfrnn' not in v.name ] # lr * 1.0 # Remove encoder_trainable from all_trainable #all_trainable = [v for v in all_trainable if v not in encoder_trainable] # Decoder part decoder_trainable = [ v for v in all_trainable if 'fc' in v.name and 'crfrnn' not in v.name ] decoder_w_trainable = [ v for v in decoder_trainable if ('weights' in v.name or 'gamma' in v.name) and 'crfrnn' not in v.name ] # lr * 10.0 decoder_b_trainable = [ v for v in decoder_trainable if ('biases' in v.name or 'beta' in v.name) and 'crfrnn' not in v.name ] # lr * 20.0 # Check assert (len(all_trainable) == len(decoder_trainable) + len(crf_trainable)) + len(encoder_trainable) assert (len(decoder_trainable) == len(decoder_w_trainable) + len(decoder_b_trainable)) # Network raw output raw_output100 = net.outputs raw_output075 = net075.outputs raw_output05 = net05.outputs raw_output = tf.reduce_max(tf.stack([ raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3, ]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3, ]) ]), axis=0) # Ground Truth: ignoring all labels greater or equal than n_classes label_proc = prepare_label(self.label_batch, tf.stack( raw_output.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=True) # [batch_size, h, w] label_proc075 = prepare_label( self.label_batch, tf.stack(raw_output075.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=True) label_proc05 = prepare_label( self.label_batch, tf.stack(raw_output05.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=True) raw_gt = tf.reshape(label_proc, [ -1, ]) raw_gt075 = tf.reshape(label_proc075, [ -1, ]) raw_gt05 = tf.reshape(label_proc05, [ -1, ]) indices = tf.squeeze( tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) indices075 = tf.squeeze( tf.where( tf.less_equal(raw_gt075, self.conf.num_classes - 1)), 1) indices05 = tf.squeeze( tf.where(tf.less_equal(raw_gt05, self.conf.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32) gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32) raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes]) raw_prediction100 = tf.reshape(raw_output100, [-1, self.conf.num_classes]) raw_prediction075 = tf.reshape(raw_output075, [-1, self.conf.num_classes]) raw_prediction05 = tf.reshape(raw_output05, [-1, self.conf.num_classes]) prediction = tf.gather(raw_prediction, indices) prediction100 = tf.gather(raw_prediction100, indices) prediction075 = tf.gather(raw_prediction075, indices075) prediction05 = tf.gather(raw_prediction05, indices05) # Pixel-wise softmax_cross_entropy loss #loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) loss = tf.nn.softmax_cross_entropy_with_logits_v2( logits=raw_prediction, labels=tf.reshape(label_proc[0], (h * w, self.conf.num_classes))) ''' coefficients = [0.01460247, 1.25147725, 2.88479363, 1.20348121, 1.65261654, 1.67514772, 0.62338799, 0.7729363, 0.42038501, 0.98557268, 1.31867536, 0.85313332, 0.67227604, 1.21317965, 1. , 0.24263748, 1.80877607, 1.3082213, 0.79664027, 0.72543945, 1.27823374] ''' #loss = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc[0], (h*w, self.conf.num_classes)), logits=raw_prediction) #loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction100, labels=gt) loss100 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=raw_prediction100, labels=tf.reshape(label_proc[0], (h * w, self.conf.num_classes))) #loss100 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc[0], (h*w, self.conf.num_classes)), logits=raw_prediction100) #loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction075, labels=gt075) loss075 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=raw_prediction075, labels=tf.reshape(label_proc075[0], (int(h * 0.75) * int(w * 0.75), self.conf.num_classes))) #loss075 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc075[0], (int(h * 0.75) * int(w * 0.75), self.conf.num_classes)), logits=raw_prediction075) #loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction05, labels=gt05) loss05 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=raw_prediction05, labels=tf.reshape( label_proc05[0], (int(h * 0.5) * int(w * 0.5), self.conf.num_classes))) #loss05 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc05[0], (int(h * 0.5) * int(w * 0.5), self.conf.num_classes)), logits=raw_prediction05) # L2 regularization l2_losses = [ self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable if 'weights' in v.name ] # Loss function self.reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean( loss100) + tf.reduce_mean(loss075) + tf.reduce_mean( loss05) + tf.add_n(l2_losses) # Define optimizers # 'poly' learning rate base_lr = tf.constant(self.conf.learning_rate) self.curr_step = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - self.curr_step / self.conf.num_steps), self.conf.power)) # We have several optimizers here in order to handle the different lr_mult # which is a kind of parameters in Caffe. This controls the actual lr for each # layer. opt_encoder = tf.train.MomentumOptimizer( learning_rate, self.conf.momentum) opt_decoder_w = tf.train.MomentumOptimizer( learning_rate * 10.0, self.conf.momentum) opt_decoder_b = tf.train.MomentumOptimizer( learning_rate * 20.0, self.conf.momentum) opt_crf = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) # Gradient accumulation # Define a variable to accumulate gradients. accum_grads = [ tf.Variable(tf.zeros_like(v.initialized_value()), trainable=False) for v in encoder_trainable + decoder_w_trainable + decoder_b_trainable + crf_trainable ] # Define an operation to clear the accumulated gradients for next batch. self.zero_op = [ v.assign(tf.zeros_like(v)) for v in accum_grads ] # To make sure each layer gets updated by different lr's, we do not use 'minimize' here. # Instead, we separate the steps compute_grads+update_params. # Compute grads grads = tf.gradients( self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable + crf_trainable) # Accumulate and normalise the gradients. self.accum_grads_op = [ accum_grads[i].assign_add(grad / self.conf.grad_update_every) for i, grad in enumerate(grads) ] grads_encoder = accum_grads[:len(encoder_trainable)] grads_decoder_w = accum_grads[len(encoder_trainable ):len(encoder_trainable) + len(decoder_w_trainable)] grads_decoder_b = accum_grads[( len(encoder_trainable) + len(decoder_w_trainable)):(len(encoder_trainable) + len(decoder_w_trainable) + len(decoder_b_trainable))] grads_crf = accum_grads[ len(encoder_trainable) + len(decoder_w_trainable) + len(decoder_b_trainable ):] # assuming crf gradients are appended to the end zip_encoder.append(list(zip(grads_encoder, encoder_trainable))) zip_decoder_b.append( list(zip(grads_decoder_b, decoder_b_trainable))) zip_decoder_w.append( list(zip(grads_decoder_w, decoder_w_trainable))) zip_crf.append(list(zip(grads_crf, crf_trainable))) avg_grads_encoder = average_gradients(zip_encoder) avg_grads_decoder_w = average_gradients(zip_decoder_w) avg_grads_decoder_b = average_gradients(zip_decoder_b) avg_grads_crf = average_gradients(zip_crf) for i in range(len(gpu_list)): with tf.device(gpu_list[i]): # Update params train_op_conv = opt_encoder.apply_gradients(avg_grads_encoder) train_op_fc_w = opt_decoder_w.apply_gradients( avg_grads_decoder_w) train_op_fc_b = opt_decoder_b.apply_gradients( avg_grads_decoder_b) train_op_crf = opt_crf.apply_gradients(avg_grads_crf) # Finally, get the train_op! update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) # for collecting moving_mean and moving_variance with tf.control_dependencies(update_ops): self.train_op = tf.group(train_op_fc_w, train_op_fc_b, train_op_crf) # train_op_conv # Saver for storing checkpoints of the model self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0) # Loader for loading the pre-trained model for i in range(len(gpu_list)): with tf.device(gpu_list[i]): self.loaders.append(tf.train.Saver(var_list=restore_vars[i])) #self.loaders.append(tf.train.Saver(var_list=tf.global_variables())) # Training summary # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, input_size) raw_output_up = tf.argmax(raw_output_up, axis=3) self.pred = tf.expand_dims(raw_output_up, axis=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [self.image_batch, 1, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [self.label_batch, 1, self.conf.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [self.pred, 1, self.conf.num_classes], tf.uint8) self.total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=1) # Concatenate row-wise. if not os.path.exists(self.conf.logdir): os.makedirs(self.conf.logdir) self.summary_writer = tf.summary.FileWriter( self.conf.logdir, graph=tf.get_default_graph())
def main(): args = get_arguments() print('SETUP TrainConfig...') train_cfg = TrainConfig(args) train_cfg.display() # print('SETUP EvalConfig...') eval_cfg = EvalConfig(args) # eval_cfg.display() train_reader = ImageReader(train_cfg) eval_reader = ImageReader(eval_cfg) train_net = ICNet(train_cfg, train_reader, eval_reader) _train_op, _losses, _summaries, _Preds, _IoUs, _Images = train_net.optimizer( ) vis = Visualizer(eval_cfg) global_step = train_net.start_step epoch_step = int( len(train_reader.attribute_list) / train_cfg.BATCH_SIZE + 0.5) start_epoch = int(global_step / epoch_step) save_step = int(epoch_step * train_cfg.SAVE_PERIOD) all_steps = int(len(eval_reader.attribute_list) / (eval_cfg.BATCH_SIZE)) g_eval_step = 0 train_fd = {train_net.handle: train_net.train_handle} eval_fd = {train_net.handle: train_net.eval_handle} for epochs in range(start_epoch, train_cfg.TRAIN_EPOCHS): epoch_loss = None start_batch = global_step % epoch_step print(f'Start batch - {start_batch}') print(f'Epoch step - {epoch_step}') for steps in range(start_batch, epoch_step): start_time = time.time() _, losses = train_net.sess.run([_train_op, _losses], feed_dict=train_fd) if epoch_loss is None: epoch_loss = np.array(losses) else: epoch_loss += np.array(losses) if global_step % save_step == 0: train_net.save(global_step) global_step += 1 duration = time.time() - start_time msg = ( f'''step {global_step} \t total loss = {losses[3]:.3f}, sub4 = {losses[0]:.3f}, ''' f'''sub24 = {losses[1]:.3f}, sub124 = {losses[2]:.3f}, val_loss: {losses[4]:.3f}''' f'''({duration:.3f} sec/step)''') print(msg) epoch_loss /= (epoch_step - start_batch) accuracy = None for steps in range(all_steps - 1): start_time = time.time() IoUs = train_net.sess.run(_IoUs, feed_dict=eval_fd) if accuracy is None: accuracy = np.array(IoUs) else: accuracy += np.array(IoUs) g_eval_step += 1 duration = time.time() - start_time msg = ( f'''step {steps} \t mean_IoU = {IoUs[0]:.3f}, Person_IoU = {IoUs[1]:.3f}, ''' f'''Rider_IoU = {IoUs[2]:.3f}, ({duration:.3f} sec/step)''') print(msg) IoUs, Preds, Images = train_net.sess.run([_IoUs, _Preds, _Images], feed_dict=eval_fd) accuracy += np.array(IoUs) accuracy /= all_steps g_eval_step += 1 vis.save_and_show(Images, Preds, g_eval_step) feed_dict = { train_net.sum_loss: epoch_loss, train_net.sum_acc: accuracy } summaries = train_net.sess.run(_summaries, feed_dict=feed_dict) train_net.writer.add_summary(summaries, epochs)
def _read_image_from_url(self, url): return ImageReader.read_from_url(url)
parser.add_argument('--warm_up', default=2, type=int, help='warm up number') parser.add_argument('--recalls', default='1,2,4,8', type=str, help='selected recall') opt = parser.parse_args() # args parse data_path, data_name, backbone_type = opt.data_path, opt.data_name, opt.backbone_type feature_dim, batch_size, num_epochs = opt.feature_dim, opt.batch_size, opt.num_epochs warm_up, recalls = opt.warm_up, [int(k) for k in opt.recalls.split(',')] save_name_pre = '{}_{}_{}'.format(data_name, backbone_type, feature_dim) results = {'train_loss': [], 'train_accuracy': []} for recall_id in recalls: results['test_recall@{}'.format(recall_id)] = [] # dataset loader train_data_set = ImageReader(data_path, data_name, 'train', backbone_type) train_data_loader = DataLoader(train_data_set, batch_size, shuffle=True, num_workers=8) test_data_set = ImageReader(data_path, data_name, 'test', backbone_type) test_data_loader = DataLoader(test_data_set, batch_size, shuffle=False, num_workers=8) # model setup, optimizer config and loss definition model = Model(backbone_type, feature_dim, len(train_data_set.class_to_idx)).cuda() optimizer = AdamW([{'params': model.backbone.parameters()}, {'params': model.refactor.parameters()}, {'params': model.fc.parameters(), 'lr': 1e-2}], lr=1e-4, weight_decay=1e-4) lr_scheduler = StepLR(optimizer, step_size=5, gamma=0.5) loss_criterion = ProxyAnchorLoss() data_base = {'test_images': test_data_set.images, 'test_labels': test_data_set.labels} best_recall = 0.0 for epoch in range(1, num_epochs + 1):
def full_run_single(video_id, video_dir, static_dir, frame_by_frame_results_dir, static_results_dir, crop_boxes_dir, ignore_mask_dir, detector_config_path, detector_model_path, reid_model_path, reid_model_backbone, crop_results_dir, anomaly_results_dir, bg_interval=4, bg_alpha=0.05, bg_start_frame=1, bg_threshold=5, raw_detect_interval=30, crop_min_obj_size=8, crop_row_capacity=3, crop_box_aspect_ratio=2, ignore_count_thresh=0.08, ignore_area_thresh=2000, ignore_score_thresh=0.1, ignore_gau_sigma=3, abnormal_duration_thresh=60, detect_duration_thresh=6, undetect_duration_thresh=8, bbox_score_thresh=0.3, light_thresh=0.8, anomaly_thresh=0.8, similarity_thresh=0.95, suspicious_duration_thresh=18, detector_verbose_interval=20, verbose=True): """ Runs the full anomaly detection pipeline on a video video_id: video id/name video_dir: folder the video is in static_dir: folder to put the background images in frame_by_frame_results_dir: folder to put the raw video detection results in static_results_dir: folder to put the background image detection results in crop_boxes_dir: folder to put the crop boxes in ignore_mask_dir: folder to put the ignore region mask in detector_config_path: path to detector configuration file detector_model_path: path to detector model checkpoint reid_model_path: path to re-ID model checkpoint reid_model_backbone: re-ID model backbone. eg. "resnet50" bg_interval, bg_alpha, bg_start_frame, bg_threshold: see calc_bg_full_video function raw_detect_interval: number of frames between detection on raw video crop_min_obj_size, crop_row_capacity, crop_box_aspect_ratio: see create_crop_boxes function ignore_count_thresh, ignore_area_thresh, ignore_score_thresh, ignore_gau_sigma: see create_ignore_mask function abnormal_duration_thresh, detect_duration_thresh, undetect_duration_thresh, bbox_score_thresh, light_thresh, anomaly_thresh, similarity_thresh, suspicious_duration_thresh: See get_anomalies function detector_verbose_interval: detector progress printing interval verbose: verbose printing """ # Set up file paths video_path = os.path.join(video_dir, f"{video_id}.mp4") static_images_folder = os.path.join(static_dir, f"{video_id}") fbf_results_path = os.path.join(frame_by_frame_results_dir, f"{video_id}.csv") static_results_path = os.path.join(static_results_dir, f"{video_id}.csv") crop_boxes_path = os.path.join(crop_boxes_dir, f"{video_id}.csv") crop_results_path = os.path.join(crop_results_dir, f"{video_id}.csv") ignore_mask_path = os.path.join(ignore_mask_dir, f"{video_id}.npy") anomaly_results_path = os.path.join(anomaly_results_dir, f"{video_id}.csv") # Create folders os.makedirs(static_images_folder, exist_ok=True) os.makedirs(frame_by_frame_results_dir, exist_ok=True) os.makedirs(static_results_dir, exist_ok=True) os.makedirs(crop_boxes_dir, exist_ok=True) os.makedirs(crop_results_dir, exist_ok=True) os.makedirs(ignore_mask_dir, exist_ok=True) os.makedirs(anomaly_results_dir, exist_ok=True) # Read Video raw_video = VideoReader(video_path) # bg modeling print("Creating background...") calc_bg_full_video(video_path, static_images_folder, bg_interval, bg_alpha, bg_start_frame, bg_threshold, verbose) # Detection detector = Detector(detector_config_path, detector_model_path, detector_verbose_interval, class_restrictions=None) # class_names = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', # 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', # 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', # 'tvmonitor') # detector.model.CLASSES = class_names # detector.class_labels = class_names ## Raw Video print("Detecting raw video...") raw_images, raw_frame_nums = raw_video.load_video(raw_detect_interval) fbf_results = detector.detect_images(raw_images, raw_frame_nums) fbf_results.to_csv(fbf_results_path, index=False) ## Static Images static_reader = ImageReader(static_images_folder) static_frame_names = list( map(lambda f: int(f[:-4]), static_reader.filenames)) # "123.jpg" -> 123 print("Detecting background...") static_results = detector.detect_images(static_reader.load_images(), static_frame_names) static_results.to_csv(static_results_path, index=False) # Perspective Cropping print("Creating crop boxes...") create_crop_boxes( fbf_results_path, crop_boxes_path, raw_video.img_shape, crop_min_obj_size, crop_row_capacity, crop_box_aspect_ratio) # either static/fbf results should work # Should be able to use this in place of normal static images. Doesnt look feasable atm, way too long detection time crop_boxes = pd.read_csv(crop_boxes_path).values print("Detecting cropped background...") crop_detect_results = detector.detect_images(static_reader.load_images(), static_frame_names, crop_boxes=crop_boxes) crop_detect_results.to_csv(crop_results_path) # # Ignore Region print("Creating ingore mask...") create_ignore_mask(fbf_results_path, ignore_mask_path, raw_video.img_shape, ignore_count_thresh, ignore_area_thresh, ignore_score_thresh, ignore_gau_sigma) # Detect anomalies print("Detecting anomalies...") anomalies = get_anomalies_preprocessed( video_path, reid_model_path, fbf_results_path, static_results_path, ignore_mask_path, reid_model_backbone, bg_start_frame, bg_interval, abnormal_duration_thresh, detect_duration_thresh, undetect_duration_thresh, bbox_score_thresh, light_thresh, anomaly_thresh, similarity_thresh, suspicious_duration_thresh, verbose) if anomalies is not None: anomaly_event_times = get_overlapping_time(anomalies) # Save results print("Saving Results...") anomalies.to_csv(anomaly_results_path, index=False) return anomalies, anomaly_event_times else: return [], []
def train_setup(self): tf.set_random_seed(self.conf.random_seed) # Create queue coordinator. self.coord = tf.train.Coordinator() # Input size self.input_size = (self.conf.input_height, self.conf.input_width) # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.data_list, self.input_size, self.conf.random_scale, self.conf.random_mirror, self.conf.ignore_label, IMG_MEAN, self.coord) self.image_batch, self.label_batch = reader.dequeue(self.conf.batch_size) # Create network net = DeepLab_v2_Network(self.image_batch, num_classes=self.conf.num_classes, is_training=self.conf.is_training) #net = DeepLabVGGModel(self.image_batch, num_classes=self.conf.num_classes, # is_training=self.conf.is_training) # Network raw output self.raw_output = net.o # [batch_size, 41, 41, 21] self.raw_output=tf.image.resize_bilinear(self.raw_output, [350,350]) print(tf.shape(self.image_batch)) # Output size output_size = (self.raw_output.shape[1].value, self.raw_output.shape[2].value) # Variables that load from pre-trained model. # For training, last few layers should not be loaded. #restore_var = [v for v in tf.global_variables() if 'fc' not in v.name] #这个是对INIT初始化模型用的 restore_var = [v for v in tf.global_variables() ] #恢复所有的参数。 # Trainable Variables # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # So we remove them from the list. all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name] # Fine-tune part conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 # ASPP part fc_trainable = [v for v in all_trainable if 'fc' in v.name] fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 # check print(len(fc_trainable)) print(len(fc_w_trainable) + len(fc_b_trainable)) assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # Groud Truth: ignoring all labels greater or equal than n_classes label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes, one_hot=False) # [batch_size, 41, 41] raw_gt = tf.reshape(label_proc, [-1,]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) raw_prediction = tf.reshape(self.raw_output, [-1, self.conf.num_classes]) prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax_cross_entropy loss loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) # L2 regularization l2_losses = [self.conf.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] # Loss function self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Define optimizers # 'poly' learning rate base_lr = tf.constant(self.conf.learning_rate) self.curr_step = tf.placeholder(dtype=tf.float32, shape=()) #learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - (15000+self.curr_step) /(15000+ self.conf.num_steps)), self.conf.power)) learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - (self.curr_step) /(self.conf.num_steps)), self.conf.power)) # We have several optimizers here in order to handle the different lr_mult # which is a kind of parameters in Caffe. This controls the actual lr for each # layer. opt_conv = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum) opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum) # To make sure each layer gets updated by different lr's, we do not use 'minimize' here. # Instead, we separate the steps compute_grads+update_params. # Compute grads grads = tf.gradients(self.reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) grads_conv = grads[:len(conv_trainable)] grads_fc_w = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] # Update params train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) # Finally, get the train_op! self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) #self.train_op = tf.group(train_op_fc_w, train_op_fc_b) #只优化全连接部分 # Saver for storing checkpoints of the model self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5) # Loader for loading the pre-trained model self.loader = tf.train.Saver(var_list=restore_var)