def train(self): assert self.need_train print("starting training") for epoch in range(self.start_epoch, self.num_epochs): start = time.time() train_loss, train_measures = self.run_epoch( self.trainer.train_step, self.train_data, epoch, train=True) if self.cmc_validation: valid_loss, valid_measures = do_cmc_validation( self, self.test_network, self.valid_data) elif self.recursive_training: for valid_round in range(3): valid_loss, valid_measures = self.run_epoch( self.trainer.validation_step, self.valid_data, epoch, train=False) valid_error_string = Measures.get_error_string( valid_measures, "valid") print("Validation ", valid_round, ": ", valid_error_string) if hasattr(self.valid_data, "clear_data_dict"): self.valid_data.clear_data_dict() elif self.valid_data is not None: valid_loss, valid_measures = self.run_epoch( self.trainer.validation_step, self.valid_data, epoch, train=False) else: valid_loss = 0.0 valid_measures = {} end = time.time() elapsed = end - start train_error_string = Measures.get_error_string( train_measures, "train") valid_error_string = Measures.get_error_string( valid_measures, "valid") print("epoch", epoch + 1, "finished. elapsed:", "%.5f" % elapsed, "train_score:", "%.5f" % train_loss, train_error_string, "valid_score:", valid_loss, valid_error_string) print("epoch", epoch + 1, "finished. elapsed:", "%.5f" % elapsed, "train_score:", "%.5f" % train_loss, train_error_string, "valid_score:", valid_loss, valid_error_string, file=open( "/home/luiten/vision/youtubevos/ReID_net/logs/" + self.model + ".txt", "a")) if self.save: self.save_model(epoch + 1) if hasattr(self.train_data, "save_masks"): self.train_data.save_masks(epoch + 1)
def run_epoch(self, step_fn, data, epoch, train): loss_total = 0.0 n_imgs_per_epoch = data.num_examples_per_epoch() measures_accumulated = {} n_imgs_processed = 0 if hasattr(data, "ignore_classes"): ignore_classes = data.ignore_classes else: ignore_classes = None while n_imgs_processed < n_imgs_per_epoch: start = time.time() res = step_fn(epoch) if len(res) > 3: loss_summed, measures, n_imgs, tags, ys_armax_val, targets = res if hasattr(data, "set_output_as_old_label"): data.set_output_as_old_label(tags, ys_armax_val, epoch, targets) else: loss_summed, measures, n_imgs = res loss_total += loss_summed #special handling for detection storing self._maybe_store_detections(epoch, train, measures) measures_accumulated = Measures.calc_measures_sum( measures_accumulated, measures) n_imgs_processed += n_imgs loss_avg = loss_summed / n_imgs #do not compute expensive measures here, since it's not the final result for the epoch measures_avg = Measures.calc_measures_avg(measures, n_imgs, ignore_classes, for_final_result=False) end = time.time() elapsed = end - start # TODO: Print proper averages for the measures print(n_imgs_processed, '/', n_imgs_per_epoch, loss_avg, measures_avg, "elapsed", elapsed) loss_total /= max(n_imgs_processed, 1) measures_accumulated = Measures.calc_measures_avg( measures_accumulated, n_imgs_processed, ignore_classes, for_final_result=True) self._maybe_finalize_detections(train) return loss_total, measures_accumulated
def postprocess(self, logit, frame_id): ann_postproc = np.zeros_like(logit) ann_postproc[np.where(logit > self.high_threshold)] = 1 if 'old_label' in self._get_video_data()[frame_id]: bbox_initial = self._get_video_data()[frame_id]['old_label'][:, :, 0] bbox_ann = get_bounding_box(ann_postproc, 1) iou = Measures.compute_iou_for_binary_segmentation( bbox_ann, bbox_initial) # Postprocess 1: set annotation to bounding box of the object if IOU is less than 50% #if iou < 0.5: # ann_postproc = bbox_ann # Postprocess 2: reset all pixels outside the bounding box ann_postproc = np.logical_and(bbox_initial, ann_postproc).astype(int) # set annotation within the threshold range to ignore pixels. ann_postproc[np.where( np.logical_and( logit > self.low_threshold, logit < self.high_threshold))] = self.void_label() return ann_postproc
def _finetune(self, video_idx, n_finetune_steps, start_step=0): frame_idx = 0 tag = self.train_data.video_tag(video_idx) self.train_data.set_video_idx(video_idx) #problem: for frame 0 there is no old mask #for now: just use the current mask as input and optionally damage it #also: the backward flow is set to zero here, since there is no preceding frame #maybe: generate some artificial backward flow old_mask_original = self.train_data.label_for_video_frame(frame_idx=0) for idx in range(start_step, start_step + n_finetune_steps): old_mask_damaged = damage_mask(old_mask_original, self.old_mask_scale_factor, self.old_mask_shift_absolute, self.old_mask_shift_factor) feed_dict = self.train_data.feed_dict_for_video_frame( frame_idx, with_annotations=True, old_mask=old_mask_damaged) loss, measures, n_imgs = self.trainer.train_step( epoch=idx, feed_dict=feed_dict) loss /= n_imgs # TODO: Replace [0] with ingore classes. iou = Measures.calc_iou(measures, n_imgs, [0]) print("finetune on", tag, idx, "/", start_step + n_finetune_steps, "loss:", loss, " iou:", iou, file=log.v5)
def _finetune(self, video_idx, n_finetune_steps, start_step=0): frame_idx = 0 tag = self.train_data.video_tag(video_idx) self.train_data.set_video_idx(video_idx) for idx in range(start_step, start_step + n_finetune_steps): if self.lucid_interval != -1 and idx % self.lucid_interval == 0: print("lucid example", file=log.v5) feed_dict = self.train_data.get_lucid_feed_dict() loss_scale = self.lucid_loss_scale else: feed_dict = self.train_data.feed_dict_for_video_frame( frame_idx, with_annotations=True) loss_scale = 1.0 loss, measures, n_imgs = self.trainer.train_step( epoch=idx, feed_dict=feed_dict, loss_scale=loss_scale) loss /= n_imgs iou = Measures.calc_iou(measures, n_imgs, [0]) print("finetune on", tag, idx, "/", start_step + n_finetune_steps, "loss:", loss, " iou:", iou, file=log.v5)
def _finetune(self, video_idx, n_finetune_steps, start_step=0): frame_ids = [0] if hasattr(self.train_data, "annotated_frame_ids"): frame_ids = self.train_data.annotated_frame_ids tag = self.train_data.video_tag(video_idx) self.train_data.set_video_idx(video_idx) for idx in range(start_step, start_step + n_finetune_steps): for frame_id in frame_ids: feed_dict = self.train_data.feed_dict_for_video_frame( frame_id, with_annotations=True) loss_scale = 1.0 loss, measures, n_imgs = self.trainer.train_step( epoch=idx, feed_dict=feed_dict, loss_scale=loss_scale) loss /= n_imgs iou = Measures.calc_iou(measures, n_imgs, [0]) print("finetune on", tag, idx, "/", start_step + n_finetune_steps, "loss:", loss, " iou:", iou, file=log.v5)
def eval(self): start = time.time() if self.cmc_validation: valid_loss, measures = do_cmc_validation(self, self.test_network, self.valid_data) else: valid_loss, measures = self.run_epoch(self.trainer.validation_step, self.valid_data, 0, train=False) end = time.time() elapsed = end - start valid_error_string = Measures.get_error_string(measures, "valid") print("eval finished. elapsed:", elapsed, "valid_score:", valid_loss, valid_error_string)
def get_measures(pred, filename): # pred = imread(pred_file) pred = np.where(pred > THRESH, 1, pred) # pred = np.max(pred, axis=2) #pdb.set_trace() filename = filename.replace(".jpg", '') filename = filename + ".png" gt_file = VOC_PATH + "SegmentationObject/" + filename gt = imread(gt_file) target = Util.get_best_overlap(pred_mask=pred, gt=gt, ignore_classes=[0, 255]) measures = Measures.compute_measures_for_binary_segmentation( pred, target) return measures
def _finetune(self, video_idx, n_finetune_steps, start_step=0): print("offline finetuning...") tag = self.train_data.video_tag(video_idx) self.train_data.set_video_idx(video_idx) n_frames = self.train_data.num_examples_per_epoch() #sampling without replacement to_sample = list(range(1, n_frames)) for step_idx in range(start_step, start_step + n_finetune_steps): if step_idx % self.offline_adaptation_interval == 0: found = False frame_idx = feed_dict = None while not found: if len(to_sample) == 0: to_sample = list(range(1, n_frames)) frame_idx = numpy.random.choice(to_sample) feed_dict = self.train_data.feed_dict_for_video_frame( frame_idx, with_annotations=True) label = feed_dict[self.train_data.get_label_placeholder()] is_bad_label = (label == 255).all() #legacy adaptation images handling for bad_label in bad_labels: if (label.shape == bad_label.shape) and (label == bad_label).all(): is_bad_label = True if is_bad_label: print("sequence", tag, "frame", frame_idx, "has bad label, selecting new one...") else: found = True to_sample.remove(frame_idx) loss_scale = self.adaptation_loss_scale print("using adaptation sample: frame", frame_idx) else: feed_dict = self.train_data.feed_dict_for_video_frame( 0, with_annotations=True) loss_scale = 1.0 loss, measures, n_imgs = self.trainer.train_step( epoch=step_idx, feed_dict=feed_dict, loss_scale=loss_scale) loss /= n_imgs iou = Measures.calc_iou(measures, n_imgs, [0]) print("finetune on", tag, step_idx, "/", start_step + n_finetune_steps, "loss:", loss, " err:", iou)
def _process_forward_result(self, y_argmax, logit, target, tag, extraction_vals, main_folder, save_results): # hack for avoiding storing logits for frames, which are not evaluated if "DO_NOT_STORE_LOGITS" in tag: logit = None tag = tag.replace("_DO_NOT_STORE_LOGITS", "") folder = main_folder + tag.split("/")[-2] + "/" tf.gfile.MakeDirs(folder) if self.training_rounds > 1: out_fn = folder + tag.split("/")[-1].replace( ".jpg", "_" + repr(self.round) + ".png").replace( ".bin", ".png") else: out_fn = folder + tag.split("/")[-1].replace( ".jpg", "_" + repr(self.round) + ".png").replace( ".bin", ".png") out_fn_logits = out_fn.replace(".png", ".pickle") target_fn = out_fn.replace(".png", "_target.png") measures = Measures.compute_measures_for_binary_segmentation( y_argmax, target) if save_results: y_scaled = (y_argmax).astype("uint8") print(out_fn) imsave(out_fn, numpy.squeeze(y_scaled * 255, axis=2)) # imsave(target_fn, numpy.squeeze(target_scaled, axis=2 )) if logit is not None: pickle.dump(logit, open(out_fn_logits, "w"), pickle.HIGHEST_PROTOCOL) for e in extraction_vals: assert e.shape[0] == 1 # batchs size should be 1 here for now for name, val in zip(self.extractions, extraction_vals): val = val[0] # remove batch dimension sp = out_fn.replace(".png", ".bin").split("/") sp[-1] = name + "_" + sp[-1] out_fn_extract = "/".join(sp) print(out_fn_extract) val.tofile(out_fn_extract) return measures
def _base_forward(self, network, data, save_results, save_logits): n_total = data.num_examples_per_epoch() n_processed = 0 targets = network.raw_labels ys = network.y_softmax # e.g. used for resizing ys = self._adjust_results_to_targets(ys, targets) measures = [] ys_argmax_values = [] logits = [] while n_processed < n_total: n, new_measures, ys_argmax, logit, _ = self._process_forward_minibatch( data, network, save_logits, save_results, targets, ys, n_processed) measures += new_measures ys_argmax_values += list(ys_argmax) logits += list(logit) n_processed += n print(n_processed, "/", n_total, file=log.v5) if self.ignore_first_and_last_results: measures = measures[1:-1] elif self.ignore_first_result: measures = measures[1:] measures = Measures.average_measures(measures) if hasattr(data, "video_tag"): video_idx = data.get_video_idx() print("sequence", video_idx + 1, data.video_tag(video_idx), measures, file=log.v1) else: print(measures, file=log.v1) return ys_argmax_values, logits
def build_network(self, config, x_image, y_ref, tags, void_label, n_classes, is_training, freeze_batchnorm, use_weight_summaries): gpus = config.int_list("gpus") # only use one gpu for eval if not is_training: gpus = gpus[:1] if self.use_partialflow: assert len(gpus) == 1, len( gpus) # partialflow does not work with multigpu network_def = config.dict("network") batch_size_tower = self.batch_size / len(gpus) assert batch_size_tower * len(gpus) == self.batch_size, ( batch_size_tower, len(gpus), self.batch_size) loss_summed = measures_accumulated = y_softmax_total = n_total = n_params = None tower_losses = [] tower_regularizers = [] update_ops = [] tower_setups = [] tower_layers = [] first = True if x_image.get_shape().as_list()[0] is not None: if self.chunk_size != -1: assert x_image.get_shape().as_list()[0] == self.batch_size * self.chunk_size, \ "dataset produced inputs with wrong shape" else: assert x_image.get_shape().as_list( )[0] == self.batch_size, "dataset produced inputs with wrong batch size" for idx, gpu in enumerate(gpus): original_sizes = self.inputs_tensors_dict.get( Constants.ORIGINAL_SIZES, None) resized_sizes = self.inputs_tensors_dict.get( Constants.RESIZED_SIZES, None) if len(gpus) == 1: x_image_tower = x_image y_ref_tower = y_ref tags_tower = tags variable_device = "/gpu:0" else: stride = batch_size_tower * (1 if self.chunk_size == -1 else self.chunk_size) x_image_tower = x_image[idx * stride:(idx + 1) * stride] tags_tower = tags[idx * stride:(idx + 1) * stride] if original_sizes is not None: original_sizes = original_sizes[idx * stride:(idx + 1) * stride] if resized_sizes is not None: resized_sizes = resized_sizes[idx * stride:(idx + 1) * stride] if isinstance(y_ref, tuple): y_ref_tower = tuple(v[idx * stride:(idx + 1) * stride] for v in y_ref) else: y_ref_tower = y_ref[idx * stride:(idx + 1) * stride] variable_device = "/cpu:0" is_main_train_tower = is_training and first tower_setup = TowerSetup( dtype=config.dtype, gpu=gpu, is_main_train_tower=is_main_train_tower, is_training=is_training, freeze_batchnorm=freeze_batchnorm, variable_device=variable_device, use_update_ops_collection=self.use_partialflow, batch_size=batch_size_tower, original_sizes=original_sizes, resized_sizes=resized_sizes, use_weight_summaries=is_main_train_tower and use_weight_summaries) tower_setups.append(tower_setup) with tf.variable_scope(tf.get_variable_scope(), reuse=True if not first else None): loss, measures, y_softmax, n, n_params_tower, regularizers, update_ops_tower, layers = self.build_tower( network_def, x_image_tower, y_ref_tower, tags_tower, void_label, n_classes, tower_setup) tower_layers.append(layers) tower_losses.append(loss / tf.cast(n, tower_setup.dtype)) tower_regularizers.append(regularizers) if first: loss_summed = loss measures_accumulated = measures y_softmax_total = [y_softmax] n_total = n update_ops = update_ops_tower first = False n_params = n_params_tower else: loss_summed += loss measures_accumulated = Measures.calc_measures_sum( measures_accumulated, measures) y_softmax_total.append(y_softmax) n_total += n update_ops += update_ops_tower assert n_params_tower == n_params if len(gpus) == 1: y_softmax_total = y_softmax_total[0] else: if isinstance(y_softmax_total[0], tuple): y_softmax_out = [] for n in range(len(y_softmax_total[0])): if y_softmax_total[0][n] is None: #TODO: or just leave it out? y_softmax_out.append(None) else: v = tf.concat(axis=0, values=[y[n] for y in y_softmax_total]) y_softmax_out.append(v) y_softmax_total = tuple(y_softmax_out) else: y_softmax_total = tf.concat(axis=0, values=y_softmax_total, name='y_softmax_total') if self.current_graph_section is not None: self.current_graph_section.__exit__(None, None, None) return tower_losses, tower_regularizers, loss_summed, y_softmax_total, measures_accumulated, n_total, n_params, \ update_ops, tower_setups, tower_layers
def eval(self): input_list = self.data.read_inputfile_lists() input_list = list(zip(input_list[0], input_list[1])) measures = {} count = 0 for im, an in input_list: im_path = im.split(":")[0] file_name = im_path.split("/")[-1] file_name_without_ext = file_name.split(".")[0] an_path = an.split(":")[0] inst = int(an.split(":")[1]) if os.path.exists(an_path): label_unmodified = imread(an_path) img_unmodified = imread(im_path) self.neg_row = [] self.neg_col = [] self.pos_row = [] self.col_pos = [] label = np.where(label_unmodified == inst, 1, 0) if len(np.where(label_unmodified == inst)[0]) < 2500: continue count += 1 img, label = self.create_inputs(img_unmodified, label) mask = None click_added = True clicks = 1 # Add a positive click when there are no previous masks self.add_clicks(mask, label) u0, u1 = self.create_distance_transform(label) while clicks <= self.max_clicks: if self.save_plot: file_name = file_name_without_ext + "_instance_" + repr( inst) + "_clicks_" + repr(clicks) self.save_image(file_name, img_unmodified, mask) #break and continue with the next instance, if a click could not be added if not click_added: break print(repr(count) + "/" + repr(len(input_list)) + "-- Forwarding File:" + \ im + " Instance: " + repr(inst) + " Clicks:" + repr(clicks), file=log.v5) for i in get_objects(): before[type(i)] += 1 mask, new_measures = self.mask_generation_fn( img, tag=im, label=label[:, :, np.newaxis], old_label=None, u0=u0, u1=u1) # leaked_things = [[x] for x in range(10)] # for i in get_objects(): # after[type(i)] += 1 # print [(k, after[k] - before[k]) for k in after if after[k] - before[k]] if clicks in measures: measures[clicks] += [new_measures] else: measures[clicks] = [new_measures] click_added = self.add_clicks(mask, label) u0, u1 = self.create_distance_transform(label) clicks += 1 x_val = [] y_val = [] for click in measures: avg_measure = Measures.average_measures(measures[click]) if Constants.IOU in avg_measure: x_val.append(click) y_val.append(float(avg_measure[Constants.IOU])) print("Average measure for " + repr(click) + " clicks: " + repr(avg_measure), file=log.v5) import matplotlib.pyplot as plt plt.clf() plt.plot(x_val, y_val) plt.savefig(self.get_file_path("eval_plot"))