Esempio n. 1
0
    def train(self):
        assert self.need_train
        print("starting training")
        for epoch in range(self.start_epoch, self.num_epochs):
            start = time.time()
            train_loss, train_measures = self.run_epoch(
                self.trainer.train_step, self.train_data, epoch, train=True)

            if self.cmc_validation:
                valid_loss, valid_measures = do_cmc_validation(
                    self, self.test_network, self.valid_data)
            elif self.recursive_training:
                for valid_round in range(3):
                    valid_loss, valid_measures = self.run_epoch(
                        self.trainer.validation_step,
                        self.valid_data,
                        epoch,
                        train=False)
                    valid_error_string = Measures.get_error_string(
                        valid_measures, "valid")
                    print("Validation ", valid_round, ": ", valid_error_string)
                if hasattr(self.valid_data, "clear_data_dict"):
                    self.valid_data.clear_data_dict()
            elif self.valid_data is not None:
                valid_loss, valid_measures = self.run_epoch(
                    self.trainer.validation_step,
                    self.valid_data,
                    epoch,
                    train=False)
            else:
                valid_loss = 0.0
                valid_measures = {}

            end = time.time()
            elapsed = end - start
            train_error_string = Measures.get_error_string(
                train_measures, "train")
            valid_error_string = Measures.get_error_string(
                valid_measures, "valid")
            print("epoch", epoch + 1, "finished. elapsed:", "%.5f" % elapsed,
                  "train_score:", "%.5f" % train_loss, train_error_string,
                  "valid_score:", valid_loss, valid_error_string)
            print("epoch",
                  epoch + 1,
                  "finished. elapsed:",
                  "%.5f" % elapsed,
                  "train_score:",
                  "%.5f" % train_loss,
                  train_error_string,
                  "valid_score:",
                  valid_loss,
                  valid_error_string,
                  file=open(
                      "/home/luiten/vision/youtubevos/ReID_net/logs/" +
                      self.model + ".txt", "a"))
            if self.save:
                self.save_model(epoch + 1)
                if hasattr(self.train_data, "save_masks"):
                    self.train_data.save_masks(epoch + 1)
Esempio n. 2
0
    def run_epoch(self, step_fn, data, epoch, train):
        loss_total = 0.0
        n_imgs_per_epoch = data.num_examples_per_epoch()
        measures_accumulated = {}
        n_imgs_processed = 0
        if hasattr(data, "ignore_classes"):
            ignore_classes = data.ignore_classes
        else:
            ignore_classes = None

        while n_imgs_processed < n_imgs_per_epoch:
            start = time.time()
            res = step_fn(epoch)
            if len(res) > 3:
                loss_summed, measures, n_imgs, tags, ys_armax_val, targets = res
                if hasattr(data, "set_output_as_old_label"):
                    data.set_output_as_old_label(tags, ys_armax_val, epoch,
                                                 targets)
            else:
                loss_summed, measures, n_imgs = res

            loss_total += loss_summed

            #special handling for detection storing
            self._maybe_store_detections(epoch, train, measures)

            measures_accumulated = Measures.calc_measures_sum(
                measures_accumulated, measures)

            n_imgs_processed += n_imgs

            loss_avg = loss_summed / n_imgs
            #do not compute expensive measures here, since it's not the final result for the epoch
            measures_avg = Measures.calc_measures_avg(measures,
                                                      n_imgs,
                                                      ignore_classes,
                                                      for_final_result=False)
            end = time.time()
            elapsed = end - start

            # TODO: Print proper averages for the measures
            print(n_imgs_processed, '/', n_imgs_per_epoch, loss_avg,
                  measures_avg, "elapsed", elapsed)
        loss_total /= max(n_imgs_processed, 1)
        measures_accumulated = Measures.calc_measures_avg(
            measures_accumulated,
            n_imgs_processed,
            ignore_classes,
            for_final_result=True)
        self._maybe_finalize_detections(train)
        return loss_total, measures_accumulated
Esempio n. 3
0
    def postprocess(self, logit, frame_id):
        ann_postproc = np.zeros_like(logit)
        ann_postproc[np.where(logit > self.high_threshold)] = 1

        if 'old_label' in self._get_video_data()[frame_id]:
            bbox_initial = self._get_video_data()[frame_id]['old_label'][:, :,
                                                                         0]
            bbox_ann = get_bounding_box(ann_postproc, 1)
            iou = Measures.compute_iou_for_binary_segmentation(
                bbox_ann, bbox_initial)
            # Postprocess 1: set annotation to bounding box of the object if IOU is less than 50%
            #if iou < 0.5:
            #  ann_postproc = bbox_ann

            # Postprocess 2: reset all pixels outside the bounding box
            ann_postproc = np.logical_and(bbox_initial,
                                          ann_postproc).astype(int)

            # set annotation within the threshold range to ignore pixels.
            ann_postproc[np.where(
                np.logical_and(
                    logit > self.low_threshold,
                    logit < self.high_threshold))] = self.void_label()

        return ann_postproc
Esempio n. 4
0
    def _finetune(self, video_idx, n_finetune_steps, start_step=0):
        frame_idx = 0
        tag = self.train_data.video_tag(video_idx)
        self.train_data.set_video_idx(video_idx)

        #problem: for frame 0 there is no old mask
        #for now: just use the current mask as input and optionally damage it
        #also: the backward flow is set to zero here, since there is no preceding frame
        #maybe: generate some artificial backward flow
        old_mask_original = self.train_data.label_for_video_frame(frame_idx=0)
        for idx in range(start_step, start_step + n_finetune_steps):
            old_mask_damaged = damage_mask(old_mask_original,
                                           self.old_mask_scale_factor,
                                           self.old_mask_shift_absolute,
                                           self.old_mask_shift_factor)
            feed_dict = self.train_data.feed_dict_for_video_frame(
                frame_idx, with_annotations=True, old_mask=old_mask_damaged)
            loss, measures, n_imgs = self.trainer.train_step(
                epoch=idx, feed_dict=feed_dict)
            loss /= n_imgs
            # TODO: Replace [0] with ingore classes.
            iou = Measures.calc_iou(measures, n_imgs, [0])
            print("finetune on",
                  tag,
                  idx,
                  "/",
                  start_step + n_finetune_steps,
                  "loss:",
                  loss,
                  " iou:",
                  iou,
                  file=log.v5)
Esempio n. 5
0
 def _finetune(self, video_idx, n_finetune_steps, start_step=0):
     frame_idx = 0
     tag = self.train_data.video_tag(video_idx)
     self.train_data.set_video_idx(video_idx)
     for idx in range(start_step, start_step + n_finetune_steps):
         if self.lucid_interval != -1 and idx % self.lucid_interval == 0:
             print("lucid example", file=log.v5)
             feed_dict = self.train_data.get_lucid_feed_dict()
             loss_scale = self.lucid_loss_scale
         else:
             feed_dict = self.train_data.feed_dict_for_video_frame(
                 frame_idx, with_annotations=True)
             loss_scale = 1.0
         loss, measures, n_imgs = self.trainer.train_step(
             epoch=idx, feed_dict=feed_dict, loss_scale=loss_scale)
         loss /= n_imgs
         iou = Measures.calc_iou(measures, n_imgs, [0])
         print("finetune on",
               tag,
               idx,
               "/",
               start_step + n_finetune_steps,
               "loss:",
               loss,
               " iou:",
               iou,
               file=log.v5)
Esempio n. 6
0
 def _finetune(self, video_idx, n_finetune_steps, start_step=0):
     frame_ids = [0]
     if hasattr(self.train_data, "annotated_frame_ids"):
         frame_ids = self.train_data.annotated_frame_ids
     tag = self.train_data.video_tag(video_idx)
     self.train_data.set_video_idx(video_idx)
     for idx in range(start_step, start_step + n_finetune_steps):
         for frame_id in frame_ids:
             feed_dict = self.train_data.feed_dict_for_video_frame(
                 frame_id, with_annotations=True)
             loss_scale = 1.0
             loss, measures, n_imgs = self.trainer.train_step(
                 epoch=idx, feed_dict=feed_dict, loss_scale=loss_scale)
             loss /= n_imgs
             iou = Measures.calc_iou(measures, n_imgs, [0])
             print("finetune on",
                   tag,
                   idx,
                   "/",
                   start_step + n_finetune_steps,
                   "loss:",
                   loss,
                   " iou:",
                   iou,
                   file=log.v5)
Esempio n. 7
0
 def eval(self):
     start = time.time()
     if self.cmc_validation:
         valid_loss, measures = do_cmc_validation(self, self.test_network,
                                                  self.valid_data)
     else:
         valid_loss, measures = self.run_epoch(self.trainer.validation_step,
                                               self.valid_data,
                                               0,
                                               train=False)
     end = time.time()
     elapsed = end - start
     valid_error_string = Measures.get_error_string(measures, "valid")
     print("eval finished. elapsed:", elapsed, "valid_score:", valid_loss,
           valid_error_string)
def get_measures(pred, filename):
  # pred = imread(pred_file)
  pred = np.where(pred > THRESH, 1, pred)
  # pred = np.max(pred, axis=2)
  
  #pdb.set_trace()
  filename = filename.replace(".jpg", '')
  filename = filename + ".png"
  gt_file = VOC_PATH + "SegmentationObject/" + filename
  gt = imread(gt_file)

  target = Util.get_best_overlap(pred_mask=pred, gt=gt,
                                 ignore_classes=[0, 255])
  measures = Measures.compute_measures_for_binary_segmentation( pred, target)
  
  return measures
    def _finetune(self, video_idx, n_finetune_steps, start_step=0):
        print("offline finetuning...")
        tag = self.train_data.video_tag(video_idx)
        self.train_data.set_video_idx(video_idx)
        n_frames = self.train_data.num_examples_per_epoch()

        #sampling without replacement
        to_sample = list(range(1, n_frames))

        for step_idx in range(start_step, start_step + n_finetune_steps):
            if step_idx % self.offline_adaptation_interval == 0:
                found = False
                frame_idx = feed_dict = None
                while not found:
                    if len(to_sample) == 0:
                        to_sample = list(range(1, n_frames))
                    frame_idx = numpy.random.choice(to_sample)
                    feed_dict = self.train_data.feed_dict_for_video_frame(
                        frame_idx, with_annotations=True)
                    label = feed_dict[self.train_data.get_label_placeholder()]

                    is_bad_label = (label == 255).all()
                    #legacy adaptation images handling
                    for bad_label in bad_labels:
                        if (label.shape
                                == bad_label.shape) and (label
                                                         == bad_label).all():
                            is_bad_label = True

                    if is_bad_label:
                        print("sequence", tag, "frame", frame_idx,
                              "has bad label, selecting new one...")
                    else:
                        found = True
                    to_sample.remove(frame_idx)
                loss_scale = self.adaptation_loss_scale
                print("using adaptation sample: frame", frame_idx)
            else:
                feed_dict = self.train_data.feed_dict_for_video_frame(
                    0, with_annotations=True)
                loss_scale = 1.0
            loss, measures, n_imgs = self.trainer.train_step(
                epoch=step_idx, feed_dict=feed_dict, loss_scale=loss_scale)
            loss /= n_imgs
            iou = Measures.calc_iou(measures, n_imgs, [0])
            print("finetune on", tag, step_idx, "/",
                  start_step + n_finetune_steps, "loss:", loss, " err:", iou)
    def _process_forward_result(self, y_argmax, logit, target, tag,
                                extraction_vals, main_folder, save_results):
        # hack for avoiding storing logits for frames, which are not evaluated
        if "DO_NOT_STORE_LOGITS" in tag:
            logit = None
            tag = tag.replace("_DO_NOT_STORE_LOGITS", "")

        folder = main_folder + tag.split("/")[-2] + "/"
        tf.gfile.MakeDirs(folder)
        if self.training_rounds > 1:
            out_fn = folder + tag.split("/")[-1].replace(
                ".jpg", "_" + repr(self.round) + ".png").replace(
                    ".bin", ".png")
        else:
            out_fn = folder + tag.split("/")[-1].replace(
                ".jpg", "_" + repr(self.round) + ".png").replace(
                    ".bin", ".png")
        out_fn_logits = out_fn.replace(".png", ".pickle")

        target_fn = out_fn.replace(".png", "_target.png")
        measures = Measures.compute_measures_for_binary_segmentation(
            y_argmax, target)
        if save_results:
            y_scaled = (y_argmax).astype("uint8")
            print(out_fn)
            imsave(out_fn, numpy.squeeze(y_scaled * 255, axis=2))
            # imsave(target_fn, numpy.squeeze(target_scaled, axis=2 ))
        if logit is not None:
            pickle.dump(logit, open(out_fn_logits, "w"),
                        pickle.HIGHEST_PROTOCOL)
        for e in extraction_vals:
            assert e.shape[0] == 1  # batchs size should be 1 here for now
        for name, val in zip(self.extractions, extraction_vals):
            val = val[0]  # remove batch dimension
            sp = out_fn.replace(".png", ".bin").split("/")
            sp[-1] = name + "_" + sp[-1]
            out_fn_extract = "/".join(sp)
            print(out_fn_extract)
            val.tofile(out_fn_extract)
        return measures
    def _base_forward(self, network, data, save_results, save_logits):
        n_total = data.num_examples_per_epoch()
        n_processed = 0
        targets = network.raw_labels
        ys = network.y_softmax

        # e.g. used for resizing
        ys = self._adjust_results_to_targets(ys, targets)

        measures = []
        ys_argmax_values = []
        logits = []
        while n_processed < n_total:
            n, new_measures, ys_argmax, logit, _ = self._process_forward_minibatch(
                data, network, save_logits, save_results, targets, ys,
                n_processed)
            measures += new_measures
            ys_argmax_values += list(ys_argmax)
            logits += list(logit)
            n_processed += n
            print(n_processed, "/", n_total, file=log.v5)
        if self.ignore_first_and_last_results:
            measures = measures[1:-1]
        elif self.ignore_first_result:
            measures = measures[1:]

        measures = Measures.average_measures(measures)
        if hasattr(data, "video_tag"):
            video_idx = data.get_video_idx()
            print("sequence",
                  video_idx + 1,
                  data.video_tag(video_idx),
                  measures,
                  file=log.v1)
        else:
            print(measures, file=log.v1)

        return ys_argmax_values, logits
Esempio n. 12
0
    def build_network(self, config, x_image, y_ref, tags, void_label,
                      n_classes, is_training, freeze_batchnorm,
                      use_weight_summaries):
        gpus = config.int_list("gpus")
        # only use one gpu for eval
        if not is_training:
            gpus = gpus[:1]
        if self.use_partialflow:
            assert len(gpus) == 1, len(
                gpus)  # partialflow does not work with multigpu
        network_def = config.dict("network")
        batch_size_tower = self.batch_size / len(gpus)
        assert batch_size_tower * len(gpus) == self.batch_size, (
            batch_size_tower, len(gpus), self.batch_size)
        loss_summed = measures_accumulated = y_softmax_total = n_total = n_params = None
        tower_losses = []
        tower_regularizers = []
        update_ops = []
        tower_setups = []
        tower_layers = []
        first = True
        if x_image.get_shape().as_list()[0] is not None:
            if self.chunk_size != -1:
                assert x_image.get_shape().as_list()[0] == self.batch_size * self.chunk_size, \
                  "dataset produced inputs with wrong shape"
            else:
                assert x_image.get_shape().as_list(
                )[0] == self.batch_size, "dataset produced inputs with wrong batch size"
        for idx, gpu in enumerate(gpus):
            original_sizes = self.inputs_tensors_dict.get(
                Constants.ORIGINAL_SIZES, None)
            resized_sizes = self.inputs_tensors_dict.get(
                Constants.RESIZED_SIZES, None)
            if len(gpus) == 1:
                x_image_tower = x_image
                y_ref_tower = y_ref
                tags_tower = tags
                variable_device = "/gpu:0"
            else:
                stride = batch_size_tower * (1 if self.chunk_size == -1 else
                                             self.chunk_size)
                x_image_tower = x_image[idx * stride:(idx + 1) * stride]
                tags_tower = tags[idx * stride:(idx + 1) * stride]
                if original_sizes is not None:
                    original_sizes = original_sizes[idx * stride:(idx + 1) *
                                                    stride]
                if resized_sizes is not None:
                    resized_sizes = resized_sizes[idx * stride:(idx + 1) *
                                                  stride]
                if isinstance(y_ref, tuple):
                    y_ref_tower = tuple(v[idx * stride:(idx + 1) * stride]
                                        for v in y_ref)
                else:
                    y_ref_tower = y_ref[idx * stride:(idx + 1) * stride]
                variable_device = "/cpu:0"

            is_main_train_tower = is_training and first
            tower_setup = TowerSetup(
                dtype=config.dtype,
                gpu=gpu,
                is_main_train_tower=is_main_train_tower,
                is_training=is_training,
                freeze_batchnorm=freeze_batchnorm,
                variable_device=variable_device,
                use_update_ops_collection=self.use_partialflow,
                batch_size=batch_size_tower,
                original_sizes=original_sizes,
                resized_sizes=resized_sizes,
                use_weight_summaries=is_main_train_tower
                and use_weight_summaries)
            tower_setups.append(tower_setup)

            with tf.variable_scope(tf.get_variable_scope(),
                                   reuse=True if not first else None):
                loss, measures, y_softmax, n, n_params_tower, regularizers, update_ops_tower, layers = self.build_tower(
                    network_def, x_image_tower, y_ref_tower, tags_tower,
                    void_label, n_classes, tower_setup)

            tower_layers.append(layers)
            tower_losses.append(loss / tf.cast(n, tower_setup.dtype))
            tower_regularizers.append(regularizers)
            if first:
                loss_summed = loss
                measures_accumulated = measures
                y_softmax_total = [y_softmax]
                n_total = n
                update_ops = update_ops_tower
                first = False
                n_params = n_params_tower
            else:
                loss_summed += loss
                measures_accumulated = Measures.calc_measures_sum(
                    measures_accumulated, measures)
                y_softmax_total.append(y_softmax)
                n_total += n
                update_ops += update_ops_tower
                assert n_params_tower == n_params
        if len(gpus) == 1:
            y_softmax_total = y_softmax_total[0]
        else:
            if isinstance(y_softmax_total[0], tuple):
                y_softmax_out = []
                for n in range(len(y_softmax_total[0])):
                    if y_softmax_total[0][n] is None:
                        #TODO: or just leave it out?
                        y_softmax_out.append(None)
                    else:
                        v = tf.concat(axis=0,
                                      values=[y[n] for y in y_softmax_total])
                        y_softmax_out.append(v)
                y_softmax_total = tuple(y_softmax_out)
            else:
                y_softmax_total = tf.concat(axis=0,
                                            values=y_softmax_total,
                                            name='y_softmax_total')
        if self.current_graph_section is not None:
            self.current_graph_section.__exit__(None, None, None)
        return tower_losses, tower_regularizers, loss_summed, y_softmax_total, measures_accumulated, n_total, n_params, \
            update_ops, tower_setups, tower_layers
Esempio n. 13
0
    def eval(self):
        input_list = self.data.read_inputfile_lists()
        input_list = list(zip(input_list[0], input_list[1]))
        measures = {}
        count = 0

        for im, an in input_list:
            im_path = im.split(":")[0]
            file_name = im_path.split("/")[-1]
            file_name_without_ext = file_name.split(".")[0]
            an_path = an.split(":")[0]
            inst = int(an.split(":")[1])

            if os.path.exists(an_path):
                label_unmodified = imread(an_path)
                img_unmodified = imread(im_path)
                self.neg_row = []
                self.neg_col = []
                self.pos_row = []
                self.col_pos = []
                label = np.where(label_unmodified == inst, 1, 0)

                if len(np.where(label_unmodified == inst)[0]) < 2500:
                    continue
                count += 1
                img, label = self.create_inputs(img_unmodified, label)
                mask = None
                click_added = True
                clicks = 1
                # Add a positive click when there are no previous masks
                self.add_clicks(mask, label)
                u0, u1 = self.create_distance_transform(label)
                while clicks <= self.max_clicks:
                    if self.save_plot:
                        file_name = file_name_without_ext + "_instance_" + repr(
                            inst) + "_clicks_" + repr(clicks)
                        self.save_image(file_name, img_unmodified, mask)
                    #break and continue with the next instance, if a click could not be added
                    if not click_added:
                        break

                    print(repr(count) + "/" + repr(len(input_list)) + "-- Forwarding File:" + \
                                     im + " Instance: " + repr(inst) + " Clicks:" + repr(clicks), file=log.v5)

                    for i in get_objects():
                        before[type(i)] += 1

                    mask, new_measures = self.mask_generation_fn(
                        img,
                        tag=im,
                        label=label[:, :, np.newaxis],
                        old_label=None,
                        u0=u0,
                        u1=u1)

                    # leaked_things = [[x] for x in range(10)]
                    # for i in get_objects():
                    #   after[type(i)] += 1
                    # print [(k, after[k] - before[k]) for k in after if after[k] - before[k]]

                    if clicks in measures:
                        measures[clicks] += [new_measures]
                    else:
                        measures[clicks] = [new_measures]

                    click_added = self.add_clicks(mask, label)
                    u0, u1 = self.create_distance_transform(label)
                    clicks += 1

        x_val = []
        y_val = []
        for click in measures:
            avg_measure = Measures.average_measures(measures[click])
            if Constants.IOU in avg_measure:
                x_val.append(click)
                y_val.append(float(avg_measure[Constants.IOU]))
            print("Average measure for " + repr(click) + " clicks: " +
                  repr(avg_measure),
                  file=log.v5)

        import matplotlib.pyplot as plt
        plt.clf()
        plt.plot(x_val, y_val)
        plt.savefig(self.get_file_path("eval_plot"))