Ejemplo n.º 1
0
    def _validate_with_gt(self):
        batch_time = AverageMeter()

        if type(self.valid_loader) is not list:
            self.valid_loader = [self.valid_loader]

        # only use the first GPU to run validation, multiple GPUs might raise error.
        # https://github.com/Eromera/erfnet_pytorch/issues/2#issuecomment-486142360
        self.model = self.model.module
        self.model.eval()

        end = time.time()

        all_error_names = []
        all_error_avgs = []

        n_step = 0
        for i_set, loader in enumerate(self.valid_loader):
            error_names = ['EPE']
            error_meters = AverageMeter(i=len(error_names))
            for i_step, data in enumerate(loader):
                img1, img2 = data['img1'], data['img2']
                img_pair = torch.cat([img1, img2], 1).to(self.device)
                gt_flows = data['target']['flow'].numpy().transpose([0, 2, 3, 1])

                # compute output
                flows = self.model(img_pair)['flows_fw']
                pred_flows = flows[0].detach().cpu().numpy().transpose([0, 2, 3, 1])

                es = evaluate_flow(gt_flows, pred_flows)
                error_meters.update([l.item() for l in es], img_pair.size(0))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()

                if i_step % self.cfg.print_freq == 0 or i_step == len(loader) - 1:
                    self._log.info('Test: {0}[{1}/{2}]\t Time {3}\t '.format(
                        i_set, i_step, self.cfg.valid_size, batch_time) + ' '.join(
                        map('{:.2f}'.format, error_meters.avg)))

                if i_step > self.cfg.valid_size:
                    break
            n_step += len(loader)

            # write error to tf board.
            for value, name in zip(error_meters.avg, error_names):
                self.summary_writer.add_scalar(
                    'Valid_{}_{}'.format(name, i_set), value, self.i_epoch)

            all_error_avgs.extend(error_meters.avg)
            all_error_names.extend(['{}_{}'.format(name, i_set) for name in error_names])

        self.model = torch.nn.DataParallel(self.model, device_ids=self.device_ids)
        # In order to reduce the space occupied during debugging,
        # only the model with more than cfg.save_iter iterations will be saved.
        if self.i_iter > self.cfg.save_iter:
            self.save_model(all_error_avgs[0] + all_error_avgs[1], name='Sintel')

        return all_error_avgs, all_error_names
Ejemplo n.º 2
0
    def _validate_with_gt(self):
        batch_time = AverageMeter()

        error_names = [
            'abs_rel', 'sq_rel', 'rmse', 'rmse_log', 'a1', 'a2', 'a3'
        ]
        error_meters = AverageMeter(i=len(error_names))

        self.model.eval()
        self.model = self.model.float()
        end = time.time()
        for i_step, data in enumerate(self.valid_loader):
            img_l, img_r = data['img1'], data['img1r']
            t_in = torch.cat([img_l, img_r], 1).to(self.device)

            # compute output
            disparities = self.model(t_in)[:4]

            disp_lr = disparities[0].detach().cpu().numpy()
            disp = disp_lr[:, 0, :, :]

            gt_disp_occ = list(map(load_disp, data['disp_occ']))
            fl_bl = [f.detach().cpu().numpy() for f in data['fl_bl']]
            gt_depth_occ = list(
                map(
                    lambda p, q: convert_disp_to_depth(
                        p, normed=False, fl_bl=q), gt_disp_occ, fl_bl))
            im_size = list(map(lambda p: p.shape[:2], gt_disp_occ))
            pred_depth = list(
                map(lambda p, q, r: convert_disp_to_depth(p, None, q, fl_bl=r),
                    disp, im_size, fl_bl))

            err_depth = compute_depth_errors(gt_depth_occ, pred_depth)
            error_meters.update(err_depth, img_l.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i_step % self.cfg.print_freq == 0:
                self._log.info(
                    'Test: [{0}/{1}]\t Time {2}\t '.format(
                        i_step, self.cfg.valid_size, batch_time) +
                    ' '.join(map('{:.2f}'.format, error_meters.avg)))

            if i_step > self.cfg.valid_size:
                break

        # write error to tf board.
        for value, name in zip(error_meters.avg, error_names):
            self.summary_writer.add_scalar('Valid_' + name, value,
                                           self.i_epoch)

        # In order to reduce the space occupied during debugging,
        # only the model with more than cfg.save_iter iterations will be saved.
        if self.i_iter > self.cfg.save_iter:
            self.save_model(error_meters.avg[0], 'KITTI_depth')

        return error_meters.avg, error_names
Ejemplo n.º 3
0
    def _validate_with_gt(self):
        batch_time = AverageMeter()

        error_names = ['EPE', 'E_noc', 'E_occ', 'F1_all']
        error_meters = AverageMeter(i=len(error_names))

        self.model.eval()
        self.model = self.model.float()
        end = time.time()
        for i_step, data in enumerate(self.valid_loader):
            img1, img2 = data['img1'], data['img2']
            img_pair = torch.cat([img1, img2], 1).to(self.device)

            # compute output
            output = self.model(img_pair)

            res = list(map(load_flow, data['flow_occ']))
            gt_flows, occ_masks = [r[0] for r in res], [r[1] for r in res]
            res = list(map(load_flow, data['flow_noc']))
            _, noc_masks = [r[0] for r in res], [r[1] for r in res]

            gt_flows = [np.concatenate([flow, occ_mask, noc_mask], axis=2) for
                        flow, occ_mask, noc_mask in zip(gt_flows, occ_masks, noc_masks)]
            pred_flows = output[0].detach().cpu().numpy().transpose([0, 2, 3, 1])
            es = evaluate_kitti_flow(gt_flows, pred_flows)
            error_meters.update([l.item() for l in es], img_pair.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i_step % self.cfg.print_freq == 0:
                self._log.info('Test: [{0}/{1}]\t Time {2}\t '.format(
                    i_step, self.cfg.valid_size, batch_time) + ' '.join(
                    map('{:.2f}'.format, error_meters.avg)))

            if i_step > self.cfg.valid_size:
                break

        # write error to tf board.
        for value, name in zip(error_meters.avg, error_names):
            self.summary_writer.add_scalar('Valid_' + name, value, self.i_epoch)

        # In order to reduce the space occupied during debugging,
        # only the model with more than cfg.save_iter iterations will be saved.
        if self.i_iter > self.cfg.save_iter:
            self.save_model(error_meters.avg[0], 'KITTI_flow')

        return error_meters.avg, error_names
Ejemplo n.º 4
0
with tf.Session(config=config) as sess:
    sess.run(
        [tf.global_variables_initializer(),
         tf.local_variables_initializer()])
    saver_to_restore.restore(sess, args.restore_path)
    merged = tf.summary.merge_all()
    writer = tf.summary.FileWriter(args.log_dir, sess.graph)

    print('\n----------- start to train -----------\n')

    best_mAP = -np.Inf

    for epoch in range(args.total_epoches):

        sess.run(train_init_op)
        loss_total, loss_xy, loss_wh, loss_conf, loss_class = AverageMeter(
        ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
        rloss_total, rloss_x, rloss_y, rloss_conf = AverageMeter(
        ), AverageMeter(), AverageMeter(), AverageMeter()

        # print(rloss)
        for i in trange(args.train_batch_num):
            _, summary, __y_pred, __y_true, __loss, __region_loss, __labels, __global_step, __lr = sess.run(
                [
                    train_op, merged, y_pred, y_true, loss, poseloss, slabels,
                    global_step, learning_rate
                ],
                feed_dict={is_training: True})

            writer.add_summary(summary, global_step=__global_step)

            rloss_total.update(__region_loss[0])
Ejemplo n.º 5
0
            os.path.join(args.prune_checkpoint_dir,
                         'kmeans_prune_restore_model_all.ckpt'))
    print("[INFO] model params restored")

    # print("[INFO] not finetine model params saved ")

    merged_fintune = tf.summary.merge_all()
    writer_fintune = tf.summary.FileWriter(args.log_dir, sess.graph)

    print('\n----------- start to finetune -----------\n')

    best_mAP_finetune = -np.Inf
    for epoch in range(prune_model._nb_finetune_epochs):

        sess.run(train_init_op)
        loss_total_finetune, loss_xy_finetune, loss_wh_finetune, loss_conf_finetune, loss_class_finetune = AverageMeter(
        ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()

        for i in trange(args.train_batch_num):
            _, summary_finetune, __y_pred_finetune, __y_true_finetune, __loss_finetune, __global_step_finetune, __lr_finetune = sess.run(
                [
                    train_op_finetune, merged_fintune, y_pred_fintune, y_true,
                    loss_fintune, global_step_finetune, learning_rate_finetune
                ],
                feed_dict={is_training: True})
            writer_fintune.add_summary(summary_finetune,
                                       global_step=__global_step_finetune)

            loss_total_finetune.update(__loss_finetune[0],
                                       len(__y_true_finetune[0]))
            loss_xy_finetune.update(__loss_finetune[1],
                                    len(__y_true_finetune[0]))
    def _run_one_epoch(self):
        am_batch_time = AverageMeter()
        am_data_time = AverageMeter()
        key_meter_names = ['Loss', 'l_ph', 'l_sm', 'flow_mean']
        key_meters = AverageMeter(i=len(key_meter_names), precision=4)

        # Zero out shared
        self.shared[:] = 0

        # Model Train
        self.model.train()

        # Iterate Train Loader
        early_stop = False
        for items in self.train_loader.enumerate():
            if early_stop: break

            # ** Signal **
            if self.cfg.mp.enabled:
                signal = torch.tensor([1]).to(self.device)
                dist.all_reduce(signal)
                if signal.item() < self.cfg.mp.workers:
                    #self._log.info(self.id, "EXIT: " + str(signal.item()))
                    self.train_loader.stop()
                    early_stop = True
                    continue

            # Get data
            local_info, batch_length, batch_idx, frame_count, frame_length, iepoch = items
            if local_info['is_valid'].sum() == 0:
                loader_str = 'Corrupted Data! batch %d / %d, iter: %d, frame_count: %d / %d; Epoch: %d / %d' \
                % (batch_idx + 1, batch_length, self.i_iter, frame_count + 1, frame_length, self.i_epoch + 1, self.cfg.train.epoch_num)
                self._log.info(self.id, loader_str)
                continue

            # Reset Stage
            if frame_count == 0:
                self._log.info(self.id, "Reset Previous Output")
                self.prev_output = {"left": None, "right": None}

            # Create inputs
            local_info["d_candi"] = self.d_candi
            local_info["d_candi_up"] = self.d_candi_up
            if "stereo" in self.cfg["var"]:
                model_input_left, gt_input_left = batch_scheduler.generate_stereo_input(self.id, local_info, self.cfg, "left")
                model_input_right, gt_input_right = batch_scheduler.generate_stereo_input(self.id, local_info, self.cfg, "right")
            else:
                model_input_left, gt_input_left = batch_scheduler.generate_model_input(self.id, local_info, self.cfg, "left")
                model_input_right, gt_input_right = batch_scheduler.generate_model_input(self.id, local_info, self.cfg, "right")
            model_input_left["prev_output"] = self.prev_output["left"]
            model_input_right["prev_output"] = self.prev_output["right"]
            model_input_left["epoch"] = self.i_epoch; model_input_right["epoch"] = self.i_epoch

            # Model
            output_left, output_right = self.model([model_input_left, model_input_right])

            # Set Prev from last one
            output_left_intp = F.interpolate(output_left["output_refined"][-1].detach(), scale_factor=0.25, mode='nearest')
            output_right_intp = F.interpolate(output_right["output_refined"][-1].detach(), scale_factor=0.25, mode='nearest')
            self.prev_output = {"left": output_left_intp, "right": output_right_intp}

            # Loss Function
            loss = self.loss_func([output_left, output_right], [gt_input_left, gt_input_right])

            # Opt
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            # String
            loader_str = 'Train batch %d / %d, iter: %d, frame_count: %d / %d; Epoch: %d / %d, loss = %.5f' \
            % (batch_idx + 1, batch_length, self.i_iter, frame_count + 1, frame_length, self.i_epoch + 1, self.cfg.train.epoch_num, loss)
            self._log.info(self.id, loader_str)
            self.i_iter += 1

        # ** Signal **
        if self.cfg.mp.enabled:
            #self._log.info(self.id, "AR: " + str(signal.item()))
            if signal.item() >= self.cfg.mp.workers:
                dist.all_reduce(torch.tensor([0]).to(self.device))
            #dist.barrier()

        self.i_epoch += 1
    def _validate_with_gt(self):
        batch_time = AverageMeter()
        if self.cfg.mp.enabled:
            dist.barrier()

        # Zero out shared
        self.shared[:] = 0

        # Eval Mode
        self.model.eval()

        # Variables
        errors = []
        errors_refined = []
        errors_uncfield_rmse = []

        # Iterate Train Loader
        for items in self.val_loader.enumerate():

            # Get data
            local_info, batch_length, batch_idx, frame_count, frame_length, iepoch = items
            if local_info['is_valid'].sum() == 0:
                loader_str = 'Corrupted Data! Val batch %d / %d, frame_count: %d / %d' \
                % (batch_idx + 1, batch_length, frame_count + 1, frame_length)
                self._log.info(self.id, loader_str)
                continue

            # Reset Stage
            if frame_count == 0:
                self._log.info(self.id, "Reset Previous Output")
                self.prev_output = {"left": None, "right": None}

            # Create inputs
            local_info["d_candi"] = self.d_candi
            local_info["d_candi_up"] = self.d_candi_up
            if "stereo" in self.cfg["var"]:
                model_input_left, gt_input_left = batch_scheduler.generate_stereo_input(self.id, local_info, self.cfg, "left")
            else:
                model_input_left, gt_input_left = batch_scheduler.generate_model_input(self.id, local_info, self.cfg, "left")
            model_input_left["prev_output"] = self.prev_output["left"]
            model_input_left["epoch"] = self.i_epoch # Not sure if this will work during runtime/eval

            # Model
            start = time.time()
            output_left = self.model([model_input_left])[0]
            #output_right = self.model(model_input_right)
            print("Forward: " + str(time.time() - start))

            # Set Prev
            output_left_intp = F.interpolate(output_left["output_refined"][-1].detach(), scale_factor=0.25, mode='nearest')
            self.prev_output = {"left": output_left_intp, "right": None}

            # Visualization
            if self.cfg.var.viz:
                self.visualize(model_input_left, gt_input_left, output_left)

            # Eval
            for b in range(0, output_left["output"][-1].shape[0]):
                dpv_predicted = output_left["output"][-1][b, :, :, :].unsqueeze(0)
                dpv_refined_predicted = output_left["output_refined"][-1][b, :, :, :].unsqueeze(0)
                depth_predicted = img_utils.dpv_to_depthmap(dpv_predicted, self.d_candi, BV_log=True)
                depth_refined_predicted = img_utils.dpv_to_depthmap(dpv_refined_predicted, self.d_candi, BV_log=True)
                mask = gt_input_left["masks"][b, :, :, :]
                mask_refined = gt_input_left["masks_imgsizes"][b, :, :, :]
                depth_truth = gt_input_left["dmaps"][b, :, :].unsqueeze(0)
                depth_refined_truth = gt_input_left["dmap_imgsizes"][b, :, :].unsqueeze(0)
                dpv_refined_truth = gt_input_left["soft_labels_imgsize"][b].unsqueeze(0)
                d_candi = model_input_left["d_candi"]
                intr_refined = model_input_left["intrinsics_up"][b, :, :]

                # Unc Field
                unc_field_truth, unc_field_predicted, debugmap = img_utils.compute_unc_field(dpv_refined_predicted, dpv_refined_truth, d_candi, intr_refined, mask_refined, self.cfg)
                unc_field_rmse = img_utils.compute_unc_rmse(unc_field_truth, unc_field_predicted, d_candi)
                
                # Eval
                depth_truth_eval = depth_truth.clone()
                depth_truth_eval[depth_truth_eval >= self.d_candi[-1]] = self.d_candi[-1]
                depth_refined_truth_eval = depth_refined_truth.clone()
                depth_refined_truth_eval[depth_refined_truth_eval >= self.d_candi[-1]] = self.d_candi[-1]
                depth_predicted_eval = depth_predicted.clone()
                depth_predicted_eval = depth_predicted_eval * mask
                depth_refined_predicted_eval = depth_refined_predicted.clone()
                depth_refined_predicted_eval = depth_refined_predicted_eval * mask_refined
                errors.append(img_utils.depth_error(depth_predicted_eval.squeeze(0).cpu().numpy(), depth_truth_eval.squeeze(0).cpu().numpy()))
                errors_refined.append(img_utils.depth_error(depth_refined_predicted_eval.squeeze(0).cpu().numpy(), depth_refined_truth_eval.squeeze(0).cpu().numpy()))
                errors_uncfield_rmse.append(unc_field_rmse.item())

            # String
            loader_str = 'Val batch %d / %d, frame_count: %d / %d' \
            % (batch_idx + 1, batch_length, frame_count + 1, frame_length)
            self._log.info(self.id, loader_str)
            self.i_iter += 1

        # Evaluate Errors
        results = img_utils.eval_errors(errors)
        results_refined = img_utils.eval_errors(errors_refined)
        sil = results["scale invariant log"][0]
        sil_refined = results_refined["scale invariant log"][0]
        rmse = results["rmse"][0]
        rmse_refined = results_refined["rmse"][0]
        rmse_unc = np.mean(np.array(errors_uncfield_rmse))
        error_keys = ["rmse", "rmse_refined", "sil", "sil_refined", "rmse_unc"]
        error_list = [rmse, rmse_refined, sil, sil_refined, rmse_unc]

        # Copy to Shared
        for i, e in enumerate(error_list):
            self.shared[self.id, i] = e

        # Mean Error Compute Only First Process
        if self.cfg.mp.enabled:
            dist.barrier()
        error_list = torch.mean(self.shared, dim=0)
        if self.cfg.eval:
            print(error_list)

        # Save Model (Only First ID)
        self.save_model(rmse_refined, self.cfg.data.exp_name)

        # Log
        if self.id == 0:
            json_loc = str(self.save_root) + "/" + self.cfg.data.exp_name + '.json'

            # First Run
            if self.first_run:
                # Remove Results JSON if first epoch
                if self.i_epoch == 1:
                    if os.path.exists(json_loc):
                        os.remove(json_loc)
                # If not first epoch, then load past results
                else:
                    if os.path.isfile(json_loc):
                        with open(json_loc) as f:
                            self.foutput = json.load(f)

            # Save
            for value, name in zip(error_list, error_keys):
                self.foutput[name].append(value.item())
            with open(json_loc, 'w') as f:
                json.dump(self.foutput, f)

            # Tensorboard
            if self.summary_writer is not None:
                for value, name in zip(error_list, error_keys):
                    self.summary_writer.add_scalar(name, value, self.i_epoch)
                self.summary_writer.flush()

        # Set fr
        self.first_run = False

        return error_list, error_keys
Ejemplo n.º 8
0
    def __evaluate_in_val(self, __global_step, __lr):
        """
        验证集评估评估方法
        :param __global_step:
        :param __lr:
        :return:
        """
        print('\033[32m -----Begin evaluating in val data-----------\033[0m')
        self.sess.run(self.val_init_op)
        val_loss_5 = Loss5()
        val_preds = []
        for _ in trange(train_args.val_img_cnt):  # 在整个验证集上验证
            __image_ids, __y_pred, __loss = self.sess.run(
                [self.image_ids, self.y_pred, self.loss],
                feed_dict={self.is_training: False})
            pred_content = get_preds_gpu(self.sess, self.gpu_nms_op,
                                         self.pred_boxes_flag,
                                         self.pred_scores_flag, __image_ids,
                                         __y_pred)

            val_preds.extend(pred_content)
            # 更新训练集误差
            val_loss_5.update(__loss)

        print("\nfinally--loss-->", __loss)

        # 计算验证集mAP
        rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(
        ), AverageMeter()
        gt_dict = parse_gt_rec(train_args.val_file, train_args.img_size,
                               train_args.letterbox_resize)

        print('\n\033[32m -----Begin calculate mAP-------\033[0m')
        info = 'epoch:{}, global_step:{}, lr:{:.6g} \n'.format(
            self.epoch, __global_step, __lr)
        for j in range(train_args.class_num):
            npos, nd, rec, prec, ap = voc_eval(
                gt_dict,
                val_preds,
                j,
                iou_thres=train_args.eval_threshold,
                use_07_metric=train_args.use_voc_07_metric)
            info += 'eval in each class:\nclass{}: recall:{:.4f}, precision:{:.4f}, AP:{:.4f}\n'.format(
                j, rec, prec, ap)
            rec_total.update(rec, npos)
            prec_total.update(prec, nd)
            ap_total.update(ap, 1)

        mAP = ap_total.average
        info += 'eval: recall:{:.4f}, precision:{:.4f}, mAP:{:.4f}, ' \
            .format(rec_total.average, prec_total.average, mAP)
        info += 'loss: total:{:.2f}, xy:{:.2f}, wh:{:.2f}, conf:{:.2f}, class:{:.2f}\n'\
            .format(
                val_loss_5.loss_total.average,
                val_loss_5.loss_xy.average,
                val_loss_5.loss_wh.average,
                val_loss_5.loss_conf.average,
                val_loss_5.loss_class.average
            )
        print(info)
        print('\033[32m -----Finish calculate mAP-------\033[0m')

        if mAP > self.best_mAP:
            self.best_mAP = mAP
            self.saver_best.save(
                self.sess,
                train_args.save_dir +
                'best_model_Epoch_{}_step_{}_mAP_{:.4f}_loss_{:.4f}_lr_{:.7g}'.
                format(self.epoch, int(__global_step), self.best_mAP,
                       val_loss_5.loss_total.average, __lr)  # todo
            )
        self.writer.add_summary(make_summary('evaluation/val_mAP', mAP),
                                global_step=self.epoch)
        self.writer.add_summary(make_summary('evaluation/val_recall',
                                             rec_total.average),
                                global_step=self.epoch)
        self.writer.add_summary(make_summary('evaluation/val_precision',
                                             prec_total.average),
                                global_step=self.epoch)
        self.writer.add_summary(make_summary(
            'validation_statistics/total_loss', val_loss_5.loss_total.average),
                                global_step=self.epoch)
        self.writer.add_summary(make_summary('validation_statistics/loss_xy',
                                             val_loss_5.loss_xy.average),
                                global_step=self.epoch)
        self.writer.add_summary(make_summary('validation_statistics/loss_wh',
                                             val_loss_5.loss_wh.average),
                                global_step=self.epoch)
        self.writer.add_summary(make_summary('validation_statistics/loss_conf',
                                             val_loss_5.loss_conf.average),
                                global_step=self.epoch)
        self.writer.add_summary(make_summary(
            'validation_statistics/loss_class', val_loss_5.loss_class.average),
                                global_step=self.epoch)
        print('\033[32m -----Finish evaluating in val data-----------\033[0m')
Ejemplo n.º 9
0
with tf.Session() as sess:
    sess.run(
        [tf.global_variables_initializer(),
         tf.local_variables_initializer()])
    saver_to_restore.restore(sess, args.restore_path)
    merged = tf.summary.merge_all()
    writer = tf.summary.FileWriter(args.log_dir, sess.graph)

    print('\n----------- start to train -----------\n')

    best_mAP = -np.Inf

    for epoch in range(args.total_epoches):

        sess.run(train_init_op)
        loss_total, loss_xy, loss_wh, loss_conf, loss_class = AverageMeter(
        ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()

        for i in trange(args.train_batch_num):
            _, summary, __y_pred, __y_true, __loss, __global_step, __lr = sess.run(
                [
                    train_op, merged, y_pred, y_true, loss, global_step,
                    learning_rate
                ],
                feed_dict={is_training: True})

            writer.add_summary(summary, global_step=__global_step)

            loss_total.update(__loss[0], len(__y_pred[0]))
            loss_xy.update(__loss[1], len(__y_pred[0]))
            loss_wh.update(__loss[2], len(__y_pred[0]))
            loss_conf.update(__loss[3], len(__y_pred[0]))
Ejemplo n.º 10
0
    def _run_one_epoch(self):
        am_batch_time = AverageMeter()
        am_data_time = AverageMeter()

        key_meter_names = [
            'Loss', 'Loss_depth', 'Loss_flow', 'lf_1', 'lf_2', 'lf_3', 'lf_4',
            'lf_5', 'in_r'
        ]
        key_meters = AverageMeter(i=len(key_meter_names), precision=4)

        self.model[0].train()
        if self.cfg.train_depth:
            self.model[1].train()
        else:
            self.model[1].eval()
        end = time.time()

        for i_step, data in enumerate(self.train_loader):
            if i_step > self.cfg.epoch_size:
                break
            # read data to device
            img1, img2 = data['img1'], data['img2']
            img1r, img2r = data['img1r'], data['img2r']

            img_pair = torch.cat([img1, img2], 1).to(self.device)
            # random pick a stereo image pair from img1 and img2
            if np.random.rand(1) > 0.5:
                img_l = img1.to(self.device)  # left
                img_r = img1r.to(self.device)  # right
            else:
                img_l = img2.to(self.device)  # left
                img_r = img2r.to(self.device)  # right

            fl_bl = data['fl_bl'].to(self.device).type_as(img_pair)
            pyramid_K = list(
                map(lambda p: p.to(self.device).type_as(img_pair),
                    data['pyramid_K']))
            pyramid_K_inv = list(
                map(lambda p: p.to(self.device).type_as(img_pair),
                    data['pyramid_K_inv']))
            raw_W = data['im_shape'][1].to(self.device).type_as(img_pair)

            # measure data loading time
            am_data_time.update(time.time() - end)

            # compute output
            flows = self.model[0](img_pair,
                                  with_bk=True)  # n * [B, 4, h / 4, w / 4]

            t_in = torch.cat([img_l, img_r], 1)
            disparities = self.model[1](t_in)[:4]  # n * [B, 2, h , w]
            disps = [d[:, 0] for d in disparities]

            # compute loss
            if self.cfg.train_depth:
                l_depth = self.loss_func[0](disparities, [img_l, img_r])
            else:
                l_depth = torch.tensor(0).type_as(img_l)

            flow_res = self.loss_func[1](disps, fl_bl, pyramid_K,
                                         pyramid_K_inv, raw_W, flows[:4],
                                         img_pair)

            loss = l_depth + flow_res[0]

            # update meters
            key_meters.update([
                loss.item(),
                l_depth.item(), flow_res[0].item(), flow_res[1].item(),
                flow_res[2].item(), flow_res[3].item(), flow_res[4].item(),
                flow_res[5].item(), flow_res[6].item()
            ], img_pair.size(0))

            # compute gradient and do optimization step
            self.optimizer.zero_grad()
            # loss.backward()

            scaled_loss = 1024. * loss
            scaled_loss.backward()

            for param in [
                    p for p in self.model[0].parameters() if p.requires_grad
            ]:
                param.grad.data.mul_(1. / 1024)

            for param in [
                    p for p in self.model[1].parameters() if p.requires_grad
            ]:
                param.grad.data.mul_(1. / 1024)

            self.optimizer.step()

            # measure elapsed time
            am_batch_time.update(time.time() - end)
            end = time.time()

            if self.i_iter % self.cfg.record_freq == 0:
                for v, name in zip(key_meters.val, key_meter_names):
                    self.summary_writer.add_scalar('Train_' + name, v,
                                                   self.i_iter)

            if self.i_iter % self.cfg.print_freq == 0:
                istr = '{}:{:04d}/{:04d}'.format(
                    self.i_epoch, i_step, self.cfg.epoch_size) + \
                       ' Time {} Data {}'.format(am_batch_time, am_data_time) + \
                       ' Loss {}'.format(key_meters)
                self._log.info(istr)

            self.i_iter += 1
        self.i_epoch += 1
Ejemplo n.º 11
0
    def _run_one_epoch(self):
        am_batch_time = AverageMeter()
        am_data_time = AverageMeter()

        key_meter_names = ['Loss']
        key_meters = AverageMeter(i=len(key_meter_names), precision=4)

        self.model.train()
        end = time.time()

        for i_step, data in enumerate(self.train_loader):
            if i_step > self.cfg.epoch_size:
                break
            # read data to device
            img1, img2 = data['img1'], data['img2']
            img1r, img2r = data['img1r'], data['img2r']

            if np.random.rand(1) > 0.5:
                img_l = img1.to(self.device)  # left
                img_r = img1r.to(self.device)  # right
            else:
                img_l = img2.to(self.device)  # left
                img_r = img2r.to(self.device)  # right

            t_in = torch.cat([img_l, img_r], 1)
            # measure data loading time
            am_data_time.update(time.time() - end)

            # compute output
            disparities = self.model(t_in)[:4]
            loss = self.loss_func(disparities, [img_l, img_r])

            # update meters
            key_meters.update([loss.item()], img_l.size(0))

            # compute gradient and do optimization step
            self.optimizer.zero_grad()
            # loss.backward()

            scaled_loss = 1024. * loss
            scaled_loss.backward()

            for param in [
                    p for p in self.model.parameters() if p.requires_grad
            ]:
                param.grad.data.mul_(1. / 1024)

            self.optimizer.step()

            # measure elapsed time
            am_batch_time.update(time.time() - end)
            end = time.time()

            if self.i_iter % self.cfg.record_freq == 0:
                for v, name in zip(key_meters.val, key_meter_names):
                    self.summary_writer.add_scalar('Train_' + name, v,
                                                   self.i_iter)

            if self.i_iter % self.cfg.print_freq == 0:
                istr = '{}:{:04d}/{:04d}'.format(
                    self.i_epoch, i_step, self.cfg.epoch_size) + \
                       ' Time {} Data {}'.format(am_batch_time, am_data_time) + \
                       ' Loss/EPE {}'.format(key_meters)
                self._log.info(istr)

            self.i_iter += 1
        self.i_epoch += 1
Ejemplo n.º 12
0
    def _run_one_epoch(self):
        am_batch_time = AverageMeter()
        am_data_time = AverageMeter()
        key_meter_names = ['G_RGB_L1', 'G_Flow_L1', 'GP', 'G_GAN', 'D_real', 'D_fake']
        key_meters = AverageMeter(i=len(key_meter_names), precision=4, names=key_meter_names)
        self.mode(train=True)
        
        t1 = time.time()  # time -> base
        for i_step, data in enumerate(self.train_loader):
            self.set_input(data)
            t2 = time.time()  # time -> load data
            self.optimize_parameters()
            t3 = time.time()  # time -> forward and backward

            am_data_time.update(t2 - t1)  # update meters
            am_batch_time.update(t3 - t2)
            key_meters.update([self.loss_G_RGB_L1.item(), self.loss_G_Flow_L1.item(), self.loss_gp.item(),
                               self.loss_G_GAN.item(), self.loss_D_real.item(), self.loss_D_fake.item()
                               ], n=1)
            # key_meters.update([self.loss_G_L1.item(), self.loss_G_L1.item(), self.loss_G_L1.item(), self.loss_G_L1.item()], n=1)

            if self.i_iter % self.cfg_train.print_frep == 0:  # update meters
                for name, v in zip(key_meter_names, key_meters.val):
                    self.summary_writer.add_scalar('Train_' + name, v, self.i_iter)
            t4 = time.time()
            if self.i_iter % self.cfg_train.record_freq == 0:
                str = '{:>3d}({}):{:>3d}/{:<3d}\t'.format(self.i_epoch + 1, self.base_epoch, i_step, len(self.train_loader)) + \
                       'Time {} Data {}\t'.format(am_batch_time, am_data_time) + \
                       'Losses {}'.format(key_meters)
                self.logger.info(str)
            t1 = time.time()
            self.i_iter += 1
        self.i_epoch += 1
Ejemplo n.º 13
0
    def _validate_with_gt2(self):
        import cv2
        import torch.nn.functional as F
        from utils.warp_utils import flow_warp
        from utils.misc_utils import plot_imgs

        batch_time = AverageMeter()

        error_names = ['EPE', 'E_noc', 'E_occ', 'F1_all']
        error_meters = AverageMeter(i=len(error_names))

        self.model.eval()
        self.model = self.model.float()
        end = time.time()
        for i_step, data in enumerate(self.valid_loader):
            img1, img2 = data['img1'], data['img2']
            img_pair = torch.cat([img1, img2], 1).to(self.device)

            # compute output
            flow = self.model(img_pair, with_bk=True)[0]
            _, _, h, w = flow.size()

            im1_origin = img_pair[:, :3]
            _, occu_mask1 = flow_warp(im1_origin, flow[:, :2], flow[:, 2:])

            res = list(map(load_flow, data['flow_occ']))
            gt_flows, occ_masks = [r[0] for r in res], [r[1] for r in res]
            res = list(map(load_flow, data['flow_noc']))
            _, noc_masks = [r[0] for r in res], [r[1] for r in res]

            gt_flows = [np.concatenate([flow, occ_mask, noc_mask], axis=2) for
                        flow, occ_mask, noc_mask in zip(gt_flows, occ_masks, noc_masks)]
            pred_flows = flow[:, :2].detach().cpu().numpy().transpose([0, 2, 3, 1])
            es = evaluate_kitti_flow(gt_flows, pred_flows)
            error_meters.update([l.item() for l in es], img_pair.size(0))

            plot_list = []
            occu_mask1 = (occu_mask1 < 0.2).detach().cpu().numpy()[0, 0] * 255
            plot_list.append({'im': occu_mask1, 'title': 'occu mask 1'})

            gt_occu_mask1 = (noc_masks[0] - occ_masks[0])[:, :, 0].astype(
                np.float32) * 255
            plot_list.append({'im': gt_occu_mask1, 'title': 'gt occu mask 1'})
            plot_imgs(plot_list,
                      save_path='./tmp/occu_soft_hard/occu_hard_{:03d}.jpg'.format(
                          i_step))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i_step % self.cfg.print_freq == 0:
                self._log.info('Test: [{0}/{1}]\t Time {2}\t '.format(
                    i_step, self.cfg.valid_size, batch_time) + ' '.join(
                    map('{:.2f}'.format, error_meters.avg)))

            if i_step > self.cfg.valid_size:
                break

        # write error to tf board.
        for value, name in zip(error_meters.avg, error_names):
            self.summary_writer.add_scalar('Valid_' + name, value, self.i_epoch)

        # In order to reduce the space occupied during debugging,
        # only the model with more than cfg.save_iter iterations will be saved.
        if self.i_iter > self.cfg.save_iter:
            self.save_model(error_meters.avg[0], 'KITTI_flow')

        return error_meters.avg, error_names
Ejemplo n.º 14
0
##################
yolo_model = yolov3(args.class_num, args.anchors)
with tf.variable_scope('yolov3'):
    pred_feature_maps = yolo_model.forward(image, is_training=is_training)
loss = yolo_model.compute_loss(pred_feature_maps, y_true)
y_pred = yolo_model.predict(pred_feature_maps)

saver_to_restore = tf.train.Saver()

with tf.Session() as sess:
    sess.run([tf.global_variables_initializer()])
    saver_to_restore.restore(sess, args.restore_path)

    print('\n----------- start to eval -----------\n')

    val_loss_total, val_loss_xy, val_loss_wh, val_loss_conf, val_loss_class = \
        AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
    val_preds = []

    for j in trange(args.img_cnt):
        __image_ids, __y_pred, __loss = sess.run(
            [image_ids, y_pred, loss], feed_dict={is_training: False})
        pred_content = get_preds_gpu(sess, gpu_nms_op, pred_boxes_flag,
                                     pred_scores_flag, __image_ids, __y_pred)

        val_preds.extend(pred_content)
        val_loss_total.update(__loss[0])
        val_loss_xy.update(__loss[1])
        val_loss_wh.update(__loss[2])
        val_loss_conf.update(__loss[3])
        val_loss_class.update(__loss[4])
Ejemplo n.º 15
0
    def _validate_with_gt(self):
        batch_time = AverageMeter()

        error_names = [
            'rmse', 'rmse_log', 'abs_rel', 'sq_rel', 'a1', 'a2', 'a3', 'EPE',
            'E_noc', 'E_occ', 'F1_all'
        ]
        error_meters = AverageMeter(i=len(error_names))

        [m.eval() for m in self.model]
        end = time.time()
        for i_step, data in enumerate(self.valid_loader):
            img1, img2, img_r = data['img1'], data['img2'], data['img1r']
            img_pair = torch.cat([img1, img2], 1).to(self.device)

            fl_bl = data['fl_bl'].to(self.device).type_as(img_pair)
            pyramid_K = list(
                map(lambda p: p.to(self.device).type_as(img_pair),
                    data['pyramid_K']))
            pyramid_K_inv = list(
                map(lambda p: p.to(self.device).type_as(img_pair),
                    data['pyramid_K_inv']))
            raw_W = data['im_shape'][1].to(self.device).type_as(img_pair)

            # compute output
            flows = self.model[0](img_pair, with_bk=True)
            disparities = self.model[1](torch.cat([img1, img_r],
                                                  1).to(self.device))

            disps = [d[:, 0] for d in disparities[:4]]
            self.loss_func[1](disps, fl_bl, pyramid_K, pyramid_K_inv, raw_W,
                              flows[:4], img_pair)

            disp_lr = disparities[0].detach().cpu().numpy()
            disp = disp_lr[:, 0, :, :]  # only the largest left disp is used

            gt_disp_occ = list(map(load_disp, data['disp_occ']))
            fl_bl_np = [f.detach().cpu().numpy() for f in fl_bl]
            gt_depth_occ = list(
                map(
                    lambda p, q: convert_disp_to_depth(
                        p, normed=False, fl_bl=q), gt_disp_occ, fl_bl_np))
            im_size = list(map(lambda p: p.shape[:2], gt_disp_occ))
            pred_depth = list(
                map(lambda p, q, r: convert_disp_to_depth(p, None, q, fl_bl=r),
                    disp, im_size, fl_bl_np))

            err_depth = compute_depth_errors(gt_depth_occ, pred_depth)

            flow = flows[0][:, :2].detach().cpu().numpy().transpose(
                [0, 2, 3, 1])
            res = list(map(load_flow, data['flow_occ']))
            gt_flows, occ_masks = [r[0] for r in res], [r[1] for r in res]
            res = list(map(load_flow, data['flow_noc']))
            _, noc_masks = [r[0] for r in res], [r[1] for r in res]

            gt_flows = [
                np.concatenate([f, o, no], axis=2)
                for f, o, no in zip(gt_flows, occ_masks, noc_masks)
            ]
            err_flow = evaluate_kitti_flow(gt_flows, flow)

            error_meters.update(err_depth + err_flow, img_pair.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i_step % self.cfg.print_freq == 0:
                self._log.info(
                    'Test: [{0}/{1}]\t Time {2}\t '.format(
                        i_step, self.cfg.valid_size, batch_time) +
                    ' '.join(map('{:.2f}'.format, error_meters.avg)))

            if i_step > self.cfg.valid_size:
                break

        for value, name in zip(error_meters.avg, error_names):
            self.summary_writer.add_scalar('Valid_' + name, value,
                                           self.i_epoch)

        self.save_model([error_meters.avg[0], error_meters.avg[7]],
                        ['KITTI_rigid_depth', 'KITTI_rigid_flow'])

        return error_meters.avg, error_names
Ejemplo n.º 16
0
    def _run_one_epoch(self):
        am_batch_time = AverageMeter()
        am_data_time = AverageMeter()

        key_meter_names = [
            'Loss', 'l_ph', 'l_sm', 'flow_mean', 'l_atst', 'l_ot'
        ]
        key_meters = AverageMeter(i=len(key_meter_names), precision=4)

        self.model.train()
        end = time.time()

        if 'stage1' in self.cfg:
            if self.i_epoch == self.cfg.stage1.epoch:
                self.loss_func.cfg.update(self.cfg.stage1.loss)

        for i_step, data in enumerate(self.train_loader):
            if i_step > self.cfg.epoch_size:
                break
            # read data to device
            img1, img2 = data['img1'].to(self.device), data['img2'].to(
                self.device)
            img_pair = torch.cat([img1, img2], 1)

            # measure data loading time
            am_data_time.update(time.time() - end)

            # run 1st pass
            res_dict = self.model(img_pair, with_bk=True)
            flows_12, flows_21 = res_dict['flows_fw'], res_dict['flows_bw']
            flows = [
                torch.cat([flo12, flo21], 1)
                for flo12, flo21 in zip(flows_12, flows_21)
            ]
            loss, l_ph, l_sm, flow_mean = self.loss_func(flows, img_pair)

            flow_ori = res_dict['flows_fw'][0].detach()

            if self.cfg.run_atst:
                img1, img2 = data['img1_ph'].to(
                    self.device), data['img2_ph'].to(self.device)

                # construct augment sample
                noc_ori = self.loss_func.pyramid_occu_mask1[
                    0]  # non-occluded region
                s = {
                    'imgs': [img1, img2],
                    'flows_f': [flow_ori],
                    'masks_f': [noc_ori]
                }
                st_res = self.sp_transform(
                    deepcopy(s)) if self.cfg.run_st else deepcopy(s)
                flow_t, noc_t = st_res['flows_f'][0], st_res['masks_f'][0]

                # run 2nd pass
                img_pair = torch.cat(st_res['imgs'], 1)
                flow_t_pred = self.model(img_pair,
                                         with_bk=False)['flows_fw'][0]

                if not self.cfg.mask_st:
                    noc_t = torch.ones_like(noc_t)
                l_atst = ((flow_t_pred - flow_t).abs() +
                          self.cfg.ar_eps)**self.cfg.ar_q
                l_atst = (l_atst * noc_t).mean() / (noc_t.mean() + 1e-7)

                loss += self.cfg.w_ar * l_atst
            else:
                l_atst = torch.zeros_like(loss)

            if self.cfg.run_ot:
                img1, img2 = data['img1_ph'].to(
                    self.device), data['img2_ph'].to(self.device)
                # run 3rd pass
                img_pair = torch.cat([img1, img2], 1)

                # random crop images
                img_pair, flow_t, occ_t = random_crop(img_pair, flow_ori,
                                                      1 - noc_ori,
                                                      self.cfg.ot_size)

                # slic 200, random select 8~16
                if self.cfg.ot_slic:
                    img2 = img_pair[:, 3:]
                    seg_mask = run_slic_pt(img2,
                                           n_seg=200,
                                           compact=self.cfg.ot_compact,
                                           rd_select=[8, 16],
                                           fast=self.cfg.ot_fast).type_as(
                                               img2)  # Nx1xHxW
                    noise = torch.rand(img2.size()).type_as(img2)
                    img2 = img2 * (1 - seg_mask) + noise * seg_mask
                    img_pair[:, 3:] = img2

                flow_t_pred = self.model(img_pair,
                                         with_bk=False)['flows_fw'][0]
                noc_t = 1 - occ_t
                l_ot = ((flow_t_pred - flow_t).abs() +
                        self.cfg.ar_eps)**self.cfg.ar_q
                l_ot = (l_ot * noc_t).mean() / (noc_t.mean() + 1e-7)

                loss += self.cfg.w_ar * l_ot
            else:
                l_ot = torch.zeros_like(loss)

            # update meters
            key_meters.update([
                loss.item(),
                l_ph.item(),
                l_sm.item(),
                flow_mean.item(),
                l_atst.item(),
                l_ot.item()
            ], img_pair.size(0))

            # compute gradient and do optimization step
            self.optimizer.zero_grad()
            # loss.backward()

            scaled_loss = 1024. * loss
            scaled_loss.backward()

            for param in [
                    p for p in self.model.parameters() if p.requires_grad
            ]:
                param.grad.data.mul_(1. / 1024)

            self.optimizer.step()

            # measure elapsed time
            am_batch_time.update(time.time() - end)
            end = time.time()

            if self.i_iter % self.cfg.record_freq == 0:
                for v, name in zip(key_meters.val, key_meter_names):
                    self.summary_writer.add_scalar('Train_' + name, v,
                                                   self.i_iter)

            if self.i_iter % self.cfg.print_freq == 0:
                istr = '{}:{:04d}/{:04d}'.format(
                    self.i_epoch, i_step, self.cfg.epoch_size) + \
                       ' Time {} Data {}'.format(am_batch_time, am_data_time) + \
                       ' Info {}'.format(key_meters)
                self._log.info(istr)

            self.i_iter += 1
        self.i_epoch += 1
Ejemplo n.º 17
0
def train():
    # dataset方法
    train_init_op, val_init_op, image_ids, image, y_true = create_iterator()

    # 是否训练placeholders
    is_training = tf.placeholder(tf.bool, name="phase_train")
    pred_boxes_flag = tf.placeholder(tf.float32, [1, None, None])
    pred_scores_flag = tf.placeholder(tf.float32, [1, None, None])

    # gpu nms 操作
    gpu_nms_op = gpu_nms(pred_boxes_flag, pred_scores_flag,
                         train_args.class_num, train_args.nms_topk,
                         train_args.score_threshold, train_args.nms_threshold)

    # 模型加载
    yolo_model = yolov3(train_args.class_num,
                        train_args.anchors,
                        train_args.use_label_smooth,
                        train_args.use_focal_loss,
                        train_args.batch_norm_decay,
                        train_args.weight_decay,
                        use_static_shape=False)
    with tf.variable_scope('yolov3'):
        pred_feature_maps = yolo_model.forward(image, is_training=is_training)
    # 预测值
    y_pred = yolo_model.predict(pred_feature_maps)
    # loss
    loss = yolo_model.compute_loss(pred_feature_maps, y_true)
    l2_loss = tf.losses.get_regularization_loss()

    tf.summary.scalar('train_batch_statistics/total_loss', loss[0])
    tf.summary.scalar('train_batch_statistics/loss_xy', loss[1])
    tf.summary.scalar('train_batch_statistics/loss_wh', loss[2])
    tf.summary.scalar('train_batch_statistics/loss_conf', loss[3])
    tf.summary.scalar('train_batch_statistics/loss_class', loss[4])
    tf.summary.scalar('train_batch_statistics/loss_l2', l2_loss)
    tf.summary.scalar('train_batch_statistics/loss_ratio', l2_loss / loss[0])

    # 加载除去yolov3/yolov3_head下Conv_6、Conv_14、Conv_22
    saver_to_restore = tf.train.Saver(
        var_list=tf.contrib.framework.get_variables_to_restore(
            include=train_args.restore_include,
            exclude=train_args.restore_exclude))
    # 需要更新的变量
    update_vars = tf.contrib.framework.get_variables_to_restore(
        include=train_args.update_part)
    global_step = tf.Variable(float(train_args.global_step),
                              trainable=False,
                              collections=[tf.GraphKeys.LOCAL_VARIABLES])

    # 学习率
    learning_rate = get_learning_rate(global_step)
    tf.summary.scalar('learning_rate', learning_rate)

    # 是否要保存优化器的参数
    if not train_args.save_optimizer:
        saver_to_save = tf.train.Saver()
        saver_best = tf.train.Saver()

    # 优化器
    train_op = build_optimizer(learning_rate, loss, l2_loss, update_vars,
                               global_step)

    if train_args.save_optimizer:
        saver_to_save = tf.train.Saver()
        saver_best = tf.train.Saver()

    with tf.Session() as sess:
        sess.run([
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        ])
        print('\033[32m----------- Begin resotre weights  -----------')
        saver_to_restore.restore(sess, train_args.restore_path)
        print('\033[32m----------- Finish resotre weights  -----------')
        merged = tf.summary.merge_all()
        writer = tf.summary.FileWriter(train_args.log_dir, sess.graph)

        print('\n\033[32m----------- start to train -----------\n')
        best_mAP = -np.Inf

        for epoch in range(train_args.total_epoches):  # epoch
            print('\033[32m---------epoch:{}---------'.format(epoch))
            sess.run(train_init_op)  # 初始化训练集dataset
            # 初始化五种损失函数
            loss_total, loss_xy, loss_wh, loss_conf, loss_class\
                = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()

            for _ in trange(train_args.train_batch_num):  # batch
                # 优化器. summary, 预测值, gt, 损失, global_step, 学习率
                _, __image_ids, summary, __y_pred, __y_true, __loss, __l2_loss, __global_step, __lr = sess.run(
                    [
                        train_op, image_ids, merged, y_pred, y_true, loss,
                        l2_loss, global_step, learning_rate
                    ],
                    feed_dict={is_training: True})
                print(__l2_loss)
                writer.add_summary(summary, global_step=__global_step)

                # 更新误差
                loss_total.update(__loss[0], len(__y_pred[0]))
                loss_xy.update(__loss[1], len(__y_pred[0]))
                loss_wh.update(__loss[2], len(__y_pred[0]))
                loss_conf.update(__loss[3], len(__y_pred[0]))
                loss_class.update(__loss[4], len(__y_pred[0]))

                # 验证
                if __global_step % train_args.train_evaluation_step == 0 and __global_step > 0:
                    # 召回率,精确率
                    recall, precision = evaluate_on_gpu(
                        sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag,
                        __y_pred, __y_true, train_args.class_num,
                        train_args.nms_threshold)

                    info = "epoch:{},global_step:{} | loss_total:{:.2f}, "\
                        .format(epoch, int(__global_step), loss_total.average)
                    info += "xy:{:.2f},wh:{:.2f},conf:{:.2f},class:{:.2f} | "\
                        .format(loss_xy.average, loss_wh.average, loss_conf.average, loss_class.average)
                    info += 'last batch:rec:{:.3f},prec:{:.3f} | lr:{:.5g}'\
                        .format(recall, precision, __lr)
                    print(info)

                    writer.add_summary(make_summary(
                        'evaluation/train_batch_recall', recall),
                                       global_step=__global_step)
                    writer.add_summary(make_summary(
                        'evaluation/train_batch_precision', precision),
                                       global_step=__global_step)

                    if np.isnan(loss_total.average):
                        raise ArithmeticError('梯度爆炸,修改参数后重新训练')

            # 保存模型
            if epoch % train_args.save_epoch == 0 and epoch > 0:
                if loss_total.average <= 2.:
                    print(
                        '\033[32m ----------- Begin sotre weights-----------')
                    print('\033[32m-loss_total.average{}'.format(
                        loss_total.average))
                    saver_to_save.save(
                        sess, train_args.save_dir +
                        'model-epoch_{}_step_{}_loss_{:.4f}_lr_{:.5g}'.format(
                            epoch, int(__global_step), loss_total.average,
                            __lr))
                    print(
                        '\033[32m ----------- Begin sotre weights  -----------'
                    )

            #  验证集评估评估方法
            if epoch % train_args.val_evaluation_epoch == 0 and epoch >= train_args.warm_up_epoch:  # 要过了warm up
                sess.run(val_init_op)

                val_loss_total, val_loss_xy, val_loss_wh, val_loss_conf, val_loss_class = \
                    AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()

                val_preds = []
                print(
                    '\033[32m -----Begin computing each pred in one epoch of val data-----------'
                )
                for i in trange(train_args.val_img_cnt):  # 在整个验证集上验证
                    __image_ids, __y_pred, __loss = sess.run(
                        [image_ids, y_pred, loss],
                        feed_dict={is_training: False})
                    pred_content = get_preds_gpu(sess, gpu_nms_op,
                                                 pred_boxes_flag,
                                                 pred_scores_flag, __image_ids,
                                                 __y_pred)

                    val_preds.extend(pred_content)
                    # 更新训练集误差
                    val_loss_total.update(__loss[0])
                    val_loss_xy.update(__loss[1])
                    val_loss_wh.update(__loss[2])
                    val_loss_conf.update(__loss[3])
                    val_loss_class.update(__loss[4])
                    if i % 300 == 0:
                        print(i, "--loss-->", __loss)
                print(
                    '\033[32m -----Finish computing each pred in one epoch of val data-----------'
                )
                # 计算验证集mAP
                rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(
                ), AverageMeter()
                gt_dict = parse_gt_rec(train_args.val_file,
                                       train_args.img_size,
                                       train_args.letterbox_resize)

                print('\033[32m -----Begin calculate mAP-------\033[0m')
                info = 'Epoch: {}, global_step: {}, lr: {:.6g} \n'.format(
                    epoch, __global_step, __lr)  # todo
                for j in range(train_args.class_num):
                    npos, nd, rec, prec, ap = voc_eval(
                        gt_dict,
                        val_preds,
                        j,
                        iou_thres=train_args.eval_threshold,
                        use_07_metric=train_args.use_voc_07_metric)
                    info += 'eval: Class {}: Recall: {:.4f}, Precision: {:.4f}, AP: {:.4f}\n'.format(
                        j, rec, prec, ap)
                    rec_total.update(rec, npos)
                    prec_total.update(prec, nd)
                    ap_total.update(ap, 1)

                mAP = ap_total.average
                info += 'eval: Recall: {:.4f}, Precison: {:.4f}, mAP: {:.4f}\n'\
                    .format(rec_total.average, prec_total.average, mAP)
                info += 'eval: loss: total: {:.2f}, xy: {:.2f}, wh: {:.2f}, conf: {:.2f}, class: {:.2f}\n'\
                    .format(val_loss_total.average, val_loss_xy.average,
                            val_loss_wh.average, val_loss_conf.average, val_loss_class.average)
                print(info)
                logging.info(info)
                print('\033[32m -----Begin calculate mAP-------\033[0m')

                if mAP > best_mAP:
                    best_mAP = mAP
                    saver_best.save(
                        sess,
                        train_args.save_dir +
                        'best_model_Epoch_{}_step_{}_mAP_{:.4f}_loss_{:.4f}_lr_{:.7g}'
                        .format(epoch, int(__global_step), best_mAP,
                                val_loss_total.average, __lr)  # todo
                    )
                writer.add_summary(make_summary('evaluation/val_mAP', mAP),
                                   global_step=epoch)
                writer.add_summary(make_summary('evaluation/val_recall',
                                                rec_total.average),
                                   global_step=epoch)
                writer.add_summary(make_summary('evaluation/val_precision',
                                                prec_total.average),
                                   global_step=epoch)
                writer.add_summary(make_summary(
                    'validation_statistics/total_loss',
                    val_loss_total.average),
                                   global_step=epoch)
                writer.add_summary(make_summary(
                    'validation_statistics/loss_xy', val_loss_xy.average),
                                   global_step=epoch)
                writer.add_summary(make_summary(
                    'validation_statistics/loss_wh', val_loss_wh.average),
                                   global_step=epoch)
                writer.add_summary(make_summary(
                    'validation_statistics/loss_conf', val_loss_conf.average),
                                   global_step=epoch)
                writer.add_summary(make_summary(
                    'validation_statistics/loss_class',
                    val_loss_class.average),
                                   global_step=epoch)
Ejemplo n.º 18
0
    def _run_one_epoch(self):
        am_batch_time = AverageMeter()
        am_data_time = AverageMeter()

        key_meter_names = ['Loss', 'l_ph', 'l_sm', 'flow_mean']
        key_meters = AverageMeter(i=len(key_meter_names), precision=4)

        self.model.train()
        end = time.time()

        if 'stage1' in self.cfg:
            if self.i_epoch == self.cfg.stage1.epoch:
                self.loss_func.cfg.update(self.cfg.stage1.loss)

        for i_step, data in enumerate(self.train_loader):
            if i_step > self.cfg.epoch_size:
                break
            # read data to device
            img1, img2 = data['img1'], data['img2']
            img_pair = torch.cat([img1, img2], 1).to(self.device)

            # measure data loading time
            am_data_time.update(time.time() - end)

            # compute output
            res_dict = self.model(img_pair, with_bk=True)
            flows_12, flows_21 = res_dict['flows_fw'], res_dict['flows_bw']
            flows = [
                torch.cat([flo12, flo21], 1)
                for flo12, flo21 in zip(flows_12, flows_21)
            ]
            loss, l_ph, l_sm, flow_mean = self.loss_func(flows, img_pair)

            # update meters
            key_meters.update(
                [loss.item(),
                 l_ph.item(),
                 l_sm.item(),
                 flow_mean.item()], img_pair.size(0))

            # compute gradient and do optimization step
            self.optimizer.zero_grad()
            # loss.backward()

            scaled_loss = 1024. * loss
            scaled_loss.backward()

            for param in [
                    p for p in self.model.parameters() if p.requires_grad
            ]:
                param.grad.data.mul_(1. / 1024)

            self.optimizer.step()

            # measure elapsed time
            am_batch_time.update(time.time() - end)
            end = time.time()

            if self.i_iter % self.cfg.record_freq == 0:
                for v, name in zip(key_meters.val, key_meter_names):
                    self.summary_writer.add_scalar('Train_' + name, v,
                                                   self.i_iter)

            if self.i_iter % self.cfg.print_freq == 0:
                istr = '{}:{:04d}/{:04d}'.format(
                    self.i_epoch, i_step, self.cfg.epoch_size) + \
                       ' Time {} Data {}'.format(am_batch_time, am_data_time) + \
                       ' Info {}'.format(key_meters)
                self._log.info(istr)

            self.i_iter += 1
        self.i_epoch += 1
Ejemplo n.º 19
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 16 14:35:25 2019

@author: yuan
"""
import tensorflow as tf
from utils.misc_utils import shuffle_and_overwrite, make_summary, config_learning_rate, config_optimizer, AverageMeter
from utils.eval_utils import evaluate_on_cpu, evaluate_on_gpu, get_preds_gpu, voc_eval, parse_gt_rec
import args

# calc mAP
rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(), AverageMeter()
gt_dict = parse_gt_rec(args.val_file, args.img_size, args.letterbox_resize)