コード例 #1
0
def test_single():
    with open('../test/depth_gt.npy', 'rb') as f:
        depth_gt = np.load(f)
    with open('../test/depth_res.npy', 'rb') as f:
        depth_res = np.load(f)
    vis = False
    params = sun3d.set_params()
    if not np.all(depth_gt.shape == depth_res.shape):
        depth_gt = cv2.resize(depth_gt,
                              (depth_res.shape[1], depth_res.shape[0]),
                              interpolation=cv2.INTER_NEAREST)

    sample_rate = [0.01, 0.05, 0.1, 0.2, 0.4, 0.8]
    acc = np.zeros(len(sample_rate), dtype=np.float32)
    uts.plot_images({'image': depth_gt})

    acc_o = eval_depth([depth_res], [depth_gt])

    for i, rate in enumerate(sample_rate):
        depth_gt_down = uts_3d.down_sample_depth(depth_gt,
                                                 method='uniform',
                                                 percent=rate,
                                                 K=params['intrinsic'])
        depth = uts_3d.xyz2depth(depth_gt_down, params['intrinsic'],
                                 depth_gt.shape)
        depth_up = upsampler.LaplacianDeform(depth_res, depth_gt_down,
                                             params['intrinsic'], False)

        acc[i] = eval_depth([depth_up], [depth_gt])

    if vis:
        plot_figure(np.append(0, sample_rate), np.apend(acc_o, acc),
                    'depth_acc', 'sample rate', 'relative l1 error')
    else:
        print "rates: {}, thresholds {}".format(sample_rate, acc)
    def show_pose(self, in_case=None):
        """ show an image pose by render point to image
        """
        self._data_config = self.dataset.get_self_local_config(
            in_case.Road_id, in_case.split)
        cloud_name = '%s/%s/pc_sub.pcd' % (self._data_config['cloud_dir'],
                                           in_case.time_id)
        proj = pj.pyRenderPCD(cloud_name, self.shader['vertex'],
                              self.shader['geometry'], self.shader['fragment'],
                              self.image_size[0], self.image_size[1],
                              in_case.with_label)

        intr = self._to_proj_intr(
            self._data_config['intrinsic'][in_case.camera_name],
            self.image_size[0], self.image_size[1])
        ext = self._to_proj_mat(in_case.pose[:3], in_case.pose[3:])
        label, depth = proj.pyRenderToRGBDepth(intr, ext)

        image_path = '%s/%s/%s/Camera %s/%s.jpg' % (\
                self._data_config['image_dir'],
                in_case.time_id, in_case.record_id,
                in_case.camera_name[-1], in_case.image_name)

        image = cv2.resize(cv2.imread(image_path),
                           (self.image_size[1], self.image_size[0]))
        assert not (image is None)

        uts.plot_images(
            {
                'image': np.uint8(image),
                'depth': depth,
                'mask': label
            },
            layout=[1, 3])
コード例 #3
0
def gen_img_pair_data(scene, pair_num, id_img2depth):
    # for each scene, for each image, gen pair of images
    K = np.loadtxt(DATA_PATH + scene + '/intrinsics.txt')
    extrinsic_file = pd_util.preprocess_util.list_files(DATA_PATH + scene +
                                                        '/extrinsics/')
    extrinsic_file.sort()
    extrinsic = np.reshape(np.loadtxt(extrinsic_file[-1]), (-1, 3, 4))
    # keep the original
    id_img2depth = OrderedDict(sorted(id_img2depth.items(),
                                      key=lambda t: t[0]))
    image_names = id_img2depth.keys()

    for i in range(0, len(image_names) - 30, 10):
        pair_id = np.random.choice(range(10, 30), 10, replace=False)
        for j in pair_id:
            image_path1 = DATA_PATH + scene + '/image/' + image_names[
                i] + '.jpg'
            image_path2 = DATA_PATH + scene + '/image/' + image_names[
                i + j] + '.jpg'
            depth_path1 = DATA_PATH + scene + '/depth/' + id_img2depth[
                image_names[i]] + '.png'
            depth_path2 = DATA_PATH + scene + '/depth/' + id_img2depth[
                image_names[i + j]] + '.png'

            # try:
            image1 = np.array(uts.load_image(image_path1))
            image2 = np.array(uts.load_image(image_path2))
            depth1 = uts.read_depth(depth_path1)
            depth2 = uts.read_depth(depth_path2)
            # except:
            #     continue

            print "image1 name: {}, image2 name: {} \
                   depth1 name: {}, depth2 name: {}".format(
                image_names[i], image_names[i + j],
                id_img2depth[image_names[i]], id_img2depth[image_names[i + j]])

            flow, is_valid = get_opt_flow(depth1, depth2, K,
                                          extrinsic[i, :, :],
                                          extrinsic[i + j, :, :], True, image1,
                                          image2)
            is_valid = False
            uts.plot_images(
                OrderedDict([('image1', image1), ('image2', image2),
                             ('flowu', flow[:, :, 0]), ('flowv', flow[:, :,
                                                                      1])]))

            # print is_valid
            if is_valid:
                flow_file = FLOW_PATH + scene + '/flow/' + \
                            image_names[i] + '_' + image_names[i + j] + '.pkl'
                print 'saving ' + flow_file
                with open(flow_file, 'wb') as f:
                    pkl.dump(flow, f, -1)
コード例 #4
0
    def showAnn(self, image_name):
        """Show the annotation of a pose file in an image
        Input:
            image_name: the name of image
        Output:
            depth: a rendered depth map of each car
            masks: an instance mask of the label
            image_vis: an image show the overlap of car model and image
        """

        car_pose_file = '%s/%s.json' % (self._data_config['pose_dir'],
                                        image_name)
        with open(car_pose_file) as f:
            car_poses = json.load(f)
        image_file = '%s/%s.jpg' % (self._data_config['image_dir'], image_name)
        image = cv2.imread(image_file, cv2.IMREAD_UNCHANGED)[:, :, ::-1]

        # intrinsic are all used by Camera 5
        intrinsic = self.dataset.get_intrinsic(image_name, 'Camera_5')
        image, self.intrinsic = self.rescale(image, intrinsic)

        self.depth = self.MAX_DEPTH * np.ones(self.image_size)
        self.mask = np.zeros(self.depth.shape)

        for i, car_pose in enumerate(car_poses):
            car_name = car_models.car_id2name[car_pose['car_id']].name
            depth, mask = self.render_car(car_pose['pose'], car_name)
            self.mask, self.depth = self.merge_inst(depth, i + 1, self.mask,
                                                    self.depth)

        self.depth[self.depth == self.MAX_DEPTH] = -1.0
        image = 0.5 * image
        for i in range(len(car_poses)):
            frame = np.float32(self.mask == i + 1)
            frame = np.tile(frame[:, :, None], (1, 1, 3))
            image = image + frame * 0.5 * self.colors[i, :]

        uts.plot_images(
            {
                'image_vis': np.uint8(image),
                'depth': self.depth,
                'mask': self.mask
            },
            layout=[1, 3])

        return image, self.mask, self.depth
コード例 #5
0
def test_extend_building():
    image = np.zeros((10, 10), dtype=np.int32)
    building_id = 1
    sky_id = 2
    image[0, 0] = 1
    image[2, 0] = 2
    image[9, 1] = 1
    image_o = cv2.imread(
        '/home/peng/Data/zpark/Label/Record001/Camera_1/170427_222949577_Camera_1.png',
        cv2.IMREAD_UNCHANGED)
    # image_o = cv2.resize(image, (100, 100), interpolation=cv2.INTER_NEAREST)
    print image
    building_id = 25
    sky_id = 1
    image = cut.extend_building(np.int32(image_o), building_id, sky_id)
    print image
    uts.plot_images({'image_o': image_o, 'image_1': image})
コード例 #6
0
def test_img():
    # depth = np.load('/home/peng/Data/visualization.npy')
    depth = cv2.imread('/home/peng/Data/kitti/000000_10.png')
    print np.amax(depth)
    mask = depth[:, :, 0] == 0
    depth = np.float32(1.0 / depth[:, :, 0]) * 1000
    depth[mask] = 0.0

    # depth = np.float32(1. / depth[:, :, 0])
    height, width = depth.shape
    # depth = cv2.resize(depth, (width / 3, height / 3))
    # intrinsic = np.array([1, 1, width / 2, height / 2], dtype=np.float32)
    intrinsic = np.array([959.0/width, 957.0/height, 696.0/width, 224.0/height],\
     dtype=np.float32)

    normal = cut.depth2normals_np(depth, intrinsic)
    normal = normal.transpose([1, 2, 0])
    normal[:, :, [1, 2]] *= -1

    # uts.plot_images(OrderedDict([('depth', depth),
    #                              ('normal', (normal + 1.0)/2.)]),
    #                 layout=[2,1])
    uts.plot_images(OrderedDict([('depth', depth), ('normal', normal)]),
                    layout=[2, 1])
コード例 #7
0
def test_geowarp():
    image_path1 = '/home/peng/Data/sun3d/brown_bm_1/' + \
                  'brown_bm_1/image/0001761-000059310235.jpg'
    image1 = cv2.imread(image_path1)
    with open('../test/depth_gt.npy', 'rb') as f:
        depth_gt = np.load(f)
    with open('../test/depth_res.npy', 'rb') as f:
        depth_res = np.load(f)

    if not np.all(depth_gt.shape == depth_res.shape):
        depth_gt = cv2.resize(depth_gt,
                              (depth_res.shape[1], depth_res.shape[0]),
                              interpolation=cv2.INTER_NEAREST)

    params = sun3d.set_params()
    rate = 0.05
    height, width = depth_gt.shape[0], depth_gt.shape[1]
    depth_gt_down = uts_3d.down_sample_depth(depth_gt,
                                             method='uniform',
                                             percent=rate,
                                             K=params['intrinsic'])
    depth = uts_3d.xyz2depth(depth_gt_down, params['intrinsic'],
                             depth_gt.shape)

    depth_up = LaplacianDeform(depth_res, depth_gt_down, params['intrinsic'],
                               True)

    outputs, out_field = d_net.get_demon_outputs(inputs,
                                                 params,
                                                 ext_inputs=None)
    parameters, topo = paddle.parameters.create(outputs[out_field])
    uts.plot_images(OrderedDict([('image', image1), ('depth_gt', depth_gt),
                                 ('depth_down', depth),
                                 ('depth_res', depth_res), ('mask', mask),
                                 ('depth_up', depth_up)]),
                    layout=[4, 2])
コード例 #8
0
def test(cfg,
         data,
         weights=None,
         batch_size=16,
         img_size=608,
         iou_thres=0.5,
         conf_thres=0.001,
         nms_thres=0.5,
         save_json=True,
         hyp=None,
         model=None,
         single_cls=False):
    """test the metrics of the trained model

    :param str cfg: model cfg file
    :param str data: data dict
    :param str weights: weights path
    :param int batch_size: batch size
    :param int img_size: image size
    :param float iou_thres: iou threshold
    :param float conf_thres: confidence threshold
    :param float nms_thres: nms threshold
    :param bool save_json: Whether to save the model
    :param str hyp: hyperparameter
    :param str model: yolov4 model
    :param bool single_cls: only one class
    :return: results
    """

    if model is None:
        device = select_device(opt.device)
        verbose = False
        # Initialize model
        model = Model(cfg, img_size).to(device)
        # Load weights
        if weights.endswith('.pt'):
            checkpoint = torch.load(weights, map_location=device)
            state_dict = intersect_dicts(checkpoint['model'],
                                         model.state_dict())
            model.load_state_dict(state_dict, strict=False)
        elif len(weights) > 0:
            load_darknet_weights(model, weights)
        print(f'Loaded weights from {weights}!')

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
        device = next(model.parameters()).device
        verbose = False

    test_path = data['valid']
    num_classes, names = (1, ['item']) if single_cls else (int(
        data['num_classes']), data['names'])

    # Dataloader
    dataset = LoadImagesAndLabels(test_path, img_size, batch_size, hyp=hyp)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             num_workers=8,
                                             pin_memory=True,
                                             collate_fn=dataset.collate_fn)

    seen = 0
    model.eval()
    coco91class = coco80_to_coco91_class()
    output_format = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets',
                                             'Pre', 'Rec', 'mAP', 'F1')
    precision, recall, f_1, mean_pre, mean_rec, mean_ap, mf1 = 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3)
    json_dict, stats, aver_pre, ap_class = [], [], [], []
    for batch_i, (imgs, targets, paths,
                  shapes) in enumerate(tqdm(dataloader, desc=output_format)):
        targets = targets.to(device)
        imgs = imgs.to(device) / 255.0
        _, _, height, width = imgs.shape  # batch size, channels, height, width

        # Plot images with bounding boxes
        if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
            plot_images(imgs=imgs,
                        targets=targets,
                        paths=paths,
                        fname='test_batch0.jpg')

        with torch.no_grad():
            inference_output, train_output = model(imgs)

            if hasattr(model, 'hyp'):  # if model has loss hyperparameters
                loss += compute_loss(train_output, targets,
                                     model)[1][:3].cpu()  # GIoU, obj, cls

            output = non_max_suppression(inference_output,
                                         conf_thres=conf_thres,
                                         nms_thres=nms_thres)

        # Statistics per image
        for i, pred in enumerate(output):
            labels = targets[targets[:, 0] == i, 1:]
            num_labels = len(labels)
            target_class = labels[:, 0].tolist() if num_labels else []
            seen += 1

            if pred is None:
                if num_labels:
                    stats.append(
                        ([], torch.Tensor(), torch.Tensor(), target_class))
                continue

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[i]).stem.split('_')[-1])
                box = pred[:, :4].clone()  # xyxy
                scale_coords(imgs[i].shape[1:], box,
                             shapes[i][0])  # to original shape
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for det_i, det in enumerate(pred):
                    json_dict.append({
                        'image_id':
                        image_id,
                        'category_id':
                        coco91class[int(det[6])],
                        'bbox':
                        [float(format(x, '.%gf' % 3)) for x in box[det_i]],
                        'score':
                        float(format(det[4], '.%gf' % 5))
                    })

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Assign all predictions as incorrect
            correct = [0] * len(pred)
            if num_labels:
                detected = []
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                tbox[:, [0, 2]] *= width
                tbox[:, [1, 3]] *= height

                # Search for correct predictions
                for j, (*pbox, _, _, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == num_labels:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in target_class:
                        continue

                    # Best iou, index between pred and targets
                    mask = (pcls == tcls_tensor).nonzero(
                        as_tuple=False).view(-1)
                    iou, best_iou = bbox_iou(pbox, tbox[mask]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and mask[
                            best_iou] not in detected:  # and pcls == target_class[bi]:
                        correct[j] = 1
                        detected.append(mask[best_iou])

            # Append statistics (correct, conf, pcls, target_class)
            stats.append(
                (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), target_class))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]
    if len(stats):
        precision, recall, aver_pre, f_1, ap_class = ap_per_class(*stats)
        mean_pre, mean_rec, mean_ap, mf1 = precision.mean(), recall.mean(
        ), aver_pre.mean(), f_1.mean()
        num_targets = np.bincount(
            stats[3].astype(np.int64),
            minlength=num_classes)  # number of targets per class
    else:
        num_targets = torch.zeros(1)

    # Print results
    print_format = '%20s' + '%10.3g' * 6
    print(print_format %
          ('all', seen, num_targets.sum(), mean_pre, mean_rec, mean_ap, mf1))

    # Print results per class
    if verbose and num_classes > 1 and stats:
        for i, class_ in enumerate(ap_class):
            print(print_format %
                  (names[class_], seen, num_targets[class_], precision[i],
                   recall[i], aver_pre[i], f_1[i]))

    # Save JSON
    if save_json and mean_ap and json_dict:
        try:
            img_ids = [
                int(Path(x).stem.split('_')[-1]) for x in dataset.img_files
            ]
            with open('results.json', 'w') as file:
                json.dump(json_dict, file)

            # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            cocogt = COCO('data/coco/annotations/instances_val2017.json'
                          )  # initialize COCO ground truth api
            cocodt = cocogt.loadRes('results.json')  # initialize COCO pred api

            cocoeval = COCOeval(cocogt, cocodt, 'bbox')
            cocoeval.params.imgIds = img_ids  # [:32]  # only evaluate these images
            cocoeval.evaluate()
            cocoeval.accumulate()
            cocoeval.summarize()
            mean_ap = cocoeval.stats[1]  # update mAP to pycocotools mAP
        except ImportError:
            print(
                'WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.'
            )

    # Return results
    maps = np.zeros(num_classes) + mean_ap
    for i, class_ in enumerate(ap_class):
        maps[class_] = aver_pre[i]
    return (mean_pre, mean_rec, mean_ap, mf1,
            *(loss / len(dataloader)).tolist()), maps
コード例 #9
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = './yolov3.onnx'
    engine_file_path = "yolov3.trt"
    data_path = "./data/unrel.data"

    data = parse_data_cfg(data_path)
    nc = int(data['classes'])  # number of classes
    path = data['valid']  # path to test images
    names = load_classes(data['names'])  # class names

    iouv = torch.linspace(0.5, 0.95, 1,
                          dtype=torch.float32)  # iou vector for [email protected]:0.95
    niou = 1

    conf_thres = 0.001
    iou_thres = 0.6
    verbose = True

    # Genearte custom dataloader
    img_size = 448  # copy form pytorch src
    batch_size = 16

    dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=True)
    batch_size = min(batch_size, len(dataset))
    dataloader = data_loader(dataset, batch_size, img_size)

    # Output shapes expected by the post-processor
    output_shapes = [(16, 126, 14, 14), (16, 126, 28, 28), (16, 126, 56, 56)]

    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                     '[email protected]', 'F1')
        p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
        pbar = tqdm.tqdm(dataloader, desc=s)
        stats, ap, ap_class = [], [], []
        seen = 0

        for batch_i, (imgs, targets, paths, shapes) in enumerate(pbar):

            imgs = imgs.astype(np.float32) / 255.0
            nb, _, height, width = imgs.shape  # batch size, channels, height, width
            whwh = np.array([width, height, width, height])

            inputs[0].host = imgs

            postprocessor_args = {
                "yolo_masks": [
                    (6, 7, 8), (3, 4, 5), (0, 1, 2)
                ],  # A list of 3 three-dimensional tuples for the YOLO masks
                "yolo_anchors": [
                    (10, 13),
                    (16, 30),
                    (33, 23),
                    (30, 61),
                    (
                        62, 45
                    ),  # A list of 9 two-dimensional tuples for the YOLO anchors
                    (59, 119),
                    (116, 90),
                    (156, 198),
                    (373, 326)
                ],
                "num_classes":
                37,
                "stride": [32, 16, 8]
            }

            postprocessor = PostprocessYOLO(**postprocessor_args)

            # Do layers before yolo
            t = time.time()
            trt_outputs = common.do_inference_v2(context,
                                                 bindings=bindings,
                                                 inputs=inputs,
                                                 outputs=outputs,
                                                 stream=stream)

            trt_outputs = [
                output.reshape(shape)
                for output, shape in zip(trt_outputs, output_shapes)
            ]

            trt_outputs = [
                np.ascontiguousarray(
                    otpt[:, :, :int(imgs.shape[2] * (2**i) /
                                    32), :int(imgs.shape[3] * (2**i) / 32)],
                    dtype=np.float32) for i, otpt in enumerate(trt_outputs)
            ]

            output_list = postprocessor.process(trt_outputs)

            t0 += time.time() - t

            inf_out = torch.cat(output_list, 1)
            t = time.time()
            output = non_max_suppression(inf_out,
                                         conf_thres=conf_thres,
                                         iou_thres=iou_thres)  # nms
            t1 += time.time() - t

            # Statistics per image
            for si, pred in enumerate(output):
                labels = targets[targets[:, 0] == si, 1:]
                nl = len(labels)
                tcls = labels[:, 0].tolist() if nl else []  # target class
                seen += 1

                if pred is None:
                    if nl:
                        stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                      torch.Tensor(), torch.Tensor(), tcls))
                    continue

                # Assign all predictions as incorrect
                correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool)
                if nl:
                    detected = []  # target indices
                    tcls_tensor = labels[:, 0]

                    # target boxes
                    tbox = xywh2xyxy(labels[:, 1:5]) * whwh
                    tbox = tbox.type(torch.float32)

                    # Per target class
                    for cls in torch.unique(tcls_tensor):
                        ti = (cls == tcls_tensor).nonzero().view(
                            -1)  # prediction indices
                        pi = (cls == pred[:, 5]).nonzero().view(
                            -1)  # target indices

                        # Search for detections
                        if pi.shape[0]:
                            # Prediction to target ious
                            ious, i = box_iou(pred[pi, :4], tbox[ti]).max(
                                1)  # best ious, indices

                            # Append detections
                            for j in (ious > iouv[0]).nonzero():
                                d = ti[i[j]]  # detected target
                                if d not in detected:
                                    detected.append(d)
                                    correct[pi[j]] = ious[
                                        j] > iouv  # iou_thres is 1xn
                                    if len(
                                            detected
                                    ) == nl:  # all targets already located in image
                                        break

                # Append statistics (correct, conf, pcls, tcls)
                stats.append(
                    (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

            # Plot images
            if batch_i < 1:
                f = 'test_batch%g_gt.jpg' % batch_i  # filename
                plot_images(imgs, targets, paths=paths, names=names,
                            fname=f)  # ground truth
                f = 'test_batch%g_pred.jpg' % batch_i
                plot_images(imgs,
                            output_to_target(output, width, height),
                            paths=paths,
                            names=names,
                            fname=f)  # predictions

        # Compute statistics
        stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
        if len(stats):
            p, r, ap, f1, ap_class = ap_per_class(*stats)
            if niou > 1:
                p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean(
                    1), ap[:, 0]  # [P, R, [email protected]:0.95, [email protected]]
            mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
            nt = np.bincount(stats[3].astype(np.int64),
                             minlength=nc)  # number of targets per class
        else:
            nt = torch.zeros(1)

        # Print results
        pf = '%20s' + '%10.3g' * 6  # print format
        print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))

        # Print results per class
        if verbose and nc > 1 and len(stats):
            for i, c in enumerate(ap_class):
                print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

        # Print speeds
        if verbose:
            t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (
                img_size, img_size, batch_size)  # tuple
            print(
                'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g'
                % t)
コード例 #10
0
def get_train_valid_loader(data_dir,
                           batch_size,
                           random_seed,
                           valid_size=0.1,
                           shuffle=True,
                           show_sample=False,
                           num_workers=4,
                           pin_memory=False):
    """
    Utility function for loading and returning train and valid
    multi-process iterators over the MNIST dataset. A sample
    9x9 grid of the images can be optionally displayed.

    If using CUDA, num_workers should be set to 1 and pin_memory to True.

    Args
    ----
    - data_dir: path directory to the dataset.
    - batch_size: how many samples per batch to load.
    - random_seed: fix seed for reproducibility.
    - valid_size: percentage split of the training set used for
      the validation set. Should be a float in the range [0, 1].
      In the paper, this number is set to 0.1.
    - shuffle: whether to shuffle the train/validation indices.
    - show_sample: plot 9x9 sample grid of the dataset.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.

    Returns
    -------
    - train_loader: training set iterator.
    - valid_loader: validation set iterator.
    """
    error_msg = "[!] valid_size should be in the range [0, 1]."
    assert ((valid_size >= 0) and (valid_size <= 1)), error_msg

    # define transforms
    normalize = transforms.Normalize((0.1307, ), (0.3081, ))
    trans = transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])

    # load dataset
    dataset = datasets.MNIST(data_dir,
                             train=True,
                             download=True,
                             transform=trans)

    num_train = len(dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]

    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        sampler=train_sampler,
        num_workers=num_workers,
        pin_memory=pin_memory,
    )

    valid_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        sampler=valid_sampler,
        num_workers=num_workers,
        pin_memory=pin_memory,
    )

    # visualize some images
    if show_sample:
        sample_loader = torch.utils.data.DataLoader(dataset,
                                                    batch_size=9,
                                                    shuffle=shuffle,
                                                    num_workers=num_workers,
                                                    pin_memory=pin_memory)
        data_iter = iter(sample_loader)
        images, labels = data_iter.next()
        X = images.numpy()
        X = np.transpose(X, [0, 2, 3, 1])
        plot_images(X, labels)

    return (train_loader, valid_loader)
コード例 #11
0
def check_diff():

    # PaddlePaddle init
    paddle.init(use_gpu=True, gpu_id=FLAGS.gpu_id)
    # paddle.init(use_gpu=False)

    # setting parameters
    params = sun3d.set_params('sun3d')
    params['stage'] = 5
    layout = [2, 3]
    cur_level = 0
    inputs = d_net.get_demon_inputs(params)

    # define several external input here to avoid implementation difference
    inputs.update(
        d_net.get_cnn_input("image2_down", params['size_stage'][1], 3))
    inputs.update(d_net.get_cnn_input("image_warp", params['size_stage'][1],
                                      3))
    inputs.update(
        d_net.get_cnn_input("depth_trans", params['size_stage'][1], 1))
    inputs.update(d_net.get_cnn_input("flow", params['size_stage'][1], 2))

    # Add neural network config
    outputs, out_filed = d_net.get_demon_outputs(inputs,
                                                 params,
                                                 ext_inputs=inputs)
    print('load parameters')
    with gzip.open('./output/' + FLAGS.model, 'r') as f:
        parameters_init = paddle.parameters.Parameters.from_tar(f)

    # print parameters_init.names()
    parameters = paddle.parameters.create(outputs[out_filed])
    for name in parameters.names():
        # print "setting parameter {}".format(name)
        parameters.set(name, parameters_init.get(name))

    # load the input from saved example
    res_folder = 'output/example_output/'
    with open(res_folder + 'img_pair', 'rb') as f:
        tf_pair = np.load(f)
        tf_pair = tf_pair.squeeze()
    with open(res_folder + 'image2_down', 'rb') as f:
        image2_down = np.load(f)
        image2_down = image2_down.squeeze()
    intrinsic = np.array([0.89115971, 1.18821287, 0.5, 0.5])

    # load some extra inputs
    names = ['flow', 'depth', 'normal', 'rotation', 'translation']
    tf_names = [
        'predict_flow2', 'predict_depth2', 'predict_normal2',
        'predict_rotation', 'predict_translation'
    ]
    start_id = range(4, 4 + len(names))
    input_name_match = dict(zip(names, tf_names))
    results_names = dict(zip(names, start_id))
    boost_results = load_tf_boost_results(res_folder, input_name_match,
                                          params['stage'])

    test_data = [
        tf_pair[:3, :, :].flatten(), tf_pair[3:, :, :].flatten(),
        image2_down.flatten(), intrinsic
    ]
    test_data = [tuple(test_data + boost_results)]
    feeding = {'image1': 0, 'image2': 1, 'image2_down': 2, 'intrinsic': 3}
    feeding.update(results_names)

    # img_diff1 = tf_pair[:3, :, :] - image1_new.reshape((3, params['size'][0], params['size'][1]))
    # img_diff1 = img_diff1.transpose((1, 2, 0))
    # uts.plot_images({'img_diff': img_diff1}, layout=[1, 2])

    # print np.sum(np.abs(tf_pair[:3, :, :].flatten() - image1_new))
    # print np.sum(np.abs(tf_pair[3:, :, :].flatten() - image2_new))

    # return
    outputs_list = [outputs[x] for x in outputs.keys()]

    # pdb.set_trace()
    print len(test_data)
    print feeding.keys()

    conv = paddle.infer(output_layer=outputs_list,
                        parameters=parameters,
                        input=test_data,
                        feeding=feeding)

    height_list = [cp.g_layer_map[outputs[x].name].height \
                    for x in outputs.keys()]
    width_list = [cp.g_layer_map[outputs[x].name].width \
                    for x in outputs.keys()]

    conv = vec2img(inputs=conv, height=height_list, width=width_list)

    blob_name_match = get_name_matching(params['stage'])

    folder = './output/example_output/'
    # for name in outputs.keys()[cur_level:]:
    ob_names = outputs.keys()[cur_level:]
    # ob_names = ['depth_trans','geo_out']
    # ob_names = ['depth_0']

    for name in ob_names:
        i = outputs.keys().index(name)

        print name, ' ', blob_name_match[name]
        tf_conv_file = folder + str(params['stage']) + '_' + \
                       blob_name_match[name] + '.pkl'
        with open(tf_conv_file, 'rb') as f:
            tf_conv = np.load(f)

        print conv[i].shape, ' ', tf_conv.shape
        diff = conv[i] - tf_conv

        if len(diff.shape) <= 1:
            print '{} and {}, {}'.format(conv[i], tf_conv, diff)
        else:
            if len(diff.shape) == 2:
                diff = diff[:, :, np.newaxis]
            vis_dict = []
            for j in range(min(diff.shape[2], layout[0] * layout[1])):
                vis_dict.append(('diff_' + str(j), diff[:, :, j]))
            vis_dict = OrderedDict(vis_dict)
            uts.plot_images(OrderedDict(vis_dict), layout=layout)
コード例 #12
0
ファイル: wip_fisheye_aug.py プロジェクト: phnk/yolov3
                                                   camera_matrix, distortion)

    # calculate the new targets
    new_targets = get_new_targets(new_bbox_coords_matrix, width, height)

    return img, new_targets, new_bbox_coords_matrix


if __name__ == "__main__":
    dataset = LoadImagesAndLabels(
        "data/3 class ground and light/train/train_paths.txt",
        640,
        augment=True)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             1,
                                             collate_fn=dataset.collate_fn)
    for imgs, targets, img_path, res in dataloader:
        imgs = imgs.to(device)
        targets = targets.to(device)
        plot_images(imgs=imgs, targets=targets)

    #for i in range(10):
    #    im = Image.open("data/inside/train/images/image{}.png".format(i))
    #    with open('data/inside/train/labels/image{}.txt'.format(i), 'r') as f:
    #        targets = [[float(num) for num in line.split(' ')] for line in f]

    #    im, targets, new_bbox_coords_matrix = fisheye_augmentation(im, targets)
    #    im = Image.fromarray(im)

    #    plot_image(im, targets, new_bbox_coords_matrix)
コード例 #13
0
def train():

    # 0、Initialize parameters( set random seed, get cfg info, )
    cfg = opt.cfg
    weights = opt.weights
    img_size = opt.img_size
    batch_size = opt.batch_size
    total_epochs = opt.epochs
    init_seeds()
    data = parse_data_cfg(opt.data)
    train_txt_path = data['train']
    valid_txt_path = data['valid']
    nc = int(data['classes'])

    # 0、打印配置文件信息,写log等
    print('config file:', cfg)
    print('pretrained weights:', weights)

    # 1、加载模型
    model = Darknet(cfg).to(device)

    if weights.endswith('.pt'):

        ### model.load_state_dict(torch.load(weights)['model']) # 错误原因:没有考虑类别对不上的那一层,也就是yolo_layer前一层
                                                                #          会报错size mismatch for module_list.81.Conv2d.weight: copying a param with shape torch.size([255, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([75, 1024, 1, 1]).
                                                               #           TODO:map_location=device ?
        chkpt = torch.load(weights, map_location=device)
        try:
            chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
            model.load_state_dict(chkpt['model'], strict=False)
            # model.load_state_dict(chkpt['model'])
        except KeyError as e:
            s = "%s is not compatible with %s" % (opt.weights, opt.cfg)
            raise KeyError(s) from e

        write_to_file(repr(opt), log_file_path, mode='w')
        write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path)

    elif weights.endswith('.pth'):    # for 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
        model_state_dict = model.state_dict()
        chkpt = torch.load(weights, map_location=device)
        #try:
        state_dict = {}
        block_cnt = 0
        fc_item_num = 2
        chkpt_keys = list(chkpt.keys())
        model_keys = list(model.state_dict().keys())
        model_values = list(model.state_dict().values())
        for i in range(len(chkpt_keys) - fc_item_num):  # 102 - 2
            if i % 5 == 0:
                state_dict[model_keys[i+block_cnt]] = chkpt[chkpt_keys[i]]
            elif i % 5 == 1 or i % 5 == 2:
                state_dict[model_keys[i+block_cnt+2]] = chkpt[chkpt_keys[i]]
            elif i % 5 == 3 or i % 5 == 4:
                state_dict[model_keys[i+block_cnt-2]] = chkpt[chkpt_keys[i]]
                if i % 5 == 4:
                    block_cnt += 1
                    state_dict[model_keys[i + block_cnt]] = model_values[i + block_cnt]


        #chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
        model.load_state_dict(state_dict, strict=False)

        # model.load_state_dict(chkpt['model'])

        # except KeyError as e:
        #     s = "%s is not compatible with %s" % (opt.weights, opt.cfg)
        #     raise KeyError(s) from e

        write_to_file(repr(opt), log_file_path, mode='w')
        write_to_file('anchors:\n' +  repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path)

    elif len(weights) > 0:  # darknet format
        # possible weights are '*.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
        load_darknet_weights(model, weights)

        write_to_file(repr(opt), log_file_path, mode='w')
        write_to_file('anchors:\n' +  repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path)
    # else:
    #     raise Exception("pretrained model's path can't be NULL!")

    # 2、设置优化器 和 学习率
    start_epoch = 0
    #optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=momentum, weight_decay=weight_decay, nesterov=True)  # TODO:nesterov ?  weight_decay=0.0005 ?

    # Optimizer
    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for k, v in dict(model.named_parameters()).items():
        if '.bias' in k:
            pg2 += [v]  # biases
        elif 'Conv2d.weight' in k:
            pg1 += [v]  # apply weight_decay
        else:
            pg0 += [v]  # parameter group 0

    optimizer = torch.optim.SGD(pg0, lr=lr0, momentum=momentum, nesterov=True)

    optimizer.add_param_group({'params': pg1, 'weight_decay': weight_decay})  # add pg1 with weight_decay
    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
    del pg0, pg1, pg2


    ###### apex need ######
    if mixed_precision:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
    # Initialize distributed training
    if torch.cuda.device_count() > 1:
        dist.init_process_group(backend='nccl',  # 'distributed backend'
                                init_method='tcp://127.0.0.1:9999',  # distributed training init method
                                world_size=1,  # number of nodes for distributed training
                                rank=0)  # distributed training node rank
        model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True)  # clw note: 多卡,在 amp.initialize()之后调用分布式代码 DistributedDataParallel否则报错
        model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level


    ######
    model.nc = nc

    #### 阶梯学习率
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(total_epochs * x) for x in [0.8, 0.9]], gamma=0.1)
    ### 余弦学习率
    #lf = lambda x: (1 + math.cos(x * math.pi / total_epochs)) / 2
    #scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    # 3、加载数据集
    train_dataset = VocDataset(train_txt_path, img_size, with_label=True)
    dataloader = DataLoader(train_dataset,
                            batch_size=batch_size,
                            shuffle=True,  # TODO: True
                            num_workers=8, # TODO
                            collate_fn=train_dataset.train_collate_fn,
                            pin_memory=True)


    # 4、训练
    print('')   # 换行
    print('Starting training for %g epochs...' % total_epochs)
    nb = len(dataloader)

    mloss = torch.zeros(4).to(device)  # mean losses
    writer = SummaryWriter()    # tensorboard --logdir=runs, view at http://localhost:6006/

    prebias = start_epoch == 0

    for epoch in range(start_epoch, total_epochs):  # epoch ------------------------------
        model.train()  # 写在这里,是因为在一个epoch结束后,调用test.test()时,会调用 model.eval()

        # # Prebias
        # if prebias:
        #     if epoch < 3:  # prebias
        #         ps = 0.1, 0.9  # prebias settings (lr=0.1, momentum=0.9)
        #     else:  # normal training
        #         ps = lr0, momentum  # normal training settings
        #         print_model_biases(model)
        #         prebias = False
        #
        #     # Bias optimizer settings
        #     optimizer.param_groups[2]['lr'] = ps[0]
        #     if optimizer.param_groups[2].get('momentum') is not None:  # for SGD but not Adam
        #         optimizer.param_groups[2]['momentum'] = ps[1]

        start = time.time()
        title = ('\n' + '%10s' * 11 ) % ('Epoch', 'Batch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size', 'lr', 'time_use')
        print(title)
        #pbar = tqdm(dataloader, ncols=20)  # 行数参数ncols=10,这个值可以自己调:尽量大到不能引起上下滚动,同时满足美观的需求。
        #for i, (img_tensor, target_tensor, img_path, _) in enumerate(pbar):

        # # Freeze darknet53.conv.74 for first epoch
        # freeze_backbone = False
        # if freeze_backbone and (epoch < 3):
        #     for i, (name, p) in enumerate(model.named_parameters()):
        #         if int(name.split('.')[2]) < 75:  # if layer < 75  # 多卡是[2],单卡[1]
        #             p.requires_grad = False if (epoch < 3) else True

        for i, (img_tensor, target_tensor, img_path, _) in enumerate(dataloader):

            # # SGD burn-in
            # ni = epoch * nb + i
            # if ni <= 1000:  # n_burnin = 1000
            #     lr = lr0 * (ni / 1000) ** 2
            #     for g in optimizer.param_groups:
            #         g['lr'] = lr

            batch_start = time.time()
            #print(img_path)
            img_tensor = img_tensor.to(device)
            target_tensor = target_tensor.to(device)
            ### 训练过程主要包括以下几个步骤:
            # (1) 前传
            #print('img_tensor:', img_tensor[0][1][208][208])
            pred = model(img_tensor)

            # (2) 计算损失
            loss, loss_items = compute_loss(pred, target_tensor, model)
            if not torch.isfinite(loss):
               raise Exception('WARNING: non-finite loss, ending training ', loss_items)

            # (3) 损失:反向传播,求出梯度
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # (4) 优化器:更新参数、梯度清零
            # ni = i + nb * epoch  # number integrated batches (since train start)
            # if ni % accumulate == 0:  # Accumulate gradient for x batches before optimizing
            optimizer.step()
            optimizer.zero_grad()

            # Print batch results
            mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0  # (GB)
            #s = ('%10s' * 2 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  scheduler.get_lr()[0], time.time()-batch_start)
            #s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  optimizer.state_dict()['param_groups'][0]['lr'], time.time()-batch_start)
            s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  scheduler.get_lr()[0], time.time()-batch_start)

            if i % 10 == 0:
                print(s)
                
            # Plot
            if epoch == start_epoch  and i == 0:
                fname = 'train_batch.jpg' # filename
                cur_path = os.getcwd()
                res = plot_images(images=img_tensor, targets=target_tensor, paths=img_path, fname=os.path.join(cur_path, fname))
                writer.add_image(fname, res, dataformats='HWC', global_step=epoch)
                # tb_writer.add_graph(model, imgs)  # add model to tensorboard

            # end batch ------------------------------------------------------------------------------------------------

        print('time use per epoch: %.3fs' % (time.time() - start))

        write_to_file(title, log_file_path)
        write_to_file(s, log_file_path)

        # Update scheduler
        scheduler.step()

        # compute mAP
        results, maps = test.test(cfg,
                                  'cfg/voc.data',
                                  batch_size=batch_size,
                                  img_size=img_size,
                                  conf_thres=0.05,
                                  iou_thres=0.5,
                                  nms_thres=0.5,
                                  src_txt_path=valid_txt_path,
                                  dst_path='./output',
                                  weights=None,
                                  model=model,
                                  log_file_path = log_file_path)

        # Tensorboard
        tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss',
                'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1']
        for x, tag in zip(list(mloss[:-1]) + list(results), tags):
            writer.add_scalar(tag, x, epoch)

        # save model 保存模型
        chkpt = {'epoch': epoch,
                 'model': model.module.state_dict() if type(model) is nn.parallel.DistributedDataParallel else model.state_dict(),  # clw note: 多卡
                 'optimizer': optimizer.state_dict()}

        torch.save(chkpt, last_model_path)

    print('end')
コード例 #14
0
def test_demo():
    # PaddlePaddle init
    paddle.init(use_gpu=True, gpu_id=FLAGS.gpu_id)
    params = sun3d.set_params()
    inputs = d_net.get_demon_inputs(params)

    params['stage'] = 5
    # Add neural network config
    outputs, out_field = d_net.get_demon_outputs(inputs, params, ext_inputs=None)
    parameters, topo = paddle.parameters.create(outputs[out_field])

    # Read image pair 1, 2 flow
    for scene_name in params['train_scene'][1:]:
        image_list = preprocess_util.list_files(
            params['flow_path'] + scene_name + '/flow/')
        image2depth = sun3d.get_image_depth_matching(scene_name)

        for pair_name in image_list[0:2]:
            image1, image2, flow_gt, depth1_gt, normal1_gt = \
                sun3d.load_image_pair(scene_name, pair_name, image2depth)

            image1_new = uts.transform(image1.copy(),
                                       height=params['size'][0],
                                       width=params['size'][1])

            image2_new = uts.transform(image2.copy(),
                                       height=params['size'][0],
                                       width=params['size'][1])
            intrinsic = np.array([0.89115971, 1.18821287, 0.5, 0.5])

            test_data = [(image1_new, image2_new, intrinsic)]
            depth_name = 'depth' if params['stage'] < 5 else 'depth_0'
            out_fields = ['flow', depth_name, 'normal', 'rotation',
                          'translation']

            # out_fields = ['flow']
            # height_list = [cp.g_layer_map[outputs[x].name].height \
            #                 for x in ['flow']]
            # width_list = [cp.g_layer_map[outputs[x].name].width \
            #                 for x in ['flow']]
            output_list = [outputs[x] for x in out_fields]
            flow, depth, normal, rotation, translation = paddle.infer(
                                    output=topo,
                                    parameters=parameters,
                                    input=test_data,
                                    feeding={'image1': 0,
                                             'image2': 1,
                                             'intrinsic': 2});
            height_list = [cp.g_layer_map[outputs[x].name].height \
                            for x in ['flow', depth_name,'normal']]
            width_list = [cp.g_layer_map[outputs[x].name].width \
                            for x in ['flow', depth_name,'normal']]

            # flow = paddle.infer(output=output_list,
            #                     parameters=parameters,
            #                     input=test_data,
            #                     feeding={'image1': 0,
            #                              'image2': 1,
            #                              'intrinsic': 2});
            # flow = vec2img(inputs=[flow],
            #                height=height_list,
            #                width=width_list)

            # uts.plot_images(OrderedDict([('image1',image1),
            #                              ('image2',image2),
            #                              ('flow',flow),
            #                              ('flow_gt',flow_gt)]),
            #                 layout=[4,2])
            flow, depth, normal = vec2img(inputs=[flow, depth, normal],
                           height=height_list,
                           width=width_list)

            # visualize depth in 3D
            # image1_down = cv2.resize(image1,
            #     (depth.shape[1], depth.shape[0]))
            # visualize_prediction(
            #     depth=depth,
            #     image=np.uint8(image1_down.transpose([2, 0, 1])),
            #     rotation=rotation,
            #     translation=translation)
            uts.plot_images(OrderedDict([('image1',image1),
                                         ('image2',image2),
                                         ('flow',flow),
                                         ('flow_gt',flow_gt),
                                         ('depth', depth),
                                         ('depth_gt', depth1_gt)]),
                                         # ('normal', (normal + 1.0)/2.),
                                         # ('normal_gt', (normal1_gt + 1.0)/2)]),
                            layout=[4,2])
コード例 #15
0
box_visualizer.draw_normalized_box(decoded_positive_boxes, selected_key)

# drawing generator output
train_keys, validation_keys = split_data(ground_truth_data, training_ratio=.8)
image_generator = ImageGenerator(ground_truth_data,
                                 prior_box_manager,
                                 1,
                                 image_shape,
                                 train_keys,
                                 validation_keys,
                                 image_prefix,
                                 vertical_flip_probability=0,
                                 horizontal_flip_probability=0.5)

generated_data = next(image_generator.flow(mode='demo'))
generated_input = generated_data[0]['input_1']
generated_output = generated_data[1]['predictions']
transformed_image = np.squeeze(generated_input[0]).astype('uint8')
validation_image_name = image_prefix + validation_keys[0]
original_image = read_image(validation_image_name)
original_image = resize_image(original_image, image_shape)
plot_images(original_image, transformed_image)

# finally draw the assigned boxes given by the generator
generated_encoded_boxes = np.squeeze(generated_output)
generated_boxes = prior_box_manager.decode_boxes(generated_encoded_boxes)
positive_mask = generated_boxes[:, 4] != 1
generated_positive_boxes = generated_boxes[positive_mask]
box_visualizer.draw_normalized_box(generated_positive_boxes,
                                   validation_keys[0])
コード例 #16
0
ファイル: train.py プロジェクト: clw5180/PyTorch_Practice
            #s = ('%10s' * 2 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  scheduler.get_lr()[0], time.time()-batch_start)

            #s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  optimizer.state_dict()['param_groups'][0]['lr'], time.time()-batch_start)
            s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  optimizer.param_groups[0]['lr'], time.time()-batch_start)
            #s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  scheduler.get_lr()[0], time.time()-batch_start)

            #pbar.set_description(s)
            ### for debug ###
            if i % 10 == 0:
                print(s)
                
            # Plot
            if epoch == start_epoch  and i == 0:
                fname = 'train_batch.jpg' # filename
                cur_path = os.getcwd()
                res = plot_images(images=img_tensor, targets=target_tensor, paths=img_path, fname=os.path.join(cur_path, fname))
                writer.add_image(fname, res, dataformats='HWC', global_step=epoch)
                # tb_writer.add_graph(model, imgs)  # add model to tensorboard

            # end batch ------------------------------------------------------------------------------------------------

        print('clw: time use per epoch: %.3fs' % (time.time() - start))

        write_to_file(title, log_file_path)
        write_to_file(s, log_file_path)

        ### Update scheduler per epoch
        # scheduler.step()

        # compute mAP
        results, maps = test.test(cfg,
コード例 #17
0
def sequencial_upsampleing(dataset='sun3d',
                           split='train',
                           max_num=None,
                           vis=False):

    # Read image pair 1, 2, generate depth
    if dataset == 'sun3d':
        params = sun3d.set_params()
        params['demon_model'] = '../output/tf_model_full_5.tar.gz'
    else:
        print "dataset {} is not supported".format(dataset)

    deep_upsampler = DeepUpSampler(params)
    part, part_id = [int(x) for x in FLAGS.part.split(',')]
    test_ids = partition(len(params[split + '_scene']), part, part_id)
    rate = 0.05
    process_scene_names = [params[split + '_scene'][x] for x in test_ids]
    all_time = 0.
    all_count = 0.

    for scene_name in process_scene_names:
        image_list = preprocess_util.list_files(params['flow_path'] +
                                                scene_name + '/flow/')

        image2depth = sun3d.get_image_depth_matching(scene_name)
        image_num = len(image_list) if max_num is None \
                                    else min(len(image_list), max_num)
        image_id = range(0, len(image_list), len(image_list) / image_num)
        upsample_output_path = params['flow_path'] + scene_name + \
          '/pair_depth/' + str(rate) + '/'
        uts.mkdir_if_need(upsample_output_path)

        print "processing {} with images: {}".format(scene_name, len(image_id))

        image_name_list = [image_list[x] for x in image_id]
        for pair_name in image_name_list:
            pair_image_name = pair_name.split('/')[-1]
            outfile = upsample_output_path + pair_image_name[:-4] + '.npy'
            # if uts.exists(outfile):
            #   print "\t {} exists".format(pair_name)
            #   continue

            image1, image2, flow_gt, depth_gt = \
                sun3d.load_image_pair(scene_name, pair_name,
                  image2depth, False)

            print pair_name
            uts.plot_images(OrderedDict([('image', image1),
                                         ('depth_gt', depth_gt)]),
                            layout=[4, 2])
            continue

            depth_gt_down = uts_3d.down_sample_depth(depth_gt,
                                                     method='uniform',
                                                     percent=rate,
                                                     K=params['intrinsic'])

            try:
                start_time = time.time()
                print "\t upsampling {}".format(pair_name)
                depth_up = deep_upsampler.UpSample(depth_gt_down,
                                                   [image1, image2])
                np.save(outfile, depth_up)
                print "\t  time: {}".format(time.time() - start_time)

                all_time += time.time() - start_time
                all_count += 1

            except:
                print "{} failed".format(pair_name)

            if vis:
                uts.plot_images(OrderedDict([('image', image1),
                                             ('depth_gt', depth_gt),
                                             ('depth_up', depth_up)]),
                                layout=[4, 2])
    print "average run time {}\n".format(all_time / all_count)
コード例 #18
0
def test_refine_net(dataset='sun3d', split='train', vis=False):

    paddle.init(use_gpu=True, gpu_id=FLAGS.gpu_id)
    params = sun3d.set_params()
    part, part_id = [int(x) for x in FLAGS.part.split(',')]
    test_ids = partition(len(params[split + '_scene']), part, part_id)
    rate = 0.05
    is_inverse = False
    depth_name = 'depth_inv' if is_inverse else 'depth'

    process_scene_names = [params[split + '_scene'][x] for x in test_ids]
    inputs = u_net.get_inputs(params)
    outputs = u_net.refine_net(inputs, params)
    parameters, topo = paddle.parameters.create(outputs[depth_name])
    print('load parameters {}'.format(FLAGS.model))
    with gzip.open(FLAGS.model, 'r') as f:
        parameters = paddle.parameters.Parameters.from_tar(f)
    feeding = {'image1': 0, 'depth': 1}

    for scene_name in process_scene_names:
        id_img2depth = sun3d.get_image_depth_matching(scene_name)
        upsample_output_path = params['flow_path'] + scene_name + \
          '/pair_depth/' + str(rate) + '/'
        prefix_len = len(upsample_output_path)
        image_list = preprocess_util.list_files(upsample_output_path)

        for pair_name in image_list:
            print pair_name
            pair_image_name = pair_name.split('/')[-1]
            outfile = upsample_output_path + pair_image_name[:-4] + '.npy'
            depth_net = np.load(outfile)
            depth_net_in = depth_net.flatten()
            if is_inverse:
                depth_net_in = uts_3d.inverse_depth(depth_net)

            image_name1, _ = pair_image_name.split('_')
            image_path1 = params['data_path'] + scene_name + \
                          '/image/' + image_name1 + '.jpg'
            depth_path1 = params['data_path'] + scene_name + '/depth/' + \
                          id_img2depth[image_name1] + '.png'

            image1 = cv2.imread(image_path1)
            depth1 = uts.read_depth(depth_path1)

            image1_new = uts.transform(image1.copy(),
                                       height=params['size'][0],
                                       width=params['size'][1])
            test_data = [(
                image1_new,
                depth_net_in,
            )]

            print 'forward'
            depth_out = paddle.infer(output=topo,
                                     parameters=parameters,
                                     input=test_data,
                                     feeding=feeding)
            if is_inverse:
                depth_out = uts_3d.inverse_depth(depth_out)

            depth = uts.vec2img(inputs=depth_out,
                                height=params['size'][0],
                                width=params['size'][1])

            if vis:
                uts.plot_images(OrderedDict([('image', image1),
                                             ('depth1', depth1),
                                             ('depth_net', depth_net),
                                             ('depth', depth)]),
                                layout=[4, 2])
コード例 #19
0
    def showAnn(self,
                image_name,
                if_result=False,
                if_visualize=False,
                if_save=False,
                plot_path='tmp',
                is_training=False):
        """Show the annotation of a pose file in an image
        Input:
            image_name: the name of image
        Output:
            depth: a rendered depth map of each car
            masks: an instance mask of the label
            image_vis: an image show the overlap of car model and image
        """

        image_file = '%s/%s.jpg' % (self._data_config['image_dir'], image_name)
        image = cv2.imread(image_file, cv2.IMREAD_UNCHANGED)[:, :, ::-1]
        # print 'Original and rescaled image size: ', image.shape, self.image_size
        intrinsic = self.dataset.get_intrinsic(image_name, 'Camera_5')
        image_rescaled, self.intrinsic = self.rescale(image, intrinsic)

        if is_training:
            car_pose_file = '%s/%s.json' % (
                self._data_config['pose_dir'] if not (if_result) else
                self._data_config['pose_dir_result'], image_name)

            with open(car_pose_file) as f:
                car_poses = json.load(f)

            self.depth = self.MAX_DEPTH * np.ones(self.image_size)
            self.mask = np.zeros(self.depth.shape)
            self.shape_id_map = np.zeros(self.depth.shape)
            self.pose_map = np.zeros(
                (self.depth.shape[0], self.depth.shape[1], 6)) + np.inf
            self.shape_map = np.zeros(
                (self.depth.shape[0], self.depth.shape[1], 10)) + np.inf

            self.pose_list = []
            self.rot_uvd_list = []
            self.bbox_list = []
            self.shape_id_list = []

            plt.figure(figsize=(20, 10))
            plt.imshow(image_rescaled)
            for i, car_pose in enumerate(car_poses):
                car_name = car_models.car_id2name[car_pose['car_id']].name
                # if if_result:
                #     car_pose['pose'][-1]  = 1./car_pose['pose'][-1]
                depth, mask, vert, K = self.render_car(car_pose['pose'],
                                                       car_name)
                self.mask, self.shape_id_map, self.depth, self.pose_map = self.merge_inst(
                    depth, i + 1, car_pose['car_id'] + 1, self.mask,
                    self.shape_id_map, self.depth, self.pose_map,
                    car_pose['pose'])
                self.pose_list.append(car_pose['pose'])
                self.shape_id_list.append(car_pose['car_id'])

                scale = np.ones((3, ))
                car = self.car_models[car_name]
                pose = np.array(car_pose['pose'])
                print 'GT pose: ', pose[3:]
                vert = car['vertices']
                vert = np.zeros((1, 3))
                vert_transformed = uts.project(pose, scale, vert)  # [*, 3]
                print 'Center transformed: ', vert_transformed

                vert_hom = np.hstack(
                    (vert_transformed, np.ones((vert.shape[0], 1))))
                K_hom = np.hstack((K, np.zeros((3, 1))))
                proj_uv_hom = np.matmul(K_hom, vert_hom.T)
                proj_uv = np.vstack((proj_uv_hom[0, :] / proj_uv_hom[2, :],
                                     proj_uv_hom[1, :] / proj_uv_hom[2, :]))
                u = proj_uv[0:1, :]  # [1, 1]
                v = proj_uv[1:2, :]
                d = proj_uv_hom[2:3, :]

                rot_uvd = [
                    car_pose['pose'][0], car_pose['pose'][1],
                    car_pose['pose'][2], u[0, 0], v[0, 0], car_pose['pose'][5]
                ]
                self.rot_uvd_list.append(rot_uvd)

                plt.scatter(u, v, linewidths=20)

                F1 = K_hom[0, 0]
                W = K_hom[0, 2]
                F2 = K_hom[1, 1]
                H = K_hom[1, 2]
                K_T = np.array([[1. / F1, 0., -W / F1], [0, 1. / F2, -H / F2],
                                [0., 0., 1.]])
                # print K_T
                # print self.intrinsic
                # print F1, W, F2, H
                uvd = np.vstack((u * d, v * d, d))
                xyz = np.matmul(K_T, uvd)
                print 'xyz / pose recovered: ', xyz
                # print 'uvd:', rot_uvd

                # print car_pose['pose'].shape, vert_transformed.shape

                ## Get bbox from mask
                arr = np.expand_dims(np.int32(mask), -1)
                # number of highest label:
                labmax = 1
                # maximum and minimum positions along each axis (initialized to very low and high values)
                b_first = np.iinfo('int32').max * np.ones(
                    (3, labmax + 1), dtype='int32')
                b_last = np.iinfo('int32').max * np.ones(
                    (3, labmax + 1), dtype='int32')
                # run through all dimensions making 2D slices and marking all existing labels to b
                for dim in range(2):
                    # create a generic slice object to make the slices
                    sl = [slice(None), slice(None), slice(None)]
                    bf = b_first[dim]
                    bl = b_last[dim]
                    # go through all slices in this dimension
                    for k in range(arr.shape[dim]):
                        # create the slice object
                        sl[dim] = k
                        # update the last "seen" vector
                        bl[arr[sl].flatten()] = k
                        # if we have smaller values in "last" than in "first", update
                        bf[:] = np.clip(bf, None, bl)
                bbox = [
                    b_first[1, 1], b_last[1, 1], b_first[0, 1], b_last[0, 1]
                ]  # [x_min, x_max, y_min, y_max]
                self.bbox_list.append(bbox)
                plt.imshow(mask)
                print mask.shape
                currentAxis = plt.gca()
                # print (bbox[0], bbox[2]), bbox[1]-bbox[0], bbox[3]-bbox[2]
                currentAxis.add_patch(
                    Rectangle((bbox[0], bbox[2]),
                              bbox[1] - bbox[0],
                              bbox[3] - bbox[2],
                              alpha=1,
                              edgecolor='r',
                              facecolor='none'))
                # plt.show()
                # break
            plt.show()

            self.depth[self.depth == self.MAX_DEPTH] = -1.0
            image = 0.5 * image_rescaled
            for i in range(len(car_poses)):
                frame = np.float32(self.mask == i + 1)
                frame = np.tile(frame[:, :, None], (1, 1, 3))
                image = image + frame * 0.5 * self.colors[i, :]

            if if_visualize:
                uts.plot_images(
                    {
                        'image_vis': np.uint8(image),
                        'shape_id': self.shape_id_map,
                        'mask': self.mask,
                        'depth': self.depth
                    },
                    np.asarray(self.rot_uvd_list),
                    self.bbox_list,
                    layout=[1, 4],
                    fig_size=10,
                    save_fig=if_save,
                    fig_name=plot_path)

            return image, self.mask, self.shape_id_map, self.depth, self.pose_map, image_rescaled, self.pose_list, self.shape_id_list, self.rot_uvd_list, self.bbox_list
        else:
            return None, None, None, None, None, image_rescaled, None, None, None, None
コード例 #20
0
def test_demo():
    # PaddlePaddle init
    paddle.init(use_gpu=True, gpu_id=FLAGS.gpu_id)
    params = sun3d.set_params()
    inputs = d_net.get_demon_inputs(params)

    # Add neural network config
    outputs_bs = d_net.bootstrap_net(inputs, params)
    outputs_it = d_net.iterative_net(inputs, params)
    outputs_re = d_net.refine_net(inputs, params)
    out_fields = ['flow', 'depth_inv', 'normal', 'rotation', 'translation']
    my_g_layer_map = {}
    parameters_bs, topo_bs = paddle.parameters.create(
        [outputs_bs[x] for x in out_fields])
    my_g_layer_map.update(cp.g_layer_map)
    parameters_it, topo_it = paddle.parameters.create(
        [outputs_it[x] for x in out_fields])
    my_g_layer_map.update(cp.g_layer_map)
    parameters_re, topo_re = paddle.parameters.create(outputs_re['depth_0'])
    my_g_layer_map.update(cp.g_layer_map)

    print('load parameters')
    with gzip.open(FLAGS.model, 'r') as f:
        parameters_init = paddle.parameters.Parameters.from_tar(f)

    for name in parameters_bs.names():
        parameters_bs.set(name, parameters_init.get(name))
    for name in parameters_it.names():
        parameters_it.set(name, parameters_init.get(name))
    for name in parameters_re.names():
        parameters_re.set(name, parameters_init.get(name))

    # Read image pair 1, 2 flow
    for scene_name in params['train_scene'][1:]:
        image_list = preprocess_util.list_files(params['flow_path'] +
                                                scene_name + '/flow/')
        image2depth = sun3d.get_image_depth_matching(scene_name)
        for pair_name in image_list[0:2]:
            image1, image2, flow_gt, depth1_gt, normal1_gt = \
                sun3d.load_image_pair(scene_name, pair_name, image2depth)

            #transform and yield
            image1_new = uts.transform(image1.copy(),
                                       height=params['size'][0],
                                       width=params['size'][1])
            image2_new = uts.transform(image2.copy(),
                                       height=params['size'][0],
                                       width=params['size'][1])
            intrinsic = np.array([0.89115971, 1.18821287, 0.5, 0.5])

            test_data_bs = [(image1_new, image2_new)]
            feeding_bs = {'image1': 0, 'image2': 1}
            flow, depth_inv, normal, rotation, translation = paddle.infer(
                output=topo_bs,
                parameters=parameters_bs,
                input=test_data_bs,
                feeding=feeding_bs)

            for i in range(3):
                test_data_it = [(image1_new, image2_new, intrinsic, rotation,
                                 translation, depth_inv, normal)]
                feeding_it = {
                    'image1': 0,
                    'image2': 1,
                    'intrinsic': 2,
                    'rotation': 3,
                    'translation': 4,
                    'depth_inv': 5,
                    'normal': 6
                }
                flow, depth_inv, normal, rotation, translation = paddle.infer(
                    output=topo_it,
                    parameters=parameters_it,
                    input=test_data_it,
                    feeding=feeding_it)

            test_data_re = [(image1_new, image2_new, depth_inv)]
            feeding_re = {'image1': 0, 'image2': 1, 'depth_inv': 2}
            depth = paddle.infer(output=topo_re,
                                 parameters=parameters_re,
                                 input=test_data_re,
                                 feeding=feeding_re)

            layer_names = [
                outputs_it['flow'].name, outputs_it['normal'].name,
                outputs_re['depth_0'].name
            ]
            height_list = [my_g_layer_map[x].height for x in layer_names]
            width_list = [my_g_layer_map[x].width for x in layer_names]

            flow, normal, depth = vec2img(inputs=[flow, normal, depth],
                                          height=height_list,
                                          width=width_list)

            # visualize depth in 3D
            # image1_down = cv2.resize(image1,
            #     (depth.shape[1], depth.shape[0]))

            # visualize_prediction(
            #     depth=depth,
            #     image=np.uint8(image1_down.transpose([2, 0, 1])),
            #     rotation=rotation,
            #     translation=translation)
            with open('./test/depth_gt.npy', 'wb') as f:
                np.save(f, depth1_gt)

            with open('./test/depth_res.npy', 'wb') as f:
                np.save(f, depth)

            uts.plot_images(OrderedDict([
                ('image1', image1), ('image2', image2), ('flow', flow),
                ('flow_gt', flow_gt), ('depth', depth),
                ('depth_gt', depth1_gt), ('normal', (normal + 1.0) / 2.),
                ('normal_gt', (normal1_gt + 1.0) / 2)
            ]),
                            layout=[4, 2])