Exemple #1
0
        def check(xshape, wshape, stride=1, padding=0, dilation=1):
            with jt.log_capture_scope(
                    use_cuda=1,
                    enable_tuner=1,
                    log_v=1,
                    log_vprefix="op.cc=100,exe=1000") as raw_log:
                x = jt.random(xshape)
                w = jt.random(wshape)
                y = conv(x, w, stride, padding)
                mask = jt.random(y.shape)
                loss = mask * y
                dx, dw = jt.grad(loss, [x, w])
                jt.sync([y, loss, dx, dw])

            # fails when enable_tuner=1, something wrong with mkl_conv_backward_x maybe.
            with jt.flag_scope(use_cuda=0, enable_tuner=0):
                cy = conv(x, w, stride, padding)
                closs = mask * cy
                cdx, cdw = jt.grad(closs, [x, w])
                jt.sync([cy, closs, cdx, cdw])
            logs = find_log_with_re(raw_log,
                                    "(Jit op key (not )?found: cudnn_conv.*)")
            assert len(logs) == 3 and "oihw" in logs[0][0], logs
            assert np.allclose(y.data, cy.data)
            assert np.allclose(dx.data, cdx.data, 1e-2)
            assert np.allclose(dw.data, cdw.data, 1e-2)
def check_backward(xshape, wshape, stride, padding, dilation, groups, use_cuda,
                   nhwc):
    assert nhwc == 0
    test_func = test_nchw

    # only check cudnn
    with jt.log_capture_scope(use_cuda=use_cuda,
                              enable_tuner=1,
                              log_v=10,
                              log_vprefix="conv_tuner.cc=1000") as raw_log:
        x = jt.random(xshape)
        w = jt.random(wshape)
        y = test_func(x, w, stride, padding, dilation, groups)
        dx, dw = jt.grad(y, [x, w])
        jt.sync([y, dx, dw])
    with jt.flag_scope(use_cuda=0,
                       enable_tuner=0,
                       compile_options={"test": 233}):
        cy = test_func(x, w, stride, padding, dilation, groups)
        cdx, cdw = jt.grad(cy, [x, w])
        jt.sync([cy, cdx, cdw])

    assert np.allclose(y.data, cy.data)
    assert np.allclose(dw.data,
                       cdw.data, 1e-3), (dw.data, cdw.data,
                                         np.abs(dw.data - cdw.data).max())
    assert np.allclose(dx.data,
                       cdx.data, 1e-3), (dx.data, cdx.data,
                                         np.abs(dx.data - cdx.data).max())
Exemple #3
0
def check_backward(xshape, wshape, stride, padding, dilation, use_cuda, nhwc):
    if nhwc:
        test_func = test_nhwc
    else:
        test_func = test_nchw
    if use_cuda == 1:
        op_name = "cudnn_conv"
    else:
        op_name = "mkl_conv"

    with jt.log_capture_scope(use_cuda=use_cuda, enable_tuner=1,
        log_v=1, log_vprefix="op.cc=1000,exe=1000,conv_t=1000", compile_options={"test":244}
    ) as raw_log:
        x = jt.random(xshape)
        w = jt.random(wshape)
        y = test_func(x, w, stride, padding, dilation)
        loss = y.mean()
        dx, dw = jt.grad(loss, [x, w])
        jt.sync([y, loss, dx, dw])
    with jt.flag_scope(use_cuda=0, enable_tuner=0, compile_options={"test":233}):
        cy = test_func(x, w, stride, padding, dilation)
        closs = cy.mean()
        cdx, cdw = jt.grad(closs, [x, w])
        jt.sync([cy, closs, cdx, cdw])
    logs = find_log_with_re(raw_log, "(Jit op key (not )?found: " + op_name + ".*)")
    assert len(logs)==3 and "oihw" in logs[0][0], (logs)
    assert np.allclose(y.data, cy.data, 1e-3)
    assert np.allclose(dw.data, cdw.data, 1e-3), (dw.data, cdw.data)
    assert np.allclose(dx.data, cdx.data, 1e-3), (dx.data, cdx.data, np.abs(cdx.data).max(), np.abs(dx.data - cdx.data).max())
Exemple #4
0
 def test_scalar_fuse_unary(self):
     with jt.profile_scope() as rep:
         a = jt.array([1])
         b = -a
         a = a.clone()
         b = b.clone()
         jt.sync([a, b])
         assert a.data == 1
         assert b.data == -1
     assert len(rep) == 2
Exemple #5
0
 def step(self, loss):
     ps = self.parameters
     gs = jt.grad(loss, ps)
     self.adam_step += 1
     n, (b0, b1) = float(self.adam_step), self.betas
     for p, g, v, m in zip(ps, gs, self.values, self.m):
         m.assign(b0 * m + (1-b0) * g)
         v.assign(b1 * v + (1-b1) * g * g)
         step_size = self.lr * jt.sqrt(1-b1**n) / (1-b0 ** n)
         p -= m * step_size / (jt.sqrt(v) + self.eps)
         p.detach_inplace()
     jt.sync(self.no_grad_parameters)
Exemple #6
0
 def test_wrong_fuse2(self):
     a = jt.array([1])
     b = jt.random([
         10,
     ])
     c = jt.random([
         100,
     ])
     bb = a * b
     cc = a * c
     jt.sync([bb, cc])
     np.testing.assert_allclose(b.data, bb.data)
     np.testing.assert_allclose(c.data, cc.data)
Exemple #7
0
 def test_longest_dis_fuse(self):
     x = jt.array(np.random.rand(1, 3, 224, 224).astype(np.float32))
     loss = jt.sum(resnet_fake(x))
     ps = jt.find_vars('resnet_fake')
     gs = jt.grad(loss, ps)
     jt.sync(gs)
     # assert not alloc big tensor
     g = jt.dump_all_graphs()
     for s in g.nodes_info:
         if not s.startswith("Var"):
             continue
         shape = s.split("[")[1].split("]")[0].split(",")
         ptr = s.split("(")[1].split(")")[0].split(",")[-1]
         if ptr != '0':
             assert len(shape) <= 5, s
Exemple #8
0
 def step(self, loss):
     ps = self.parameters
     gs = jt.grad(loss, ps)
     for p, g, v in zip(ps, gs, self.values):
         dp = p * self.weight_decay + g
         v.assign(self.momentum * v + dp * (1 - self.dampening))
         if self.nesterov:
             p -= (dp + self.momentum * v) * self.lr
         else:
             p -= v * self.lr
         # detach with the prev graph to reduce memory consumption
         p.detach_inplace()
     # sync all no grad parameters, such as
     # moving_mean and moving_var in batch_norm
     # sync such parameters to reduce memory consumption
     jt.sync(self.no_grad_parameters)
Exemple #9
0
        def test():
            class MyFunc(Function):
                def execute(self, x, z, y):
                    self.x = x
                    self.y = y
                    return x*y, "test", x/y

                def grad(self, grad0, _, grad1):
                    assert _ is None
                    res = (grad0 * self.y, None, grad1 * self.x)
                    return res
            a = jt.array(3.0)
            b = jt.array(4.0)
            c,_,d = MyFunc()(a, "a", b)
            g = jt.grad(c+d*3, [a, b])
            jt.sync(g)
Exemple #10
0
    def test_stop_fuse2(self):
        with jt.profile_scope() as report:
            a = jt.float32(0).stop_fuse()
            c = jt.float32(0).stop_fuse()
            bs = [c]
            for i in range(2000):
                b = jt.float32(i) * 2 * c
                bs.append(b)
                a += b

            a = a * 2

            dbs = jt.grad(a, bs)
            jt.sync(dbs + [a])

        for a in report[1:]:
            assert len(a[0].split("opkey")) < 8
Exemple #11
0
 def step(self, loss):
     self.adam_step += 1
     ps = self.parameters
     gs = jt.grad(loss, ps)
     if jt.mpi:
         for g in gs:
             g.assign(g.mpi_all_reduce("mean"))
         if self.adam_step % self.param_sync_iter == 0:
             for p in ps:
                 p.assign(p.mpi_all_reduce("mean"))
     n, (b0, b1) = float(self.adam_step), self.betas
     for p, g, v, m in zip(ps, gs, self.values, self.m):
         m.assign(b0 * m + (1 - b0) * g)
         v.assign(b1 * v + (1 - b1) * g * g)
         step_size = self.lr * jt.sqrt(1 - b1**n) / (1 - b0**n)
         p -= m * step_size / (jt.sqrt(v) + self.eps)
         p.detach_inplace()
     jt.sync(self.no_grad_parameters)
Exemple #12
0
    def pre_step(self, loss):
        """ something should be done before step, 
        such as calc gradients, mpi sync, and so on.
        Example:
        ```
        class MyOptimizer(Optimizer):
            def step(self, loss):
                self.post_step(loss)
                ...
        ```
        """
        # clean prev grads
        params = []
        params_has_grad = []
        for pg in self.param_groups:
            pg["grads"] = [None] * len(pg['params'])
            for p in pg['params']:
                params.append(p)
                if not p.is_stop_grad():
                    params_has_grad.append(p)

        # sync params, reduce computing graph size
        jt.sync(params)

        # get gradient
        grads = jt.grad(loss, params_has_grad)

        # sync grads and model if in mpi
        if jt.mpi:
            for g in grads:
                g.assign(g.mpi_all_reduce("mean"))
            if self.n_step % self.param_sync_iter == 0:
                for p in params:
                    p.assign(p.mpi_all_reduce("mean"))
        self.n_step += 1

        # set up grads in param_groups
        pid = 0
        for pg in self.param_groups:
            pg_grads = pg["grads"]
            for i, p in enumerate(pg['params']):
                if not p.is_stop_grad():
                    pg_grads[i] = grads[pid]
                    pid += 1
    def test_stop_fuse(self):
        with jt.profile_scope() as report:
            a = jt.float32(0).stop_fuse()
            c = jt.float32(0)
            bs = [c]
            for i in range(2000):
                b = jt.float32(i) * 2 * c
                bs.append(b)
                a += b

            a = a * 2

            dbs = jt.grad(a, bs)
            jt.sync(dbs + [a])

        for a in report[1:]:
            # origin is 50
            # after update queue, increase to 102
            assert len(a[0].split("opkey")) < 110, len(a[0].split("opkey"))
    def test_reduce_opt(self):
        a = jt.random((16, 512, 38, 38))
        b = jt.random((16, 512, 38, 38))
        jt.sync([a, b])
        with jt.profile_scope(rerun=10, warmup=10) as rep:
            norm = a.sqr().sum(1, keepdims=True).sqrt()
            c = a / norm
            da = jt.grad(c * b, a)
            jt.sync([c, da])
        gpu_c = c.numpy()
        gpu_da = da.numpy()
        with jt.flag_scope(use_cuda=0):
            norm = a.sqr().sum(1, keepdims=True).sqrt()
            c = a / norm
            da = jt.grad(c * b, a)
            assert np.allclose(gpu_c, c.data, 1e-3)
            assert (np.abs(gpu_da - da.data).max() < 1e-6)

        assert float(rep[1][3]) < 15e6, float(rep[1][3])  # 15ms(about 8ms)
 def check(data_shape, weights_shape, stride=1, dilation=1):
     N, C, H, W = data_shape
     i, o, h, w = weights_shape
     img = np.random.rand(N, C, H, W).astype("float32")
     weights = np.random.rand(i, o, h, w).astype("float32")
     m1 = jt.nn.ConvTranspose(i,
                              o,
                              h,
                              stride=stride,
                              dilation=dilation,
                              bias=False)
     m2 = torch.nn.ConvTranspose2d(i,
                                   o,
                                   h,
                                   stride=stride,
                                   dilation=dilation,
                                   bias=False)
     m1.weight.data = weights
     m2.weight.data = torch.Tensor(weights)
     x = jt.array(img)
     # out1 = m1(x)
     out1 = jt.nn.conv_transpose2d(x,
                                   m1.weight,
                                   stride=stride,
                                   dilation=dilation,
                                   bias=False)
     mask = jt.random(out1.shape)
     out1 = out1 * mask
     tx = torch.Tensor(img)
     tx.requires_grad = True
     out2 = m2(tx) * torch.Tensor(mask.data)
     with jt.log_capture_scope(
             log_silent=1,
             log_vprefix="var_re=0,conv=0,op.cc=100") as logs:
         assert np.allclose(out1.data, out2.data)
         dx, dw = jt.grad(out1, [x, m1.weight])
         jt.sync([dx, dw])
         out2.sum().backward()
         assert np.allclose(dw.data, m2.weight.grad.numpy(), 1e-3)
         assert np.allclose(dx.data, tx.grad.numpy())
     assert len(find_log_with_re(logs, "conv")) == 3
Exemple #16
0
    def test_memcopy_overlap(self):
        import time
        from jittor.models import resnet

        im = np.random.rand(100, 3, 224, 224).astype(np.float32)
        net = resnet.Resnet34()
        net.eval()
        # warm up
        x = jt.array(im).stop_grad()

        for i in range(10):
            a = net(x)
            a.sync()
        jt.sync(device_sync=True)

        # pure compute
        time_start = time.time()
        x = jt.array(im).stop_grad()
        for i in range(10):
            a = net(x)
            a.sync()
        jt.sync(device_sync=True)
        t1 = time.time() - time_start

        # warm up
        for i in range(3):
            x = jt.array(im)
            b = net(x)
            b.fetch(lambda b: None)
            b.sync()
        jt.sync(device_sync=True)

        # overlap
        time_start = time.time()
        results = []
        for i in range(10):
            x = jt.array(im)
            b = net(x)
            b.fetch(lambda b: results.append(b))
            b.sync()
            # del c
        jt.sync(device_sync=True)
        t2 = time.time() - time_start

        assert t2 - t1 < 0.010, (t2, t1, t2 - t1)
        assert np.allclose(a.data, b.data)
        assert len(results) == 10
        for v in results:
            assert np.allclose(a.data, v), (v.shape, a.data.shape)
        jt.LOG.v(f"pure compute: {t1}, overlap: {t2}")
Exemple #17
0
def train():
    if args.cuda:
        jt.flags.use_cuda = 1

    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    # dataset = COCODetection(image_path=cfg.dataset.train_images,
    #                         info_file=cfg.dataset.train_info,
    #                         transform=SSDAugmentation(MEANS))

    dataset = COCODetection(image_path=cfg.dataset.train_images,
                            info_file=cfg.dataset.train_info,
                            transform=BaseTransform(MEANS))
    if args.validation_epoch > 0:
        setup_eval()
        val_dataset = EvalCOCODetection(image_path=cfg.dataset.valid_images,
                                        info_file=cfg.dataset.valid_info,
                                        transform=BaseTransform(MEANS))

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    if args.log:
        log = Log(cfg.name,
                  args.log_folder,
                  dict(args._get_kwargs()),
                  overwrite=(args.resume is None),
                  log_gpu_stats=args.log_gpu)

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs, so disable it just to be safe.
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        print('Resuming training, loading {}..'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        print('Initializing weights..')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)
    criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                             pos_threshold=cfg.positive_iou_threshold,
                             neg_threshold=cfg.negative_iou_threshold,
                             negpos_ratio=cfg.ohem_negpos_ratio)

    if args.batch_alloc is not None:
        args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')]
        if sum(args.batch_alloc) != args.batch_size:
            print(
                'Error: Batch allocation (%s) does not sum to batch size (%s).'
                % (args.batch_alloc, args.batch_size))
            exit(-1)

    net = NetLoss(net, criterion)

    # Initialize everything
    if not cfg.freeze_bn:
        yolact_net.freeze_bn()  # Freeze bn so we don't kill our means
    yolact_net(jt.zeros((1, 3, cfg.max_size, cfg.max_size)))
    if not cfg.freeze_bn: yolact_net.freeze_bn(True)

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    dataset.set_attrs(batch_size=args.batch_size,
                      num_workers=args.num_workers,
                      shuffle=False)
    dataset.collate_batch = detection_collate
    data_loader = dataset

    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()

    global loss_types  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}
    print('Begin training!')
    print()
    # try-except so you can use ctrl+c to save early and stop training
    try:
        # jt.profiler.start(0, 0)
        i = 0
        for epoch in range(num_epochs):
            # Resume from start_iter
            if (epoch + 1) * epoch_size < iteration:
                continue

            for datum in data_loader:
                # data_loader.display_worker_status()
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until:
                    set_lr(optimizer, (args.lr - cfg.lr_warmup_init) *
                           (iteration / cfg.lr_warmup_until) +
                           cfg.lr_warmup_init)

                # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while step_index < len(
                        cfg.lr_steps
                ) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma**step_index))

                # Zero the grad to get ready to compute gradients
                #optimizer.zero_grad()

                # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss)
                splits = prepare_data(datum)
                losses = net(*splits)

                losses = {k: (v).mean()
                          for k, v in losses.items()
                          }  # Mean here because Dataparallel
                loss = sum([losses[k] for k in losses])

                # no_inf_mean removes some components from the loss, so make sure to backward through all of it
                # all_loss = sum([v.mean() for v in losses.values()])
                # loss.sync()
                # Backprop
                loss.sync()
                optimizer.step(loss)
                jt.sync(optimizer.param_groups[0]['params'])

                # Add the loss to the moving average for bookkeeping
                for k in losses:
                    loss_avgs[k].add(losses[k].item())
                # for k in losses:
                #     loss_avgs[k].add(0)
                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(seconds=(cfg.max_iter - iteration) *
                                           time_avg.get_avg())).split('.')[0]

                    total = sum([loss_avgs[k].get_avg() for k in losses])
                    loss_labels = sum([[k, loss_avgs[k].get_avg()]
                                       for k in loss_types if k in losses], [])

                    print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) +
                           ' T: %.3f || ETA: %s || timer: %.3f') %
                          tuple([epoch, iteration] + loss_labels +
                                [total, eta_str, elapsed]),
                          flush=True)

                if args.log:
                    precision = 5
                    # loss_info = {k: round(float(losses[k].item()), precision) for k in losses}
                    # loss_info['T'] = round(float(loss.item()), precision)
                    loss_info = {k: round(float(0), precision) for k in losses}
                    loss_info['T'] = round(float(0), precision)

                    if args.log_gpu:
                        log.log_gpu_stats = (iteration % 10 == 0
                                             )  # nvidia-smi is sloooow

                    log.log('train',
                            loss=loss_info,
                            epoch=epoch,
                            iter=iteration,
                            lr=round(cur_lr, 10),
                            elapsed=elapsed)

                    log.log_gpu_stats = args.log_gpu

                iteration += 1

                if iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder,
                                                     cfg.name)

                    print('Saving state, iter:', iteration)
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            print('Deleting old save..')
                            os.remove(latest)
                i += 1
                if i > 100: break
            if i > 100: break
            # This is done per epoch
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    compute_validation_map(epoch, iteration, yolact_net,
                                           val_dataset,
                                           log if args.log else None)

        # Compute validation mAP after training is finished
        # compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None)
    except KeyboardInterrupt:
        if args.interrupt:
            print('Stopping early. Saving network..')

            # Delete previous copy of the interrupted network so we don't spam the weights folder
            SavePath.remove_interrupt(args.save_folder)

            yolact_net.save_weights(
                save_path(epoch,
                          repr(iteration) + '_interrupt'))
        exit()

    yolact_net.save_weights(save_path(epoch, iteration))
    def filter_results(self, boxlist, num_classes):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)

        result = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        # inds_all = (scores > self.score_thresh).int()
        inds_all = scores > self.score_thresh
        # print(self.score_thresh,num_classes)
        # print(inds_all.shape)
        # inds_all = inds_all.transpose(1,0)
        inds_nonzeros = [ inds_all[:,j].nonzero() for j in range(1, num_classes) ]
        jt.sync(inds_nonzeros)

        for j in range(1, num_classes):
            # with nvtx_scope("aa"):
            #     inds = inds_all[:,j].nonzero().squeeze(1)
                
            # with nvtx_scope("bb"):
            #     scores_j = scores[inds, j]
            #     boxes_j = boxes[inds, j * 4 : (j + 1) * 4]
            # with nvtx_scope("cc"):
            #     boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            # with nvtx_scope("cc2"):
            #     boxlist_for_class.add_field("scores", scores_j)
            # with nvtx_scope("cc3"):
            #     boxlist_for_class = boxlist_nms(
            #         boxlist_for_class, self.nms
            #     )
            # with nvtx_scope("dd"):
            #     num_labels = len(boxlist_for_class)
            # with nvtx_scope("dd2"):
            #     boxlist_for_class.add_field(
            #         "labels", jt.full((num_labels,), j).int32()
            #     )
            #     result.append(boxlist_for_class)

            # inds = inds_all[:,j].nonzero().squeeze(1)
            inds = inds_nonzeros[j-1]
            if inds.shape[0] == 0:
                continue
            inds = inds.squeeze(1)
            scores_j = scores[inds, j]
            boxes_j = boxes[inds, j * 4 : (j + 1) * 4]
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)
            boxlist_for_class = boxlist_nms(
                    boxlist_for_class, self.nms
                )
            num_labels = len(boxlist_for_class)
            # print(j,num_labels)

            boxlist_for_class.add_field(
                    "labels", jt.full((num_labels,), j).int32()
                )
            result.append(boxlist_for_class)

        result = cat_boxlist(result)
        if not result.has_field('labels'):
            result.add_field('labels',jt.empty((0,)))
        if not result.has_field('scores'):
            result.add_field('scores',jt.empty((0,)))
        number_of_detections = len(result)

        #Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            image_thresh, _ = jt.kthvalue(
                cls_scores, number_of_detections - self.detections_per_img + 1
            )
            keep = cls_scores >= image_thresh
            keep = jt.nonzero(keep).squeeze(1)
            result = result[keep]
        # # Absolute limit detection imgs
        # if number_of_detections > self.detections_per_img > 0:
        #     cls_scores = result.get_field("scores")
        #     scores, indices = jt.topk(
        #         cls_scores, self.detections_per_img
        #     )
        #     result = result[indices]
        return result
Exemple #19
0
 def test_multioutput(self):
     a, b = jt.index([2, 2])
     jt.sync([a, b])
     assert (a.data == [[0, 0], [1, 1]]).all()
     assert (b.data == [[0, 1], [0, 1]]).all(), b.data