Beispiel #1
0
 def test_fetch(self):
     a = jt.array([1, 2, 3])
     a = a * 2
     v = []
     jt.fetch([a], lambda a: v.append(a))
     jt.sync_all(True)
     assert len(v) == 1 and (v[0] == [2, 4, 6]).all()
Beispiel #2
0
 def test_fetch(self):
     a = jt.array([1, 2, 3])
     a = a * 2
     v = []
     jt.fetch(a, lambda a: v.append(a))
     jt.fetch(
         1, 2, 3, a, lambda x, y, z, a: self.assertTrue(
             x == 1 and y == 2 and z == 3 and isinstance(a, np.ndarray)))
     jt.sync_all(True)
     assert len(v) == 1 and (v[0] == [2, 4, 6]).all()
Beispiel #3
0
def train(model, dataloader, optimizer, epoch, iteration):
    # switch to train mode
    model.train()

    averMeters.clear()
    end = time.time()
    for i, inputs in enumerate(dataloader):
        for k, v in inputs.items():
            print(type(v[0]))
        averMeters['data_time'].update(time.time() - end)
        iteration += 1

        lr = adjust_learning_rate(optimizer,
                                  iteration,
                                  BASE_LR=0.0002,
                                  WARM_UP_FACTOR=1.0 / 3,
                                  WARM_UP_ITERS=1000,
                                  STEPS=(0, 14150 * 15, 14150 * 20),
                                  GAMMA=0.1)

        # forward
        outputs = model(**inputs)

        # loss
        loss = outputs
        jt.sync_all()

        # backward
        jt.fetch(averMeters['loss'], loss, lambda a, l: a.update(l.data[0]))
        #averMeters['loss'].update(loss.data.item())
        optimizer.step(loss)

        # measure elapsed time
        averMeters['batch_time'].update(time.time() - end)
        end = time.time()

        if i % 10 == 0:
            logger.info('Epoch: [{0}][{1}/{2}]\t'
                        'Lr: [{3:.8f}]\t'
                        'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                        'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                        'loss {loss.val:.5f} ({loss.avg:.5f})\t'.format(
                            epoch,
                            i,
                            len(dataloader),
                            lr,
                            batch_time=averMeters['batch_time'],
                            data_time=averMeters['data_time'],
                            loss=averMeters['loss']))

        if i % 10000 == 0:
            model.save(os.path.join(SNAPSHOTDIR, '%d_%d.pkl' % (epoch, i)))
            model.save(os.path.join(SNAPSHOTDIR, 'last.pkl'))

    return iteration
Beispiel #4
0
    def test_densenet(self):
        self.setup_seed(1)
        loss_list = []
        acc_list = []
        mnist_net = MnistNet()
        global prev
        prev = time.time()
        SGD = nn.SGD(mnist_net.parameters(), self.learning_rate, self.momentum,
                     self.weight_decay)
        # SGD = jt.optim.Adam(mnist_net.parameters(), lr=0.0001)

        for batch_idx, (data, target) in enumerate(self.train_loader):
            output = mnist_net(data)
            loss = nn.cross_entropy_loss(output, target)
            SGD.step(loss)

            def callback(batch_idx, loss, output, target):
                # print train info
                global prev
                pred = np.argmax(output, axis=1)
                acc = np.mean(target == pred)
                loss_list.append(loss[0])
                acc_list.append(acc)
                print(
                    'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAcc: {:.6f} \tTime:{:.3f}'
                    .format(0, batch_idx, 600, 1. * batch_idx / 6.0, loss[0],
                            acc,
                            time.time() - prev))
                # prev = time.time()

            jt.fetch(batch_idx, loss, output, target, callback)
            # Train Epoch: 0 [0/600 (0%)]     Loss: 2.402650  Acc: 0.060000
            # Train Epoch: 0 [1/600 (0%)]     Loss: 2.770145  Acc: 0.100000
            # Train Epoch: 0 [2/600 (0%)]     Loss: 3.528072  Acc: 0.100000
            # Train Epoch: 0 [3/600 (0%)]     Loss: 2.992042  Acc: 0.100000
            # Train Epoch: 0 [4/600 (1%)]     Loss: 4.672772  Acc: 0.060000
            # Train Epoch: 0 [5/600 (1%)]     Loss: 5.003410  Acc: 0.080000
            # Train Epoch: 0 [6/600 (1%)]     Loss: 5.417546  Acc: 0.100000
            # Train Epoch: 0 [7/600 (1%)]     Loss: 5.137665  Acc: 0.100000
            # Train Epoch: 0 [8/600 (1%)]     Loss: 5.241075  Acc: 0.070000
            # Train Epoch: 0 [9/600 (2%)]     Loss: 4.515363  Acc: 0.100000
            # Train Epoch: 0 [10/600 (2%)]    Loss: 3.357187  Acc: 0.170000
            # Train Epoch: 0 [20/600 (3%)]    Loss: 2.265879  Acc: 0.100000
            # Train Epoch: 0 [30/600 (5%)]    Loss: 2.107000  Acc: 0.250000
            # Train Epoch: 0 [40/600 (7%)]    Loss: 1.918214  Acc: 0.290000
            # Train Epoch: 0 [50/600 (8%)]    Loss: 1.645694  Acc: 0.400000

        jt.sync_all(True)
        assert np.mean(loss_list[-50:]) < 0.3
        assert np.mean(acc_list[-50:]) > 0.9
    def test_vgg(self):
        self.setup_seed(1)
        loss_list = []
        acc_list = []
        mnist_net = MnistNet()
        SGD = nn.SGD(mnist_net.parameters(), self.learning_rate, self.momentum,
                     self.weight_decay)

        for batch_idx, (data, target) in enumerate(self.train_loader):
            output = mnist_net(data)
            loss = nn.cross_entropy_loss(output, target)

            # train step
            with jt.log_capture_scope(
                    log_silent=1,
                    log_v=1,
                    log_vprefix="op.cc=100,exe=10",
            ) as logs:
                SGD.step(loss)

                def callback(loss, output, target, batch_idx):
                    # print train info
                    pred = np.argmax(output, axis=1)
                    acc = np.sum(target == pred) / self.batch_size
                    loss_list.append(loss[0])
                    acc_list.append(acc)
                    print(
                        'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAcc: {:.6f}'
                        .format(0, batch_idx, 100, 1. * batch_idx, loss[0],
                                acc))

                jt.fetch(batch_idx, loss, output, target, callback)

            log_conv = find_log_with_re(
                logs, "Jit op key (not )?found: ((mkl)|(cudnn))_conv.*")
            log_matmul = find_log_with_re(
                logs, "Jit op key (not )?found: ((mkl)|(cublas))_matmul.*")
            if batch_idx:
                assert len(log_conv) == 38 and len(log_matmul) == 12, (
                    len(log_conv), len(log_matmul))

            mem_used = jt.flags.stat_allocator_total_alloc_byte \
                -jt.flags.stat_allocator_total_free_byte
            assert mem_used < 11e9, mem_used
            assert jt.core.number_of_lived_vars() < 3500
            if (np.mean(loss_list[-50:]) < 0.2):
                break

        assert np.mean(loss_list[-50:]) < 0.2
Beispiel #6
0
    def test_resnet(self):
        self.setup_seed(1)
        loss_list=[]
        acc_list=[]
        mnist_net = MnistNet()
        global prev
        prev = time.time()
        SGD = nn.SGD(mnist_net.parameters(), self.learning_rate, self.momentum, self.weight_decay)

        iters = 10
        for batch_idx, (data, target) in enumerate(self.train_loader):
            if (batch_idx > iters):
                break
            jt.display_memory_info()
            output = mnist_net(data)
            loss = nn.cross_entropy_loss(output, target)
            SGD.step(loss)
            def callback(batch_idx, loss, output, target):
                global prev
                pred = np.argmax(output, axis=1)
                acc = np.mean(target==pred)
                loss_list.append(loss[0])
                acc_list.append(acc)
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAcc: {:.6f} \tTime:{:.3f}'
                    .format(0, batch_idx, iters,1. * batch_idx / 6.0, loss[0], acc, time.time()-prev))
            jt.fetch(batch_idx, loss, output, target, callback)
        jt.sync_all(True)
        jt.display_max_memory_info()
        _, out = jt.get_max_memory_treemap()
        out_ = out.split('\n')
        assert(out_[0] == 'root()')
        assert(out_[3].endswith('(_run_module_as_main)'))
        assert(out_[7].endswith('(_run_code)'))
        _, out = jt.get_max_memory_treemap(build_by=1)
        out_ = out.split('\n')
        assert(out_[0] == 'root()')
        assert(out_[4].endswith('(_run_module_as_main)'))
        assert(out_[8].endswith('(_run_code)'))
Beispiel #7
0
def prep_benchmark(dets_out, h, w):
    with timer.env('Postprocess'):
        t = postprocess(dets_out,
                        w,
                        h,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)

    result = {}
    with timer.env('Copy'):
        classes, scores, boxes, masks = [x[:args.top_k] for x in t]
        if isinstance(scores, list):
            box_scores = scores[0]  #.numpy()
            mask_scores = scores[1]  #.numpy()
            jt.fetch(
                box_scores,
                lambda box_scores: result.update({'box_scores': box_scores}))
            jt.fetch(
                mask_scores, lambda mask_scores: result.update(
                    {'mask_scores': mask_scores}))

        else:
            # scores = scores#.numpy()
            jt.fetch(scores, lambda scores: result.update({'scores': scores}))

        # classes = classes#.numpy()
        # boxes = boxes#.numpy()
        # masks = masks#.numpy()
        jt.fetch(classes, lambda classes: result.update({'classes': classes}))
        jt.fetch(boxes, lambda boxes: result.update({'boxes': boxes}))
        jt.fetch(masks, lambda masks: result.update({'masks': masks}))

    with timer.env('Sync'):
        # Just in case
        jt.sync_all()
Beispiel #8
0
    def test_resnet(self):
        self.setup_seed(1)
        loss_list = []
        acc_list = []
        mnist_net = MnistNet()
        global prev
        prev = time.time()
        SGD = nn.SGD(mnist_net.parameters(), self.learning_rate, self.momentum,
                     self.weight_decay)
        self.train_loader.endless = True

        for data, target in self.train_loader:
            batch_id = self.train_loader.batch_id
            epoch_id = self.train_loader.epoch_id

            # train step
            # with jt.log_capture_scope(
            #     log_silent=1,
            #     log_v=1, log_vprefix="op.cc=100,exe=10",
            # ) as logs:
            output = mnist_net(data)
            loss = nn.cross_entropy_loss(output, target)
            SGD.step(loss)

            def callback(epoch_id, batch_id, loss, output, target):
                # print train info
                global prev
                pred = np.argmax(output, axis=1)
                acc = np.mean(target == pred)
                loss_list.append(loss[0])
                acc_list.append(acc)
                print(
                    'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAcc: {:.6f} \tTime:{:.3f}'
                    .format(epoch_id, batch_id, 600, 1. * batch_id / 6.0,
                            loss[0], acc,
                            time.time() - prev))
                # prev = time.time()

            jt.fetch(epoch_id, batch_id, loss, output, target, callback)

            # log_conv = find_log_with_re(logs,
            #     "Jit op key (not )?found: ((mkl)|(cudnn))_conv.*")
            # log_matmul = find_log_with_re(logs,
            #     "Jit op key (not )?found: ((mkl)|(cublas))_matmul.*")
            # if batch_id > 2:
            #     assert len(log_conv)==59 and len(log_matmul)==6, (len(log_conv), len(log_matmul))

            mem_used = jt.flags.stat_allocator_total_alloc_byte \
                -jt.flags.stat_allocator_total_free_byte
            # assert mem_used < 4e9, mem_used
            # TODO: why bigger?
            assert mem_used < 5.6e9, mem_used
            # example log:
            # Train Epoch: 0 [0/100 (0%)]     Loss: 2.352903  Acc: 0.110000
            # Train Epoch: 0 [1/100 (1%)]     Loss: 2.840830  Acc: 0.080000
            # Train Epoch: 0 [2/100 (2%)]     Loss: 3.473594  Acc: 0.100000
            # Train Epoch: 0 [3/100 (3%)]     Loss: 3.131615  Acc: 0.200000
            # Train Epoch: 0 [4/100 (4%)]     Loss: 2.524094  Acc: 0.230000
            # Train Epoch: 0 [5/100 (5%)]     Loss: 7.780025  Acc: 0.080000
            # Train Epoch: 0 [6/100 (6%)]     Loss: 3.890721  Acc: 0.160000
            # Train Epoch: 0 [7/100 (7%)]     Loss: 6.370137  Acc: 0.140000
            # Train Epoch: 0 [8/100 (8%)]     Loss: 11.390827 Acc: 0.150000
            # Train Epoch: 0 [9/100 (9%)]     Loss: 21.598564 Acc: 0.080000
            # Train Epoch: 0 [10/100 (10%)]   Loss: 23.369165 Acc: 0.130000
            # Train Epoch: 0 [20/100 (20%)]   Loss: 4.804510  Acc: 0.100000
            # Train Epoch: 0 [30/100 (30%)]   Loss: 3.393924  Acc: 0.110000
            # Train Epoch: 0 [40/100 (40%)]   Loss: 2.286762  Acc: 0.130000
            # Train Epoch: 0 [50/100 (50%)]   Loss: 2.055014  Acc: 0.290000

            if jt.in_mpi:
                assert jt.core.number_of_lived_vars(
                ) < 8100, jt.core.number_of_lived_vars()
            else:
                assert jt.core.number_of_lived_vars(
                ) < 7000, jt.core.number_of_lived_vars()
            if self.train_loader.epoch_id >= 2:
                break

        jt.sync_all(True)
        assert np.mean(loss_list[-50:]) < 0.5
        assert np.mean(acc_list[-50:]) > 0.8