Exemple #1
0
def main():
    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    args = parse_args()
    cfg = Config.fromfile(args.config)
    cfg.gpus = args.gpus
    cfg.dynamic = args.dynamic
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir
    else:
        assert cfg.get(
            'work_dir', None
        ) is not None, 'if do not set work_dir in args, please set in config file'
    if args.resume_from is not None:
        cfg.resume_from = args.resume_from

    cfg.work_dir = os.path.join(cfg.work_dir, timestamp)
    mkdir_or_exist(os.path.abspath(cfg.work_dir))

    # init the logger
    log_file = os.path.join(cfg.work_dir, 'root.log')
    logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)

    # log some basic info
    logger.info('training gpus num: {}'.format(args.gpus))
    logger.info('Config:\n{}'.format(cfg.text))

    # get world_size
    world_size = args.gpus
    assert world_size <= mge.get_device_count("gpu")
    if world_size == 0:  # use cpu
        mge.set_default_device(device='cpux')
    else:
        gpuid = args.gpuid
        mge.set_default_device(device='gpu' + gpuid)

    if world_size > 1:
        # scale learning rate by number of gpus
        is_dict_of_dict = True
        for _, cfg_ in cfg.optimizers.items():
            if not isinstance(cfg_, dict):
                is_dict_of_dict = False
        if is_dict_of_dict:
            for _, cfg_ in cfg.optimizers.items():
                cfg_['lr'] = cfg_['lr'] * world_size
        else:
            raise RuntimeError(
                "please use 'dict of dict' style for optimizers config")

        # start distributed training, dispatch sub-processes
        mp.set_start_method("spawn")
        processes = []
        for rank in range(world_size):
            p = mp.Process(target=worker, args=(rank, world_size, cfg))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()
    else:
        worker(0, 1, cfg)
Exemple #2
0
def inference(model_file, device, records, result_queue):

    def val_func():
        pred_boxes = net(net.inputs)
        return pred_boxes

    mge.set_default_device('xpu{}'.format(device))
    net = network.Network()
    net = load_model(net, model_file)
    net.eval()
    
    for record in records:

        np.set_printoptions(precision=2, suppress=True)
        image, gt_boxes, im_info, ID = get_data(record, device)

        net.inputs["image"].set_value(image.astype(np.float32))
        net.inputs["im_info"].set_value(im_info)
        del record, image

        pred_boxes = val_func().numpy()

        pred_bbox = pred_boxes[:, 1]
        scale = im_info[0, 2]

        cls_dets = pred_bbox[:, :4] / scale

        if config.test_nms_version == 'set_nms':
            n = cls_dets.shape[0] // 2
            idents = np.tile(np.linspace(0, n-1, n).reshape(-1, 1),(1, 2)).reshape(-1, 1)
            pred_boxes = np.hstack([cls_dets, pred_bbox[:,4:5], idents])
            flag = pred_boxes[:, 4] >= config.test_cls_threshold
            cls_dets = pred_boxes[flag]
            keep = emd_cpu_nms(cls_dets, config.test_nms)
            cls_dets = cls_dets[keep, :5].astype(np.float64)

        elif config.test_nms_version == 'normal_nms':
            pred_boxes = np.hstack([cls_dets, pred_bbox[:, 4:5]])
            flag = pred_boxes[:, 4] >= config.test_cls_threshold
            cls_dets = pred_boxes[flag]
            keep = nms(cls_dets.astype(np.float32), config.test_nms)
            cls_dets = cls_dets[keep, :5].astype(np.float64)
        else:
            raise NotImplementedError('the results should be post processed.')

        pred_tags = np.ones([cls_dets.shape[0],]).astype(np.float64)
        gt_boxes = gt_boxes.astype(np.float64)

        dtboxes = boxes_dump(cls_dets[:, :5], pred_tags, False)
        gtboxes = boxes_dump(gt_boxes, None, True)

        # im_info = im_info.astype(np.int32)
        # height, width = im_info[0, 3], im_info[0, 4]
        height, width = int(im_info[0, 3]), int(im_info[0, 4])
        result_dict = dict(ID=ID, height=height, width=width,
                dtboxes = dtboxes, gtboxes = gtboxes)
        result_queue.put_nowait(result_dict)
Exemple #3
0
def main():
    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    args = parse_args()
    cfg = Config.fromfile(args.config)
    cfg.dynamic = args.dynamic
    cfg.ensemble = args.ensemble
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir
    else:
        assert cfg.get(
            'work_dir', None
        ) is not None, 'if do not set work_dir in args, please set in config file'

    cfg.work_dir = os.path.join(cfg.work_dir, timestamp)
    mkdir_or_exist(os.path.abspath(cfg.work_dir))

    log_file = os.path.join(cfg.work_dir, 'root.log')
    logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
    logger.info('Config:\n{}'.format(cfg.text))

    gpu_list = [item.strip() for item in args.gpuids.split(",")]
    if gpu_list[0] == "-1":
        world_size = 0  # use cpu
        logger.info('test use only cpu')
    else:
        world_size = len(gpu_list)
        logger.info('test gpus num: {}'.format(world_size))

    # assert world_size <= mge.get_device_count("gpu")

    if world_size == 0:  # use cpu
        mge.set_default_device(device='cpux')
    elif world_size == 1:
        mge.set_default_device(device='gpu' + gpu_list[0])
    else:
        pass

    if world_size > 1:
        port = dist.util.get_free_ports(1)[0]
        server = dist.Server(port)
        processes = []
        for rank in range(world_size):
            logger.info("init distributed process group {} / {}".format(
                rank, world_size))
            p = mp.Process(target=worker,
                           args=(rank, world_size, cfg, gpu_list[rank], port))
            p.start()
            processes.append(p)

        for rank in range(world_size):
            processes[rank].join()
            code = processes[rank].exitcode
            assert code == 0, "subprocess {} exit with code {}".format(
                rank, code)
    else:
        worker(0, 1, cfg)
Exemple #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-a", "--arch", default="resnet18", type=str)
    parser.add_argument("-d", "--data", default=None, type=str)
    parser.add_argument("-s", "--save", default="/data/models", type=str)
    parser.add_argument("-c",
                        "--checkpoint",
                        default=None,
                        type=str,
                        help="pretrained model to finetune")

    parser.add_argument(
        "-m",
        "--mode",
        default="qat",
        type=str,
        choices=["normal", "qat", "quantized"],
        help="Quantization Mode\n"
        "normal: no quantization, using float32\n"
        "qat: quantization aware training, simulate int8\n"
        "quantized: convert mode to int8 quantized, inference only")

    parser.add_argument("-n", "--ngpus", default=None, type=int)
    parser.add_argument("-w", "--workers", default=4, type=int)
    parser.add_argument("--report-freq", default=50, type=int)
    args = parser.parse_args()

    world_size = mge.get_device_count(
        "gpu") if args.ngpus is None else args.ngpus

    if args.mode == "quantized":
        world_size = 1
        args.report_freq = 1  # test is slow on cpu
        mge.set_default_device("cpux")
        logger.warning("quantized mode use cpu only")

    if world_size > 1:
        # start distributed training, dispatch sub-processes
        mp.set_start_method("spawn")
        processes = []
        for rank in range(world_size):
            p = mp.Process(target=worker, args=(rank, world_size, args))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()
    else:
        worker(0, 1, args)
Exemple #5
0
def main():
    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    args = parse_args()
    cfg = Config.fromfile(args.config)
    cfg.gpus = args.gpus
    cfg.dynamic = args.dynamic
    cfg.ensemble = args.ensemble
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir
    else:
        assert cfg.get(
            'work_dir', None
        ) is not None, 'if do not set work_dir in args, please set in config file'

    cfg.work_dir = os.path.join(cfg.work_dir, timestamp)
    mkdir_or_exist(os.path.abspath(cfg.work_dir))

    # init the logger
    log_file = os.path.join(cfg.work_dir, 'root.log')
    logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)

    # log some basic info
    logger.info('test gpus num: {}'.format(args.gpus))
    logger.info('Config:\n{}'.format(cfg.text))

    # get world_size
    world_size = args.gpus
    assert world_size <= mge.get_device_count("gpu")
    if world_size == 0:  # use cpu
        mge.set_default_device(device='cpux')
    else:
        gpuid = args.gpuid
        mge.set_default_device(device='gpu' + gpuid)

    if world_size > 1:
        # start distributed test, dispatch sub-processes
        mp.set_start_method("spawn")
        processes = []
        for rank in range(world_size):
            p = mp.Process(target=worker, args=(rank, world_size, cfg))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()
    else:
        worker(0, 1, cfg)
Exemple #6
0
def test_save_load():
    net = Simple()

    optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
    optim.clear_grad()
    gm = ad.GradManager().attach(net.parameters())

    data = tensor([2.34])

    with gm:
        loss = net(data)
        gm.backward(loss)

    optim.step()

    model_name = "simple.pkl"
    print("save to {}".format(model_name))

    mge.save(
        {
            "name": "simple",
            "state_dict": net.state_dict(),
            "opt_state": optim.state_dict(),
        },
        model_name,
    )

    # Load param to cpu
    checkpoint = mge.load(model_name, map_location="cpu0")
    device_save = mge.get_default_device()
    mge.set_default_device("cpu0")
    net = Simple()
    net.load_state_dict(checkpoint["state_dict"])
    optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
    optim.load_state_dict(checkpoint["opt_state"])
    print("load done")
    os.remove("simple.pkl")

    with gm:
        loss = net([1.23])
        gm.backward(loss)

    optim.step()
    # Restore device
    mge.set_default_device(device_save)
Exemple #7
0
def inference(model_file, device, records, result_queue):

    def val_func():
        pred_boxes = net(net.inputs)
        return pred_boxes

    mge.set_default_device('xpu{}'.format(device))
    net = network.Network()
    net = load_model(net, model_file)
    net.eval()
    
    for record in records:

        np.set_printoptions(precision=2, suppress=True)
        image, gt_boxes, im_info, ID = get_data(record, device)

        net.inputs["image"].set_value(image.astype(np.float32))
        net.inputs["im_info"].set_value(im_info)
        del record, image

        pred_boxes = val_func().numpy()

        scale = im_info[0, 2]
        pred_bbox = pred_boxes[:, :4] / scale
        scores = np.prod(pred_boxes[:,4:6], axis=1)
        n = scores.shape[0] // 2
        idents = np.tile(np.linspace(0, n-1, n).reshape(-1, 1),(1, 2)).reshape(-1, 1)
        pred_boxes = np.hstack([pred_bbox, scores.reshape(-1, 1), idents])
        flag = pred_boxes[:, 4] >= 0.05
        cls_dets = pred_boxes[flag]
        keep = emd_cpu_nms(cls_dets, 0.5)
        
        cls_dets = cls_dets[keep].astype(np.float64)
        pred_tags = np.ones([cls_dets.shape[0],]).astype(np.float64)
        gt_boxes = gt_boxes.astype(np.float64)

        dtboxes = boxes_dump(cls_dets[:, :5], pred_tags, False)
        gtboxes = boxes_dump(gt_boxes, None, True)

        height, width = int(im_info[0, 3]), int(im_info[0, 4])
        result_dict = dict(ID=ID, height=height, width=width,
                dtboxes = dtboxes, gtboxes = gtboxes)
        result_queue.put_nowait(result_dict)
Exemple #8
0
def test_tensor_serialization():
    with TemporaryFile() as f:
        data = np.random.randint(low=0, high=7, size=[233])
        a = Tensor(data, device="cpu0", dtype=np.int32)
        mge.save(a, f)
        f.seek(0)
        b = mge.load(f)
        np.testing.assert_equal(a.numpy(), data)
        assert b.device.logical_name == "cpu0:0"
        assert b.dtype == np.int32

    with TemporaryFile() as f:
        a = Parameter(np.random.random(size=(233, 2)).astype(np.float32))
        mge.save(a, f)
        f.seek(0)
        b = mge.load(f)
        assert isinstance(b, Parameter)
        np.testing.assert_equal(a.numpy(), b.numpy())

    with TemporaryFile() as f:
        a = Tensor(np.random.random(size=(2, 233)).astype(np.float32))
        mge.save(a, f)
        f.seek(0)
        b = mge.load(f)
        assert type(b) is Tensor
        np.testing.assert_equal(a.numpy(), b.numpy())

    with TemporaryFile() as f:
        a = Tensor(np.random.random(size=(2, 233)).astype(np.float32))
        mge.save(a, f)
        f.seek(0)
        b = mge.load(f, map_location="cpux")
        assert type(b) is Tensor
        assert "cpu" in str(b.device)
        np.testing.assert_equal(a.numpy(), b.numpy())

    with TemporaryFile() as f:
        if mge.is_cuda_available():
            device_org = mge.get_default_device()
            mge.set_default_device("gpu0")
            a = Tensor(np.random.random(size=(2, 233)).astype(np.float32))
            mge.save(a, f)
            f.seek(0)
            mge.set_default_device("cpux")
            b = mge.load(f, map_location={"gpu0": "cpu0"})
            assert type(b) is Tensor
            assert "cpu0" in str(b.device)
            np.testing.assert_equal(a.numpy(), b.numpy())
            mge.set_default_device(device_org)

    with TemporaryFile() as f:
        a = Tensor(0)
        a.qparams.scale = Tensor(1.0)
        mge.save(a, f)
        f.seek(0)
        b = mge.load(f)
        assert isinstance(b.qparams.scale, Tensor)
        np.testing.assert_equal(b.qparams.scale.numpy(), 1.0)
Exemple #9
0
def test_tensor_serialization():
    def tensor_eq(a, b):
        assert a.dtype == b.dtype
        assert a.device == b.device
        np.testing.assert_equal(a.numpy(), b.numpy())

    with TemporaryFile() as f:
        data = np.random.randint(low=0, high=7, size=[233])
        a = Tensor(data, device="xpux", dtype=np.int32)
        pickle.dump(a, f)
        f.seek(0)
        b = pickle.load(f)
        np.testing.assert_equal(a.numpy(), b.numpy())

    with TemporaryFile() as f:
        a = Parameter(np.random.random(size=(233, 2)).astype(np.float32))
        pickle.dump(a, f)
        f.seek(0)
        b = pickle.load(f)
        assert isinstance(b, Parameter)
        np.testing.assert_equal(a.numpy(), b.numpy())

    with TemporaryFile() as f:
        a = Tensor(np.random.random(size=(2, 233)).astype(np.float32))
        pickle.dump(a, f)
        f.seek(0)
        b = pickle.load(f)
        assert type(b) is Tensor
        np.testing.assert_equal(a.numpy(), b.numpy())

    with TemporaryFile() as f:
        a = Tensor(np.random.random(size=(2, 233)).astype(np.float32))
        mge.save(a, f)
        f.seek(0)
        b = mge.load(f, map_location="cpux")
        assert type(b) is Tensor
        assert "cpu" in str(b.device)
        np.testing.assert_equal(a.numpy(), b.numpy())

    with TemporaryFile() as f:
        if mge.is_cuda_available():
            device_org = mge.get_default_device()
            mge.set_default_device("gpu0")
            a = Tensor(np.random.random(size=(2, 233)).astype(np.float32))
            mge.save(a, f)
            f.seek(0)
            mge.set_default_device("cpux")
            b = mge.load(f, map_location={"gpu0": "cpu0"})
            assert type(b) is Tensor
            assert "cpu0" in str(b.device)
            np.testing.assert_equal(a.numpy(), b.numpy())
            mge.set_default_device(device_org)
Exemple #10
0
def main():

    if not mge.is_cuda_available():
        mge.set_default_device("cpux")

    net = XORNet()
    gm = ad.GradManager().attach(net.parameters())
    opt = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
    batch_size = 64
    train_dataset = minibatch_generator(batch_size)
    val_dataset = minibatch_generator(batch_size)

    def train_fun(data, label):
        opt.clear_grad()
        with gm:
            pred = net(data)
            loss = F.loss.cross_entropy(pred, label)
            gm.backward(loss)
        opt.step()
        return pred, loss

    def val_fun(data, label):
        pred = net(data)
        loss = F.loss.cross_entropy(pred, label)
        return pred, loss

    @trace(symbolic=True, capture_as_const=True)
    def pred_fun(data):
        pred = net(data)
        pred_normalized = F.softmax(pred)
        return pred_normalized

    data = np.random.random((batch_size, 2)).astype(np.float32)
    label = np.zeros((batch_size,)).astype(np.int32)
    train_loss = []
    val_loss = []
    for step, minibatch in enumerate(train_dataset):
        if step > 1000:
            break
        data = mge.tensor(minibatch["data"])
        label = mge.tensor(minibatch["label"])
        net.train()
        _, loss = train_fun(data, label)
        train_loss.append((step, loss.numpy()))
        if step % 50 == 0:
            minibatch = next(val_dataset)
            net.eval()
            _, loss = val_fun(data, label)
            loss = loss.numpy()
            val_loss.append((step, loss))
            print("Step: {} loss={}".format(step, loss))
        opt.step()

    test_data = np.array(
        [
            (0.5, 0.5),
            (0.3, 0.7),
            (0.1, 0.9),
            (-0.5, -0.5),
            (-0.3, -0.7),
            (-0.9, -0.1),
            (0.5, -0.5),
            (0.3, -0.7),
            (0.9, -0.1),
            (-0.5, 0.5),
            (-0.3, 0.7),
            (-0.1, 0.9),
        ]
    )

    # tracing only accepts tensor as input
    data = mge.tensor(test_data, dtype=np.float32)
    net.eval()
    out = pred_fun(data)
    pred_output = out.numpy()
    pred_label = np.argmax(pred_output, 1)

    print("Test data")
    print(test_data)

    with np.printoptions(precision=4, suppress=True):
        print("Predicated probability:")
        print(pred_output)

    print("Predicated label")
    print(pred_label)

    model_name = "xornet_deploy.mge"

    print("Dump model as {}".format(model_name))
    pred_fun.dump(model_name, arg_names=["data"])

    model_with_testcase_name = "xornet_with_testcase.mge"

    print("Dump model with testcase as {}".format(model_with_testcase_name))
    pred_fun.dump(model_with_testcase_name, arg_names=["data"], input_data=["#rand(0.1, 0.8, 4, 2)"])
Exemple #11
0
def main():

    if not mge.is_cuda_available():
        mge.set_default_device("cpux")

    net = XORNet()
    opt = optim.SGD(net.parameters(requires_grad=True), lr=0.01, momentum=0.9)
    batch_size = 64
    train_dataset = minibatch_generator(batch_size)
    val_dataset = minibatch_generator(batch_size)

    data = mge.tensor()
    label = mge.tensor(np.zeros((batch_size, )), dtype=np.int32)
    train_loss = []
    val_loss = []
    for step, minibatch in enumerate(train_dataset):
        if step > 1000:
            break
        data.set_value(minibatch["data"])
        label.set_value(minibatch["label"])
        opt.zero_grad()
        _, loss = train_fun(data, label, net=net, opt=opt)
        train_loss.append((step, loss.numpy()))
        if step % 50 == 0:
            minibatch = next(val_dataset)
            _, loss = val_fun(data, label, net=net)
            loss = loss.numpy()[0]
            val_loss.append((step, loss))
            print("Step: {} loss={}".format(step, loss))
        opt.step()

    test_data = np.array([
        (0.5, 0.5),
        (0.3, 0.7),
        (0.1, 0.9),
        (-0.5, -0.5),
        (-0.3, -0.7),
        (-0.9, -0.1),
        (0.5, -0.5),
        (0.3, -0.7),
        (0.9, -0.1),
        (-0.5, 0.5),
        (-0.3, 0.7),
        (-0.1, 0.9),
    ])

    data.set_value(test_data)
    out = pred_fun(data, net=net)
    pred_output = out.numpy()
    pred_label = np.argmax(pred_output, 1)

    print("Test data")
    print(test_data)

    with np.printoptions(precision=4, suppress=True):
        print("Predicated probability:")
        print(pred_output)

    print("Predicated label")
    print(pred_label)

    model_name = "xornet_deploy.mge"

    if pred_fun.enabled:
        print("Dump model as {}".format(model_name))
        pred_fun.dump(model_name, arg_names=["data"])
    else:
        print("pred_fun must be run with trace enabled in order to dump model")
Exemple #12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-a", "--arch", default="resnet18", type=str)
    parser.add_argument("-c", "--checkpoint", default=None, type=str)
    parser.add_argument("-i", "--image", default=None, type=str)

    parser.add_argument(
        "-m",
        "--mode",
        default="quantized",
        type=str,
        choices=["normal", "qat", "quantized"],
        help="Quantization Mode\n"
        "normal: no quantization, using float32\n"
        "qat: quantization aware training, simulate int8\n"
        "quantized: convert mode to int8 quantized, inference only")
    parser.add_argument("--dump",
                        action="store_true",
                        help="Dump quantized model")
    args = parser.parse_args()

    if args.mode == "quantized":
        mge.set_default_device("cpux")

    model = models.__dict__[args.arch]()

    if args.mode != "normal":
        Q.quantize_qat(model, Q.ema_fakequant_qconfig)

    if args.checkpoint:
        logger.info("Load pretrained weights from %s", args.checkpoint)
        ckpt = mge.load(args.checkpoint)
        ckpt = ckpt["state_dict"] if "state_dict" in ckpt else ckpt
        model.load_state_dict(ckpt, strict=False)

    if args.mode == "quantized":
        Q.quantize(model)

    if args.image is None:
        path = "../assets/cat.jpg"
    else:
        path = args.image
    image = cv2.imread(path, cv2.IMREAD_COLOR)

    transform = T.Compose([
        T.Resize(256),
        T.CenterCrop(224),
        T.Normalize(mean=128),
        T.ToMode("CHW"),
    ])

    @jit.trace(symbolic=True)
    def infer_func(processed_img):
        model.eval()
        logits = model(processed_img)
        probs = F.softmax(logits)
        return probs

    processed_img = transform.apply(image)[np.newaxis, :]

    if args.mode == "normal":
        processed_img = processed_img.astype("float32")
    elif args.mode == "quantized":
        processed_img = processed_img.astype("int8")

    probs = infer_func(processed_img)

    top_probs, classes = F.top_k(probs, k=5, descending=True)

    if args.dump:
        output_file = ".".join([args.arch, args.mode, "megengine"])
        logger.info("Dump to {}".format(output_file))
        infer_func.dump(output_file, arg_names=["data"])
        mge.save(model.state_dict(), output_file.replace("megengine", "pkl"))

    with open("../assets/imagenet_class_info.json") as fp:
        imagenet_class_index = json.load(fp)

    for rank, (prob, classid) in enumerate(
            zip(top_probs.numpy().reshape(-1),
                classes.numpy().reshape(-1))):
        print("{}: class = {:20s} with probability = {:4.1f} %".format(
            rank, imagenet_class_index[str(classid)][1], 100 * prob))