コード例 #1
0
def worker(rank, world_size, ngpus_per_node, args):
    if world_size > 1:
        # init process group
        dist.init_process_group(
            master_ip=args.dist_addr,
            port=args.dist_port,
            world_size=world_size,
            rank=rank,
            device=rank % ngpus_per_node,
            backend="nccl",
        )
        logging.info("init process group rank %d / %d", dist.get_rank(),
                     dist.get_world_size())

    # build dataset
    _, valid_dataloader = build_dataset(args)

    # build model
    model = resnet_model.__dict__[args.arch](pretrained=args.model is None)
    if args.model is not None:
        logging.info("load from checkpoint %s", args.model)
        checkpoint = megengine.load(args.model)
        if "state_dict" in checkpoint:
            state_dict = checkpoint["state_dict"]
        model.load_state_dict(state_dict)

    def valid_step(image, label):
        logits = model(image)
        loss = F.nn.cross_entropy(logits, label)
        acc1, acc5 = F.topk_accuracy(logits, label, topk=(1, 5))
        # calculate mean values
        if world_size > 1:
            loss = F.distributed.all_reduce_sum(loss) / world_size
            acc1 = F.distributed.all_reduce_sum(acc1) / world_size
            acc5 = F.distributed.all_reduce_sum(acc5) / world_size
        return loss, acc1, acc5

    model.eval()
    _, valid_acc1, valid_acc5 = valid(valid_step, valid_dataloader, args)
    logging.info(
        "Test Acc@1 %.3f, Acc@5 %.3f",
        valid_acc1,
        valid_acc5,
    )
コード例 #2
0
ファイル: test_save_load.py プロジェクト: mozre/MegEngine
def test_save_load():
    net = Simple()

    optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
    optim.clear_grad()
    gm = ad.GradManager().attach(net.parameters())

    data = tensor([2.34])

    with gm:
        loss = net(data)
        gm.backward(loss)

    optim.step()

    model_name = "simple.pkl"

    mge.save(
        {
            "name": "simple",
            "state_dict": net.state_dict(),
            "opt_state": optim.state_dict(),
        },
        model_name,
    )

    # Load param to cpu
    checkpoint = mge.load(model_name, map_location="cpu0")
    device_save = mge.get_default_device()
    mge.set_default_device("cpu0")
    net = Simple()
    net.load_state_dict(checkpoint["state_dict"])
    optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
    optim.load_state_dict(checkpoint["opt_state"])
    os.remove("simple.pkl")

    with gm:
        loss = net([1.23])
        gm.backward(loss)

    optim.step()
    # Restore device
    mge.set_default_device(device_save)
コード例 #3
0
ファイル: test.py プロジェクト: lizhiyuanUSTC/Models
def worker(
    net_file, model_file, data_dir, worker_id, total_worker, result_queue,
):
    """
    :param net_file: network description file
    :param model_file: file of dump weights
    :param data_dir: the dataset directory
    :param worker_id: the index of the worker
    :param total_worker: number of gpu for evaluation
    :param result_queue: processing queue
    """
    os.environ["CUDA_VISIBLE_DEVICES"] = str(worker_id)

    @jit.trace(symbolic=True, opt_level=2)
    def val_func():
        pred = model(model.inputs)
        return pred

    sys.path.insert(0, os.path.dirname(net_file))
    current_network = importlib.import_module(os.path.basename(net_file).split(".")[0])
    model = current_network.Net(current_network.Cfg(), batch_size=1)
    model.eval()
    evaluator = DetEvaluator(model)
    model.load_state_dict(mge.load(model_file)["state_dict"])

    loader = build_dataloader(worker_id, total_worker, data_dir)
    for data_dict in loader:
        data, im_info = DetEvaluator.process_inputs(
            data_dict[0][0],
            model.cfg.test_image_short_size,
            model.cfg.test_image_max_size,
        )
        model.inputs["im_info"].set_value(im_info)
        model.inputs["image"].set_value(data.astype(np.float32))

        pred_res = evaluator.predict(val_func)
        result_queue.put_nowait(
            {
                "det_res": pred_res,
                "image_id": int(data_dict[1][2][0].split(".")[0].split("_")[-1]),
            }
        )
コード例 #4
0
def inference(args):
    @jit.trace(symbolic=False)
    def val_func():
        pred_boxes = net(net.inputs)
        return pred_boxes

    # model path
    saveDir = config.model_dir
    evalDir = config.eval_dir
    misc_utils.ensure_dir(evalDir)
    model_file = os.path.join(saveDir,
                              'epoch_{}.pkl'.format(args.resume_weights))
    assert os.path.exists(model_file)
    # load model
    net = network.Network()
    net.eval()
    check_point = mge.load(model_file)
    net.load_state_dict(check_point['state_dict'])
    ori_image, image, im_info = get_data(args.img_path)
    net.inputs["image"].set_value(image.astype(np.float32))
    net.inputs["im_info"].set_value(im_info)
    pred_boxes = val_func().numpy()
    num_tag = config.num_classes - 1
    target_shape = (pred_boxes.shape[0] // num_tag, 1)
    pred_tags = (np.arange(num_tag) + 1).reshape(-1, 1)
    pred_tags = np.tile(pred_tags, target_shape).reshape(-1, 1)
    # nms
    from set_nms_utils import cpu_nms
    keep = pred_boxes[:, -1] > args.thresh
    pred_boxes = pred_boxes[keep]
    pred_tags = pred_tags[keep]
    keep = cpu_nms(pred_boxes, 0.5)
    pred_boxes = pred_boxes[keep]
    pred_tags = pred_tags[keep]

    pred_tags = pred_tags.astype(np.int32).flatten()
    pred_tags_name = np.array(config.class_names)[pred_tags]
    visual_utils.draw_boxes(ori_image, pred_boxes[:, :-1], pred_boxes[:, -1],
                            pred_tags_name)
    name = args.img_path.split('/')[-1].split('.')[-2]
    fpath = '/data/jupyter/{}.png'.format(name)
    cv2.imwrite(fpath, ori_image)
コード例 #5
0
    def initDetector(self):
        '''
        自定义目标检测器
        :return:
        '''
        current_network = import_from_file(self.detector_model)
        cfg = current_network.Cfg()
        cfg.backbone_pretrained = False
        model = current_network.Net(cfg)
        model.eval()

        state_dict = mge.load(self.detector_weight)
        if "state_dict" in state_dict:
            state_dict = state_dict["state_dict"]
        model.load_state_dict(state_dict)

        detector = DetEvaluator(model)
        short_size = model.cfg.test_image_short_size
        max_size = model.cfg.test_image_max_size
        return detector, short_size, max_size
コード例 #6
0
def test_snpe_model_8f():
    model = "8w16f_backbone.tm"
    net = mge.load(model)
    print(net.flatten().graph)
    inp_dtype = dtype.quint8(16.0 / 128.0, 128)
    inps = get_qat_inputs_quint8(inp_dtype, num_inp=2, shape=(1, 16, 384, 512))
    tm_result = dict(zip(net.graph.outputs, net(*inps)))
    _test_convert_result(
        inps,
        net,
        tm_result,
        max_err,
        input_data_type="quint8",
        input_scales=inps[0].qparams.scale,
        input_zero_points=inps[0].qparams.zero_point,
        require_quantize=False,
        param_fake_quant=True,
        split_conv_relu=True,
        input_name=["inp", "prev"],
    )
コード例 #7
0
ファイル: test.py プロジェクト: zzh7982/Models
def worker(
    current_network, weight_file, dataset_dir, result_list,
    master_ip=None, port=None, world_size=None, rank=None
):
    if world_size > 1:
        dist.init_process_group(
            master_ip=master_ip,
            port=port,
            world_size=world_size,
            rank=rank,
            device=rank,
        )

    cfg = current_network.Cfg()
    cfg.backbone_pretrained = False
    model = current_network.Net(cfg)
    model.eval()

    state_dict = mge.load(weight_file)
    if "state_dict" in state_dict:
        state_dict = state_dict["state_dict"]
    model.load_state_dict(state_dict)

    def pred_func(data):
        pred = model(data)
        return pred

    test_loader = build_dataloader(dataset_dir, model.cfg)
    if dist.get_world_size() == 1:
        test_loader = tqdm(test_loader)

    for data in test_loader:
        img = data[0].squeeze()
        label = data[1].squeeze()
        im_info = data[2]
        pred = evaluate(pred_func, img, model.cfg)
        result = {"pred": pred, "gt": label, "name": im_info[2]}
        if dist.get_world_size() > 1:
            result_list.put_nowait(result)
        else:
            result_list.append(result)
コード例 #8
0
def main():
    parser = make_parser()
    args = parser.parse_args()

    logger.info("Load Model : %s completed", args.weight_file)

    @jit.trace(symbolic=True)
    def val_func():
        pred = model(model.inputs)
        return pred

    sys.path.insert(0, os.path.dirname(args.file))
    current_network = importlib.import_module(
        os.path.basename(args.file).split(".")[0])
    cfg = current_network.Cfg()
    cfg.backbone_pretrained = False
    model = current_network.Net(cfg, batch_size=1)
    model.eval()
    state_dict = mge.load(args.weight_file)
    if "state_dict" in state_dict:
        state_dict = state_dict["state_dict"]
    model.load_state_dict(state_dict)

    evaluator = DetEvaluator(model)

    ori_img = cv2.imread(args.image)
    data, im_info = DetEvaluator.process_inputs(
        ori_img.copy(),
        model.cfg.test_image_short_size,
        model.cfg.test_image_max_size,
    )
    model.inputs["im_info"].set_value(im_info)
    model.inputs["image"].set_value(data.astype(np.float32))
    pred_res = evaluator.predict(val_func)
    res_img = DetEvaluator.vis_det(
        ori_img,
        pred_res,
        is_show_label=True,
        classes=COCO.class_names,
    )
    cv2.imwrite("results.jpg", res_img)
コード例 #9
0
ファイル: test.py プロジェクト: klsdjft/Models-1
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-c",
                        "--config",
                        type=str,
                        required=True,
                        help="configuration file")
    parser.add_argument(
        "-d",
        "--dataset_dir",
        type=str,
        default="/data/datasets/VOC2012",
    )
    parser.add_argument("-m",
                        "--model_path",
                        type=str,
                        default=None,
                        help="eval model file")
    args = parser.parse_args()

    cfg = import_config_from_file(args.config)

    test_loader, test_size = build_dataloader(args.dataset_dir, cfg)
    print("number of test images: %d" % (test_size))
    net = DeepLabV3Plus(class_num=cfg.NUM_CLASSES)
    model_dict = mge.load(args.model_path)

    net.load_state_dict(model_dict["state_dict"])
    print("load model %s" % (args.model_path))
    net.eval()

    result_list = []
    for sample_batched in tqdm(test_loader):
        img = sample_batched[0].squeeze()
        label = sample_batched[1].squeeze()
        im_info = sample_batched[2]
        pred = evaluate(net, img, cfg)
        result_list.append({"pred": pred, "gt": label, "name": im_info[2]})
    if cfg.VAL_SAVE:
        save_results(result_list, cfg.VAL_SAVE, cfg)
    compute_metric(result_list, cfg)
コード例 #10
0
def test_optimizer_serialization():
    data, data_shape, label, label_shape = get_input()
    mlp = MLP()
    opt = SGD(mlp.parameters(), lr=0.01, momentum=0.9)
    slots = TensorDict()
    for param in mlp.parameters():
        slots[param] = np.zeros(param.shape).astype(np.float32)

    pred = mlp(data)
    loss = F.square_loss(pred, label.reshape(-1, 1))
    opt.zero_grad()
    opt.backward(loss)
    opt.step()
    for param in mlp.parameters():
        slot = slots[param]
        slot *= 0.9
        slot -= param.grad.numpy() * 0.01

    with BytesIO() as fout:
        save(opt.state_dict(), fout)
        fout.seek(0)
        state_dict = load(fout)
        opt1 = SGD(mlp.parameters(), lr=0.02, momentum=0.8)
        opt1.load_state_dict(state_dict)

        data.set_value(np.random.random(data_shape).astype(np.float32))
        label.set_value(np.random.randint(0, 10, label_shape))
        pred = mlp(data)
        loss = F.square_loss(pred, label.reshape(-1, 1))
        opt1.zero_grad()
        opt1.backward(loss)
        orig_params = TensorDict()
        for param in mlp.parameters():
            orig_params[param] = np.copy(param.numpy())
        opt1.step()
        for param in mlp.parameters():
            orig_param = orig_params[param]
            slot = slots[param]
            slot *= 0.9
            slot -= param.grad.numpy() * 0.01
            assertTensorClose(param.numpy(), orig_param + slot)
コード例 #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-a",
                        "--arch",
                        default="shufflenet_v1_x0_5_g3_int8",
                        type=str)
    parser.add_argument("-m", "--model", default=None, type=str)
    args = parser.parse_args()

    model = getattr(M, args.arch)(pretrained=(args.model is None))
    quantize_qat(model, qconfig=ema_fakequant_qconfig)

    if args.model:
        state_dict = mge.load(args.model)
        model.load_state_dict(state_dict, strict=False)

    quantize(model)

    data = mge.tensor(np.zeros((10, 3, 224, 224), dtype="float32"))
    infer_func(data, model=model)
    infer_func.dump(args.arch, arg_names=["data"], optimize_for_inference=True)
コード例 #12
0
ファイル: test_module.py プロジェクト: ym593277523/MegEngine
def test_state_dict():
    data_shape = (2, 28)
    data = tensor(np.random.random(data_shape))
    mlp = MLP()
    pred0 = mlp(data)

    with BytesIO() as fout:
        mge.save(mlp.state_dict(), fout)
        fout.seek(0)
        state_dict = mge.load(fout)
        state_dict["extra"] = None
        mlp1 = MLP()
        mlp1.load_state_dict(state_dict, strict=False)
        pred1 = mlp1(data)
        np.testing.assert_allclose(pred0.numpy(), pred1.numpy(), atol=5e-6)
        with pytest.raises(KeyError):
            mlp1.load_state_dict(state_dict)
        del state_dict["extra"]
        del state_dict["dense0.bias"]
        with pytest.raises(KeyError):
            mlp1.load_state_dict(state_dict)
コード例 #13
0
def update_model(model_path):
    """
    Update the dumped model with test cases for new reference values.

    The model with pre-trained weights is trained for one iter with the test data attached.
    The loss and updated net state dict is dumped.

    .. code-block:: python

        from test_dp_correctness import update_model
        update_model('mnist_model_with_test.mge') # for gpu
        update_model('mnist_model_with_test_cpu.mge') # for cpu

    """
    net = MnistNet(has_bn=True)
    checkpoint = mge.load(model_path)
    net.load_state_dict(checkpoint["net_init"])
    lr = checkpoint["sgd_lr"]
    opt = SGD(net.parameters(), lr=lr)

    gm = ad.GradManager().attach(
        net.parameters(),
        callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)])

    data = Tensor(checkpoint["data"], dtype=np.float32)
    label = Tensor(checkpoint["label"], dtype=np.int32)

    opt.clear_grad()
    loss = train(data, label, net=net, opt=opt)
    opt.step()

    xpu_name = get_xpu_name()

    checkpoint.update({
        "net_updated": net.state_dict(),
        "loss": loss.numpy(),
        "xpu": xpu_name
    })
    mge.serialization.save(checkpoint, model_path)
コード例 #14
0
def inference(model_file, device, records, result_queue):
    @jit.trace(symbolic=False)
    def val_func():
        pred_boxes = net(net.inputs)
        return pred_boxes

    net = network.Network()
    net.eval()
    check_point = mge.load(model_file)
    net.load_state_dict(check_point['state_dict'])
    for record in records:
        np.set_printoptions(precision=2, suppress=True)
        net.eval()
        image, gt_boxes, im_info, ID = get_data(record, device)
        net.inputs["image"].set_value(image.astype(np.float32))
        net.inputs["im_info"].set_value(im_info)
        pred_boxes = val_func().numpy()
        # nms
        if if_set_nms:
            from set_nms_utils import set_cpu_nms
            n = pred_boxes.shape[0] // 2
            idents = np.tile(np.arange(n)[:, None], (1, 2)).reshape(-1, 1)
            pred_boxes = np.hstack((pred_boxes, idents))
            keep = pred_boxes[:, -2] > 0.05
            pred_boxes = pred_boxes[keep]
            keep = set_cpu_nms(pred_boxes, 0.5)
            pred_boxes = pred_boxes[keep][:, :-1]
        else:
            from set_nms_utils import cpu_nms
            keep = pred_boxes[:, -1] > 0.05
            pred_boxes = pred_boxes[keep]
            keep = cpu_nms(pred_boxes, 0.5)
            pred_boxes = pred_boxes[keep]
        result_dict = dict(ID=ID,
                           height=int(im_info[0, -2]),
                           width=int(im_info[0, -1]),
                           dtboxes=boxes_dump(pred_boxes, False),
                           gtboxes=boxes_dump(gt_boxes, True))
        result_queue.put_nowait(result_dict)
コード例 #15
0
ファイル: inference.py プロジェクト: klsdjft/Models-1
def main():

    parser = make_parser()
    args = parser.parse_args()

    detector = getattr(Det, args.detector)(pretrained=True)
    detector.eval()
    logger.info("Load Model : %s completed", args.detector)

    keypoint_model = getattr(M, args.arch)()
    keypoint_model.load_state_dict(mge.load(args.model)["state_dict"])
    keypoint_model.eval()
    logger.info("Load Model : %s completed", args.arch)

    @jit.trace(symbolic=True)
    def det_func():
        pred = detector(detector.inputs)
        return pred

    @jit.trace(symbolic=True)
    def keypoint_func():
        pred = keypoint_model.predict()
        return pred

    evaluator = KeypointEvaluator(detector, det_func, keypoint_model,
                                  keypoint_func)

    image = cv2.imread(args.image)

    logger.info("Detecting Humans")
    person_boxes = evaluator.detect_persons(image)

    logger.info("Detecting Keypoints")
    all_keypoints = evaluator.predict(image, person_boxes)

    logger.info("Visualizing")
    canvas = evaluator.vis_skeletons(image, all_keypoints)
    cv2.imwrite("vis_skeleton.jpg", canvas)
コード例 #16
0
ファイル: test.py プロジェクト: FateScript/Models-1
def worker(
    arch,
    model_file,
    data_root,
    ann_file,
    worker_id,
    total_worker,
    result_queue,
):
    """
    :param net_file: network description file
    :param model_file: file of dump weights
    :param data_dir: the dataset directory
    :param worker_id: the index of the worker
    :param total_worker: number of gpu for evaluation
    :param result_queue: processing queue
    """
    os.environ["CUDA_VISIBLE_DEVICES"] = str(worker_id)

    @jit.trace(symbolic=True, opt_level=2)
    def val_func():
        pred = model.predict()
        return pred

    model = getattr(M, arch)()
    model.eval()
    model.load_state_dict(mge.load(model_file)["state_dict"])

    loader = build_dataloader(worker_id, total_worker, data_root, ann_file)
    for data_dict in loader:
        img, bbox, info = data_dict
        fliped_img = img[:, :, :, ::-1] - np.zeros_like(img)
        data = np.concatenate([img, fliped_img], 0)
        model.inputs["image"].set_value(
            np.ascontiguousarray(data).astype(np.float32))
        instance = find_results(val_func, img, bbox[0, 0], info)

        result_queue.put_nowait(instance)
コード例 #17
0
def run_test(model_path, use_jit, use_symbolic):
    """
    Load the model with test cases and run the training for one iter.
    The loss and updated weights are compared with reference value to verify the correctness.

    Dump a new file with updated result by calling update_model
    if you think the test fails due to numerical rounding errors instead of bugs.
    Please think twice before you do so.

    """
    net = MnistNet(has_bn=True)
    checkpoint = mge.load(model_path)
    net.load_state_dict(checkpoint["net_init"])
    lr = checkpoint["sgd_lr"]
    opt = SGD(net.parameters(), lr=lr)

    data = tensor(dtype=np.float32)
    label = tensor(dtype=np.int32)
    data.set_value(checkpoint["data"])
    label.set_value(checkpoint["label"])

    max_err = 1e-5

    train_func = train
    if use_jit:
        train_func = jit.trace(train_func, symbolic=use_symbolic)

    opt.zero_grad()
    loss = train_func(data, label, net=net, opt=opt)
    opt.step()

    assertTensorClose(loss.numpy(), checkpoint["loss"], max_err=max_err)

    for param, param_ref in zip(net.state_dict().items(),
                                checkpoint["net_updated"].items()):
        assert param[0] == param_ref[0]
        assertTensorClose(param[1], param_ref[1], max_err=max_err)
コード例 #18
0
def update_model(model_path):
    """
    Update the dumped model with test cases for new reference values.

    The model with pre-trained weights is trained for one iter with the test data attached.
    The loss and updated net state dict is dumped.

    .. code-block:: python

        from test_correctness import update_model
        update_model('mnist_model_with_test.mge') # for gpu
        update_model('mnist_model_with_test_cpu.mge') # for cpu

    """
    net = MnistNet(has_bn=True)
    checkpoint = mge.load(model_path)
    net.load_state_dict(checkpoint["net_init"])
    lr = checkpoint["sgd_lr"]
    opt = SGD(net.parameters(), lr=lr)

    data = tensor(dtype=np.float32)
    label = tensor(dtype=np.int32)
    data.set_value(checkpoint["data"])
    label.set_value(checkpoint["label"])

    opt.zero_grad()
    loss = train(data, label, net=net, opt=opt)
    opt.step()

    xpu_name = get_xpu_name()

    checkpoint.update({
        "net_updated": net.state_dict(),
        "loss": loss.numpy(),
        "xpu": xpu_name
    })
    mge.save(checkpoint, model_path)
コード例 #19
0
def inference(args):
    @jit.trace(symbolic=False)
    def val_func():
        pred_boxes = net(net.inputs)
        return pred_boxes
    # model path
    saveDir = config.model_dir
    evalDir = config.eval_dir
    misc_utils.ensure_dir(evalDir)
    model_file = os.path.join(saveDir,
            'epoch_{}.pkl'.format(args.resume_weights))
    assert os.path.exists(model_file)
    # load model
    net = network.Network()
    net.eval()
    check_point = mge.load(model_file)
    net.load_state_dict(check_point['state_dict'])
    image, im_info = get_data(args.img_path)
    net.inputs["image"].set_value(image.astype(np.float32))
    net.inputs["im_info"].set_value(im_info)
    pred_boxes = val_func().numpy()
    num_tag = config.num_classes - 1
    target_shape = (pred_boxes.shape[0]//num_tag, 1)
    pred_tags = (np.arange(num_tag) + 1).reshape(-1,1)
    pred_tags = np.tile(pred_tags, target_shape).reshape(-1,1)
    # nms
    from set_nms_utils import cpu_nms
    keep = pred_boxes[:, -1] > 0.05
    pred_boxes = pred_boxes[keep]
    pred_tags = pred_tags[keep]
    keep = cpu_nms(pred_boxes, 0.5)
    pred_boxes = pred_boxes[keep]
    pred_tags = pred_tags[keep].flatten()
    result_dict = dict(height=int(im_info[0, -2]), width=int(im_info[0, -1]),
        dtboxes=boxes_dump(pred_boxes, pred_tags))
    name = args.img_path.split('/')[-1].split('.')[-2]
    misc_utils.save_json_lines([result_dict], '{}.json'.format(name))
コード例 #20
0
ファイル: train.py プロジェクト: zzh7982/Models
def worker(args):
    current_network = import_from_file(args.file)

    model = current_network.Net(current_network.Cfg())
    model.train()

    if dist.get_rank() == 0:
        logger.info(get_config_info(model.cfg))
        logger.info(repr(model))

    backbone_params = []
    head_params = []
    for name, param in model.named_parameters():
        if "backbone" in name:
            backbone_params.append(param)
        else:
            head_params.append(param)

    opt = SGD(
        [
            {
                "params": backbone_params,
                "lr": model.cfg.learning_rate * 0.1
            },
            {
                "params": head_params
            },
        ],
        lr=model.cfg.learning_rate,
        momentum=model.cfg.momentum,
        weight_decay=model.cfg.weight_decay * dist.get_world_size(),
    )

    gm = GradManager()
    if dist.get_world_size() > 1:
        gm.attach(model.parameters(),
                  callbacks=[dist.make_allreduce_cb("SUM", dist.WORLD)])
    else:
        gm.attach(model.parameters())

    cur_epoch = 0
    if args.resume is not None:
        pretrained = mge.load(args.resume)
        cur_epoch = pretrained["epoch"] + 1
        model.load_state_dict(pretrained["state_dict"])
        opt.load_state_dict(pretrained["opt"])
        if dist.get_rank() == 0:
            logger.info("load success: epoch %d", cur_epoch)

    if dist.get_world_size() > 1:
        dist.bcast_list_(model.parameters(), dist.WORLD)  # sync parameters

    if dist.get_rank() == 0:
        logger.info("Prepare dataset")
    train_loader = iter(
        build_dataloader(model.cfg.batch_size, args.dataset_dir, model.cfg))

    for epoch in range(cur_epoch, model.cfg.max_epoch):
        train_one_epoch(model, train_loader, opt, gm, epoch)
        if dist.get_rank() == 0:
            save_path = "log-of-{}/epoch_{}.pkl".format(
                os.path.basename(args.file).split(".")[0], epoch)
            mge.save(
                {
                    "epoch": epoch,
                    "state_dict": model.state_dict(),
                    "opt": opt.state_dict()
                }, save_path)
            logger.info("dump weights to %s", save_path)
コード例 #21
0
def worker(rank, world_size, args):
    if world_size > 1:
        dist.init_process_group(
            master_ip="localhost",
            master_port=23456,
            world_size=world_size,
            rank=rank,
            dev=rank,
        )
        logger.info("Init process group for gpu%d done", rank)

    sys.path.insert(0, os.path.dirname(args.file))
    current_network = importlib.import_module(
        os.path.basename(args.file).split(".")[0])

    model = current_network.Net(current_network.Cfg(),
                                batch_size=args.batch_size)
    params = model.parameters(requires_grad=True)
    model.train()

    if rank == 0:
        logger.info(get_config_info(model.cfg))
    opt = optim.SGD(
        params,
        lr=model.cfg.basic_lr * world_size * model.batch_size,
        momentum=model.cfg.momentum,
        weight_decay=model.cfg.weight_decay,
    )

    if args.weight_file is not None:
        weights = mge.load(args.weight_file)
        model.backbone.bottom_up.load_state_dict(weights)

    if rank == 0:
        logger.info("Prepare dataset")
    train_loader = iter(
        build_dataloader(model.batch_size, args.dataset_dir, model.cfg))

    for epoch_id in range(model.cfg.max_epoch):
        for param_group in opt.param_groups:
            param_group["lr"] = (model.cfg.basic_lr * world_size *
                                 model.batch_size *
                                 (model.cfg.lr_decay_rate**bisect.bisect_right(
                                     model.cfg.lr_decay_stages, epoch_id)))

        tot_steps = model.cfg.nr_images_epoch // (model.batch_size *
                                                  world_size)
        train_one_epoch(
            model,
            train_loader,
            opt,
            tot_steps,
            rank,
            epoch_id,
            world_size,
            args.enable_sublinear,
        )
        if rank == 0:
            save_path = "log-of-{}/epoch_{}.pkl".format(
                os.path.basename(args.file).split(".")[0], epoch_id)
            mge.save(
                {
                    "epoch": epoch_id,
                    "state_dict": model.state_dict()
                },
                save_path,
            )
            logger.info("dump weights to %s", save_path)
コード例 #22
0
def make_data_given_desc(args, inputs, shape0_multiply=1):
    if args.load_input_data:
        logger.info("load data from {}".format(args.load_input_data))
        data = mge.load(args.load_input_data)
        data_names = [inp.name for inp in inputs]

        if isinstance(data, np.ndarray):
            assert len(data_names) == 1, (
                "data is given as a single numpy array, so there should be "
                "exactly one input in the graph; got: {}".format(data_names)
            )
            data = {data_names[0]: data}

        assert isinstance(data, dict)
        for v in data.values():
            assert isinstance(
                v, np.ndarray
            ), "data should provide ndarray; got {} instead".format(v)

        if args.batchsize:
            for k, v in list(data.items()):
                assert (
                    args.batchsize % v.shape[0] == 0
                ), "current batch size must divide given batch size: {} {}".format(
                    args.batchsize, v.shape[0]
                )
                data[k] = np.repeat(v, args.batchsize // v.shape[0], axis=0)
        return data

    def iter_inpdesc(desc):
        if not desc:
            return
        for pair in desc.split(";"):
            name, value = pair.split(":")
            if name not in data_shapes:
                logger.warning("rng name {} not in data provider".format(name))
            yield name, value

    rng = np.random.RandomState(args.seed)

    data_shapes = OrderedDict((inp.name, list(inp.shape)) for inp in inputs)
    data_dtypes = OrderedDict((inp.name, inp.dtype) for inp in inputs)

    for name, shape in iter_inpdesc(args.input_desc):
        data_shapes[name] = list(map(int, shape.split(",")))

    if args.batchsize:
        for i in data_shapes.values():
            i[0] = args.batchsize

    data_rngs = dict(iter_inpdesc(args.rng))

    result = OrderedDict()
    for name, shape in data_shapes.items():
        shape[0] *= shape0_multiply
        rng_expr = data_rngs.get(name)
        if rng_expr:
            value = eval("rng.{}".format(rng_expr).format(shape), {"rng": rng})
        else:
            value = rng.uniform(size=shape)

        value = np.ascontiguousarray(value, dtype=data_dtypes[name])
        assert value.shape == tuple(shape)
        result[name] = value

    return result
コード例 #23
0
def worker(rank, world_size, args):
    # pylint: disable=too-many-statements
    mge.set_log_file(os.path.join(args.save, args.arch, "log.txt"))

    if world_size > 1:
        # Initialize distributed process group
        logger.info("init distributed process group {} / {}".format(
            rank, world_size))
        dist.init_process_group(
            master_ip="localhost",
            master_port=23456,
            world_size=world_size,
            rank=rank,
            dev=rank,
        )

    save_dir = os.path.join(args.save, args.arch)

    model = getattr(M, args.arch)()
    step_start = 0
    if args.model:
        logger.info("load weights from %s", args.model)
        model.load_state_dict(mge.load(args.model))
        step_start = int(args.model.split("-")[1].split(".")[0])

    optimizer = optim.SGD(
        get_parameters(model),
        lr=args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay,
    )

    # Define train and valid graph
    @jit.trace(symbolic=True)
    def train_func(image, label):
        model.train()
        logits = model(image)
        loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.)
        acc1, acc5 = F.accuracy(logits, label, (1, 5))
        optimizer.backward(loss)  # compute gradients
        if dist.is_distributed():  # all_reduce_mean
            loss = dist.all_reduce_sum(loss,
                                       "train_loss") / dist.get_world_size()
            acc1 = dist.all_reduce_sum(acc1,
                                       "train_acc1") / dist.get_world_size()
            acc5 = dist.all_reduce_sum(acc5,
                                       "train_acc5") / dist.get_world_size()
        return loss, acc1, acc5

    @jit.trace(symbolic=True)
    def valid_func(image, label):
        model.eval()
        logits = model(image)
        loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.)
        acc1, acc5 = F.accuracy(logits, label, (1, 5))
        if dist.is_distributed():  # all_reduce_mean
            loss = dist.all_reduce_sum(loss,
                                       "valid_loss") / dist.get_world_size()
            acc1 = dist.all_reduce_sum(acc1,
                                       "valid_acc1") / dist.get_world_size()
            acc5 = dist.all_reduce_sum(acc5,
                                       "valid_acc5") / dist.get_world_size()
        return loss, acc1, acc5

    # Build train and valid datasets
    logger.info("preparing dataset..")
    train_dataset = data.dataset.ImageNet(args.data, train=True)
    train_sampler = data.Infinite(
        data.RandomSampler(train_dataset,
                           batch_size=args.batch_size,
                           drop_last=True))
    train_queue = data.DataLoader(
        train_dataset,
        sampler=train_sampler,
        transform=T.Compose([
            T.RandomResizedCrop(224),
            T.RandomHorizontalFlip(),
            T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
            T.ToMode("CHW"),
        ]),
        num_workers=args.workers,
    )

    valid_dataset = data.dataset.ImageNet(args.data, train=False)
    valid_sampler = data.SequentialSampler(valid_dataset,
                                           batch_size=100,
                                           drop_last=False)
    valid_queue = data.DataLoader(
        valid_dataset,
        sampler=valid_sampler,
        transform=T.Compose([
            T.Resize(256),
            T.CenterCrop(224),
            T.ToMode("CHW"),
        ]),
        num_workers=args.workers,
    )

    # Start training
    objs = AverageMeter("Loss")
    top1 = AverageMeter("Acc@1")
    top5 = AverageMeter("Acc@5")
    total_time = AverageMeter("Time")

    t = time.time()
    for step in range(step_start, args.steps + 1):
        # Linear learning rate decay
        decay = 1.0
        decay = 1 - float(step) / args.steps if step < args.steps else 0
        for param_group in optimizer.param_groups:
            param_group["lr"] = args.learning_rate * decay

        image, label = next(train_queue)
        time_data = time.time() - t
        image = image.astype("float32")
        label = label.astype("int32")

        n = image.shape[0]

        optimizer.zero_grad()
        loss, acc1, acc5 = train_func(image, label)
        optimizer.step()

        top1.update(100 * acc1.numpy()[0], n)
        top5.update(100 * acc5.numpy()[0], n)
        objs.update(loss.numpy()[0], n)
        total_time.update(time.time() - t)
        time_iter = time.time() - t
        t = time.time()
        if step % args.report_freq == 0 and rank == 0:
            logger.info(
                "TRAIN Iter %06d: lr = %f,\tloss = %f,\twc_loss = 1,\tTop-1 err = %f,\tTop-5 err = %f,\tdata_time = %f,\ttrain_time = %f,\tremain_hours=%f",
                step,
                args.learning_rate * decay,
                float(objs.__str__().split()[1]),
                1 - float(top1.__str__().split()[1]) / 100,
                1 - float(top5.__str__().split()[1]) / 100,
                time_data,
                time_iter - time_data,
                time_iter * (args.steps - step) / 3600,
            )
            objs.reset()
            top1.reset()
            top5.reset()
            total_time.reset()
        if step % 10000 == 0 and rank == 0 and step != 0:
            logger.info("SAVING %06d", step)
            mge.save(
                model.state_dict(),
                os.path.join(save_dir, "checkpoint-{:06d}.pkl".format(step)),
            )
        if step % 50000 == 0 and step != 0:
            _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args)
            logger.info(
                "TEST Iter %06d: loss = %f,\tTop-1 err = %f,\tTop-5 err = %f",
                step, _, 1 - valid_acc / 100, 1 - valid_acc5 / 100)

    mge.save(model.state_dict(),
             os.path.join(save_dir, "checkpoint-{:06d}.pkl".format(step)))
    _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args)
    logger.info("TEST Iter %06d: loss=%f,\tTop-1 err = %f,\tTop-5 err = %f",
                step, _, 1 - valid_acc / 100, 1 - valid_acc5 / 100)
コード例 #24
0
            mge.tensor(t) for t in batch
        )
        batch_size = input_ids.shape[0]
        loss, logits, label_ids = net_eval(
            input_ids, segment_ids, input_mask, label_ids, net=net
        )
        sum_loss += loss.mean().item()
        sum_accuracy += accuracy(logits, label_ids)
        total_examples += batch_size
        total_steps += 1

    result = {
        "eval_loss": sum_loss / total_steps,
        "eval_accuracy": sum_accuracy / total_examples,
    }

    logger.info("***** Eval results *****")
    for key in sorted(result.keys()):
        logger.info("%s = %s", key, str(result[key]))


if __name__ == "__main__":
    bert, config, vocab_file = create_hub_bert(args.pretrained_bert, pretrained=False)
    args.vocab_file = vocab_file
    model = BertForSequenceClassification(config, num_labels=2, bert=bert)
    mrpc_dataset = MRPCDataset(args)
    model.load_state_dict(mge.load(args.load_model_path))
    mrpc_dataset = MRPCDataset(args)
    eval_dataloader, eval_size = mrpc_dataset.get_eval_dataloader()
    eval(eval_dataloader, model)
コード例 #25
0
import megengine as mge
from model import ReverseString
from dataset import get_dataloader, make_string_from_tensor, MAXLEN

model = ReverseString()
model.load_state_dict(mge.load('transformer.60.mge'))

model.eval()

test_data = get_dataloader()
data = mge.tensor()
for idx, (batch_data, batch_label, batch_mask) in enumerate(test_data):
    data.set_value(batch_data)
    prob = model(data)
    prob = prob.reshape(-1, MAXLEN + 1, 28)
    predicted = prob.numpy().argmax(axis=2)
    inp_str = make_string_from_tensor(batch_data)
    pred_str = make_string_from_tensor(predicted)
    gt_str = make_string_from_tensor(batch_label)

    for i in range(len(inp_str)):
        print(inp_str[i], gt_str[i], pred_str[i], batch_mask[i])
コード例 #26
0
ファイル: train.py プロジェクト: zzh7982/Models
def worker(master_ip, port, rank, world_size, args):
    if world_size > 1:
        # Initialize distributed process group
        logger.info("init distributed process group {} / {}".format(rank, world_size))
        dist.init_process_group(
            master_ip=master_ip,
            port=port,
            world_size=world_size,
            rank=rank,
            device=rank,
        )

    model_name = "{}_{}x{}".format(args.arch, cfg.input_shape[0], cfg.input_shape[1])
    save_dir = os.path.join(args.save, model_name)

    model = getattr(kpm, args.arch)()
    model.train()
    start_epoch = 0
    if args.resume is not None:
        file = mge.load(args.resume)
        model.load_state_dict(file["state_dict"])
        start_epoch = file["epoch"]

    optimizer = optim.Adam(
        model.parameters(), lr=cfg.initial_lr, weight_decay=cfg.weight_decay
    )

    gm = GradManager()
    if dist.get_world_size() > 1:
        gm.attach(
            model.parameters(), callbacks=[dist.make_allreduce_cb("SUM", dist.WORLD)],
        )
    else:
        gm.attach(model.parameters())

    if dist.get_world_size() > 1:
        dist.bcast_list_(model.parameters(), dist.WORLD)  # sync parameters

    # Build train datasets
    logger.info("preparing dataset..")
    ann_file = os.path.join(
        cfg.data_root, "annotations", "person_keypoints_train2017.json"
    )
    train_dataset = COCOJoints(
        cfg.data_root,
        ann_file,
        image_set="train2017",
        order=("image", "keypoints", "boxes", "info"),
    )
    logger.info("Num of Samples: {}".format(len(train_dataset)))
    train_sampler = data.RandomSampler(
        train_dataset, batch_size=cfg.batch_size, drop_last=True
    )

    transforms = [
        T.Normalize(mean=cfg.img_mean, std=cfg.img_std),
        RandomHorizontalFlip(0.5, keypoint_flip_order=cfg.keypoint_flip_order)
    ]

    if cfg.half_body_transform:
        transforms.append(
            HalfBodyTransform(
                cfg.upper_body_ids, cfg.lower_body_ids, cfg.prob_half_body
            )
        )
    if cfg.extend_boxes:
        transforms.append(
            ExtendBoxes(cfg.x_ext, cfg.y_ext, cfg.input_shape[1] / cfg.input_shape[0])
        )

    transforms += [
        RandomBoxAffine(
            degrees=cfg.rotate_range,
            scale=cfg.scale_range,
            output_shape=cfg.input_shape,
            rotate_prob=cfg.rotation_prob,
            scale_prob=cfg.scale_prob,
        )
    ]
    transforms += [T.ToMode()]

    train_queue = data.DataLoader(
        train_dataset,
        sampler=train_sampler,
        num_workers=args.workers,
        transform=T.Compose(transforms=transforms, order=train_dataset.order,),
        collator=HeatmapCollator(
            cfg.input_shape,
            cfg.output_shape,
            cfg.keypoint_num,
            cfg.heat_thr,
            cfg.heat_kernels if args.multi_scale_supervision else cfg.heat_kernels[-1:],
            cfg.heat_range,
        ),
    )

    # Start training
    for epoch in range(start_epoch, cfg.epochs):
        loss = train(model, train_queue, optimizer, gm, epoch=epoch)
        logger.info("Epoch %d Train %.6f ", epoch, loss)

        if rank == 0 and epoch % cfg.save_freq == 0:  # save checkpoint
            mge.save(
                {"epoch": epoch + 1, "state_dict": model.state_dict()},
                os.path.join(save_dir, "epoch_{}.pkl".format(epoch)),
            )
コード例 #27
0
ファイル: finetune.py プロジェクト: zwxlib/Models
def worker(rank, world_size, args):
    # pylint: disable=too-many-statements

    if world_size > 1:
        # Initialize distributed process group
        logger.info("init distributed process group {} / {}".format(
            rank, world_size))
        dist.init_process_group(
            master_ip="localhost",
            master_port=23456,
            world_size=world_size,
            rank=rank,
            dev=rank,
        )

    save_dir = os.path.join(args.save, args.arch + "." + args.mode)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir, exist_ok=True)
    mge.set_log_file(os.path.join(save_dir, "log.txt"))

    model = models.__dict__[args.arch]()
    cfg = config.get_finetune_config(args.arch)

    cfg.LEARNING_RATE *= world_size  # scale learning rate in distributed training
    total_batch_size = cfg.BATCH_SIZE * world_size
    steps_per_epoch = 1280000 // total_batch_size
    total_steps = steps_per_epoch * cfg.EPOCHS

    if args.mode != "normal":
        Q.quantize_qat(model, Q.ema_fakequant_qconfig)

    if args.checkpoint:
        logger.info("Load pretrained weights from %s", args.checkpoint)
        ckpt = mge.load(args.checkpoint)
        ckpt = ckpt["state_dict"] if "state_dict" in ckpt else ckpt
        model.load_state_dict(ckpt, strict=False)

    if args.mode == "quantized":
        raise ValueError("mode = quantized only used during inference")
        Q.quantize(model)

    optimizer = optim.SGD(
        get_parameters(model, cfg),
        lr=cfg.LEARNING_RATE,
        momentum=cfg.MOMENTUM,
    )

    # Define train and valid graph
    @jit.trace(symbolic=True)
    def train_func(image, label):
        model.train()
        logits = model(image)
        loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1)
        acc1, acc5 = F.accuracy(logits, label, (1, 5))
        optimizer.backward(loss)  # compute gradients
        if dist.is_distributed():  # all_reduce_mean
            loss = dist.all_reduce_sum(loss,
                                       "train_loss") / dist.get_world_size()
            acc1 = dist.all_reduce_sum(acc1,
                                       "train_acc1") / dist.get_world_size()
            acc5 = dist.all_reduce_sum(acc5,
                                       "train_acc5") / dist.get_world_size()
        return loss, acc1, acc5

    @jit.trace(symbolic=True)
    def valid_func(image, label):
        model.eval()
        logits = model(image)
        loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1)
        acc1, acc5 = F.accuracy(logits, label, (1, 5))
        if dist.is_distributed():  # all_reduce_mean
            loss = dist.all_reduce_sum(loss,
                                       "valid_loss") / dist.get_world_size()
            acc1 = dist.all_reduce_sum(acc1,
                                       "valid_acc1") / dist.get_world_size()
            acc5 = dist.all_reduce_sum(acc5,
                                       "valid_acc5") / dist.get_world_size()
        return loss, acc1, acc5

    # Build train and valid datasets
    logger.info("preparing dataset..")
    train_dataset = data.dataset.ImageNet(args.data, train=True)
    train_sampler = data.Infinite(
        data.RandomSampler(train_dataset,
                           batch_size=cfg.BATCH_SIZE,
                           drop_last=True))
    train_queue = data.DataLoader(
        train_dataset,
        sampler=train_sampler,
        transform=T.Compose([
            T.RandomResizedCrop(224),
            T.RandomHorizontalFlip(),
            cfg.COLOR_JITTOR,
            T.Normalize(mean=128),
            T.ToMode("CHW"),
        ]),
        num_workers=args.workers,
    )
    train_queue = iter(train_queue)
    valid_dataset = data.dataset.ImageNet(args.data, train=False)
    valid_sampler = data.SequentialSampler(valid_dataset,
                                           batch_size=100,
                                           drop_last=False)
    valid_queue = data.DataLoader(
        valid_dataset,
        sampler=valid_sampler,
        transform=T.Compose([
            T.Resize(256),
            T.CenterCrop(224),
            T.Normalize(mean=128),
            T.ToMode("CHW"),
        ]),
        num_workers=args.workers,
    )

    def adjust_learning_rate(step, epoch):
        learning_rate = cfg.LEARNING_RATE
        if cfg.SCHEDULER == "Linear":
            learning_rate *= 1 - float(step) / total_steps
        elif cfg.SCHEDULER == "Multistep":
            learning_rate *= cfg.SCHEDULER_GAMMA**bisect.bisect_right(
                cfg.SCHEDULER_STEPS, epoch)
        else:
            raise ValueError(cfg.SCHEDULER)
        for param_group in optimizer.param_groups:
            param_group["lr"] = learning_rate
        return learning_rate

    # Start training
    objs = AverageMeter("Loss")
    top1 = AverageMeter("Acc@1")
    top5 = AverageMeter("Acc@5")
    total_time = AverageMeter("Time")

    t = time.time()
    for step in range(0, total_steps):
        # Linear learning rate decay
        epoch = step // steps_per_epoch
        learning_rate = adjust_learning_rate(step, epoch)

        image, label = next(train_queue)
        image = image.astype("float32")
        label = label.astype("int32")

        n = image.shape[0]

        optimizer.zero_grad()
        loss, acc1, acc5 = train_func(image, label)
        optimizer.step()

        top1.update(100 * acc1.numpy()[0], n)
        top5.update(100 * acc5.numpy()[0], n)
        objs.update(loss.numpy()[0], n)
        total_time.update(time.time() - t)
        t = time.time()
        if step % args.report_freq == 0 and rank == 0:
            logger.info("TRAIN e%d %06d %f %s %s %s %s", epoch, step,
                        learning_rate, objs, top1, top5, total_time)
            objs.reset()
            top1.reset()
            top5.reset()
            total_time.reset()
        if step % 10000 == 0 and rank == 0:
            logger.info("SAVING %06d", step)
            mge.save(
                {
                    "step": step,
                    "state_dict": model.state_dict()
                },
                os.path.join(save_dir, "checkpoint.pkl"),
            )
        if step % 10000 == 0 and step != 0:
            _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args)
            logger.info("TEST %06d %f, %f", step, valid_acc, valid_acc5)

    mge.save({
        "step": step,
        "state_dict": model.state_dict()
    }, os.path.join(save_dir, "checkpoint-final.pkl"))
    _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args)
    logger.info("TEST %06d %f, %f", step, valid_acc, valid_acc5)
コード例 #28
0
ファイル: calibration.py プロジェクト: zihan987/Models
def worker(world_size, args):
    # pylint: disable=too-many-statements

    rank = dist.get_rank()
    if world_size > 1:
        # Initialize distributed process group
        logger.info("init distributed process group {} / {}".format(rank, world_size))

    save_dir = os.path.join(args.save, args.arch + "." + "calibration")
    if not os.path.exists(save_dir):
        os.makedirs(save_dir, exist_ok=True)
    mge.set_log_file(os.path.join(save_dir, "log.txt"))

    model = models.__dict__[args.arch]()

    # load calibration model
    assert args.checkpoint
    logger.info("Load pretrained weights from %s", args.checkpoint)
    ckpt = mge.load(args.checkpoint)
    ckpt = ckpt["state_dict"] if "state_dict" in ckpt else ckpt
    model.load_state_dict(ckpt, strict=False)

    # Build valid datasets
    valid_dataset = data.dataset.ImageNet(args.data, train=False)
    valid_sampler = data.SequentialSampler(
        valid_dataset, batch_size=100, drop_last=False
    )
    valid_queue = data.DataLoader(
        valid_dataset,
        sampler=valid_sampler,
        transform=T.Compose(
            [T.Resize(256), T.CenterCrop(224), T.Normalize(mean=128), T.ToMode("CHW")]
        ),
        num_workers=args.workers,
    )

    # calibration
    model.fc.disable_quantize()
    model = quantize_qat(model, qconfig=Q.calibration_qconfig)

    # calculate scale
    def calculate_scale(image, label):
        model.eval()
        enable_observer(model)
        logits = model(image)
        loss = F.loss.cross_entropy(logits, label, label_smooth=0.1)
        acc1, acc5 = F.topk_accuracy(logits, label, (1, 5))
        if dist.is_distributed():  # all_reduce_mean
            loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size()
            acc1 = dist.functional.all_reduce_sum(acc1) / dist.get_world_size()
            acc5 = dist.functional.all_reduce_sum(acc5) / dist.get_world_size()
        return loss, acc1, acc5

    infer(calculate_scale, valid_queue, args)

    # quantized
    model = quantize(model)

    # eval quantized model
    def eval_func(image, label):
        model.eval()
        logits = model(image)
        loss = F.loss.cross_entropy(logits, label, label_smooth=0.1)
        acc1, acc5 = F.topk_accuracy(logits, label, (1, 5))
        if dist.is_distributed():  # all_reduce_mean
            loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size()
            acc1 = dist.functional.all_reduce_sum(acc1) / dist.get_world_size()
            acc5 = dist.functional.all_reduce_sum(acc5) / dist.get_world_size()
        return loss, acc1, acc5

    _, valid_acc, valid_acc5 = infer(eval_func, valid_queue, args)
    logger.info("TEST %f, %f", valid_acc, valid_acc5)

    # save quantized model
    mge.save(
        {"step": -1, "state_dict": model.state_dict()},
        os.path.join(save_dir, "checkpoint-calibration.pkl"),
    )
    logger.info(
        "save in {}".format(os.path.join(save_dir, "checkpoint-calibration.pkl"))
    )
コード例 #29
0
def tracedmodule_to_caffe(
    traced_module,
    prototxt="out.prototxt",
    caffemodel="out.caffemodel",
    outspec=None,
    use_empty_blobs=False,
    input_data_type: str = None,
    input_scales: Union[float, List[float]] = None,
    input_zero_points: Union[int, List[int]] = None,
    require_quantize=False,
    param_fake_quant=False,
    split_conv_relu=False,
    fuse_bn=False,
    quantize_file_path="quant_params.json",
    convert_backend: BackEnd = BackEnd.CAFFE,
):
    """
    Convert TracedModule model to Caffe,
    and save caffe model to `prototxt` and `caffemodel`.

    :param traced_module: the file path of TracedModule model.
    :type traced_module: str
    :param prototxt: the filename used for saved model definition.
    :type prototxt: str
    :param caffemodel: the filename used for saved model weights.
    :type caffemodel: str
    :param outspec: specify the end points of the model, expect the full names of nodes.
    :type outspec: list
    """
    if isinstance(traced_module, str):
        traced_module = mge.load(traced_module)
    assert isinstance(
        traced_module, TracedModule
    ), "Input should be a traced module or a path of traced module."
    assert not require_quantize, "Caffe do not support quantize model."

    _update_inputs_qparams(
        traced_module, input_data_type, input_scales, input_zero_points
    )
    tm_resolver = TM_FrontEnd(traced_module, outspec=outspec)
    irgraph = tm_resolver.resolve()

    transformer_options = [
        TransformerRule.REMOVE_DROPOUT,
        TransformerRule.REMOVE_RESHAPE_REALTED_OP,
        TransformerRule.REMOVE_UNRELATED_IROP,
        TransformerRule.ADD_FAKE_HSIGMOID_OUT,
        TransformerRule.EXPAND_CONVRELU,
    ]
    if fuse_bn:
        transformer_options += [
            TransformerRule.FUSE_LINEAR_BN,
            TransformerRule.FUSE_CONV_BN,
        ]

    if convert_backend == BackEnd.NNIE:
        transformer_options.extend(
            [TransformerRule.REMOVE_FLATTEN_BEFORE_LINEAR,]
        )

    if split_conv_relu:
        transformer_options += [TransformerRule.REMOVE_RELU]
    transformer = IRTransform(transformer_options)
    transformed_irgraph = transformer.transform(irgraph)

    quantizer = IRQuantizer(
        require_quantize=require_quantize, param_fake_quant=param_fake_quant
    )

    if tm_resolver.has_qat:
        quantizer.save_quantize_params(transformed_irgraph)

    converter = CaffeConverter(
        transformed_irgraph, quantizer, use_empty_blobs, convert_backend
    )
    converter.convert()

    if tm_resolver.has_qat:
        quantizer.dump_quant_param(path=quantize_file_path)

    assert isinstance(prototxt, str) and isinstance(
        caffemodel, str
    ), "'prototxt' and 'caffemodel' must be string"
    converter.dump(prototxt, caffemodel)
コード例 #30
0
import megengine as mge
from model import ReverseString
from dataset import get_dataloader, make_string_from_tensor, MAXLEN
import sys

model = ReverseString()
model.load_state_dict(mge.load(sys.argv[1]))

model.eval()


test_data = get_dataloader()
data = mge.tensor()
position = mge.tensor()
total = 0
correct = 0
for idx, (batch_data, batch_label, pos) in enumerate(test_data):
    data.set_value(batch_data)
    position.set_value(pos)
    prob = model(data, position)
    prob = prob.reshape(-1, MAXLEN, 26)
    predicted = prob.numpy().argmax(axis=2)
    inp_str = make_string_from_tensor(batch_data)
    pred_str = make_string_from_tensor(predicted)
    gt_str = make_string_from_tensor(batch_label)

    for i in range(len(inp_str)):
        total += 1
        correct += gt_str[i] == pred_str[i]

    print(correct, total)