Example #1
0
def _build_training_pipeline(config: TransformerConfig,
                             pre_training_dataset=None,
                             fine_tune_dataset=None,
                             test_dataset=None):
    """
    Build training pipeline.

    Args:
        config (TransformerConfig): Config of mass model.
        pre_training_dataset (Dataset): Pre-training dataset.
        fine_tune_dataset (Dataset): Fine-tune dataset.
        test_dataset (Dataset): Test dataset.
    """
    net_with_loss = TransformerNetworkWithLoss(config, is_training=True)
    net_with_loss.init_parameters_data()

    if config.existed_ckpt:
        if config.existed_ckpt.endswith(".npz"):
            weights = np.load(config.existed_ckpt)
        else:
            weights = load_checkpoint(config.existed_ckpt)
        for param in net_with_loss.trainable_params():
            weights_name = param.name
            if weights_name not in weights:
                raise ValueError(
                    f"Param {weights_name} is not found in ckpt file.")

            if isinstance(weights[weights_name], Parameter):
                param.default_input = weights[weights_name].default_input
            elif isinstance(weights[weights_name], Tensor):
                param.default_input = Tensor(weights[weights_name].asnumpy(),
                                             config.dtype)
            elif isinstance(weights[weights_name], np.ndarray):
                param.default_input = Tensor(weights[weights_name],
                                             config.dtype)
            else:
                param.default_input = weights[weights_name]
    else:
        for param in net_with_loss.trainable_params():
            name = param.name
            value = param.default_input
            if isinstance(value, Tensor):
                if name.endswith(".gamma"):
                    param.default_input = one_weight(value.asnumpy().shape)
                elif name.endswith(".beta") or name.endswith(".bias"):
                    param.default_input = zero_weight(value.asnumpy().shape)
                else:
                    param.default_input = weight_variable(
                        value.asnumpy().shape)

    dataset = pre_training_dataset if pre_training_dataset is not None \
        else fine_tune_dataset

    if dataset is None:
        raise ValueError(
            "pre-training dataset or fine-tuning dataset must be provided one."
        )

    update_steps = dataset.get_repeat_count() * dataset.get_dataset_size()
    if config.lr_scheduler == "isr":
        lr = Tensor(square_root_schedule(
            lr=config.lr,
            update_num=update_steps,
            decay_start_step=config.decay_start_step,
            warmup_steps=config.warmup_steps,
            min_lr=config.min_lr),
                    dtype=mstype.float32)
    elif config.lr_scheduler == "poly":
        lr = Tensor(polynomial_decay_scheduler(
            lr=config.lr,
            min_lr=config.min_lr,
            decay_steps=config.decay_steps,
            total_update_num=update_steps,
            warmup_steps=config.warmup_steps,
            power=config.poly_lr_scheduler_power),
                    dtype=mstype.float32)
    else:
        lr = config.lr

    if config.optimizer.lower() == "adam":
        optimizer = Adam(net_with_loss.trainable_params(),
                         lr,
                         beta1=0.9,
                         beta2=0.98)
    elif config.optimizer.lower() == "lamb":
        lr = BertLearningRate(decay_steps=12000,
                              learning_rate=config.lr,
                              end_learning_rate=config.min_lr,
                              power=10.0,
                              warmup_steps=config.warmup_steps)
        decay_params = list(
            filter(
                lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x
                .name.lower(), net_with_loss.trainable_params()))
        other_params = list(
            filter(
                lambda x: 'layernorm' in x.name.lower() or 'bias' in x.name.
                lower(), net_with_loss.trainable_params()))
        group_params = [{
            'params': decay_params,
            'weight_decay': 0.01
        }, {
            'params': other_params
        }]

        optimizer = Lamb(group_params, lr, eps=1e-6)
    elif config.optimizer.lower() == "momentum":
        optimizer = Momentum(net_with_loss.trainable_params(),
                             lr,
                             momentum=0.9)
    else:
        raise ValueError(f"optimizer only support `adam` and `momentum` now.")

    # Dynamic loss scale.
    scale_manager = DynamicLossScaleManager(
        init_loss_scale=config.init_loss_scale,
        scale_factor=config.loss_scale_factor,
        scale_window=config.scale_window)
    net_with_grads = TransformerTrainOneStepWithLossScaleCell(
        network=net_with_loss,
        optimizer=optimizer,
        scale_update_cell=scale_manager.get_update_cell())
    net_with_grads.set_train(True)
    model = Model(net_with_grads)
    loss_monitor = LossCallBack(config)
    ckpt_config = CheckpointConfig(
        save_checkpoint_steps=config.save_ckpt_steps,
        keep_checkpoint_max=config.keep_ckpt_max)

    rank_size = os.getenv('RANK_SIZE')
    callbacks = [loss_monitor]
    if rank_size is not None and int(
            rank_size) > 1 and MultiAscend.get_rank() % 8 == 0:
        ckpt_callback = ModelCheckpoint(
            prefix=config.ckpt_prefix,
            directory=os.path.join(config.ckpt_path,
                                   'ckpt_{}'.format(os.getenv('DEVICE_ID'))),
            config=ckpt_config)
        callbacks.append(ckpt_callback)

    if rank_size is None or int(rank_size) == 1:
        ckpt_callback = ModelCheckpoint(
            prefix=config.ckpt_prefix,
            directory=os.path.join(config.ckpt_path,
                                   'ckpt_{}'.format(os.getenv('DEVICE_ID'))),
            config=ckpt_config)
        callbacks.append(ckpt_callback)

    print(f" | ALL SET, PREPARE TO TRAIN.")
    _train(model=model,
           config=config,
           pre_training_dataset=pre_training_dataset,
           fine_tune_dataset=fine_tune_dataset,
           test_dataset=test_dataset,
           callbacks=callbacks)
Example #2
0
    parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU', 'CPU'],
                        help='device where the code will be implemented (default: Ascend)')
    parser.add_argument('--data_path', type=str, default="s3://hithcd/rgb/pic/",
                        help='path where the dataset is saved')

    parser.add_argument('--ckpt_path', type=str, default="obs://hithcd/MA-hw_project_resnet18-05-30-19/output/V0408/", help='if mode is test, must provide\
                        path where the trained ckpt file')
    parser.add_argument('--dataset_sink_mode', type=bool, default=False, help='dataset_sink_mode is False or True')
    args = parser.parse_args()

    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
    local_data_url = '/cache/data'
    local_output_url = '/cache/ckpt'
    mox.file.copy_parallel(args.data_path, local_data_url)
    mox.file.copy_parallel(args.ckpt_path, local_output_url)

    net = resnet18(class_num=config.class_num)

    print("============== Starting Testing ==============")
    param_dict = load_checkpoint(os.path.join(local_output_url, 'resnet-50_1759.ckpt'), net=net)
    load_param_into_net(net, param_dict)
    im = np.asarray(Image.open(os.path.join(local_data_url, '490.png')).convert('L'))
    im = 255-im
    im = im/255.0
    input = im.reshape((1, 1, 112, 112))
    input_tensor = Tensor(input, mindspore.float32)
    acc = net(input_tensor)
    acc = acc.asnumpy()
    preds = np.argmax(acc, axis=1)
    mox.file.copy_parallel(local_output_url, args.train_url)
    print("Predict label:{0}, acc={1}".format(preds[0], acc[0][preds[0]]))
Example #3
0
def yolov3_predict(instance, strategy):
    network = YOLOV3DarkNet53(is_training=False)
    pretrained_ckpt = '/dataset/ckpt-files/shanshui_full/yolov3.ckpt'
    if not os.path.exists(pretrained_ckpt):
        err_msg = "The yolov3.ckpt file does not exist!"
        return {"status": 1, "err_msg": err_msg}
    param_dict = load_checkpoint(pretrained_ckpt)
    param_dict_new = {}
    for key, values in param_dict.items():
        if key.startswith('moments.'):
            continue
        elif key.startswith('yolo_network.'):
            param_dict_new[key[13:]] = values
        else:
            param_dict_new[key] = values
    load_param_into_net(network, param_dict_new)

    config = ConfigYOLOV3DarkNet53()

    # init detection engine
    args = edict()
    args.ignore_threshold = 0.01
    args.nms_thresh = 0.5
    detection = DetectionEngine(args)

    input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
    print('Start inference....')
    network.set_train(False)
    ori_image = np.array(json.loads(instance['data']), dtype=instance['dtype'])
    image, image_shape = data_preprocess(ori_image, config)
    prediction = network(Tensor(image.reshape(1, 3, 416, 416), ms.float32),
                         input_shape)
    output_big, output_me, output_small = prediction
    output_big = output_big.asnumpy()
    output_me = output_me.asnumpy()
    output_small = output_small.asnumpy()

    per_batch_size = 1
    detection.detect([output_small, output_me, output_big], per_batch_size,
                     image_shape, config)
    detection.do_nms_for_results()
    out_img = detection.draw_boxes_in_image(ori_image)

    # for i in range(len(detection.det_boxes)):
    #     print("x: ", detection.det_boxes[i]['bbox'][0])
    #     print("y: ", detection.det_boxes[i]['bbox'][1])
    #     print("h: ", detection.det_boxes[i]['bbox'][2])
    #     print("w: ", detection.det_boxes[i]['bbox'][3])
    #     print("score: ", round(detection.det_boxes[i]['score'], 3))
    #     print("category: ", detection.det_boxes[i]['category_id'])

    det_boxes = detection.det_boxes
    if not len(det_boxes):
        err_msg = "抱歉!未检测到任何种类,无法标注。"
        return {"status": 1, "err_msg": err_msg}
    max_det = max(det_boxes, key=lambda k: k['score'])
    max_score = max_det['score']
    category = det_boxes[det_boxes.index(max_det)]['category_id']

    res = {
        "status": 0,
        "instance": {
            "boxes_num": len(det_boxes),
            "max_score": round(max_score, 3),
            "category": category,
            "data": numpy2base64(out_img)
        }
    }
    return res
Example #4
0
def main():
    parser = argparse.ArgumentParser(description="YOLOv3 train")
    parser.add_argument("--only_create_dataset", type=ast.literal_eval, default=False,
                        help="If set it true, only create Mindrecord, default is False.")
    parser.add_argument("--distribute", type=ast.literal_eval, default=False, help="Run distribute, default is False.")
    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
    parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.")
    parser.add_argument("--lr", type=float, default=0.001, help="Learning rate, default is 0.001.")
    parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or not, default is sink")
    parser.add_argument("--epoch_size", type=int, default=50, help="Epoch size, default is 50")
    parser.add_argument("--batch_size", type=int, default=32, help="Batch size, default is 32.")
    parser.add_argument("--pre_trained", type=str, default=None, help="Pretrained checkpoint file path")
    parser.add_argument("--pre_trained_epoch_size", type=int, default=0, help="Pretrained epoch size")
    parser.add_argument("--save_checkpoint_epochs", type=int, default=5, help="Save checkpoint epochs, default is 5.")
    parser.add_argument("--loss_scale", type=int, default=1024, help="Loss scale, default is 1024.")
    parser.add_argument("--mindrecord_dir", type=str, default="./Mindrecord_train",
                        help="Mindrecord directory. If the mindrecord_dir is empty, it wil generate mindrecord file by "
                             "image_dir and anno_path. Note if mindrecord_dir isn't empty, it will use mindrecord_dir "
                             "rather than image_dir and anno_path. Default is ./Mindrecord_train")
    parser.add_argument("--image_dir", type=str, default="", help="Dataset directory, "
                                                                  "the absolute image path is joined by the image_dir "
                                                                  "and the relative path in anno_path")
    parser.add_argument("--anno_path", type=str, default="", help="Annotation path.")
    args_opt = parser.parse_args()

    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
    if args_opt.distribute:
        device_num = args_opt.device_num
        context.reset_auto_parallel_context()
        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
                                          device_num=device_num)
        init()
        rank = args_opt.device_id % device_num
    else:
        rank = 0
        device_num = 1

    print("Start create dataset!")

    # It will generate mindrecord file in args_opt.mindrecord_dir,
    # and the file name is yolo.mindrecord0, 1, ... file_num.
    if not os.path.isdir(args_opt.mindrecord_dir):
        os.makedirs(args_opt.mindrecord_dir)

    prefix = "yolo.mindrecord"
    mindrecord_file = os.path.join(args_opt.mindrecord_dir, prefix + "0")
    if not os.path.exists(mindrecord_file):
        if os.path.isdir(args_opt.image_dir) and os.path.exists(args_opt.anno_path):
            print("Create Mindrecord.")
            data_to_mindrecord_byte_image(args_opt.image_dir,
                                          args_opt.anno_path,
                                          args_opt.mindrecord_dir,
                                          prefix,
                                          8)
            print("Create Mindrecord Done, at {}".format(args_opt.mindrecord_dir))
        else:
            raise ValueError('image_dir {} or anno_path {} does not exist'.format(\
                              args_opt.image_dir, args_opt.anno_path))

    if not args_opt.only_create_dataset:
        loss_scale = float(args_opt.loss_scale)

        # When create MindDataset, using the fitst mindrecord file, such as yolo.mindrecord0.
        dataset = create_yolo_dataset(mindrecord_file,
                                      batch_size=args_opt.batch_size, device_num=device_num, rank=rank)
        dataset_size = dataset.get_dataset_size()
        print("Create dataset done!")

        net = yolov3_resnet18(ConfigYOLOV3ResNet18())
        net = YoloWithLossCell(net, ConfigYOLOV3ResNet18())
        init_net_param(net, "XavierUniform")

        # checkpoint
        ckpt_config = CheckpointConfig(save_checkpoint_steps=dataset_size * args_opt.save_checkpoint_epochs)
        ckpoint_cb = ModelCheckpoint(prefix="yolov3", directory='./ckpt_' + str(rank) + '/', config=ckpt_config)

        if args_opt.pre_trained:
            if args_opt.pre_trained_epoch_size <= 0:
                raise KeyError("pre_trained_epoch_size must be greater than 0.")
            param_dict = load_checkpoint(args_opt.pre_trained)
            load_param_into_net(net, param_dict)
        total_epoch_size = 60
        if args_opt.distribute:
            total_epoch_size = 160
        lr = Tensor(get_lr(learning_rate=args_opt.lr, start_step=args_opt.pre_trained_epoch_size * dataset_size,
                           global_step=total_epoch_size * dataset_size,
                           decay_step=1000, decay_rate=0.95, steps=True))
        opt = nn.Adam(filter(lambda x: x.requires_grad, net.get_parameters()), lr, loss_scale=loss_scale)
        net = TrainingWrapper(net, opt, loss_scale)

        callback = [TimeMonitor(data_size=dataset_size), LossMonitor(), ckpoint_cb]

        model = Model(net)
        dataset_sink_mode = False
        if args_opt.mode == "sink":
            print("In sink mode, one epoch return a loss.")
            dataset_sink_mode = True
        print("Start train YOLOv3, the first epoch will be slower because of the graph compilation.")
        model.train(args_opt.epoch_size, dataset, callbacks=callback, dataset_sink_mode=dataset_sink_mode)
Example #5
0
def test(cloud_args=None):
    """test"""
    args = parse_args(cloud_args)
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        device_target=args.platform,
                        save_graphs=False)
    if os.getenv('DEVICE_ID', "not_set").isdigit():
        context.set_context(device_id=int(os.getenv('DEVICE_ID')))

    # init distributed
    if args.is_distributed:
        if args.platform == "Ascend":
            init()
        elif args.platform == "GPU":
            init("nccl")
        args.rank = get_rank()
        args.group_size = get_group_size()
        parallel_mode = ParallelMode.DATA_PARALLEL
        context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                          device_num=args.group_size,
                                          parameter_broadcast=True,
                                          mirror_mean=True)
    else:
        args.rank = 0
        args.group_size = 1

    args.outputs_dir = os.path.join(
        args.log_path,
        datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))

    args.logger = get_logger(args.outputs_dir, args.rank)
    args.logger.save_args(args)

    # network
    args.logger.important_info('start create network')
    if os.path.isdir(args.pretrained):
        models = list(glob.glob(os.path.join(args.pretrained, '*.ckpt')))
        print(models)
        if args.graph_ckpt:
            f = lambda x: -1 * int(
                os.path.splitext(os.path.split(x)[-1])[0].split('-')[-1].split(
                    '_')[0])
        else:
            f = lambda x: -1 * int(
                os.path.splitext(os.path.split(x)[-1])[0].split('_')[-1])
        args.models = sorted(models, key=f)
    else:
        args.models = [
            args.pretrained,
        ]

    for model in args.models:
        de_dataset = classification_dataset(args.data_dir,
                                            image_size=args.image_size,
                                            per_batch_size=args.per_batch_size,
                                            max_epoch=1,
                                            rank=args.rank,
                                            group_size=args.group_size,
                                            mode='eval')
        eval_dataloader = de_dataset.create_tuple_iterator()
        network = get_network(args.backbone,
                              args.num_classes,
                              platform=args.platform)
        if network is None:
            raise NotImplementedError('not implement {}'.format(args.backbone))

        param_dict = load_checkpoint(model)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('network.'):
                param_dict_new[key[8:]] = values
            else:
                param_dict_new[key] = values

        load_param_into_net(network, param_dict_new)
        args.logger.info('load model {} success'.format(model))

        img_tot = 0
        top1_correct = 0
        top5_correct = 0
        if args.platform == "Ascend":
            network.to_float(mstype.float16)
        else:
            auto_mixed_precision(network)
        network.set_train(False)
        t_end = time.time()
        it = 0
        for data, gt_classes in eval_dataloader:
            output = network(Tensor(data, mstype.float32))
            output = output.asnumpy()

            top1_output = np.argmax(output, (-1))
            top5_output = np.argsort(output)[:, -5:]

            t1_correct = np.equal(top1_output, gt_classes).sum()
            top1_correct += t1_correct
            top5_correct += get_top5_acc(top5_output, gt_classes)
            img_tot += args.per_batch_size

            if args.rank == 0 and it == 0:
                t_end = time.time()
                it = 1
        if args.rank == 0:
            time_used = time.time() - t_end
            fps = (img_tot - args.per_batch_size) * args.group_size / time_used
            args.logger.info(
                'Inference Performance: {:.2f} img/sec'.format(fps))
        results = [[top1_correct], [top5_correct], [img_tot]]
        args.logger.info('before results={}'.format(results))
        if args.is_distributed:
            model_md5 = model.replace('/', '')
            tmp_dir = '/cache'
            if not os.path.exists(tmp_dir):
                os.mkdir(tmp_dir)
            top1_correct_npy = '/cache/top1_rank_{}_{}.npy'.format(
                args.rank, model_md5)
            top5_correct_npy = '/cache/top5_rank_{}_{}.npy'.format(
                args.rank, model_md5)
            img_tot_npy = '/cache/img_tot_rank_{}_{}.npy'.format(
                args.rank, model_md5)
            np.save(top1_correct_npy, top1_correct)
            np.save(top5_correct_npy, top5_correct)
            np.save(img_tot_npy, img_tot)
            while True:
                rank_ok = True
                for other_rank in range(args.group_size):
                    top1_correct_npy = '/cache/top1_rank_{}_{}.npy'.format(
                        other_rank, model_md5)
                    top5_correct_npy = '/cache/top5_rank_{}_{}.npy'.format(
                        other_rank, model_md5)
                    img_tot_npy = '/cache/img_tot_rank_{}_{}.npy'.format(
                        other_rank, model_md5)
                    if not os.path.exists(top1_correct_npy) or not os.path.exists(top5_correct_npy) or \
                       not os.path.exists(img_tot_npy):
                        rank_ok = False
                if rank_ok:
                    break

            top1_correct_all = 0
            top5_correct_all = 0
            img_tot_all = 0
            for other_rank in range(args.group_size):
                top1_correct_npy = '/cache/top1_rank_{}_{}.npy'.format(
                    other_rank, model_md5)
                top5_correct_npy = '/cache/top5_rank_{}_{}.npy'.format(
                    other_rank, model_md5)
                img_tot_npy = '/cache/img_tot_rank_{}_{}.npy'.format(
                    other_rank, model_md5)
                top1_correct_all += np.load(top1_correct_npy)
                top5_correct_all += np.load(top5_correct_npy)
                img_tot_all += np.load(img_tot_npy)
            results = [[top1_correct_all], [top5_correct_all], [img_tot_all]]
            results = np.array(results)
        else:
            results = np.array(results)

        args.logger.info('after results={}'.format(results))
        top1_correct = results[0, 0]
        top5_correct = results[1, 0]
        img_tot = results[2, 0]
        acc1 = 100.0 * top1_correct / img_tot
        acc5 = 100.0 * top5_correct / img_tot
        args.logger.info('after allreduce eval: top1_correct={}, tot={},'
                         'acc={:.2f}%(TOP1)'.format(top1_correct, img_tot,
                                                    acc1))
        args.logger.info('after allreduce eval: top5_correct={}, tot={},'
                         'acc={:.2f}%(TOP5)'.format(top5_correct, img_tot,
                                                    acc5))
    if args.is_distributed:
        release()
def test_lenet_mnist_fuzzing():
    # upload trained network
    ckpt_path = '../common/networks/lenet5/trained_ckpt_file/checkpoint_lenet-10_1875.ckpt'
    net = LeNet5()
    load_dict = load_checkpoint(ckpt_path)
    load_param_into_net(net, load_dict)
    model = Model(net)
    mutate_config = [{
        'method': 'Blur',
        'params': {
            'radius': [0.1, 0.2, 0.3],
            'auto_param': [True, False]
        }
    }, {
        'method': 'Contrast',
        'params': {
            'auto_param': [True]
        }
    }, {
        'method': 'Translate',
        'params': {
            'auto_param': [True]
        }
    }, {
        'method': 'Brightness',
        'params': {
            'auto_param': [True]
        }
    }, {
        'method': 'Noise',
        'params': {
            'auto_param': [True]
        }
    }, {
        'method': 'Scale',
        'params': {
            'auto_param': [True]
        }
    }, {
        'method': 'Shear',
        'params': {
            'auto_param': [True]
        }
    }, {
        'method': 'FGSM',
        'params': {
            'eps': [0.3, 0.2, 0.4],
            'alpha': [0.1]
        }
    }]

    # get training data
    data_list = "../common/dataset/MNIST/train"
    batch_size = 32
    ds = generate_mnist_dataset(data_list, batch_size, sparse=False)
    train_images = []
    for data in ds.create_tuple_iterator(output_numpy=True):
        images = data[0].astype(np.float32)
        train_images.append(images)
    train_images = np.concatenate(train_images, axis=0)

    # initialize fuzz test with training dataset
    model_coverage_test = ModelCoverageMetrics(model, 10, 1000, train_images)

    # fuzz test with original test data
    # get test data
    data_list = "../common/dataset/MNIST/test"
    batch_size = 32
    ds = generate_mnist_dataset(data_list, batch_size, sparse=False)
    test_images = []
    test_labels = []
    for data in ds.create_tuple_iterator(output_numpy=True):
        images = data[0].astype(np.float32)
        labels = data[1]
        test_images.append(images)
        test_labels.append(labels)
    test_images = np.concatenate(test_images, axis=0)
    test_labels = np.concatenate(test_labels, axis=0)
    initial_seeds = []

    # make initial seeds
    for img, label in zip(test_images, test_labels):
        initial_seeds.append([img, label])

    initial_seeds = initial_seeds[:100]
    model_coverage_test.calculate_coverage(
        np.array(test_images[:100]).astype(np.float32))
    LOGGER.info(TAG, 'KMNC of this test is : %s',
                model_coverage_test.get_kmnc())

    model_fuzz_test = Fuzzer(model, train_images, 10, 1000)
    _, _, _, _, metrics = model_fuzz_test.fuzzing(mutate_config,
                                                  initial_seeds,
                                                  eval_metrics='auto')
    if metrics:
        for key in metrics:
            LOGGER.info(TAG, key + ': %s', metrics[key])
Example #7
0
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
##############export checkpoint file into air and onnx models#################
python export.py
"""
import numpy as np

import mindspore as ms
from mindspore import Tensor
from mindspore.train.serialization import load_checkpoint, load_param_into_net, export

from src.config import cifar_cfg as cfg
from src.googlenet import GoogleNet

if __name__ == '__main__':
    net = GoogleNet(num_classes=cfg.num_classes)
    param_dict = load_checkpoint(cfg.checkpoint_path)
    load_param_into_net(net, param_dict)

    input_arr = Tensor(np.random.uniform(0.0, 1.0, size=[1, 3, 224, 224]),
                       ms.float32)
    export(net, input_arr, file_name=cfg.onnx_filename, file_format="ONNX")
    export(net, input_arr, file_name=cfg.air_filename, file_format="AIR")
Example #8
0
def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1):
    """
    Do train
    Args:
        dataset: the train dataset.
        network:  the network with loss
        load_checkpoint_path: the file path which saved pretrained model checkpoint.
        save_checkpoint_path:  the file path which will save finetuned model checkpoint.
        epoch_num: the number of epoch.
    """
    if load_checkpoint_path == "":
        raise ValueError("Pretrain model missed, finetune task must load pretrain model!")

    steps_per_epoch = dataset.get_dataset_size()

    # optimizer
    if cfg.optimizer == 'AdamWeightDecay':
        lr_schedule = GPT2LearningRate(learning_rate=cfg.AdamWeightDecay.learning_rate,
                                       end_learning_rate=cfg.AdamWeightDecay.end_learning_rate,
                                       warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
                                       decay_steps=steps_per_epoch * epoch_num,
                                       power=cfg.AdamWeightDecay.power)
        params = network.trainable_params()

        decay_params = list(filter(cfg.AdamWeightDecay.decay_filter, params))
        other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
        group_params = [{'params': decay_params, 'weight_decay': cfg.AdamWeightDecay.weight_decay},
                        {'params': other_params, 'weight_decay': 0.0}]
        optimizer = AdamWeightDecay(group_params, lr_schedule, eps=cfg.AdamWeightDecay.eps)
    elif cfg.optimizer == 'Lamb':
        lr_schedule = GPT2LearningRate(learning_rate=cfg.Lamb.learning_rate,
                                       end_learning_rate=cfg.Lamb.end_learning_rate,
                                       warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
                                       decay_steps=steps_per_epoch * epoch_num,
                                       power=cfg.Lamb.power)
        optimizer = Lamb(network.trainable_params(), lr_schedule)
    elif cfg.optimizer == 'Momentum':
        optimizer = Momentum(network.trainable_params(), cfg.Momentum.learning_rate, cfg.Momentum.momentum)
    else:
        raise Exception("Optimizer not supported. support: [AdamWeightDecay, Lamb, Momentum]")

    # load checkpoint into network
    ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch, keep_checkpoint_max=1)
    prefix_name = "gpt2_translation_" + str(cfg.gpt2_network) + "_" + str(cfg.optimizer) + "_" \
                  + str(epoch_num) + "_bs" + str(gpt2_net_cfg.batch_size)
    ckpoint_cb = ModelCheckpoint(prefix=prefix_name,
                                 directory=None if save_checkpoint_path == "" else save_checkpoint_path,
                                 config=ckpt_config)
    param_dict = load_checkpoint(load_checkpoint_path)

    final_param_dict = {}
    for name, _ in param_dict.items():
        final_param_dict['gpt2.gpt2.' + name] = param_dict[name]
    final_param_dict['gpt2.dense1.weight'] = param_dict['gpt2_embedding_lookup.embedding_table']

    load_param_into_net(network, final_param_dict)
    print("Load the pretrained parameter successfully! \n")

    update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2 ** 32, scale_factor=2, scale_window=1000)
    netwithgrads = GPT2FinetuneCell(network, optimizer=optimizer, scale_update_cell=update_cell)
    netwithgrads.set_train(True)
    loss_cb = LossMonitor(per_print_times=1)

    model = Model(netwithgrads)

    callbacks = [TimeMonitor(dataset.get_dataset_size()), loss_cb, ckpoint_cb]

    print("=================== Starting Training For Translation Task ====================")
    model.train(epoch_num, dataset, callbacks=callbacks, dataset_sink_mode=False)
    print("===================      Translation Training Success      ====================")
Example #9
0
                    default="./MNIST_Data",
                    help='path where the dataset is saved')
parser.add_argument(
    '--ckpt_path',
    type=str,
    default="",
    help='if mode is test, must provide path where the trained ckpt file')
args = parser.parse_args()

if __name__ == "__main__":
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args.device_target)

    # define fusion network
    network = LeNet5Fusion(cfg.num_classes)
    # convert fusion network to quantization aware network
    network = quant.convert_quant_network(network,
                                          quant_delay=0,
                                          bn_fold=False,
                                          freeze_bn=10000,
                                          per_channel=[True, False],
                                          symmetric=[True, False])
    # load quantization aware network checkpoint
    param_dict = load_checkpoint(args.ckpt_path)
    load_param_into_net(network, param_dict)

    # export network
    inputs = Tensor(np.ones([1, 1, cfg.image_height, cfg.image_width]),
                    mindspore.float32)
    quant.export(network, inputs, file_name="lenet_quant", file_format='AIR')
Example #10
0
    context.set_context(enable_task_sink=True)

    print("test lenet predict start")
    seed = 0
    np.random.seed(seed)
    batch = 1
    channel = 1
    input_h = 32
    input_w = 32
    origin_data = np.random.uniform(low=0,
                                    high=255,
                                    size=(batch, channel, input_h,
                                          input_w)).astype(np.float32)
    origin_data.tofile("lenet_input_data.bin")

    input_data = Tensor(origin_data)
    print(input_data.asnumpy())
    net = LeNet()
    ckpt_file_path = "./tests/ut/python/predict/checkpoint_lenet.ckpt"
    predict_args = parser.parse_args()
    model_path_name = predict_args.path

    is_ckpt_exist = os.path.exists(ckpt_file_path)
    if is_ckpt_exist:
        param_dict = load_checkpoint(ckpoint_file_name=ckpt_file_path)
        load_param_into_net(net, param_dict)
        export(net, input_data, file_name=model_path_name, file_format='LITE')
        print("test lenet predict success.")
    else:
        print("checkpoint file is not exist.")
Example #11
0
def do_eval(dataset=None, network=None, metric=None, load_checkpoint_path="", eval_type=None, tokenizer_file_path="",
            generate_length=1, top_k=1, top_p=1.0, temperature=1.0):
    """
    Do evaluation on Translation
    Args:
        dataset: the eval dataset.
        network:  the network with loss.
        metric: the evaluation method.
        load_checkpoint_path: the file path which saved finetune model checkpoint.

    """
    if load_checkpoint_path == "":
        raise ValueError("Finetune model missed, evaluation task must load finetune model!")
    if metric.lower() == "bleu":
        print("Prepare to calculate the BLEU score ...")

        gpt2_translation = network(config=gpt2_net_cfg,
                                   is_training=False,
                                   use_one_hot_embeddings=False)
        gpt2_translation.set_train(False)
        param_dict = load_checkpoint(load_checkpoint_path)

        if eval_type == "zero-shot":
            final_param_dict = {}
            for name, _ in param_dict.items():
                final_param_dict['gpt2.' + name] = param_dict[name]
            final_param_dict['dense1.weight'] = param_dict['gpt2_embedding_lookup.embedding_table']
            load_param_into_net(gpt2_translation, final_param_dict)
            print("load pretrained parameter successfully!\n")
        elif eval_type == "finetuned":
            load_param_into_net(gpt2_translation, param_dict)
            print("load finetuned parameter successfully!\n")
        else:
            raise ValueError("Evaluation type missed, eval_type should be [zero-shot, finetuned]")

        model = Model(gpt2_translation)
        tokenizer = Tokenizer(vocab_file=tokenizer_file_path + 'gpt2-vocab.json',
                              merge_file=tokenizer_file_path + 'gpt2-merges.txt')
        callback = BLEU(tokenizer)
        translation_generator = GenerateForTranslation(decoder=model,
                                                       config=gpt2_net_cfg,
                                                       tokenizer=tokenizer,
                                                       generate_length=1,
                                                       use_hint=True,
                                                       select_first_sentence=True,
                                                       topk_num=top_k,
                                                       topp_prob=float(top_p),
                                                       temperature=float(temperature)
                                                       )

        columns_list = ["input_ids", "input_mask", "label_ids"]
        print("==================== [BLEU] Testing ====================")
        num_data = 1
        for data in dataset.create_dict_iterator():
            input_data = []
            for i in columns_list:
                input_data.append(data[i])
            input_ids, input_mask, label_ids = input_data

            print("| Data count: {}".format(num_data * gpt2_net_cfg.batch_size))
            print("input_ids shape: {}".format(input_ids.shape))
            print("input_mask shape: {}".format(input_mask.shape))
            print("label_ids shape: {}".format(label_ids.shape))

            ts_predict_list, ref_list = translation_generator.generate_for_translation(input_ids)
            print("| Batch Reference translation:\n{}\n".format(ref_list))
            if ref_list == '' or ref_list is None:
                print("Sorry ref_list is None, skip it!")
                continue
            else:
                print(" | Batch Predict translation:\n{}\n".format(ts_predict_list))
                callback.update(ref_list, ts_predict_list)
                num_data += 1
                print("\n\n")

        print("**************************************************************")
        eval_result_print(metric, callback)
        print("********************** Testing Finished **********************")
    else:
        raise ValueError("metric method not supported in translation, support: [BLEU]")
Example #12
0
def load_backbone(net, ckpt_path, args):
    """Load darknet53 backbone checkpoint."""
    param_dict = load_checkpoint(ckpt_path)
    yolo_backbone_prefix = 'feature_map.backbone'
    darknet_backbone_prefix = 'network.backbone'
    find_param = []
    not_found_param = []
    net.init_parameters_data()
    for name, cell in net.cells_and_names():
        if name.startswith(yolo_backbone_prefix):
            name = name.replace(yolo_backbone_prefix, darknet_backbone_prefix)
            if isinstance(cell, (nn.Conv2d, nn.Dense)):
                darknet_weight = '{}.weight'.format(name)
                darknet_bias = '{}.bias'.format(name)
                if darknet_weight in param_dict:
                    cell.weight.set_data(param_dict[darknet_weight].data)
                    find_param.append(darknet_weight)
                else:
                    not_found_param.append(darknet_weight)
                if darknet_bias in param_dict:
                    cell.bias.set_data(param_dict[darknet_bias].data)
                    find_param.append(darknet_bias)
                else:
                    not_found_param.append(darknet_bias)
            elif isinstance(cell, (nn.BatchNorm2d, nn.BatchNorm1d)):
                darknet_moving_mean = '{}.moving_mean'.format(name)
                darknet_moving_variance = '{}.moving_variance'.format(name)
                darknet_gamma = '{}.gamma'.format(name)
                darknet_beta = '{}.beta'.format(name)
                if darknet_moving_mean in param_dict:
                    cell.moving_mean.set_data(
                        param_dict[darknet_moving_mean].data)
                    find_param.append(darknet_moving_mean)
                else:
                    not_found_param.append(darknet_moving_mean)
                if darknet_moving_variance in param_dict:
                    cell.moving_variance.set_data(
                        param_dict[darknet_moving_variance].data)
                    find_param.append(darknet_moving_variance)
                else:
                    not_found_param.append(darknet_moving_variance)
                if darknet_gamma in param_dict:
                    cell.gamma.set_data(param_dict[darknet_gamma].data)
                    find_param.append(darknet_gamma)
                else:
                    not_found_param.append(darknet_gamma)
                if darknet_beta in param_dict:
                    cell.beta.set_data(param_dict[darknet_beta].data)
                    find_param.append(darknet_beta)
                else:
                    not_found_param.append(darknet_beta)

    args.logger.info('================found_param {}========='.format(
        len(find_param)))
    args.logger.info(find_param)
    args.logger.info('================not_found_param {}========='.format(
        len(not_found_param)))
    args.logger.info(not_found_param)
    args.logger.info('=====load {} successfully ====='.format(ckpt_path))

    return net
Example #13
0
def validation(net, model_path, data_dir, filename, num_consumer, batch):
    param_dict = load_checkpoint(model_path)
    load_param_into_net(net, param_dict)

    auc = val(net, data_dir, filename, num_consumer, batch)
    return auc
Example #14
0
def test_trains(args):
    '''test trains'''
    print('----eval----begin----')

    model_path = args.pretrained
    result_file = model_path.replace('.ckpt', '.txt')
    if os.path.exists(result_file):
        os.remove(result_file)
    epoch_result = open(result_file, 'a')
    epoch_result.write(model_path + '\n')

    network = FaceQABackbone()
    ckpt_path = model_path

    if os.path.isfile(ckpt_path):
        param_dict = load_checkpoint(ckpt_path)

        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('network.'):
                param_dict_new[key[8:]] = values
            else:
                param_dict_new[key] = values
        load_param_into_net(network, param_dict_new)

    else:
        print('wrong model path')
        return 1

    path = args.eval_dir
    kp_error_all = [[], [], [], [], []]
    eulers_error_all = [[], [], []]
    kp_ipn = []

    file_list = os.listdir(path)
    for file_name in tqdm(file_list):
        if file_name.endswith('jpg'):
            img_path = os.path.join(path, file_name)
            img, img_ori = read_img(img_path)

            txt_path = img_path.replace('jpg', 'txt')

            if os.path.exists(txt_path):
                euler_kps_do = True
                x_length = img_ori.shape[1]
                y_length = img_ori.shape[0]
                eulers_gt, kp_list = read_gt(txt_path, x_length, y_length)
            else:
                euler_kps_do = False
                continue

            out = network(img)

            _, _, kp_coord_ori, eulers_ori, _ = get_md_output(out)

            if euler_kps_do:
                eulgt = list(eulers_gt)
                for euler_id, _ in enumerate(eulers_ori):
                    eulori = eulers_ori[euler_id]
                    eulers_error_all[euler_id].append(abs(eulori-float(eulgt[euler_id])))

                eye01 = kp_list[0]
                eye02 = kp_list[1]
                eye_dis = 1
                cur_flag = True
                if eye01[0] < 0 or eye01[1] < 0 or eye02[0] < 0 or eye02[1] < 0:
                    cur_flag = False
                else:
                    eye_dis = np.sqrt(np.square(abs(eye01[0]-eye02[0]))+np.square(abs(eye01[1]-eye02[1])))
                cur_error_list = []
                for i in range(5):
                    kp_coord_gt = kp_list[i]
                    kp_coord_model = kp_coord_ori[i]
                    if kp_coord_gt[0] != -1:
                        dis = np.sqrt(np.square(
                            kp_coord_gt[0] - kp_coord_model[0]) + np.square(kp_coord_gt[1] - kp_coord_model[1]))
                        kp_error_all[i].append(dis)
                        cur_error_list.append(dis)
                if cur_flag:
                    kp_ipn.append(sum(cur_error_list)/len(cur_error_list)/eye_dis)

    kp_ave_error = []
    for kps, _ in enumerate(kp_error_all):
        kp_ave_error.append("%.3f" % (sum(kp_error_all[kps])/len(kp_error_all[kps])))

    euler_ave_error = []
    elur_mae = []
    for eulers, _ in enumerate(eulers_error_all):
        euler_ave_error.append("%.3f" % (sum(eulers_error_all[eulers])/len(eulers_error_all[eulers])))
        elur_mae.append((sum(eulers_error_all[eulers])/len(eulers_error_all[eulers])))

    print(r'5 keypoints average err:'+str(kp_ave_error))
    print(r'3 eulers average err:'+str(euler_ave_error))
    print('IPN of 5 keypoints:'+str(sum(kp_ipn)/len(kp_ipn)*100))
    print('MAE of elur:'+str(sum(elur_mae)/len(elur_mae)))

    epoch_result.write(str(sum(kp_ipn)/len(kp_ipn)*100)+'\t'+str(sum(elur_mae)/len(elur_mae))+'\t'
                       + str(kp_ave_error)+'\t'+str(euler_ave_error)+'\n')

    print('----eval----end----')
    return 0
Example #15
0
def test():
    """The function of eval."""
    start_time = time.time()
    args = parse_args()

    devid = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args.device_target,
                        save_graphs=True,
                        device_id=devid)

    # logger
    args.outputs_dir = os.path.join(
        args.log_path,
        datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    rank_id = int(
        os.environ.get('RANK_ID')) if os.environ.get('RANK_ID') else 0
    args.logger = get_logger(args.outputs_dir, rank_id)

    context.reset_auto_parallel_context()
    parallel_mode = ParallelMode.STAND_ALONE
    context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                      gradients_mean=True,
                                      device_num=1)

    args.logger.info('Creating Network....')
    network = YOLOV3DarkNet53(is_training=False)

    args.logger.info(args.pretrained)
    if os.path.isfile(args.pretrained):
        param_dict = load_checkpoint(args.pretrained)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('yolo_network.'):
                param_dict_new[key[13:]] = values
            else:
                param_dict_new[key] = values
        load_param_into_net(network, param_dict_new)
        args.logger.info('load_model {} success'.format(args.pretrained))
    else:
        args.logger.info('{} not exists or not a pre-trained file'.format(
            args.pretrained))
        assert FileNotFoundError(
            '{} not exists or not a pre-trained file'.format(args.pretrained))
        exit(1)

    data_root = args.data_root
    ann_file = args.annFile

    config = ConfigYOLOV3DarkNet53()
    if args.testing_shape:
        config.test_img_shape = conver_testing_shape(args)

    ds, data_size = create_yolo_dataset(data_root,
                                        ann_file,
                                        is_training=False,
                                        batch_size=args.per_batch_size,
                                        max_epoch=1,
                                        device_num=1,
                                        rank=rank_id,
                                        shuffle=False,
                                        config=config)

    args.logger.info('testing shape : {}'.format(config.test_img_shape))
    args.logger.info('totol {} images to eval'.format(data_size))

    network.set_train(False)

    # init detection engine
    detection = DetectionEngine(args)

    input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
    args.logger.info('Start inference....')
    for i, data in enumerate(ds.create_dict_iterator(num_epochs=1)):
        image = data["image"]

        image_shape = data["image_shape"]
        image_id = data["img_id"]

        prediction = network(image, input_shape)
        output_big, output_me, output_small = prediction
        output_big = output_big.asnumpy()
        output_me = output_me.asnumpy()
        output_small = output_small.asnumpy()
        image_id = image_id.asnumpy()
        image_shape = image_shape.asnumpy()

        detection.detect([output_small, output_me, output_big],
                         args.per_batch_size, image_shape, image_id)
        if i % 1000 == 0:
            args.logger.info('Processing... {:.2f}% '.format(
                i * args.per_batch_size / data_size * 100))

    args.logger.info('Calculating mAP...')
    detection.do_nms_for_results()
    result_file_path = detection.write_result()
    args.logger.info('result file path: {}'.format(result_file_path))
    eval_result = detection.get_eval_result()

    cost_time = time.time() - start_time
    args.logger.info('\n=============coco eval reulst=========\n' +
                     eval_result)
    args.logger.info('testing cost time {:.2f}h'.format(cost_time / 3600.))
Example #16
0
                    type=bool,
                    default=True,
                    help='dataset_sink_mode is False or True')
args = parser.parse_args()

if __name__ == "__main__":
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args.device_target)
    ds_train = create_dataset(os.path.join(args.data_path, "train"),
                              cfg.batch_size, cfg.epoch_size)
    step_size = ds_train.get_dataset_size()

    # define fusion network
    network = LeNet5Fusion(cfg.num_classes)
    # load quantization aware network checkpoint
    param_dict = load_checkpoint(args.ckpt_path, network.type)
    load_param_into_net(network, param_dict)
    # convert fusion network to quantization aware network
    network = quant.convert_quant_network(network,
                                          quant_delay=0,
                                          bn_fold=False,
                                          freeze_bn=10000)

    # define network loss
    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False,
                                                sparse=True,
                                                reduction="mean")
    # define network optimization
    net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)

    # call back and monitor
Example #17
0
def test_fast_gradient_sign_method():
    """
    FGSM-Attack test
    """
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    # upload trained network
    ckpt_name = './trained_ckpt_file/checkpoint_lenet-10_1875.ckpt'
    net = LeNet5()
    load_dict = load_checkpoint(ckpt_name)
    load_param_into_net(net, load_dict)

    # get test data
    data_list = "./MNIST_unzip/test"
    batch_size = 32
    ds = generate_mnist_dataset(data_list, batch_size, sparse=False)

    # prediction accuracy before attack
    model = Model(net)
    batch_num = 3  # the number of batches of attacking samples
    test_images = []
    test_labels = []
    predict_labels = []
    i = 0
    for data in ds.create_tuple_iterator():
        i += 1
        images = data[0].astype(np.float32)
        labels = data[1]
        test_images.append(images)
        test_labels.append(labels)
        pred_labels = np.argmax(model.predict(Tensor(images)).asnumpy(),
                                axis=1)
        predict_labels.append(pred_labels)
        if i >= batch_num:
            break
    predict_labels = np.concatenate(predict_labels)
    true_labels = np.argmax(np.concatenate(test_labels), axis=1)
    accuracy = np.mean(np.equal(predict_labels, true_labels))
    LOGGER.info(TAG, "prediction accuracy before attacking is : %s", accuracy)

    # attacking
    attack = FastGradientSignMethod(net, eps=0.3)
    start_time = time.clock()
    adv_data = attack.batch_generate(np.concatenate(test_images),
                                     np.concatenate(test_labels), batch_size=32)
    stop_time = time.clock()
    np.save('./adv_data', adv_data)
    pred_logits_adv = model.predict(Tensor(adv_data)).asnumpy()
    # rescale predict confidences into (0, 1).
    pred_logits_adv = softmax(pred_logits_adv, axis=1)
    pred_labels_adv = np.argmax(pred_logits_adv, axis=1)
    accuracy_adv = np.mean(np.equal(pred_labels_adv, true_labels))
    LOGGER.info(TAG, "prediction accuracy after attacking is : %s", accuracy_adv)
    attack_evaluate = AttackEvaluate(np.concatenate(test_images).transpose(0, 2, 3, 1),
                                     np.concatenate(test_labels),
                                     adv_data.transpose(0, 2, 3, 1),
                                     pred_logits_adv)
    LOGGER.info(TAG, 'mis-classification rate of adversaries is : %s',
                attack_evaluate.mis_classification_rate())
    LOGGER.info(TAG, 'The average confidence of adversarial class is : %s',
                attack_evaluate.avg_conf_adv_class())
    LOGGER.info(TAG, 'The average confidence of true class is : %s',
                attack_evaluate.avg_conf_true_class())
    LOGGER.info(TAG, 'The average distance (l0, l2, linf) between original '
                     'samples and adversarial samples are: %s',
                attack_evaluate.avg_lp_distance())
    LOGGER.info(TAG, 'The average structural similarity between original '
                     'samples and adversarial samples are: %s',
                attack_evaluate.avg_ssim())
    LOGGER.info(TAG, 'The average costing time is %s',
                (stop_time - start_time)/(batch_num*batch_size))
Example #18
0
def run_gru_eval():
    """
    Transformer evaluation.
    """
    parser = argparse.ArgumentParser(description='GRU eval')
    parser.add_argument(
        "--device_target",
        type=str,
        default="Ascend",
        help="device where the code will be implemented, default is Ascend")
    parser.add_argument('--device_id',
                        type=int,
                        default=0,
                        help='device id of GPU or Ascend, default is 0')
    parser.add_argument('--device_num',
                        type=int,
                        default=1,
                        help='Use device nums, default is 1')
    parser.add_argument('--ckpt_file',
                        type=str,
                        default="",
                        help='ckpt file path')
    parser.add_argument("--dataset_path",
                        type=str,
                        default="",
                        help="Dataset path, default: f`sns.")
    args = parser.parse_args()

    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, reserve_class_name_in_scope=False, \
        device_id=args.device_id, save_graphs=False)
    dataset = create_gru_dataset(epoch_count=config.num_epochs, batch_size=config.eval_batch_size, \
        dataset_path=args.dataset_path, rank_size=args.device_num, rank_id=0, do_shuffle=False, is_training=False)
    dataset_size = dataset.get_dataset_size()
    print("dataset size is {}".format(dataset_size))
    network = Seq2Seq(config, is_training=False)
    network = GRUInferCell(network)
    network.set_train(False)
    if args.ckpt_file != "":
        parameter_dict = load_checkpoint(args.ckpt_file)
        load_param_into_net(network, parameter_dict)
    model = Model(network)

    predictions = []
    source_sents = []
    target_sents = []
    eval_text_len = 0
    for batch in dataset.create_dict_iterator(output_numpy=True, num_epochs=1):
        source_sents.append(batch["source_ids"])
        target_sents.append(batch["target_ids"])
        source_ids = Tensor(batch["source_ids"], mstype.int32)
        target_ids = Tensor(batch["target_ids"], mstype.int32)
        predicted_ids = model.predict(source_ids, target_ids)
        print("predicts is ", predicted_ids.asnumpy())
        print("target_ids is ", target_ids)
        predictions.append(predicted_ids.asnumpy())
        eval_text_len = eval_text_len + 1

    f_output = open(config.output_file, 'w')
    f_target = open(config.target_file, "w")
    for batch_out, true_sentence in zip(predictions, target_sents):
        for i in range(config.eval_batch_size):
            target_ids = [str(x) for x in true_sentence[i].tolist()]
            f_target.write(" ".join(target_ids) + "\n")
            token_ids = [str(x) for x in batch_out[i].tolist()]
            f_output.write(" ".join(token_ids) + "\n")
    f_output.close()
    f_target.close()
Example #19
0
parser.add_argument('--train_url', type=str, default=None, help='Train output path')
args_opt = parser.parse_args()

device_id = int(os.getenv('DEVICE_ID'))

context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target)
context.set_context(device_id=device_id)
context.set_context(enable_mem_reuse=True)

local_data_url = '/cache/data'
local_ckpt_url = '/cache/ckpt'


if args_opt.checkpoint_path:
    checkpoint_file=os.path.join(local_ckpt_url,os.path.split(args_opt.checkpoint_path)[1])
mox.file.copy_parallel(args_opt.data_url,local_data_url)
mox.file.copy_parallel(args_opt.checkpoint_path,checkpoint_file)

net = vgg16(num_classes=cfg.num_classes)
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum,
               weight_decay=cfg.weight_decay)
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})

param_dict = load_checkpoint(checkpoint_file)
load_param_into_net(net, param_dict)
net.set_train(False)
dataset = dataset.create_dataset(local_data_url, 1, False)
res = model.eval(dataset)
print("result: ", res)
Example #20
0
def test_load_checkpoint_error_filename():
    ckpoint_file_name = 1
    with pytest.raises(ValueError):
        load_checkpoint(ckpoint_file_name)
Example #21
0
        loss_scale = float(config.loss_scale)

        # When create MindDataset, using the fitst mindrecord file, such as MaskRcnn.mindrecord0.
        dataset = create_maskrcnn_dataset(mindrecord_file, batch_size=config.batch_size,
                                          device_num=device_num, rank_id=rank)

        dataset_size = dataset.get_dataset_size()
        print("total images num: ", dataset_size)
        print("Create dataset done!")

        net = Mask_Rcnn_Resnet50(config=config)
        net = net.set_train()

        load_path = args_opt.pre_trained
        if load_path != "":
            param_dict = load_checkpoint(load_path)
            if config.pretrain_epoch_size == 0:
                for item in list(param_dict.keys()):
                    if not (item.startswith('backbone') or item.startswith('rcnn_mask')):
                        param_dict.pop(item)
            load_param_into_net(net, param_dict)

        loss = LossNet()
        lr = Tensor(dynamic_lr(config, rank_size=device_num, start_steps=config.pretrain_epoch_size * dataset_size),
                    mstype.float32)
        opt = Momentum(params=net.trainable_params(), learning_rate=lr, momentum=config.momentum,
                       weight_decay=config.weight_decay, loss_scale=config.loss_scale)

        net_with_loss = WithLossCell(net, loss)
        if args_opt.run_distribute:
            net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale, reduce_flag=True,
Example #22
0
def test_load_checkpoint_empty_file():
    os.mknod("empty.ckpt")
    with pytest.raises(ValueError):
        load_checkpoint("empty.ckpt")
Example #23
0
    single_scale_trans = SingleScaleTrans(resize=args.input_shape)

    ds = ds.batch(
        args.batch_size,
        per_batch_map=single_scale_trans,
        input_columns=["image", "annotation", "image_name", "image_size"],
        num_parallel_workers=8)

    args.steps_per_epoch = ds.get_dataset_size()

    # backbone
    network = backbone_HwYolov3(num_classes, num_anchors_list, args)

    # load pretrain model
    if os.path.isfile(args.pretrained):
        param_dict = load_checkpoint(args.pretrained)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('network.'):
                param_dict_new[key[8:]] = values
            else:
                param_dict_new[key] = values
        load_param_into_net(network, param_dict_new)
        print('load model {} success'.format(args.pretrained))
    else:
        print(
            'load model {} failed, please check the path of model, evaluating end'
            .format(args.pretrained))
        exit(0)
Example #24
0
 def _load_checkpoint(self):
     from mindspore.train.serialization import load_checkpoint
     param_dict = load_checkpoint(self.checkpoint_path)
     return {k: v.asnumpy() for k, v in param_dict.items()}
Example #25
0
def do_train(dataset=None,
             network=None,
             load_checkpoint_path="",
             save_checkpoint_path="",
             epoch_num=1):
    """ do train """
    if load_checkpoint_path == "":
        raise ValueError(
            "Pretrain model missed, finetune task must load pretrain model!")
    steps_per_epoch = dataset.get_dataset_size()
    # optimizer
    if optimizer_cfg.optimizer == 'AdamWeightDecay':
        lr_schedule = BertLearningRate(
            learning_rate=optimizer_cfg.AdamWeightDecay.learning_rate,
            end_learning_rate=optimizer_cfg.AdamWeightDecay.end_learning_rate,
            warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
            decay_steps=steps_per_epoch * epoch_num,
            power=optimizer_cfg.AdamWeightDecay.power)
        params = network.trainable_params()
        decay_params = list(
            filter(optimizer_cfg.AdamWeightDecay.decay_filter, params))
        other_params = list(
            filter(lambda x: not optimizer_cfg.AdamWeightDecay.decay_filter(x),
                   params))
        group_params = [{
            'params':
            decay_params,
            'weight_decay':
            optimizer_cfg.AdamWeightDecay.weight_decay
        }, {
            'params': other_params,
            'weight_decay': 0.0
        }]

        optimizer = AdamWeightDecay(group_params,
                                    lr_schedule,
                                    eps=optimizer_cfg.AdamWeightDecay.eps)
    elif optimizer_cfg.optimizer == 'Lamb':
        lr_schedule = BertLearningRate(
            learning_rate=optimizer_cfg.Lamb.learning_rate,
            end_learning_rate=optimizer_cfg.Lamb.end_learning_rate,
            warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
            decay_steps=steps_per_epoch * epoch_num,
            power=optimizer_cfg.Lamb.power)
        optimizer = Lamb(network.trainable_params(), learning_rate=lr_schedule)
    elif optimizer_cfg.optimizer == 'Momentum':
        optimizer = Momentum(
            network.trainable_params(),
            learning_rate=optimizer_cfg.Momentum.learning_rate,
            momentum=optimizer_cfg.Momentum.momentum)
    else:
        raise Exception(
            "Optimizer not supported. support: [AdamWeightDecay, Lamb, Momentum]"
        )

    # load checkpoint into network
    ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch,
                                   keep_checkpoint_max=1)
    ckpoint_cb = ModelCheckpoint(
        prefix="squad",
        directory=None if save_checkpoint_path == "" else save_checkpoint_path,
        config=ckpt_config)
    param_dict = load_checkpoint(load_checkpoint_path)
    load_param_into_net(network, param_dict)

    update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32,
                                             scale_factor=2,
                                             scale_window=1000)
    netwithgrads = BertSquadCell(network,
                                 optimizer=optimizer,
                                 scale_update_cell=update_cell)
    model = Model(netwithgrads)
    callbacks = [
        TimeMonitor(dataset.get_dataset_size()),
        LossCallBack(), ckpoint_cb
    ]
    model.train(epoch_num, dataset, callbacks=callbacks)
Example #26
0
def do_eval(dataset=None,
            network=None,
            metric=None,
            load_checkpoint_path="",
            translate_direction="en-fr"):
    """
    Do evaluation on summarization
    Args:
        dataset: the eval dataset.
        network:  the network with loss.
        metric: the evaluation method.
        load_checkpoint_path: the file path which saved finetune model checkpoint.
    """
    if load_checkpoint_path == "":
        raise ValueError(
            "Finetune model missed, evaluation task must load finetune model!")
    if metric.lower() == "bleu":
        print("Prepare to calculate the BLEU score ...")

        gpt2_loss = network(config=gpt2_net_cfg,
                            is_training=False,
                            use_one_hot_embeddings=False)

        gpt2_loss.set_train(False)
        param_dict = load_checkpoint(load_checkpoint_path)
        reorganized_param_dict = dict()
        for netName in param_dict:
            reorganized_param_dict['gpt2.' + netName] = param_dict[netName]
        reorganized_param_dict['lm_head.weight'] = param_dict[
            'gpt2_embedding_lookup.embedding_table']
        load_param_into_net(gpt2_loss, reorganized_param_dict)

        # for item in gpt2_loss.get_parameters():

        #     print('name: ',item.data.name)

        model = Model(gpt2_loss)
        tokenizer = Tokenizer(
            vocab_file='./src/utils/pretrain-data/gpt2-vocab.json',
            merge_file='./src/utils/pretrain-data/gpt2-merges.txt')
        callback = BLEU(tokenizer)
        sample = Sample(model,
                        tokenizer=tokenizer,
                        model_config=gpt2_net_cfg,
                        topk_num=0,
                        topp_prob=0.92,
                        min_tokens_to_keep=1,
                        demo_mode=False,
                        early_stop=True)
        columns_list = ["input_ids", "input_mask", "label_ids"]
        for data in dataset.create_dict_iterator():
            input_data = []
            for i in columns_list:
                input_data.append(data[i])
            input_ids, input_mask, label_ids = input_data

            print("input_ids shape: {}".format(input_ids.shape))
            print("label_ids shape: {}".format(label_ids.shape))
            print("============= Translation Testing =============")

            #input_str,ref_str = sample.extract_string_from_tensor(input_ids,mode="pair")
            hypo, ref = sample.generate_for_Translation(
                input_ids, max_generate_length=150)
            print("REF str:\n ", ref, "\nHYPO str:\n", hypo, "\n")
            #print("LENGTH: ",len(ref[1]),"   and   ",len(hypo[1]),"\n")
            callback.update(ref, hypo)
        print("==============================================")
        eval_result_print(metric, callback)
        print("==============================================")
        print("************** Translation Testing Finished **************")

    else:
        raise ValueError(
            "metric method not supported in summarization, support: [Rouge]")
Example #27
0
        init()

    epoch_size = args_opt.epoch_size
    net = resnet50(args_opt.batch_size, args_opt.num_classes)
    ls = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
                   0.01, 0.9)

    model = Model(net, loss_fn=ls, optimizer=opt, metrics={'acc'})

    # as for train, users could use model.train
    if args_opt.do_train:
        dataset = create_dataset()
        batch_num = dataset.get_dataset_size()
        config_ck = CheckpointConfig(save_checkpoint_steps=batch_num,
                                     keep_checkpoint_max=35)
        ckpoint_cb = ModelCheckpoint(prefix="train_resnet_cifar10",
                                     directory="./",
                                     config=config_ck)
        loss_cb = LossMonitor()
        model.train(epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb])

    # as for evaluation, users could use model.eval
    if args_opt.do_eval:
        if args_opt.checkpoint_path:
            param_dict = load_checkpoint(args_opt.checkpoint_path)
            load_param_into_net(net, param_dict)
        eval_dataset = create_dataset(training=False)
        res = model.eval(eval_dataset)
        print("result: ", res)
Example #28
0
def do_train(dataset=None,
             network=None,
             load_checkpoint_path="",
             save_checkpoint_path="",
             epoch_num=1):
    """
    Do train
    Args:
        dataset: the train dataset.
        network:  the network with loss
        load_checkpoint_path: the file path which saved pretrain model checkpoint.
        save_checkpoint_path:  the file path which will save finetune model checkpoint.
        epoch_num: the number of epoch
    """
    if load_checkpoint_path == "":
        raise ValueError(
            "Pretrain model missed, finetune task must load pretrain model!")

    steps_per_epoch = dataset.get_dataset_size(
    )  # samples / batch_size  doing####

    #Select Optimizer
    if cfg.optimizer == 'AdamWeightDecay':
        lr_schedule = GPT2LearningRate(
            learning_rate=cfg.AdamWeightDecay.learning_rate,
            end_learning_rate=cfg.AdamWeightDecay.end_learning_rate,
            warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
            decay_steps=steps_per_epoch * epoch_num,
            power=cfg.AdamWeightDecay.power)
        params = network.trainable_params(
        )  # return a list of all trainable parmeters of the network

        # Use parameter groups and set different values
        decay_params = list(filter(cfg.AdamWeightDecay.decay_filter,
                                   params))  # without layernorm and bias
        other_params = list(
            filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x),
                   params))  # with layernorm and bias
        group_params = [{
            'params': decay_params,
            'weight_decay': cfg.AdamWeightDecay.weight_decay
        }, {
            'params': other_params,
            'weight_decay': 0.0
        }]
        optimizer = AdamWeightDecay(group_params,
                                    lr_schedule,
                                    eps=cfg.AdamWeightDecay.eps)
    elif cfg.optimizer == 'Lamb':
        lr_schedule = GPT2LearningRate(
            learning_rate=cfg.Lamb.learning_rate,
            end_learning_rate=cfg.Lamb.end_learning_rate,
            warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
            decay_steps=steps_per_epoch * epoch_num,
            power=cfg.Lamb.power)
        optimizer = Lamb(network.trainable_params(), lr_schedule)
    elif cfg.optimizer == 'Momentum':
        optimizer = Momentum(network.trainable_params(),
                             cfg.Momentum.learning_rate, cfg.Momentum.momentum)
    else:
        raise Exception(
            "Optimizer not supported. support: [AdamWeightDecay, Lamb, Momentum]"
        )

    # load checkpoint into network
    ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch,
                                   keep_checkpoint_max=1)
    ckpoint_cb = ModelCheckpoint(
        prefix="gpt2_translation_en_fr_",
        directory=None if save_checkpoint_path == "" else save_checkpoint_path,
        config=ckpt_config)
    param_dict = load_checkpoint(load_checkpoint_path)
    final_param_dict = {}
    for k, v in param_dict.items():
        final_param_dict['gpt2_loss.gpt2.gpt2.' + k] = param_dict[k]
    # set the weights of final linear weights to weights of gpt2 token embedding
    final_param_dict['gpt2_loss.gpt2.dense1.weight'] = param_dict[
        'gpt2_embedding_lookup.embedding_table']
    load_param_into_net(network, final_param_dict)
    print("| loading the pretrained weights | \n")

    update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32,
                                             scale_factor=2,
                                             scale_window=1000)
    netwithgrads = GPT2FinetuneCell(network,
                                    optimizer=optimizer,
                                    scale_update_cell=update_cell)
    netwithgrads.set_train(True)
    loss_cb = LossMonitor()

    model = Model(netwithgrads, amp_level='O2')

    callbacks = [TimeMonitor(dataset.get_dataset_size()), loss_cb, ckpoint_cb]

    print(
        "============== Starting Training For Translation Task ==============")
    model.train(epoch_num, dataset, callbacks=callbacks)
    print(
        "==============      Translation Training Success      ==============")
Example #29
0
                auto_parallel_context().set_all_reduce_fusion_split_indices([85, 160])
            ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"

    # create dataset
    dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, repeat_num=1,
                             batch_size=config.batch_size, target=target)
    step_size = dataset.get_dataset_size()

    # define net
    net = resnet(class_num=config.class_num)
    if args_opt.parameter_server:
        net.set_param_ps()

    # init weight
    if args_opt.pre_trained:
        param_dict = load_checkpoint(args_opt.pre_trained)
        load_param_into_net(net, param_dict)
    else:
        for _, cell in net.cells_and_names():
            if isinstance(cell, nn.Conv2d):
                cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(),
                                                                    cell.weight.shape,
                                                                    cell.weight.dtype)
            if isinstance(cell, nn.Dense):
                cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(),
                                                                    cell.weight.shape,
                                                                    cell.weight.dtype)

    # init lr
    if args_opt.net == "resnet50":
        if args_opt.dataset == "cifar10":
Example #30
0
    if args_opt.platform != 'GPU':
        raise ValueError("Only supported GPU training.")

    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args_opt.platform)

    net = efficientnet_b0(
        num_classes=cfg.num_classes,
        drop_rate=cfg.drop,
        drop_connect_rate=cfg.drop_connect,
        global_pool=cfg.gp,
        bn_tf=cfg.bn_tf,
    )

    ckpt = load_checkpoint(args_opt.checkpoint)
    load_param_into_net(net, ckpt)
    net.set_train(False)
    val_data_url = args_opt.data_path
    dataset = create_dataset_val(cfg.batch_size,
                                 val_data_url,
                                 workers=cfg.workers,
                                 distributed=False)
    loss = LabelSmoothingCrossEntropy(smooth_factor=cfg.smoothing)
    eval_metrics = {
        'Loss': nn.Loss(),
        'Top1-Acc': nn.Top1CategoricalAccuracy(),
        'Top5-Acc': nn.Top5CategoricalAccuracy()
    }
    model = Model(net, loss, optimizer=None, metrics=eval_metrics)