Exemple #1
0
    target = args_opt.device_target
    ckpt_save_dir = config.save_checkpoint_path

    # init context
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=target,
                        save_graphs=False)
    if args_opt.parameter_server:
        context.set_ps_context(enable_ps=True)
    if args_opt.run_distribute:
        if target == "Ascend":
            device_id = int(os.getenv('DEVICE_ID'))
            context.set_context(device_id=device_id,
                                enable_auto_mixed_precision=True)
            context.set_auto_parallel_context(
                device_num=args_opt.device_num,
                parallel_mode=ParallelMode.DATA_PARALLEL,
                gradients_mean=True)
            if args_opt.net == "resnet50" or args_opt.net == "se-resnet50":
                context.set_auto_parallel_context(
                    all_reduce_fusion_config=[85, 160])
            else:
                context.set_auto_parallel_context(
                    all_reduce_fusion_config=[180, 313])
            init()
        # GPU target
        else:
            init()
            context.set_auto_parallel_context(
                device_num=get_group_size(),
                parallel_mode=ParallelMode.DATA_PARALLEL,
                gradients_mean=True)
Exemple #2
0
def test():
    """The function of eval."""
    start_time = time.time()
    args = parse_args()

    # logger
    args.outputs_dir = os.path.join(
        args.log_path,
        datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    rank_id = int(os.environ.get('RANK_ID'))
    args.logger = get_logger(args.outputs_dir, rank_id)

    context.reset_auto_parallel_context()
    parallel_mode = ParallelMode.STAND_ALONE
    context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                      gradients_mean=True,
                                      device_num=1)

    args.logger.info('Creating Network....')
    network = YOLOV3DarkNet53(is_training=False)

    config = ConfigYOLOV3DarkNet53()
    if args.testing_shape:
        config.test_img_shape = conver_testing_shape(args)

    # convert fusion network to quantization aware network
    if config.quantization_aware:
        quantizer = QuantizationAwareTraining(bn_fold=True,
                                              per_channel=[True, False],
                                              symmetric=[True, False])
        network = quantizer.quantize(network)

    args.logger.info(args.pretrained)
    if os.path.isfile(args.pretrained):
        param_dict = load_checkpoint(args.pretrained)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('yolo_network.'):
                param_dict_new[key[13:]] = values
            else:
                param_dict_new[key] = values
        load_param_into_net(network, param_dict_new)
        args.logger.info('load_model {} success'.format(args.pretrained))
    else:
        args.logger.info('{} not exists or not a pre-trained file'.format(
            args.pretrained))
        assert FileNotFoundError(
            '{} not exists or not a pre-trained file'.format(args.pretrained))
        exit(1)

    data_root = args.data_root
    ann_file = args.annFile

    ds, data_size = create_yolo_dataset(data_root,
                                        ann_file,
                                        is_training=False,
                                        batch_size=args.per_batch_size,
                                        max_epoch=1,
                                        device_num=1,
                                        rank=rank_id,
                                        shuffle=False,
                                        config=config)

    args.logger.info('testing shape : {}'.format(config.test_img_shape))
    args.logger.info('totol {} images to eval'.format(data_size))

    network.set_train(False)

    # init detection engine
    detection = DetectionEngine(args)

    input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
    args.logger.info('Start inference....')
    for i, data in enumerate(ds.create_dict_iterator(num_epochs=1)):
        image = data["image"]

        image_shape = data["image_shape"]
        image_id = data["img_id"]

        prediction = network(image, input_shape)
        output_big, output_me, output_small = prediction
        output_big = output_big.asnumpy()
        output_me = output_me.asnumpy()
        output_small = output_small.asnumpy()
        image_id = image_id.asnumpy()
        image_shape = image_shape.asnumpy()

        detection.detect([output_small, output_me, output_big],
                         args.per_batch_size, image_shape, image_id)
        if i % 1000 == 0:
            args.logger.info('Processing... {:.2f}% '.format(
                i * args.per_batch_size / data_size * 100))

    args.logger.info('Calculating mAP...')
    detection.do_nms_for_results()
    result_file_path = detection.write_result()
    args.logger.info('result file path: {}'.format(result_file_path))
    eval_result = detection.get_eval_result()

    cost_time = time.time() - start_time
    args.logger.info('\n=============coco eval reulst=========\n' +
                     eval_result)
    args.logger.info('testing cost time {:.2f}h'.format(cost_time / 3600.))
def test_yolov3_darknet53():
    devid = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        device_target="Ascend",
                        device_id=devid)

    rank = 0
    device_num = 1
    lr_init = 0.001
    epoch_size = 3
    batch_size = 32
    loss_scale = 1024
    mindrecord_dir = DATA_DIR
    # It will generate mindrecord file in args_opt.mindrecord_dir,
    # and the file name is yolo.mindrecord0, 1, ... file_num.
    if not os.path.isdir(mindrecord_dir):
        raise KeyError("mindrecord path is not exist.")
    data_root = os.path.join(mindrecord_dir, 'train2014')
    annFile = os.path.join(mindrecord_dir,
                           'annotations/instances_train2014.json')
    # print("yolov3 mindrecord is ", mindrecord_file)
    if not os.path.exists(annFile):
        print("instances_train2014 file is not exist.")
        assert False
    loss_meter = AverageMeter('loss')
    context.reset_auto_parallel_context()
    parallel_mode = ParallelMode.STAND_ALONE
    context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                      gradients_mean=True,
                                      device_num=1)
    network = YOLOV3DarkNet53(is_training=True)
    # default is kaiming-normal
    default_recurisive_init(network)
    network = YoloWithLossCell(network)
    print('finish get network')

    config = ConfigYOLOV3DarkNet53()
    label_smooth = 0
    label_smooth_factor = 0.1
    config.label_smooth = label_smooth
    config.label_smooth_factor = label_smooth_factor
    # When create MindDataset, using the fitst mindrecord file, such as yolo.mindrecord0.
    print("Create dataset begin!")
    training_shape = [int(416), int(416)]
    config.multi_scale = [training_shape]
    num_samples = 256
    ds, data_size = create_yolo_dataset(image_dir=data_root,
                                        anno_path=annFile,
                                        is_training=True,
                                        batch_size=batch_size,
                                        max_epoch=epoch_size,
                                        device_num=device_num,
                                        rank=rank,
                                        config=config,
                                        num_samples=num_samples)
    print("Create dataset done!")
    per_batch_size = batch_size
    group_size = 1
    print("data_size:", data_size)
    steps_per_epoch = int(data_size / per_batch_size / group_size)
    print("steps_per_epoch:", steps_per_epoch)

    warmup_epochs = 0.
    max_epoch = epoch_size
    T_max = 1
    eta_min = 0
    lr = warmup_cosine_annealing_lr(lr_init, steps_per_epoch, warmup_epochs,
                                    max_epoch, T_max, eta_min)

    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=0.9,
                   weight_decay=0.0005,
                   loss_scale=loss_scale)

    network = TrainingWrapper(network, opt)
    network.set_train()
    old_progress = -1
    t_end = time.time()
    data_loader = ds.create_dict_iterator(output_numpy=True)
    train_starttime = time.time()
    time_used_per_epoch = 0
    print("time:", time.time())
    for i, data in enumerate(data_loader):
        images = data["image"]
        input_shape = images.shape[2:4]
        print('iter[{}], shape{}'.format(i, input_shape[0]))
        images = Tensor.from_numpy(images)
        batch_y_true_0 = Tensor.from_numpy(data['bbox1'])
        batch_y_true_1 = Tensor.from_numpy(data['bbox2'])
        batch_y_true_2 = Tensor.from_numpy(data['bbox3'])
        batch_gt_box0 = Tensor.from_numpy(data['gt_box1'])
        batch_gt_box1 = Tensor.from_numpy(data['gt_box2'])
        batch_gt_box2 = Tensor.from_numpy(data['gt_box3'])
        input_shape = Tensor(tuple(input_shape[::-1]), ms.float32)
        loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2,
                       batch_gt_box0, batch_gt_box1, batch_gt_box2,
                       input_shape)
        loss_meter.update(loss.asnumpy())
        if (i + 1) % steps_per_epoch == 0:
            time_used = time.time() - t_end
            epoch = int(i / steps_per_epoch)
            fps = per_batch_size * (i - old_progress) * group_size / time_used
            if rank == 0:
                print(
                    'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}, time_used:{}'
                    .format(epoch, i, loss_meter, fps, lr[i], time_used))
            t_end = time.time()
            loss_meter.reset()
            old_progress = i
            time_used_per_epoch = time_used

    train_endtime = time.time() - train_starttime
    print('train_time_used:{}'.format(train_endtime))
    expect_loss_value = 3210.0
    loss_value = re.findall(r"\d+\.?\d*", str(loss_meter))
    print('loss_value:{}'.format(loss_value[0]))
    assert float(loss_value[0]) < expect_loss_value
    export_time_used = 20.0
    print('time_used_per_epoch:{}'.format(time_used_per_epoch))
    assert time_used_per_epoch < export_time_used
    print('==========test case passed===========')
Exemple #4
0
                    type=int,
                    default=0,
                    help="Rank id, default: 0.")
args_opt = parser.parse_args()

context.set_context(mode=context.GRAPH_MODE,
                    device_target=args_opt.device_target,
                    device_id=args_opt.device_id)

if __name__ == '__main__':
    if args_opt.run_distribute:
        if args_opt.device_target == "Ascend":
            rank = args_opt.rank_id
            device_num = args_opt.device_num
            context.set_auto_parallel_context(
                device_num=device_num,
                parallel_mode=ParallelMode.DATA_PARALLEL,
                gradients_mean=True)
            init()
        else:
            init("nccl")
            context.reset_auto_parallel_context()
            rank = get_rank()
            device_num = get_group_size()
            context.set_auto_parallel_context(
                device_num=device_num,
                parallel_mode=ParallelMode.DATA_PARALLEL,
                gradients_mean=True)
    else:
        rank = 0
        device_num = 1
Exemple #5
0
from mindspore import Model, context
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor
from mindspore.train import ParallelMode
from mindspore.communication.management import get_rank, get_group_size, init

from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel
from src.callbacks import LossCallBack, EvalCallBack
from src.datasets import create_dataset
from src.metrics import AUCMetric
from src.config import WideDeepConfig

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
context.set_context(mode=context.GRAPH_MODE,
                    device_target="Ascend",
                    save_graphs=True)
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL,
                                  mirror_mean=True)
init()


def get_WideDeep_net(config):
    WideDeep_net = WideDeepModel(config)
    loss_net = NetWithLossClass(WideDeep_net, config)
    train_net = TrainStepWrap(loss_net)
    eval_net = PredictWithSigmoid(WideDeep_net)
    return train_net, eval_net


class ModelBuilder():
    """
    ModelBuilder
    """
Exemple #6
0
def main():
    parser = argparse.ArgumentParser(description="YOLOv3 train")
    parser.add_argument("--only_create_dataset",
                        type=bool,
                        default=False,
                        help="If set it true, only create "
                        "Mindrecord, default is false.")
    parser.add_argument("--distribute",
                        type=bool,
                        default=False,
                        help="Run distribute, default is false.")
    parser.add_argument("--device_id",
                        type=int,
                        default=0,
                        help="Device id, default is 0.")
    parser.add_argument("--device_num",
                        type=int,
                        default=1,
                        help="Use device nums, default is 1.")
    parser.add_argument("--lr",
                        type=float,
                        default=0.001,
                        help="Learning rate, default is 0.001.")
    parser.add_argument("--mode",
                        type=str,
                        default="sink",
                        help="Run sink mode or not, default is sink")
    parser.add_argument("--epoch_size",
                        type=int,
                        default=10,
                        help="Epoch size, default is 10")
    parser.add_argument("--batch_size",
                        type=int,
                        default=32,
                        help="Batch size, default is 32.")
    parser.add_argument("--pre_trained",
                        type=str,
                        default=None,
                        help="Pretrained checkpoint file path")
    parser.add_argument("--pre_trained_epoch_size",
                        type=int,
                        default=0,
                        help="Pretrained epoch size")
    parser.add_argument("--save_checkpoint_epochs",
                        type=int,
                        default=5,
                        help="Save checkpoint epochs, default is 5.")
    parser.add_argument("--loss_scale",
                        type=int,
                        default=1024,
                        help="Loss scale, default is 1024.")
    parser.add_argument(
        "--mindrecord_dir",
        type=str,
        default="./Mindrecord",
        help=
        "Mindrecord directory. If the mindrecord_dir is empty, it wil generate mindrecord file by"
        "image_dir and anno_path. Note if mindrecord_dir isn't empty, it will use mindrecord_dir "
        "rather than image_dir and anno_path. Default is ./Mindrecord_train")
    parser.add_argument('--data_url',
                        type=str,
                        default=None,
                        help='Dataset path')
    parser.add_argument('--train_url',
                        type=str,
                        default=None,
                        help='Train output path')
    parser.add_argument("--anno_path",
                        type=str,
                        default="",
                        help="Annotation path.")
    args_opt = parser.parse_args()

    device_id = int(os.getenv('DEVICE_ID'))
    device_num = int(os.getenv('RANK_SIZE'))
    rankid = int(os.getenv('RANK_ID'))

    local_data_url = '/cache/data'
    local_train_url = '/cache/ckpt'
    local_anno_url = '/cache/anno'
    local_mindrecord_url = '/cache/mindrecord'
    mox.file.copy_parallel(args_opt.mindrecord_dir, local_mindrecord_url)

    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend",
                        device_id=device_id)
    if args_opt.distribute:
        context.reset_auto_parallel_context()
        context.set_auto_parallel_context(
            parallel_mode=ParallelMode.DATA_PARALLEL,
            mirror_mean=True,
            device_num=device_num)
        init()
        rank = rankid
        local_train_url = os.path.join(local_train_url, str(device_id))
    else:
        rank = 0
        device_num = 1

    print("Start create dataset!")

    # It will generate mindrecord file in args_opt.mindrecord_dir,
    # and the file name is yolo.mindrecord0, 1, ... file_num.
    if not os.path.isdir(local_mindrecord_url):
        os.makedirs(local_mindrecord_url)

    prefix = "train.mindrecord"
    mindrecord_file = os.path.join(local_mindrecord_url, prefix + "0")
    if not os.path.exists(mindrecord_file):
        mox.file.copy_parallel(args_opt.data_url, local_data_url)
        if args_opt.anno_path:
            anno_file = os.path.join(local_anno_url,
                                     os.path.split(args_opt.anno_path)[1])
        mox.file.copy_parallel(args_opt.anno_path, anno_file)
        if os.path.isdir(local_data_url) or os.path.exists(anno_file):
            print("Create Mindrecord.")
            data_to_mindrecord_byte_image(local_data_url,
                                          anno_file,
                                          local_mindrecord_url,
                                          prefix=prefix,
                                          file_num=8)
            print("Create Mindrecord Done, at {}".format(
                args_opt.mindrecord_dir))
            mox.file.copy_parallel(local_mindrecord_url,
                                   args_opt.mindrecord_dir)
        else:
            print("image_dir or anno_path not exits.")

    if not args_opt.only_create_dataset:
        loss_scale = float(args_opt.loss_scale)

        # When create MindDataset, using the fitst mindrecord file, such as yolo.mindrecord0.
        dataset = create_yolo_dataset(mindrecord_file,
                                      repeat_num=args_opt.epoch_size,
                                      batch_size=args_opt.batch_size,
                                      device_num=device_num,
                                      rank=rank)
        dataset_size = dataset.get_dataset_size()
        print("Create dataset done!")

        net = yolov3_resnet18(ConfigYOLOV3ResNet18())
        net = YoloWithLossCell(net, ConfigYOLOV3ResNet18())
        init_net_param(net, "XavierUniform")

        # checkpoint
        ckpt_config = CheckpointConfig(save_checkpoint_steps=dataset_size *
                                       args_opt.save_checkpoint_epochs)
        ckpoint_cb = ModelCheckpoint(prefix="yolov3",
                                     directory=local_train_url,
                                     config=ckpt_config)

        if args_opt.pre_trained:
            if args_opt.pre_trained_epoch_size <= 0:
                raise KeyError(
                    "pre_trained_epoch_size must be greater than 0.")
            param_dict = load_checkpoint(args_opt.pre_trained)
            load_param_into_net(net, param_dict)
        total_epoch_size = 60
        if args_opt.distribute:
            total_epoch_size = 160
        lr = Tensor(
            get_lr(learning_rate=args_opt.lr,
                   start_step=args_opt.pre_trained_epoch_size * dataset_size,
                   global_step=total_epoch_size * dataset_size,
                   decay_step=1000,
                   decay_rate=0.95,
                   steps=True))
        opt = nn.Adam(filter(lambda x: x.requires_grad, net.get_parameters()),
                      lr,
                      loss_scale=loss_scale)
        net = TrainingWrapper(net, opt, loss_scale)

        callback = [
            TimeMonitor(data_size=dataset_size),
            LossMonitor(), ckpoint_cb
        ]

        model = Model(net)
        dataset_sink_mode = False
        if args_opt.mode == "sink":
            print("In sink mode, one epoch return a loss.")
            dataset_sink_mode = True
        print(
            "Start train YOLOv3, the first epoch will be slower because of the graph compilation."
        )
        model.train(args_opt.epoch_size,
                    dataset,
                    callbacks=callback,
                    dataset_sink_mode=dataset_sink_mode)
        if device_id == 1:
            mox.file.copy_parallel(local_train_url, args_opt.train_url)
Exemple #7
0
def test_range3():
    context.set_auto_parallel_context(parallel_mode="auto_parallel",
                                      device_num=dev_num,
                                      global_rank=2)
    net = Net(_w1, 0.0, 4.0, 0.5)
    compile_net(net)
Exemple #8
0
def train(args):
    '''train'''
    print('=============yolov3 start trainging==================')


    # init distributed
    if args.world_size != 1:
        init()
        args.local_rank = get_rank()
        args.world_size = get_group_size()

    args.batch_size = config.batch_size
    args.warmup_lr = config.warmup_lr
    args.lr_rates = config.lr_rates
    args.lr_steps = config.lr_steps
    args.gamma = config.gamma
    args.weight_decay = config.weight_decay
    args.momentum = config.momentum
    args.max_epoch = config.max_epoch
    args.log_interval = config.log_interval
    args.ckpt_path = config.ckpt_path
    args.ckpt_interval = config.ckpt_interval

    args.outputs_dir = os.path.join(args.ckpt_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    print('args.outputs_dir', args.outputs_dir)

    args.logger = get_logger(args.outputs_dir, args.local_rank)

    if args.world_size != 8:
        args.lr_steps = [i * 8 // args.world_size for i in args.lr_steps]

    if args.world_size == 1:
        args.weight_decay = 0.

    if args.world_size != 1:
        parallel_mode = ParallelMode.DATA_PARALLEL
    else:
        parallel_mode = ParallelMode.STAND_ALONE

    context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=args.world_size, gradients_mean=True)
    mindrecord_path = args.mindrecord_path

    num_classes = config.num_classes
    anchors = config.anchors
    anchors_mask = config.anchors_mask
    num_anchors_list = [len(x) for x in anchors_mask]

    momentum = args.momentum
    args.logger.info('train opt momentum:{}'.format(momentum))

    weight_decay = args.weight_decay * float(args.batch_size)
    args.logger.info('real weight_decay:{}'.format(weight_decay))
    lr_scale = args.world_size / 8
    args.logger.info('lr_scale:{}'.format(lr_scale))

    # dataloader
    args.logger.info('start create dataloader')
    epoch = args.max_epoch
    ds = de.MindDataset(mindrecord_path + "0", columns_list=["image", "annotation"], num_shards=args.world_size,
                        shard_id=args.local_rank)

    ds = ds.map(input_columns=["image", "annotation"],
                output_columns=["image", "annotation", 'coord_mask_0', 'conf_pos_mask_0', 'conf_neg_mask_0',
                                'cls_mask_0', 't_coord_0', 't_conf_0', 't_cls_0', 'gt_list_0', 'coord_mask_1',
                                'conf_pos_mask_1', 'conf_neg_mask_1', 'cls_mask_1', 't_coord_1', 't_conf_1',
                                't_cls_1', 'gt_list_1', 'coord_mask_2', 'conf_pos_mask_2', 'conf_neg_mask_2',
                                'cls_mask_2', 't_coord_2', 't_conf_2', 't_cls_2', 'gt_list_2'],
                column_order=["image", "annotation", 'coord_mask_0', 'conf_pos_mask_0', 'conf_neg_mask_0',
                              'cls_mask_0', 't_coord_0', 't_conf_0', 't_cls_0', 'gt_list_0', 'coord_mask_1',
                              'conf_pos_mask_1', 'conf_neg_mask_1', 'cls_mask_1', 't_coord_1', 't_conf_1',
                              't_cls_1', 'gt_list_1', 'coord_mask_2', 'conf_pos_mask_2', 'conf_neg_mask_2',
                              'cls_mask_2', 't_coord_2', 't_conf_2', 't_cls_2', 'gt_list_2'],
                operations=compose_map_func, num_parallel_workers=16, python_multiprocessing=True)

    ds = ds.batch(args.batch_size, drop_remainder=True, num_parallel_workers=8)

    args.steps_per_epoch = ds.get_dataset_size()
    lr = warmup_step_new(args, lr_scale=lr_scale)

    ds = ds.repeat(epoch)
    args.logger.info('args.steps_per_epoch:{}'.format(args.steps_per_epoch))
    args.logger.info('args.world_size:{}'.format(args.world_size))
    args.logger.info('args.local_rank:{}'.format(args.local_rank))
    args.logger.info('end create dataloader')
    args.logger.save_args(args)
    args.logger.important_info('start create network')
    create_network_start = time.time()

    # backbone and loss
    network = backbone_HwYolov3(num_classes, num_anchors_list, args)

    criterion0 = YoloLoss(num_classes, anchors, anchors_mask[0], 64, 0, head_idx=0.0)
    criterion1 = YoloLoss(num_classes, anchors, anchors_mask[1], 32, 0, head_idx=1.0)
    criterion2 = YoloLoss(num_classes, anchors, anchors_mask[2], 16, 0, head_idx=2.0)

    # load pretrain model
    if os.path.isfile(args.pretrained):
        param_dict = load_checkpoint(args.pretrained)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('network.'):
                param_dict_new[key[8:]] = values
            else:
                param_dict_new[key] = values
        load_param_into_net(network, param_dict_new)
        args.logger.info('load model {} success'.format(args.pretrained))

    train_net = BuildTrainNetworkV2(network, criterion0, criterion1, criterion2, args)

    # optimizer
    opt = Momentum(params=train_net.trainable_params(), learning_rate=Tensor(lr), momentum=momentum,
                   weight_decay=weight_decay)

    # package training process
    train_net = TrainOneStepWithLossScaleCell(train_net, opt)
    train_net.set_broadcast_flag()

    # checkpoint
    ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
    train_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval, keep_checkpoint_max=ckpt_max_num)
    ckpt_cb = ModelCheckpoint(config=train_config, directory=args.outputs_dir, prefix='{}'.format(args.local_rank))
    cb_params = _InternalCallbackParam()
    cb_params.train_network = train_net
    cb_params.epoch_num = ckpt_max_num
    cb_params.cur_epoch_num = 1
    run_context = RunContext(cb_params)
    ckpt_cb.begin(run_context)

    train_net.set_train()
    t_end = time.time()
    t_epoch = time.time()
    old_progress = -1
    i = 0
    scale_manager = DynamicLossScaleManager(init_loss_scale=2 ** 10, scale_factor=2, scale_window=2000)

    for data in ds.create_tuple_iterator(output_numpy=True):

        batch_images = data[0]
        batch_labels = data[1]
        coord_mask_0 = data[2]
        conf_pos_mask_0 = data[3]
        conf_neg_mask_0 = data[4]
        cls_mask_0 = data[5]
        t_coord_0 = data[6]
        t_conf_0 = data[7]
        t_cls_0 = data[8]
        gt_list_0 = data[9]
        coord_mask_1 = data[10]
        conf_pos_mask_1 = data[11]
        conf_neg_mask_1 = data[12]
        cls_mask_1 = data[13]
        t_coord_1 = data[14]
        t_conf_1 = data[15]
        t_cls_1 = data[16]
        gt_list_1 = data[17]
        coord_mask_2 = data[18]
        conf_pos_mask_2 = data[19]
        conf_neg_mask_2 = data[20]
        cls_mask_2 = data[21]
        t_coord_2 = data[22]
        t_conf_2 = data[23]
        t_cls_2 = data[24]
        gt_list_2 = data[25]

        img_tensor = Tensor(batch_images, mstype.float32)
        coord_mask_tensor_0 = Tensor(coord_mask_0.astype(np.float32))
        conf_pos_mask_tensor_0 = Tensor(conf_pos_mask_0.astype(np.float32))
        conf_neg_mask_tensor_0 = Tensor(conf_neg_mask_0.astype(np.float32))
        cls_mask_tensor_0 = Tensor(cls_mask_0.astype(np.float32))
        t_coord_tensor_0 = Tensor(t_coord_0.astype(np.float32))
        t_conf_tensor_0 = Tensor(t_conf_0.astype(np.float32))
        t_cls_tensor_0 = Tensor(t_cls_0.astype(np.float32))
        gt_list_tensor_0 = Tensor(gt_list_0.astype(np.float32))

        coord_mask_tensor_1 = Tensor(coord_mask_1.astype(np.float32))
        conf_pos_mask_tensor_1 = Tensor(conf_pos_mask_1.astype(np.float32))
        conf_neg_mask_tensor_1 = Tensor(conf_neg_mask_1.astype(np.float32))
        cls_mask_tensor_1 = Tensor(cls_mask_1.astype(np.float32))
        t_coord_tensor_1 = Tensor(t_coord_1.astype(np.float32))
        t_conf_tensor_1 = Tensor(t_conf_1.astype(np.float32))
        t_cls_tensor_1 = Tensor(t_cls_1.astype(np.float32))
        gt_list_tensor_1 = Tensor(gt_list_1.astype(np.float32))

        coord_mask_tensor_2 = Tensor(coord_mask_2.astype(np.float32))
        conf_pos_mask_tensor_2 = Tensor(conf_pos_mask_2.astype(np.float32))
        conf_neg_mask_tensor_2 = Tensor(conf_neg_mask_2.astype(np.float32))
        cls_mask_tensor_2 = Tensor(cls_mask_2.astype(np.float32))
        t_coord_tensor_2 = Tensor(t_coord_2.astype(np.float32))
        t_conf_tensor_2 = Tensor(t_conf_2.astype(np.float32))
        t_cls_tensor_2 = Tensor(t_cls_2.astype(np.float32))
        gt_list_tensor_2 = Tensor(gt_list_2.astype(np.float32))

        scaling_sens = Tensor(scale_manager.get_loss_scale(), dtype=mstype.float32)

        loss0, overflow, _ = train_net(img_tensor, coord_mask_tensor_0, conf_pos_mask_tensor_0,
                                       conf_neg_mask_tensor_0, cls_mask_tensor_0, t_coord_tensor_0,
                                       t_conf_tensor_0, t_cls_tensor_0, gt_list_tensor_0,
                                       coord_mask_tensor_1, conf_pos_mask_tensor_1, conf_neg_mask_tensor_1,
                                       cls_mask_tensor_1, t_coord_tensor_1, t_conf_tensor_1,
                                       t_cls_tensor_1, gt_list_tensor_1, coord_mask_tensor_2,
                                       conf_pos_mask_tensor_2, conf_neg_mask_tensor_2,
                                       cls_mask_tensor_2, t_coord_tensor_2, t_conf_tensor_2,
                                       t_cls_tensor_2, gt_list_tensor_2, scaling_sens)

        overflow = np.all(overflow.asnumpy())
        if overflow:
            scale_manager.update_loss_scale(overflow)
        else:
            scale_manager.update_loss_scale(False)
        args.logger.info('rank[{}], iter[{}], loss[{}], overflow:{}, loss_scale:{}, lr:{}, batch_images:{}, '
                         'batch_labels:{}'.format(args.local_rank, i, loss0, overflow, scaling_sens, lr[i],
                                                  batch_images.shape, batch_labels.shape))

        # save ckpt
        cb_params.cur_step_num = i + 1  # current step number
        cb_params.batch_num = i + 2
        if args.local_rank == 0:
            ckpt_cb.step_end(run_context)

        # save Log
        if i == 0:
            time_for_graph_compile = time.time() - create_network_start
            args.logger.important_info('Yolov3, graph compile time={:.2f}s'.format(time_for_graph_compile))

        if i % args.steps_per_epoch == 0:
            cb_params.cur_epoch_num += 1

        if i % args.log_interval == 0 and args.local_rank == 0:
            time_used = time.time() - t_end
            epoch = int(i / args.steps_per_epoch)
            fps = args.batch_size * (i - old_progress) * args.world_size / time_used
            args.logger.info('epoch[{}], iter[{}], loss:[{}], {:.2f} imgs/sec'.format(epoch, i, loss0, fps))
            t_end = time.time()
            old_progress = i

        if i % args.steps_per_epoch == 0 and args.local_rank == 0:
            epoch_time_used = time.time() - t_epoch
            epoch = int(i / args.steps_per_epoch)
            fps = args.batch_size * args.world_size * args.steps_per_epoch / epoch_time_used
            args.logger.info('=================================================')
            args.logger.info('epoch time: epoch[{}], iter[{}], {:.2f} imgs/sec'.format(epoch, i, fps))
            args.logger.info('=================================================')
            t_epoch = time.time()

        i = i + 1

    args.logger.info('=============yolov3 training finished==================')
Exemple #9
0
def main():
    """Main entrance for training"""
    args = parser.parse_args()
    print(sys.argv)
    devid, args.rank_id, args.rank_size = 0, 0, 1

    context.set_context(mode=context.GRAPH_MODE)

    if args.distributed:
        if args.GPU:
            init("nccl")
            context.set_context(device_target='GPU')
        else:
            init()
            devid = int(os.getenv('DEVICE_ID'))
            context.set_context(device_target='Ascend',
                                device_id=devid,
                                reserve_class_name_in_scope=False)
        context.reset_auto_parallel_context()
        args.rank_id = get_rank()
        args.rank_size = get_group_size()
        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL,
                                          gradients_mean=True,
                                          device_num=args.rank_size)
    else:
        if args.GPU:
            context.set_context(device_target='GPU')

    is_master = not args.distributed or (args.rank_id == 0)

    # parse model argument
    assert args.model.startswith(
        "tinynet"), "Only Tinynet models are supported."
    _, sub_name = args.model.split("_")
    net = tinynet(sub_model=sub_name,
                  num_classes=args.num_classes,
                  drop_rate=args.drop,
                  drop_connect_rate=args.drop_connect,
                  global_pool="avg",
                  bn_tf=args.bn_tf,
                  bn_momentum=args.bn_momentum,
                  bn_eps=args.bn_eps)

    if is_master:
        print("Total number of parameters:", count_params(net))
    # input image size of the network
    input_size = net.default_cfg['input_size'][1]

    train_dataset = val_dataset = None
    train_data_url = os.path.join(args.data_path, 'train')
    val_data_url = os.path.join(args.data_path, 'val')
    val_dataset = create_dataset_val(args.batch_size,
                                     val_data_url,
                                     workers=args.workers,
                                     distributed=False,
                                     input_size=input_size)

    if args.train:
        train_dataset = create_dataset(args.batch_size,
                                       train_data_url,
                                       workers=args.workers,
                                       distributed=args.distributed,
                                       input_size=input_size)
        batches_per_epoch = train_dataset.get_dataset_size()

    loss = LabelSmoothingCrossEntropy(
        smooth_factor=args.smoothing, num_classes=args.num_classes)
    time_cb = TimeMonitor(data_size=batches_per_epoch)
    loss_scale_manager = FixedLossScaleManager(
        args.loss_scale, drop_overflow_update=False)

    lr_array = get_lr(base_lr=args.lr,
                      total_epochs=args.epochs,
                      steps_per_epoch=batches_per_epoch,
                      decay_epochs=args.decay_epochs,
                      decay_rate=args.decay_rate,
                      warmup_epochs=args.warmup_epochs,
                      warmup_lr_init=args.warmup_lr,
                      global_epoch=0)
    lr = Tensor(lr_array)

    loss_cb = LossMonitor(lr_array,
                          args.epochs,
                          per_print_times=args.per_print_times,
                          start_epoch=0)

    param_group = add_weight_decay(net, weight_decay=args.weight_decay)

    if args.opt == 'sgd':
        if is_master:
            print('Using SGD optimizer')
        optimizer = SGD(param_group,
                        learning_rate=lr,
                        momentum=args.momentum,
                        weight_decay=args.weight_decay,
                        loss_scale=args.loss_scale)

    elif args.opt == 'rmsprop':
        if is_master:
            print('Using rmsprop optimizer')
        optimizer = RMSProp(param_group,
                            learning_rate=lr,
                            decay=0.9,
                            weight_decay=args.weight_decay,
                            momentum=args.momentum,
                            epsilon=args.opt_eps,
                            loss_scale=args.loss_scale)

    loss.add_flags_recursive(fp32=True, fp16=False)
    eval_metrics = {'Validation-Loss': Loss(),
                    'Top1-Acc': Top1CategoricalAccuracy(),
                    'Top5-Acc': Top5CategoricalAccuracy()}

    if args.ckpt:
        ckpt = load_checkpoint(args.ckpt)
        load_param_into_net(net, ckpt)
        net.set_train(False)

    model = Model(net, loss, optimizer, metrics=eval_metrics,
                  loss_scale_manager=loss_scale_manager,
                  amp_level=args.amp_level)

    net_ema = copy.deepcopy(net)
    net_ema.set_train(False)
    assert args.ema_decay > 0, "EMA should be used in tinynet training."

    ema_cb = EmaEvalCallBack(model=model,
                             ema_network=net_ema,
                             loss_fn=loss,
                             eval_dataset=val_dataset,
                             decay=args.ema_decay,
                             save_epoch=args.ckpt_save_epoch,
                             dataset_sink_mode=args.dataset_sink,
                             start_epoch=0)

    callbacks = [loss_cb, ema_cb, time_cb] if is_master else []

    if is_master:
        print("Training on " + args.model
              + " with " + str(args.num_classes) + " classes")

    model.train(args.epochs, train_dataset, callbacks=callbacks,
                dataset_sink_mode=args.dataset_sink)
Exemple #10
0
def test_set_auto_parallel_context():
    context.set_auto_parallel_context(device_num=4,
                                      global_rank=3,
                                      gradients_mean=True,
                                      gradient_fp32_sync=False,
                                      parallel_mode="auto_parallel",
                                      parameter_broadcast=False)
    device_num = context.get_auto_parallel_context("device_num")
    global_rank = context.get_auto_parallel_context("global_rank")
    gradients_mean = context.get_auto_parallel_context("gradients_mean")
    gradient_fp32_sync = context.get_auto_parallel_context(
        "gradient_fp32_sync")
    parallel_mode = context.get_auto_parallel_context("parallel_mode")
    parameter_broadcast = context.get_auto_parallel_context(
        "parameter_broadcast")
    assert device_num == 4
    assert global_rank == 3
    assert gradients_mean
    assert not gradient_fp32_sync
    assert parallel_mode == "auto_parallel"
    assert not parameter_broadcast

    auto_parallel_context().set_device_num(4)
    device_num = auto_parallel_context().get_device_num()
    device_num_is_set = auto_parallel_context().get_device_num_is_set()
    assert device_num == 4
    assert device_num_is_set

    auto_parallel_context().set_global_rank(4)
    global_rank = auto_parallel_context().get_global_rank()
    assert global_rank == 4

    auto_parallel_context().set_gradients_mean(True)
    gradients_mean = auto_parallel_context().get_gradients_mean()
    assert gradients_mean

    auto_parallel_context().set_gradient_fp32_sync(False)
    gradient_fp32_sync = auto_parallel_context().get_gradient_fp32_sync()
    assert not gradient_fp32_sync

    parameter_broadcast_is_set = auto_parallel_context(
    ).get_parameter_broadcast_is_set()
    assert parameter_broadcast_is_set

    with pytest.raises(ValueError):
        context.set_auto_parallel_context(device_num=0)

    with pytest.raises(ValueError):
        context.set_auto_parallel_context(device_num=4097)

    with pytest.raises(ValueError):
        context.set_auto_parallel_context(global_rank=-1)

    with pytest.raises(ValueError):
        context.set_auto_parallel_context(parallel_mode="wrong_mode")

    with pytest.raises(ValueError):
        context.set_auto_parallel_context(global_rank=4096)

    with pytest.raises(ValueError):
        set_algo_parameters(tensor_slice_align_size=0)

    with pytest.raises(ValueError):
        set_algo_parameters(tensor_slice_align_size=1025)

    context.set_auto_parallel_context(enable_parallel_optimizer=True)
    assert context.get_auto_parallel_context("enable_parallel_optimizer")
    assert not auto_parallel_context().get_all_reduce_fusion_split_indices()
Exemple #11
0
if args_opt.device_target == "Ascend":
    device_id = int(os.getenv('DEVICE_ID', '0'))
    rank_id = int(os.getenv('RANK_ID', '0'))
    rank_size = int(os.getenv('RANK_SIZE', '1'))
    run_distribute = rank_size > 1
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend",
                        device_id=device_id,
                        save_graphs=False)
elif args_opt.device_target == "GPU":
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="GPU",
                        save_graphs=False)
    init()
    context.set_auto_parallel_context(device_num=get_group_size(),
                                      parallel_mode=ParallelMode.DATA_PARALLEL,
                                      mirror_mean=True)
else:
    raise ValueError("Unsupported device target.")


class CrossEntropyWithLabelSmooth(_Loss):
    """
    CrossEntropyWith LabelSmooth.

    Args:
        smooth_factor (float): smooth factor, default=0.
        num_classes (int): num classes

    Returns:
        None.
Exemple #12
0
def run_pretrain():
    """pre-train bert_clue"""
    parser = argparse.ArgumentParser(description='bert pre_training')
    parser.add_argument(
        '--device_target',
        type=str,
        default='Ascend',
        choices=['Ascend', 'GPU'],
        help='device where the code will be implemented. (Default: Ascend)')
    parser.add_argument("--distribute",
                        type=str,
                        default="false",
                        choices=["true", "false"],
                        help="Run distribute, default is false.")
    parser.add_argument("--epoch_size",
                        type=int,
                        default="1",
                        help="Epoch size, default is 1.")
    parser.add_argument("--device_id",
                        type=int,
                        default=0,
                        help="Device id, default is 0.")
    parser.add_argument("--device_num",
                        type=int,
                        default=1,
                        help="Use device nums, default is 1.")
    parser.add_argument("--enable_save_ckpt",
                        type=str,
                        default="true",
                        choices=["true", "false"],
                        help="Enable save checkpoint, default is true.")
    parser.add_argument("--enable_lossscale",
                        type=str,
                        default="true",
                        choices=["true", "false"],
                        help="Use lossscale or not, default is not.")
    parser.add_argument("--do_shuffle",
                        type=str,
                        default="true",
                        choices=["true", "false"],
                        help="Enable shuffle for dataset, default is true.")
    parser.add_argument("--enable_data_sink",
                        type=str,
                        default="true",
                        choices=["true", "false"],
                        help="Enable data sink, default is true.")
    parser.add_argument("--data_sink_steps",
                        type=int,
                        default="1",
                        help="Sink steps for each epoch, default is 1.")
    parser.add_argument(
        "--accumulation_steps",
        type=int,
        default="1",
        help=
        "Accumulating gradients N times before weight update, default is 1.")
    parser.add_argument("--save_checkpoint_path",
                        type=str,
                        default="",
                        help="Save checkpoint path")
    parser.add_argument("--load_checkpoint_path",
                        type=str,
                        default="",
                        help="Load checkpoint file path")
    parser.add_argument("--save_checkpoint_steps",
                        type=int,
                        default=1000,
                        help="Save checkpoint steps, "
                        "default is 1000.")
    parser.add_argument("--train_steps",
                        type=int,
                        default=-1,
                        help="Training Steps, default is -1, "
                        "meaning run all steps according to epoch number.")
    parser.add_argument("--save_checkpoint_num",
                        type=int,
                        default=1,
                        help="Save checkpoint numbers, default is 1.")
    parser.add_argument("--data_dir",
                        type=str,
                        default="",
                        help="Data path, it is better to use absolute path")
    parser.add_argument("--schema_dir",
                        type=str,
                        default="",
                        help="Schema path, it is better to use absolute path")

    args_opt = parser.parse_args()
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args_opt.device_target,
                        device_id=args_opt.device_id)
    context.set_context(reserve_class_name_in_scope=False)
    ckpt_save_dir = args_opt.save_checkpoint_path
    if args_opt.distribute == "true":
        if args_opt.device_target == 'Ascend':
            D.init()
            device_num = args_opt.device_num
            rank = args_opt.device_id % device_num
        else:
            D.init()
            device_num = D.get_group_size()
            rank = D.get_rank()
        ckpt_save_dir = args_opt.save_checkpoint_path + 'ckpt_' + str(
            get_rank()) + '/'

        context.reset_auto_parallel_context()
        context.set_auto_parallel_context(
            parallel_mode=ParallelMode.AUTO_PARALLEL,
            gradients_mean=True,
            device_num=device_num,
            auto_parallel_search_mode="recursive_programming")
        _set_bert_all_reduce_split()
    else:
        rank = 0
        device_num = 1

    if args_opt.device_target == 'GPU' and bert_net_cfg.compute_type != mstype.float32:
        logger.warning('Gpu only support fp32 temporarily, run with fp32.')
        bert_net_cfg.compute_type = mstype.float32

    if args_opt.accumulation_steps > 1:
        logger.info("accumulation steps: {}".format(
            args_opt.accumulation_steps))
        logger.info("global batch size: {}".format(
            cfg.batch_size * args_opt.accumulation_steps))
        if args_opt.enable_data_sink == "true":
            args_opt.data_sink_steps *= args_opt.accumulation_steps
            logger.info("data sink steps: {}".format(args_opt.data_sink_steps))
        if args_opt.enable_save_ckpt == "true":
            args_opt.save_checkpoint_steps *= args_opt.accumulation_steps
            logger.info("save checkpoint steps: {}".format(
                args_opt.save_checkpoint_steps))

    ds = create_bert_dataset(device_num, rank, args_opt.do_shuffle,
                             args_opt.data_dir, args_opt.schema_dir)
    net_with_loss = BertNetworkWithLoss(bert_net_cfg, True)

    new_repeat_count = args_opt.epoch_size * ds.get_dataset_size(
    ) // args_opt.data_sink_steps
    if args_opt.train_steps > 0:
        train_steps = args_opt.train_steps * args_opt.accumulation_steps
        new_repeat_count = min(new_repeat_count,
                               train_steps // args_opt.data_sink_steps)
    else:
        args_opt.train_steps = args_opt.epoch_size * ds.get_dataset_size(
        ) // args_opt.accumulation_steps
        logger.info("train steps: {}".format(args_opt.train_steps))

    optimizer = _get_optimizer(args_opt, net_with_loss)
    callback = [
        TimeMonitor(args_opt.data_sink_steps),
        LossCallBack(ds.get_dataset_size())
    ]
    if args_opt.enable_save_ckpt == "true" and args_opt.device_id % min(
            8, device_num) == 0:
        config_ck = CheckpointConfig(
            save_checkpoint_steps=args_opt.save_checkpoint_steps,
            keep_checkpoint_max=args_opt.save_checkpoint_num)
        ckpoint_cb = ModelCheckpoint(
            prefix='checkpoint_bert',
            directory=None if ckpt_save_dir == "" else ckpt_save_dir,
            config=config_ck)
        callback.append(ckpoint_cb)

    if args_opt.load_checkpoint_path:
        param_dict = load_checkpoint(args_opt.load_checkpoint_path)
        load_param_into_net(net_with_loss, param_dict)

    if args_opt.enable_lossscale == "true":
        update_cell = DynamicLossScaleUpdateCell(
            loss_scale_value=cfg.loss_scale_value,
            scale_factor=cfg.scale_factor,
            scale_window=cfg.scale_window)

        if args_opt.accumulation_steps <= 1:
            net_with_grads = BertTrainOneStepWithLossScaleCell(
                net_with_loss,
                optimizer=optimizer,
                scale_update_cell=update_cell)
        else:
            accumulation_steps = args_opt.accumulation_steps
            net_with_grads = BertTrainAccumulateStepsWithLossScaleCell(
                net_with_loss,
                optimizer=optimizer,
                scale_update_cell=update_cell,
                accumulation_steps=accumulation_steps,
                enable_global_norm=cfg.enable_global_norm)
    else:
        net_with_grads = BertTrainOneStepCell(net_with_loss,
                                              optimizer=optimizer)

    model = Model(net_with_grads)
    model.train(new_repeat_count,
                ds,
                callbacks=callback,
                dataset_sink_mode=(args_opt.enable_data_sink == "true"),
                sink_size=args_opt.data_sink_steps)

    open("bert_4gpu.txt",
         "w").write(str(_executor._get_shard_strategy(model._train_network)))
Exemple #13
0
def test_auto_parallel_activation():
    context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
    strategy1 = ((4, 4), (4, 4))
    strategy2 = None
    net = Net(_w1, strategy1, strategy2)
    compile_net(net)
Exemple #14
0
def test_train():
    """train entry method"""
    if args.is_distributed:
        if args.device_target == "Ascend":
            init()
            context.set_context(device_id=args.device_id)
        elif args.device_target == "GPU":
            init()

        args.rank = get_rank()
        args.group_size = get_group_size()
        device_num = args.group_size
        context.reset_auto_parallel_context()
        context.set_auto_parallel_context(
            device_num=device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            parameter_broadcast=True,
            gradients_mean=True)
    else:
        context.set_context(device_id=args.device_id)
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args.device_target)

    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)

    layers = cfg.layers
    num_factors = cfg.num_factors
    epochs = args.train_epochs

    ds_train, num_train_users, num_train_items = create_dataset(
        test_train=True,
        data_dir=args.data_path,
        dataset=args.dataset,
        train_epochs=1,
        batch_size=args.batch_size,
        num_neg=args.num_neg)
    print("ds_train.size: {}".format(ds_train.get_dataset_size()))

    ncf_net = NCFModel(num_users=num_train_users,
                       num_items=num_train_items,
                       num_factors=num_factors,
                       model_layers=layers,
                       mf_regularization=0,
                       mlp_reg_layers=[0.0, 0.0, 0.0, 0.0],
                       mf_dim=16)
    loss_net = NetWithLossClass(ncf_net)
    train_net = TrainStepWrap(loss_net,
                              ds_train.get_dataset_size() * (epochs + 1))

    train_net.set_train()

    model = Model(train_net)
    callback = LossMonitor(per_print_times=ds_train.get_dataset_size())
    ckpt_config = CheckpointConfig(
        save_checkpoint_steps=(4970845 + args.batch_size - 1) //
        (args.batch_size),
        keep_checkpoint_max=100)
    ckpoint_cb = ModelCheckpoint(prefix='NCF',
                                 directory=args.checkpoint_path,
                                 config=ckpt_config)
    model.train(epochs,
                ds_train,
                callbacks=[
                    TimeMonitor(ds_train.get_dataset_size()), callback,
                    ckpoint_cb
                ],
                dataset_sink_mode=True)
Exemple #15
0
def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
    os.system("mkdir " + str(device_id))
    os.chdir(str(device_id))
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend",
                        save_graphs=False)
    context.set_context(device_id=device_id)
    os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = MINDSPORE_HCCL_CONFIG_PATH_2
    os.environ['RANK_ID'] = str(device_id - 4)
    os.environ['RANK_SIZE'] = str(device_num)
    if enable_hccl:
        context.set_auto_parallel_context(
            device_num=device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            gradients_mean=True,
            all_reduce_fusion_config=[107])
        init()

    # network
    damping = get_model_damping(0, 0.03, 0.87, 50, 5004)
    net = resnet50_thor(class_num=thor_config.class_num,
                        damping=damping,
                        loss_scale=thor_config.loss_scale,
                        frequency=thor_config.frequency)

    # evaluation network
    dist_eval_network = ClassifyCorrectCell(net)

    if not thor_config.label_smooth:
        thor_config.label_smooth_factor = 0.0

    # loss
    loss = CrossEntropySmooth(sparse=True,
                              reduction="mean",
                              smooth_factor=thor_config.label_smooth_factor,
                              num_classes=thor_config.class_num)

    # train dataset
    dataset = create_dataset(dataset_path=dataset_path,
                             do_train=True,
                             repeat_num=1,
                             batch_size=thor_config.batch_size)

    step_size = dataset.get_dataset_size()
    eval_interval = thor_config.eval_interval

    # evalutation dataset
    eval_dataset = create_dataset(dataset_path=eval_path,
                                  do_train=False,
                                  repeat_num=1,
                                  batch_size=thor_config.eval_batch_size)

    # loss scale
    loss_scale = FixedLossScaleManager(thor_config.loss_scale,
                                       drop_overflow_update=False)

    # learning rate
    lr = Tensor(get_model_lr(0, 0.045, 6, 70, 5004))

    # optimizer
    opt = THOR(filter(lambda x: x.requires_grad,
                      net.get_parameters()), lr, thor_config.momentum,
               filter(lambda x: 'matrix_A' in x.name, net.get_parameters()),
               filter(lambda x: 'matrix_G' in x.name, net.get_parameters()),
               filter(lambda x: 'A_inv_max' in x.name, net.get_parameters()),
               filter(lambda x: 'G_inv_max' in x.name, net.get_parameters()),
               thor_config.weight_decay, thor_config.loss_scale)

    # model
    model = THOR_Model(net,
                       loss_fn=loss,
                       optimizer=opt,
                       loss_scale_manager=loss_scale,
                       amp_level="O2",
                       keep_batchnorm_fp32=False,
                       metrics={
                           'acc':
                           DistAccuracy(batch_size=thor_config.eval_batch_size,
                                        device_num=device_num)
                       },
                       eval_network=dist_eval_network,
                       frequency=thor_config.frequency)

    # model init
    print("init_start", device_id)
    model.init(dataset, eval_dataset)
    print("init_stop", device_id)

    # callbacks
    loss_cb = LossGet(1, step_size)

    # train and eval
    acc = 0.0
    time_cost = 0.0
    print("run_start", device_id)
    for epoch_idx in range(0, int(epoch_size / eval_interval)):
        model.train(eval_interval, dataset, callbacks=loss_cb)
        eval_start = time.time()
        output = model.eval(eval_dataset)
        eval_cost = (time.time() - eval_start) * 1000
        acc = float(output["acc"])
        time_cost = loss_cb.get_per_step_time()
        loss = loss_cb.get_loss()
        print(
            "the {} epoch's resnet result:\n "
            "device{}, training loss {}, acc {}, "
            "training per step cost {:.2f} ms, eval cost {:.2f} ms, total_cost {:.2f} ms"
            .format(epoch_idx, device_id, loss, acc, time_cost, eval_cost,
                    time_cost * step_size + eval_cost))
    q.put({'acc': acc, 'cost': time_cost})
Exemple #16
0
def test_expand_dims_auto_parallel():
    context.set_auto_parallel_context(parallel_mode="auto_parallel",
                                      device_num=16,
                                      global_rank=0)
    net = Net(_w1)
    compile(net)
Exemple #17
0
def train():
    args = parse_args()

    # init multicards training
    if args.is_distributed:
        init()
        args.rank = get_rank()
        args.group_size = get_group_size()

        parallel_mode = ParallelMode.DATA_PARALLEL
        context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                          gradients_mean=True,
                                          device_num=args.group_size)

    # dataset
    dataset = data_generator.SegDataset(image_mean=args.image_mean,
                                        image_std=args.image_std,
                                        data_file=args.data_file,
                                        batch_size=args.batch_size,
                                        crop_size=args.crop_size,
                                        max_scale=args.max_scale,
                                        min_scale=args.min_scale,
                                        ignore_label=args.ignore_label,
                                        num_classes=args.num_classes,
                                        num_readers=2,
                                        num_parallel_calls=4,
                                        shard_id=args.rank,
                                        shard_num=args.group_size)
    dataset = dataset.get_dataset(repeat=1)

    # network
    if args.model == 'deeplab_v3_s16':
        network = net_factory.nets_map[args.model]('train', args.num_classes,
                                                   16, args.freeze_bn)
    elif args.model == 'deeplab_v3_s8':
        network = net_factory.nets_map[args.model]('train', args.num_classes,
                                                   8, args.freeze_bn)
    else:
        raise NotImplementedError('model [{:s}] not recognized'.format(
            args.model))

    # loss
    loss_ = loss.SoftmaxCrossEntropyLoss(args.num_classes, args.ignore_label)
    loss_.add_flags_recursive(fp32=True)
    train_net = BuildTrainNetwork(network, loss_)

    # load pretrained model
    if args.ckpt_pre_trained:
        param_dict = load_checkpoint(args.ckpt_pre_trained)
        load_param_into_net(train_net, param_dict)

    # optimizer
    iters_per_epoch = dataset.get_dataset_size()
    total_train_steps = iters_per_epoch * args.train_epochs
    if args.lr_type == 'cos':
        lr_iter = learning_rates.cosine_lr(args.base_lr, total_train_steps,
                                           total_train_steps)
    elif args.lr_type == 'poly':
        lr_iter = learning_rates.poly_lr(args.base_lr,
                                         total_train_steps,
                                         total_train_steps,
                                         end_lr=0.0,
                                         power=0.9)
    elif args.lr_type == 'exp':
        lr_iter = learning_rates.exponential_lr(args.base_lr,
                                                args.lr_decay_step,
                                                args.lr_decay_rate,
                                                total_train_steps,
                                                staircase=True)
    else:
        raise ValueError('unknown learning rate type')
    opt = nn.Momentum(params=train_net.trainable_params(),
                      learning_rate=lr_iter,
                      momentum=0.9,
                      weight_decay=0.0001,
                      loss_scale=args.loss_scale)

    # loss scale
    manager_loss_scale = FixedLossScaleManager(args.loss_scale,
                                               drop_overflow_update=False)
    model = Model(train_net,
                  optimizer=opt,
                  amp_level="O3",
                  loss_scale_manager=manager_loss_scale)

    # callback for saving ckpts
    time_cb = TimeMonitor(data_size=iters_per_epoch)
    loss_cb = LossMonitor()
    cbs = [time_cb, loss_cb]

    if args.rank == 0:
        config_ck = CheckpointConfig(
            save_checkpoint_steps=args.save_steps,
            keep_checkpoint_max=args.keep_checkpoint_max)
        ckpoint_cb = ModelCheckpoint(prefix=args.model,
                                     directory=args.train_dir,
                                     config=config_ck)
        cbs.append(ckpoint_cb)

    model.train(args.train_epochs, dataset, callbacks=cbs)
Exemple #18
0
def train(args):
    '''train'''
    print('=============yolov3 start trainging==================')
    devid = int(os.getenv('DEVICE_ID',
                          '0')) if args.run_platform != 'CPU' else 0
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args.run_platform,
                        save_graphs=False,
                        device_id=devid)
    # init distributed
    if args.world_size != 1:
        init()
        args.local_rank = get_rank()
        args.world_size = get_group_size()
        context.set_auto_parallel_context(
            parallel_mode=ParallelMode.DATA_PARALLEL,
            device_num=args.world_size,
            gradients_mean=True)
    args.logger = get_logger(args.outputs_dir, args.local_rank)

    # dataloader
    ds = create_dataset(args)

    args.logger.important_info('start create network')
    create_network_start = time.time()

    train_net = define_network(args)

    # checkpoint
    ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
    train_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval,
                                    keep_checkpoint_max=ckpt_max_num)
    ckpt_cb = ModelCheckpoint(config=train_config,
                              directory=args.outputs_dir,
                              prefix='{}'.format(args.local_rank))
    cb_params = _InternalCallbackParam()
    cb_params.train_network = train_net
    cb_params.epoch_num = ckpt_max_num
    cb_params.cur_epoch_num = 1
    run_context = RunContext(cb_params)
    ckpt_cb.begin(run_context)

    train_net.set_train()
    t_end = time.time()
    t_epoch = time.time()
    old_progress = -1
    i = 0
    if args.use_loss_scale:
        scale_manager = DynamicLossScaleManager(init_loss_scale=2**10,
                                                scale_factor=2,
                                                scale_window=2000)
    for data in ds.create_tuple_iterator(output_numpy=True):
        batch_images = data[0]
        batch_labels = data[1]
        input_list = [Tensor(batch_images, mstype.float32)]
        for idx in range(2, 26):
            input_list.append(Tensor(data[idx], mstype.float32))
        if args.use_loss_scale:
            scaling_sens = Tensor(scale_manager.get_loss_scale(),
                                  dtype=mstype.float32)
            loss0, overflow, _ = train_net(*input_list, scaling_sens)
            overflow = np.all(overflow.asnumpy())
            if overflow:
                scale_manager.update_loss_scale(overflow)
            else:
                scale_manager.update_loss_scale(False)
            args.logger.info(
                'rank[{}], iter[{}], loss[{}], overflow:{}, loss_scale:{}, lr:{}, batch_images:{}, '
                'batch_labels:{}'.format(args.local_rank, i, loss0, overflow,
                                         scaling_sens, args.lr[i],
                                         batch_images.shape,
                                         batch_labels.shape))
        else:
            loss0 = train_net(*input_list)
            args.logger.info(
                'rank[{}], iter[{}], loss[{}], lr:{}, batch_images:{}, '
                'batch_labels:{}'.format(args.local_rank, i, loss0, args.lr[i],
                                         batch_images.shape,
                                         batch_labels.shape))
        # save ckpt
        cb_params.cur_step_num = i + 1  # current step number
        cb_params.batch_num = i + 2
        if args.local_rank == 0:
            ckpt_cb.step_end(run_context)

        # save Log
        if i == 0:
            time_for_graph_compile = time.time() - create_network_start
            args.logger.important_info(
                'Yolov3, graph compile time={:.2f}s'.format(
                    time_for_graph_compile))

        if i % args.steps_per_epoch == 0:
            cb_params.cur_epoch_num += 1

        if i % args.log_interval == 0 and args.local_rank == 0:
            time_used = time.time() - t_end
            epoch = int(i / args.steps_per_epoch)
            fps = args.batch_size * (
                i - old_progress) * args.world_size / time_used
            args.logger.info(
                'epoch[{}], iter[{}], loss:[{}], {:.2f} imgs/sec'.format(
                    epoch, i, loss0, fps))
            t_end = time.time()
            old_progress = i

        if i % args.steps_per_epoch == 0 and args.local_rank == 0:
            epoch_time_used = time.time() - t_epoch
            epoch = int(i / args.steps_per_epoch)
            fps = args.batch_size * args.world_size * args.steps_per_epoch / epoch_time_used
            args.logger.info(
                '=================================================')
            args.logger.info(
                'epoch time: epoch[{}], iter[{}], {:.2f} imgs/sec'.format(
                    epoch, i, fps))
            args.logger.info(
                '=================================================')
            t_epoch = time.time()

        i = i + 1

    args.logger.info('=============yolov3 training finished==================')
Exemple #19
0
def main():
    parser = argparse.ArgumentParser(description="retinanet training")
    parser.add_argument("--only_create_dataset", type=ast.literal_eval, default=False,
                        help="If set it true, only create Mindrecord, default is False.")
    parser.add_argument("--distribute", type=ast.literal_eval, default=False,
                        help="Run distribute, default is False.")
    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
    parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.")
    parser.add_argument("--lr", type=float, default=0.1, help="Learning rate, default is 0.1.")
    parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or not, default is sink.")
    parser.add_argument("--dataset", type=str, default="coco", help="Dataset, default is coco.")
    parser.add_argument("--epoch_size", type=int, default=500, help="Epoch size, default is 500.")
    parser.add_argument("--batch_size", type=int, default=32, help="Batch size, default is 32.")
    parser.add_argument("--pre_trained", type=str, default=None, help="Pretrained Checkpoint file path.")
    parser.add_argument("--pre_trained_epoch_size", type=int, default=0, help="Pretrained epoch size.")
    parser.add_argument("--save_checkpoint_epochs", type=int, default=1, help="Save checkpoint epochs, default is 1.")
    parser.add_argument("--loss_scale", type=int, default=1024, help="Loss scale, default is 1024.")
    parser.add_argument("--filter_weight", type=ast.literal_eval, default=False,
                        help="Filter weight parameters, default is False.")
    parser.add_argument("--run_platform", type=str, default="Ascend", choices=("Ascend"),
                        help="run platform, only support Ascend.")
    args_opt = parser.parse_args()

    if args_opt.run_platform == "Ascend":
        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
        if args_opt.distribute:
            if os.getenv("DEVICE_ID", "not_set").isdigit():
                context.set_context(device_id=int(os.getenv("DEVICE_ID")))
            init()
            device_num = args_opt.device_num
            rank = get_rank()
            context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
                                              device_num=device_num)
        else:
            rank = 0
            device_num = 1
            context.set_context(device_id=args_opt.device_id)

    else:
        raise ValueError("Unsupported platform.")

    mindrecord_file = create_mindrecord(args_opt.dataset, "retinanet.mindrecord", True)

    if not args_opt.only_create_dataset:
        loss_scale = float(args_opt.loss_scale)

        # When create MindDataset, using the fitst mindrecord file, such as retinanet.mindrecord0.
        dataset = create_retinanet_dataset(mindrecord_file, repeat_num=1,
                                           batch_size=args_opt.batch_size, device_num=device_num, rank=rank)

        dataset_size = dataset.get_dataset_size()
        print("Create dataset done!")


        backbone = resnet50(config.num_classes)
        retinanet = retinanet50(backbone, config)
        net = retinanetWithLossCell(retinanet, config)
        net.to_float(mindspore.float16)
        init_net_param(net)

        if args_opt.pre_trained:
            if args_opt.pre_trained_epoch_size <= 0:
                raise KeyError("pre_trained_epoch_size must be greater than 0.")
            param_dict = load_checkpoint(args_opt.pre_trained)
            if args_opt.filter_weight:
                filter_checkpoint_parameter(param_dict)
            load_param_into_net(net, param_dict)

        lr = Tensor(get_lr(global_step=config.global_step,
                           lr_init=config.lr_init, lr_end=config.lr_end_rate * args_opt.lr, lr_max=args_opt.lr,
                           warmup_epochs1=config.warmup_epochs1, warmup_epochs2=config.warmup_epochs2,
                           warmup_epochs3=config.warmup_epochs3, warmup_epochs4=config.warmup_epochs4,
                           warmup_epochs5=config.warmup_epochs5, total_epochs=args_opt.epoch_size,
                           steps_per_epoch=dataset_size))
        opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr,
                          config.momentum, config.weight_decay, loss_scale)
        net = TrainingWrapper(net, opt, loss_scale)
        model = Model(net)
        print("Start train retinanet, the first epoch will be slower because of the graph compilation.")
        cb = [TimeMonitor(), LossMonitor()]
        cb += [Monitor(lr_init=lr.asnumpy())]
        config_ck = CheckpointConfig(save_checkpoint_steps=dataset_size * args_opt.save_checkpoint_epochs,
                                     keep_checkpoint_max=config.keep_checkpoint_max)
        ckpt_cb = ModelCheckpoint(prefix="retinanet", directory=config.save_checkpoint_path, config=config_ck)
        if args_opt.distribute:
            if rank == 0:
                cb += [ckpt_cb]
            model.train(args_opt.epoch_size, dataset, callbacks=cb, dataset_sink_mode=True)
        else:
            cb += [ckpt_cb]
            model.train(args_opt.epoch_size, dataset, callbacks=cb, dataset_sink_mode=True)
Exemple #20
0
def main():
    parser = argparse.ArgumentParser(description="SSD training")
    parser.add_argument(
        "--only_create_dataset",
        type=ast.literal_eval,
        default=False,
        help="If set it true, only create Mindrecord, default is False.")
    parser.add_argument("--distribute",
                        type=ast.literal_eval,
                        default=False,
                        help="Run distribute, default is False.")
    parser.add_argument("--device_id",
                        type=int,
                        default=0,
                        help="Device id, default is 0.")
    parser.add_argument("--device_num",
                        type=int,
                        default=1,
                        help="Use device nums, default is 1.")
    parser.add_argument("--lr",
                        type=float,
                        default=0.05,
                        help="Learning rate, default is 0.05.")
    parser.add_argument("--mode",
                        type=str,
                        default="sink",
                        help="Run sink mode or not, default is sink.")
    parser.add_argument("--dataset",
                        type=str,
                        default="coco",
                        help="Dataset, defalut is coco.")
    parser.add_argument("--epoch_size",
                        type=int,
                        default=500,
                        help="Epoch size, default is 500.")
    parser.add_argument("--batch_size",
                        type=int,
                        default=32,
                        help="Batch size, default is 32.")
    parser.add_argument("--pre_trained",
                        type=str,
                        default=None,
                        help="Pretrained Checkpoint file path.")
    parser.add_argument("--pre_trained_epoch_size",
                        type=int,
                        default=0,
                        help="Pretrained epoch size.")
    parser.add_argument("--save_checkpoint_epochs",
                        type=int,
                        default=10,
                        help="Save checkpoint epochs, default is 10.")
    parser.add_argument("--loss_scale",
                        type=int,
                        default=1024,
                        help="Loss scale, default is 1024.")
    parser.add_argument("--filter_weight",
                        type=ast.literal_eval,
                        default=False,
                        help="Filter weight parameters, default is False.")
    parser.add_argument("--run_platform",
                        type=str,
                        default="Ascend",
                        choices=("Ascend", "GPU"),
                        help="run platform, only support Ascend and GPU.")
    args_opt = parser.parse_args()

    if args_opt.run_platform == "Ascend":
        context.set_context(mode=context.GRAPH_MODE,
                            device_target="Ascend",
                            device_id=args_opt.device_id)
        if args_opt.distribute:
            device_num = args_opt.device_num
            context.reset_auto_parallel_context()
            context.set_auto_parallel_context(
                parallel_mode=ParallelMode.DATA_PARALLEL,
                gradients_mean=True,
                device_num=device_num)
            init()
            rank = args_opt.device_id % device_num
        else:
            rank = 0
            device_num = 1
    elif args_opt.run_platform == "GPU":
        context.set_context(mode=context.GRAPH_MODE,
                            device_target="GPU",
                            device_id=args_opt.device_id)
        init()
        if args_opt.distribute:
            device_num = args_opt.device_num
            context.reset_auto_parallel_context()
            context.set_auto_parallel_context(
                parallel_mode=ParallelMode.DATA_PARALLEL,
                gradients_mean=True,
                device_num=device_num)
            rank = get_rank()
        else:
            rank = 0
            device_num = 1
    else:
        raise ValueError("Unsupported platform.")

    print("Start create dataset!")

    # It will generate mindrecord file in args_opt.mindrecord_dir,
    # and the file name is ssd.mindrecord0, 1, ... file_num.

    prefix = "ssd.mindrecord"
    mindrecord_dir = config.mindrecord_dir
    mindrecord_file = os.path.join(mindrecord_dir, prefix + "0")
    if not os.path.exists(mindrecord_file):
        if not os.path.isdir(mindrecord_dir):
            os.makedirs(mindrecord_dir)
        if args_opt.dataset == "coco":
            if os.path.isdir(config.coco_root):
                print("Create Mindrecord.")
                data_to_mindrecord_byte_image("coco", True, prefix)
                print("Create Mindrecord Done, at {}".format(mindrecord_dir))
            else:
                print("coco_root not exits.")
        elif args_opt.dataset == "voc":
            if os.path.isdir(config.voc_dir):
                print("Create Mindrecord.")
                voc_data_to_mindrecord(mindrecord_dir, True, prefix)
                print("Create Mindrecord Done, at {}".format(mindrecord_dir))
            else:
                print("voc_dir not exits.")
        else:
            if os.path.isdir(config.image_dir) and os.path.exists(
                    config.anno_path):
                print("Create Mindrecord.")
                data_to_mindrecord_byte_image("other", True, prefix)
                print("Create Mindrecord Done, at {}".format(mindrecord_dir))
            else:
                print("image_dir or anno_path not exits.")

    if not args_opt.only_create_dataset:
        loss_scale = float(args_opt.loss_scale)

        # When create MindDataset, using the fitst mindrecord file, such as ssd.mindrecord0.
        dataset = create_ssd_dataset(mindrecord_file,
                                     repeat_num=1,
                                     batch_size=args_opt.batch_size,
                                     device_num=device_num,
                                     rank=rank)

        dataset_size = dataset.get_dataset_size()
        print("Create dataset done!")

        backbone = ssd_mobilenet_v2()
        ssd = SSD300(backbone=backbone, config=config)
        if args_opt.run_platform == "GPU":
            ssd.to_float(dtype.float16)
        net = SSDWithLossCell(ssd, config)
        init_net_param(net)

        # checkpoint
        ckpt_config = CheckpointConfig(save_checkpoint_steps=dataset_size *
                                       args_opt.save_checkpoint_epochs)
        save_ckpt_path = './ckpt_' + str(rank) + '/'
        ckpoint_cb = ModelCheckpoint(prefix="ssd",
                                     directory=save_ckpt_path,
                                     config=ckpt_config)

        if args_opt.pre_trained:
            if args_opt.pre_trained_epoch_size <= 0:
                raise KeyError(
                    "pre_trained_epoch_size must be greater than 0.")
            param_dict = load_checkpoint(args_opt.pre_trained)
            if args_opt.filter_weight:
                filter_checkpoint_parameter(param_dict)
            load_param_into_net(net, param_dict)

        lr = Tensor(
            get_lr(global_step=config.global_step,
                   lr_init=config.lr_init,
                   lr_end=config.lr_end_rate * args_opt.lr,
                   lr_max=args_opt.lr,
                   warmup_epochs=config.warmup_epochs,
                   total_epochs=args_opt.epoch_size,
                   steps_per_epoch=dataset_size))
        opt = nn.Momentum(
            filter(lambda x: x.requires_grad, net.get_parameters()), lr,
            config.momentum, config.weight_decay, loss_scale)
        net = TrainingWrapper(net, opt, loss_scale)

        callback = [
            TimeMonitor(data_size=dataset_size),
            LossMonitor(), ckpoint_cb
        ]

        model = Model(net)
        dataset_sink_mode = False
        if args_opt.mode == "sink":
            print("In sink mode, one epoch return a loss.")
            dataset_sink_mode = True
        print(
            "Start train SSD, the first epoch will be slower because of the graph compilation."
        )
        model.train(args_opt.epoch_size,
                    dataset,
                    callbacks=callback,
                    dataset_sink_mode=dataset_sink_mode)
def test_two_matmul():
    class Net(nn.Cell):
        def __init__(self):
            super().__init__()
            self.matmul1 = P.MatMul()
            self.matmul2 = P.MatMul()

        def construct(self, x, y, b):
            out = self.matmul1(x, y)
            out = self.matmul2(out, b)
            return out

    size = 16
    context.set_auto_parallel_context(device_num=size, global_rank=0)
    cost_model_context.set_cost_model_context(device_memory_capacity= 32.0 * 1024.0 * 1024.0 * 1024.0,
                                              costmodel_alpha=1.0,
                                              costmodel_beta=60.0,
                                              costmodel_gamma=0.1,
                                              costmodel_communi_threshold=1024.0,
                                              costmodel_communi_const=2222.0,
                                              costmodel_communi_bias=1111.0)
    dev_mem_cap = cost_model_context.get_cost_model_context("device_memory_capacity")
    assert dev_mem_cap == 32.0 * 1024.0 * 1024.0 * 1024.0
    costmodel_alpha = cost_model_context.get_cost_model_context("costmodel_alpha")
    assert costmodel_alpha == 1.0
    costmodel_beta = cost_model_context.get_cost_model_context("costmodel_beta")
    assert costmodel_beta == 60.0
    costmodel_gamma = cost_model_context.get_cost_model_context("costmodel_gamma")
    assert costmodel_gamma == 0.1
    costmodel_communi_threshold = cost_model_context.get_cost_model_context("costmodel_communi_threshold")
    assert costmodel_communi_threshold == 1024.0
    costmodel_communi_const = cost_model_context.get_cost_model_context("costmodel_communi_const")
    assert costmodel_communi_const == 2222.0
    costmodel_communi_bias = cost_model_context.get_cost_model_context("costmodel_communi_bias")
    assert costmodel_communi_bias == 1111.0

    cost_model_context.reset_cost_model_context()
    dev_mem_cap = cost_model_context.get_cost_model_context("device_memory_capacity")
    assert dev_mem_cap == 16.0 * 1024.0 * 1024.0 * 1024.0
    costmodel_alpha = cost_model_context.get_cost_model_context("costmodel_alpha")
    assert costmodel_alpha == 1.0
    costmodel_beta = cost_model_context.get_cost_model_context("costmodel_beta")
    assert costmodel_beta == 65.0
    costmodel_gamma = cost_model_context.get_cost_model_context("costmodel_gamma")
    assert costmodel_gamma == 0.02
    costmodel_communi_threshold = cost_model_context.get_cost_model_context("costmodel_communi_threshold")
    assert costmodel_communi_threshold == 2048.0
    costmodel_communi_const = cost_model_context.get_cost_model_context("costmodel_communi_const")
    assert costmodel_communi_const == 3072.0
    costmodel_communi_bias = cost_model_context.get_cost_model_context("costmodel_communi_bias")
    assert costmodel_communi_bias == 1024.0


    set_algo_parameters(simplify_cal=True,
                                          tensor_slice_align_enable=False,
                                          tensor_slice_align_size=32,
                                          not_fully_use_devices=True,
                                          elementwise_op_strategy_follow=False)
    para_simplify_cal = get_algo_parameters("simplify_cal")
    assert para_simplify_cal == True
    para_slice_align_enable = get_algo_parameters("tensor_slice_align_enable")
    assert para_slice_align_enable == False
    para_slice_align_size = get_algo_parameters("tensor_slice_align_size")
    assert para_slice_align_size == 32
    not_fully_use_devices  = get_algo_parameters("not_fully_use_devices")
    assert not_fully_use_devices == True
    elementwise_op_strategy_follow = get_algo_parameters("elementwise_op_strategy_follow")
    assert elementwise_op_strategy_follow == False

    reset_algo_parameters()
    para_simplify_cal = get_algo_parameters("simplify_cal")
    assert para_simplify_cal == True
    para_slice_align_enable = get_algo_parameters("tensor_slice_align_enable")
    assert para_slice_align_enable == False
    para_slice_align_size = get_algo_parameters("tensor_slice_align_size")
    assert para_slice_align_size == 16
    not_fully_use_devices  = get_algo_parameters("not_fully_use_devices")
    assert not_fully_use_devices == False
    elementwise_op_strategy_follow = get_algo_parameters("elementwise_op_strategy_follow")
    assert elementwise_op_strategy_follow == False

    x = Tensor(np.ones([128, 32]), dtype=ms.float32)
    y = Tensor(np.ones([32, 64]), dtype=ms.float32)
    b = Tensor(np.ones([64, 64]), dtype=ms.float32)

    net = NetWithLoss(Net())
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
    reset_op_id()
    
    _executor.compile(net, x, y, b, phase='train')
    strategies = _executor._get_strategy(net)
    expected_strategies = {'Default/network-Net/MatMul-op2': [[16, 1], [1, 1]],
                     'Default/network-Net/MatMul-op3': [[16, 1], [1, 1]]}
    assert strategies == expected_strategies
Exemple #22
0
                    type=str,
                    default=None,
                    help='Pretrained checkpoint path')
parser.add_argument('--device_target',
                    type=str,
                    default="GPU",
                    help='run device_target')
args_opt = parser.parse_args()

if args_opt.device_target == "GPU":
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="GPU",
                        save_graphs=False)
    init()
    context.set_auto_parallel_context(device_num=get_group_size(),
                                      parallel_mode=ParallelMode.DATA_PARALLEL,
                                      mirror_mean=True)
else:
    raise ValueError("Unsupported device_target.")


class CrossEntropyWithLabelSmooth(_Loss):
    """
    CrossEntropyWith LabelSmooth.

    Args:
        smooth_factor (float): smooth factor, default=0.
        num_classes (int): num classes

    Returns:
        None.
Exemple #23
0
def train(cloud_args=None):
    """training process"""
    args = parse_args(cloud_args)
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        device_target=args.platform,
                        save_graphs=False)
    if os.getenv('DEVICE_ID', "not_set").isdigit():
        context.set_context(device_id=int(os.getenv('DEVICE_ID')))

    # init distributed
    if args.is_distributed:
        parallel_mode = ParallelMode.DATA_PARALLEL
        context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                          device_num=args.group_size,
                                          gradients_mean=True)
    # dataloader
    de_dataset = classification_dataset(args.data_dir,
                                        args.image_size,
                                        args.per_batch_size,
                                        1,
                                        args.rank,
                                        args.group_size,
                                        num_parallel_workers=8)
    de_dataset.map_model = 4  # !!!important
    args.steps_per_epoch = de_dataset.get_dataset_size()

    args.logger.save_args(args)

    # network
    args.logger.important_info('start create network')
    # get network and init
    network = get_network(args.backbone,
                          num_classes=args.num_classes,
                          platform=args.platform)
    if network is None:
        raise NotImplementedError('not implement {}'.format(args.backbone))

    load_pretrain_model(args.pretrained, network, args)

    # lr scheduler
    lr = get_lr(args)

    # optimizer
    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)

    # loss
    if not args.label_smooth:
        args.label_smooth_factor = 0.0
    loss = CrossEntropy(smooth_factor=args.label_smooth_factor,
                        num_classes=args.num_classes)

    if args.is_dynamic_loss_scale == 1:
        loss_scale_manager = DynamicLossScaleManager(init_loss_scale=65536,
                                                     scale_factor=2,
                                                     scale_window=2000)
    else:
        loss_scale_manager = FixedLossScaleManager(args.loss_scale,
                                                   drop_overflow_update=False)

    if args.platform == "Ascend":
        model = Model(network,
                      loss_fn=loss,
                      optimizer=opt,
                      loss_scale_manager=loss_scale_manager,
                      metrics={'acc'},
                      amp_level="O3")
    else:
        model = Model(network,
                      loss_fn=loss,
                      optimizer=opt,
                      loss_scale_manager=loss_scale_manager,
                      metrics={'acc'},
                      amp_level="O2")

    # checkpoint save
    progress_cb = ProgressMonitor(args)
    callbacks = [
        progress_cb,
    ]
    if args.rank_save_ckpt_flag:
        ckpt_config = CheckpointConfig(
            save_checkpoint_steps=args.ckpt_interval * args.steps_per_epoch,
            keep_checkpoint_max=args.ckpt_save_max)
        save_ckpt_path = os.path.join(args.outputs_dir,
                                      'ckpt_' + str(args.rank) + '/')
        ckpt_cb = ModelCheckpoint(config=ckpt_config,
                                  directory=save_ckpt_path,
                                  prefix='{}'.format(args.rank))
        callbacks.append(ckpt_cb)

    model.train(args.max_epoch,
                de_dataset,
                callbacks=callbacks,
                dataset_sink_mode=True)
Exemple #24
0
                    type=int,
                    default=0,
                    help="Rank id, default is 0.")
args_opt = parser.parse_args()

context.set_context(mode=context.GRAPH_MODE,
                    device_target="Ascend",
                    device_id=args_opt.device_id)

if __name__ == '__main__':
    if not args_opt.do_eval and args_opt.run_distribute:
        rank = args_opt.rank_id
        device_num = args_opt.device_num
        context.set_auto_parallel_context(
            device_num=device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            mirror_mean=True,
            parameter_broadcast=True)
        init()
    else:
        rank = 0
        device_num = 1

    print("Start create dataset!")

    # It will generate mindrecord file in args_opt.mindrecord_dir,
    # and the file name is FasterRcnn.mindrecord0, 1, ... file_num.
    prefix = "FasterRcnn.mindrecord"
    mindrecord_dir = config.mindrecord_dir
    mindrecord_file = os.path.join(mindrecord_dir, prefix + "0")
    if not os.path.exists(mindrecord_file):
Exemple #25
0
def run_pretrain():
    """pre-train bert_clue"""
    parser = argparse.ArgumentParser(description='bert pre_training')
    parser.add_argument(
        '--device_target',
        type=str,
        default='Ascend',
        choices=['Ascend', 'GPU'],
        help='device where the code will be implemented. (Default: Ascend)')
    parser.add_argument("--distribute",
                        type=str,
                        default="false",
                        help="Run distribute, default is false.")
    parser.add_argument("--epoch_size",
                        type=int,
                        default="1",
                        help="Epoch size, default is 1.")
    parser.add_argument("--device_id",
                        type=int,
                        default=4,
                        help="Device id, default is 0.")
    parser.add_argument("--device_num",
                        type=int,
                        default=1,
                        help="Use device nums, default is 1.")
    parser.add_argument("--enable_save_ckpt",
                        type=str,
                        default="true",
                        help="Enable save checkpoint, default is true.")
    parser.add_argument("--enable_lossscale",
                        type=str,
                        default="false",
                        help="Use lossscale or not, default is not.")
    parser.add_argument("--do_shuffle",
                        type=str,
                        default="false",
                        help="Enable shuffle for dataset, default is true.")
    parser.add_argument("--enable_data_sink",
                        type=str,
                        default="true",
                        help="Enable data sink, default is true.")
    parser.add_argument("--data_sink_steps",
                        type=int,
                        default="100",
                        help="Sink steps for each epoch, default is 1.")
    parser.add_argument("--save_checkpoint_path",
                        type=str,
                        default="",
                        help="Save checkpoint path")
    parser.add_argument("--load_checkpoint_path",
                        type=str,
                        default="",
                        help="Load checkpoint file path")
    parser.add_argument("--save_checkpoint_steps",
                        type=int,
                        default=1000,
                        help="Save checkpoint steps, "
                        "default is 1000.")
    parser.add_argument("--train_steps",
                        type=int,
                        default=-1,
                        help="Training Steps, default is -1, "
                        "meaning run all steps according to epoch number.")
    parser.add_argument("--save_checkpoint_num",
                        type=int,
                        default=1,
                        help="Save checkpoint numbers, default is 1.")
    parser.add_argument("--data_dir",
                        type=str,
                        default="",
                        help="Data path, it is better to use absolute path")
    parser.add_argument("--schema_dir",
                        type=str,
                        default="",
                        help="Schema path, it is better to use absolute path")

    args_opt = parser.parse_args()
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args_opt.device_target,
                        device_id=args_opt.device_id,
                        save_graphs=False)
    context.set_context(reserve_class_name_in_scope=False)
    context.set_context(variable_memory_max_size="30GB")
    ckpt_save_dir = args_opt.save_checkpoint_path
    if args_opt.distribute == "true":
        if args_opt.device_target == 'Ascend':
            D.init()
            device_num = args_opt.device_num
            rank = args_opt.device_id % device_num
        else:
            D.init()
            device_num = D.get_group_size()
            rank = D.get_rank()
            ckpt_save_dir = args_opt.save_checkpoint_path + 'ckpt_' + str(
                rank) + '/'

        context.reset_auto_parallel_context()
        context.set_auto_parallel_context(
            parallel_mode=ParallelMode.DATA_PARALLEL,
            mirror_mean=True,
            device_num=device_num)
        from mindspore.parallel._auto_parallel_context import auto_parallel_context
        if bert_net_cfg.num_hidden_layers == 12:
            if bert_net_cfg.use_relative_positions:
                auto_parallel_context().set_all_reduce_fusion_split_indices(
                    [29, 58, 87, 116, 145, 174, 203, 217],
                    "hccl_world_groupsum1")
                auto_parallel_context().set_all_reduce_fusion_split_indices(
                    [29, 58, 87, 116, 145, 174, 203, 217],
                    "hccl_world_groupsum3")
            else:
                auto_parallel_context().set_all_reduce_fusion_split_indices(
                    [28, 55, 82, 109, 136, 163, 190, 205],
                    "hccl_world_groupsum1")
                auto_parallel_context().set_all_reduce_fusion_split_indices(
                    [28, 55, 82, 109, 136, 163, 190, 205],
                    "hccl_world_groupsum3")
        elif bert_net_cfg.num_hidden_layers == 24:
            if bert_net_cfg.use_relative_positions:
                auto_parallel_context().set_all_reduce_fusion_split_indices(
                    [30, 90, 150, 210, 270, 330, 390, 421],
                    "hccl_world_groupsum1")
                auto_parallel_context().set_all_reduce_fusion_split_indices(
                    [30, 90, 150, 210, 270, 330, 390, 421],
                    "hccl_world_groupsum3")
            else:
                auto_parallel_context().set_all_reduce_fusion_split_indices(
                    [38, 93, 148, 203, 258, 313, 368, 397],
                    "hccl_world_groupsum1")
                auto_parallel_context().set_all_reduce_fusion_split_indices(
                    [38, 93, 148, 203, 258, 313, 368, 397],
                    "hccl_world_groupsum3")
    else:
        rank = 0
        device_num = 1

    if args_opt.device_target == 'GPU' and bert_net_cfg.compute_type != mstype.float32:
        logger.warning('Gpu only support fp32 temporarily, run with fp32.')
        bert_net_cfg.compute_type = mstype.float32

    ds = create_bert_dataset(device_num, rank, args_opt.do_shuffle,
                             args_opt.data_dir, args_opt.schema_dir)
    net_with_loss = BertNetworkWithLoss(bert_net_cfg, True)

    new_repeat_count = args_opt.epoch_size * ds.get_dataset_size(
    ) // args_opt.data_sink_steps
    if args_opt.train_steps > 0:
        new_repeat_count = min(
            new_repeat_count, args_opt.train_steps // args_opt.data_sink_steps)
    else:
        args_opt.train_steps = args_opt.epoch_size * ds.get_dataset_size()
        logger.info("train steps: {}".format(args_opt.train_steps))

    if cfg.optimizer == 'Lamb':
        lr_schedule = BertLearningRate(
            learning_rate=cfg.Lamb.learning_rate,
            end_learning_rate=cfg.Lamb.end_learning_rate,
            warmup_steps=cfg.Lamb.warmup_steps,
            decay_steps=args_opt.train_steps,
            power=cfg.Lamb.power)
        params = net_with_loss.trainable_params()
        decay_params = list(filter(cfg.Lamb.decay_filter, params))
        other_params = list(
            filter(lambda x: not cfg.Lamb.decay_filter(x), params))
        group_params = [{
            'params': decay_params,
            'weight_decay': cfg.Lamb.weight_decay
        }, {
            'params': other_params
        }, {
            'order_params': params
        }]
        optimizer = Lamb(group_params,
                         learning_rate=lr_schedule,
                         eps=cfg.Lamb.eps)
    elif cfg.optimizer == 'Momentum':
        optimizer = Momentum(net_with_loss.trainable_params(),
                             learning_rate=cfg.Momentum.learning_rate,
                             momentum=cfg.Momentum.momentum)
    elif cfg.optimizer == 'AdamWeightDecay':
        lr_schedule = BertLearningRate(
            learning_rate=cfg.AdamWeightDecay.learning_rate,
            end_learning_rate=cfg.AdamWeightDecay.end_learning_rate,
            warmup_steps=cfg.AdamWeightDecay.warmup_steps,
            decay_steps=args_opt.train_steps,
            power=cfg.AdamWeightDecay.power)
        params = net_with_loss.trainable_params()
        decay_params = list(filter(cfg.AdamWeightDecay.decay_filter, params))
        other_params = list(
            filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
        group_params = [{
            'params': decay_params,
            'weight_decay': cfg.AdamWeightDecay.weight_decay
        }, {
            'params': other_params,
            'weight_decay': 0.0
        }, {
            'order_params': params
        }]

        optimizer = AdamWeightDecay(group_params,
                                    learning_rate=lr_schedule,
                                    eps=cfg.AdamWeightDecay.eps)
    elif cfg.optimizer == "Thor":
        lr = get_bert_lr()
        damping = get_bert_damping()
        optimizer = THOR(
            filter(lambda x: x.requires_grad,
                   net_with_loss.get_parameters()), lr, cfg.Thor.momentum,
            filter(lambda x: 'matrix_A' in x.name,
                   net_with_loss.get_parameters()),
            filter(lambda x: 'matrix_G' in x.name,
                   net_with_loss.get_parameters()), cfg.Thor.weight_decay,
            cfg.Thor.loss_scale, bert_net_cfg.num_hidden_layers,
            bert_net_cfg.batch_size, damping)
    else:
        raise ValueError(
            "Don't support optimizer {}, only support [Lamb, Momentum, AdamWeightDecay, Thor]"
            .format(cfg.optimizer))
    callback = [TimeMonitor(args_opt.data_sink_steps), LossCallBack()]
    if args_opt.enable_save_ckpt == "true" and rank == 0:
        config_ck = CheckpointConfig(
            save_checkpoint_steps=args_opt.save_checkpoint_steps,
            keep_checkpoint_max=args_opt.save_checkpoint_num)
        ckpoint_cb = ModelCheckpoint(prefix='checkpoint_bert',
                                     directory=ckpt_save_dir,
                                     config=config_ck)
        callback.append(ckpoint_cb)

    if args_opt.load_checkpoint_path:
        param_dict = load_checkpoint(args_opt.load_checkpoint_path)
        load_param_into_net(net_with_loss, param_dict)

    if args_opt.enable_lossscale == "true":
        update_cell = DynamicLossScaleUpdateCell(
            loss_scale_value=cfg.loss_scale_value,
            scale_factor=cfg.scale_factor,
            scale_window=cfg.scale_window)
        net_with_grads = BertTrainOneStepWithLossScaleCell(
            net_with_loss, optimizer=optimizer, scale_update_cell=update_cell)
    else:
        net_with_grads = BertTrainOneStepCell(net_with_loss,
                                              optimizer=optimizer)

    model = Model(net_with_grads, frequency=cfg.Thor.frequency)
    model.train(new_repeat_count,
                ds,
                callbacks=callback,
                dataset_sink_mode=(args_opt.enable_data_sink == "true"),
                sink_size=args_opt.data_sink_steps)
Exemple #26
0
def val(args):
    '''eval'''
    print('=============yolov3 start evaluating==================')

    # logger
    args.batch_size = config.batch_size
    args.input_shape = config.input_shape
    args.result_path = config.result_path
    args.conf_thresh = config.conf_thresh
    args.nms_thresh = config.nms_thresh

    context.set_auto_parallel_context(parallel_mode=ParallelMode.STAND_ALONE,
                                      device_num=args.world_size,
                                      gradients_mean=True)
    mindrecord_path = args.mindrecord_path
    print('Loading data from {}'.format(mindrecord_path))

    num_classes = config.num_classes
    if num_classes > 1:
        raise NotImplementedError(
            'num_classes > 1: Yolov3 postprocess not implemented!')

    anchors = config.anchors
    anchors_mask = config.anchors_mask
    num_anchors_list = [len(x) for x in anchors_mask]

    reduction_0 = 64.0
    reduction_1 = 32.0
    reduction_2 = 16.0
    labels = ['face']
    classes = {0: 'face'}

    # dataloader
    ds = de.MindDataset(
        mindrecord_path + "0",
        columns_list=["image", "annotation", "image_name", "image_size"])

    single_scale_trans = SingleScaleTrans(resize=args.input_shape)

    ds = ds.batch(
        args.batch_size,
        per_batch_map=single_scale_trans,
        input_columns=["image", "annotation", "image_name", "image_size"],
        num_parallel_workers=8)

    args.steps_per_epoch = ds.get_dataset_size()

    # backbone
    network = backbone_HwYolov3(num_classes, num_anchors_list, args)

    # load pretrain model
    if os.path.isfile(args.pretrained):
        param_dict = load_checkpoint(args.pretrained)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('network.'):
                param_dict_new[key[8:]] = values
            else:
                param_dict_new[key] = values
        load_param_into_net(network, param_dict_new)
        print('load model {} success'.format(args.pretrained))
    else:
        print(
            'load model {} failed, please check the path of model, evaluating end'
            .format(args.pretrained))
        exit(0)

    ds = ds.repeat(1)

    det = {}
    img_size = {}
    img_anno = {}

    model_name = args.pretrained.split('/')[-1].replace('.ckpt', '')
    result_path = os.path.join(args.result_path, model_name)
    if os.path.exists(result_path):
        pass
    if not os.path.isdir(result_path):
        os.makedirs(result_path, exist_ok=True)

    # result file
    ret_files_set = {
        'face': os.path.join(result_path, 'comp4_det_test_face_rm5050.txt'),
    }

    test_net = BuildTestNetwork(network, reduction_0, reduction_1, reduction_2,
                                anchors, anchors_mask, num_classes, args)

    print('conf_thresh:', args.conf_thresh)

    eval_times = 0

    for data in ds.create_tuple_iterator(output_numpy=True):
        batch_images = data[0]
        batch_labels = data[1]
        batch_image_name = data[2]
        batch_image_size = data[3]
        eval_times += 1

        img_tensor = Tensor(batch_images, mstype.float32)

        dets = []
        tdets = []

        coords_0, cls_scores_0, coords_1, cls_scores_1, coords_2, cls_scores_2 = test_net(
            img_tensor)

        boxes_0, boxes_1, boxes_2 = get_bounding_boxes(
            coords_0, cls_scores_0, coords_1, cls_scores_1, coords_2,
            cls_scores_2, args.conf_thresh, args.input_shape, num_classes)

        converted_boxes_0, converted_boxes_1, converted_boxes_2 = tensor_to_brambox(
            boxes_0, boxes_1, boxes_2, args.input_shape, labels)

        tdets.append(converted_boxes_0)
        tdets.append(converted_boxes_1)
        tdets.append(converted_boxes_2)

        batch = len(tdets[0])
        for b in range(batch):
            single_dets = []
            for op in range(3):
                single_dets.extend(tdets[op][b])
            dets.append(single_dets)

        det.update({
            batch_image_name[k].decode('UTF-8'): v
            for k, v in enumerate(dets)
        })
        img_size.update({
            batch_image_name[k].decode('UTF-8'): v
            for k, v in enumerate(batch_image_size)
        })
        img_anno.update({
            batch_image_name[k].decode('UTF-8'): v
            for k, v in enumerate(batch_labels)
        })

    print('eval times:', eval_times)
    print('batch size: ', args.batch_size)

    netw, neth = args.input_shape
    reorg_dets = voc_wrapper.reorg_detection(det, netw, neth, img_size)
    voc_wrapper.gen_results(reorg_dets, result_path, img_size, args.nms_thresh)

    # compute mAP
    ground_truth = parse_gt_from_anno(img_anno, classes)

    ret_list = parse_rets(ret_files_set)
    iou_thr = 0.5
    evaluate = calc_recall_presicion_ap(ground_truth, ret_list, iou_thr)

    aps_str = ''
    for cls in evaluate:
        per_line, = plt.plot(evaluate[cls]['recall'],
                             evaluate[cls]['presicion'], 'b-')
        per_line.set_label('%s:AP=%.3f' % (cls, evaluate[cls]['ap']))
        aps_str += '_%s_AP_%.3f' % (cls, evaluate[cls]['ap'])
        plt.plot([i / 1000.0 for i in range(1, 1001)],
                 [i / 1000.0 for i in range(1, 1001)], 'y--')
        plt.axis([0, 1.2, 0, 1.2])
        plt.xlabel('recall')
        plt.ylabel('precision')
        plt.grid()

        plt.legend()
        plt.title('PR')

    # save mAP
    ap_save_path = os.path.join(
        result_path,
        result_path.replace('/', '_') + aps_str + '.png')
    print('Saving {}'.format(ap_save_path))
    plt.savefig(ap_save_path)

    print('=============yolov3 evaluating finished==================')
Exemple #27
0
        raise ValueError("Only supported GPU training.")

    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args_opt.platform,
                        save_graphs=False)
    if os.getenv('DEVICE_ID', "not_set").isdigit():
        context.set_context(device_id=int(os.getenv('DEVICE_ID')))

    # init distributed
    if args_opt.is_distributed:
        init("nccl")
        cfg.rank = get_rank()
        cfg.group_size = get_group_size()
        parallel_mode = ParallelMode.DATA_PARALLEL
        context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                          device_num=cfg.group_size,
                                          gradients_mean=True)
    else:
        cfg.rank = 0
        cfg.group_size = 1

    # dataloader
    dataset = create_dataset(args_opt.dataset_path, True, cfg.rank,
                             cfg.group_size)
    batches_per_epoch = dataset.get_dataset_size()
    print("Batches Per Epoch: ", batches_per_epoch)
    # network
    net = ShuffleNetV2(n_class=cfg.num_classes, model_size=args_opt.model_size)

    # loss
    loss = CrossEntropySmooth(sparse=True,
Exemple #28
0
def train_process(q, device_id, epoch_size, device_num, enable_hccl):
    os.system("mkdir " + str(device_id))
    os.chdir(str(device_id))
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend",
                        save_graphs=False)
    context.set_context(device_id=device_id)
    os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = MINDSPORE_HCCL_CONFIG_PATH
    os.environ['RANK_ID'] = str(device_id)
    os.environ['RANK_SIZE'] = str(device_num)
    if enable_hccl:
        context.set_auto_parallel_context(
            device_num=device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            gradients_mean=True,
            all_reduce_fusion_config=[107, 160])
        init()

    # network
    net = resnet50(class_num=config.class_num)

    # evaluation network
    dist_eval_network = ClassifyCorrectCell(net)

    if not config.use_label_smooth:
        config.label_smooth_factor = 0.0

    # loss
    loss = CrossEntropySmooth(sparse=True,
                              reduction="mean",
                              smooth_factor=config.label_smooth_factor,
                              num_classes=config.class_num)

    # train dataset
    dataset = create_dataset(dataset_path=dataset_path,
                             do_train=True,
                             repeat_num=1,
                             batch_size=config.batch_size)

    step_size = dataset.get_dataset_size()
    eval_interval = config.eval_interval
    dataset.__loop_size__ = step_size * eval_interval

    # evalutation dataset
    eval_dataset = create_dataset(dataset_path=eval_path,
                                  do_train=False,
                                  repeat_num=1,
                                  batch_size=config.eval_batch_size)

    # loss scale
    loss_scale = FixedLossScaleManager(config.loss_scale,
                                       drop_overflow_update=False)

    # learning rate
    lr = Tensor(
        get_learning_rate(lr_init=config.lr_init,
                          lr_end=0.0,
                          lr_max=config.lr_max,
                          warmup_epochs=config.warmup_epochs,
                          total_epochs=config.epoch_size,
                          steps_per_epoch=step_size,
                          lr_decay_mode=config.lr_decay_mode))

    # optimizer
    decayed_params = []
    no_decayed_params = []
    for param in net.trainable_params():
        if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
            decayed_params.append(param)
        else:
            no_decayed_params.append(param)

    group_params = [{
        'params': decayed_params,
        'weight_decay': config.weight_decay
    }, {
        'params': no_decayed_params,
        'weight_decay': 0.0
    }, {
        'order_params': net.trainable_params()
    }]

    if config.use_lars:
        momentum = nn.Momentum(group_params,
                               lr,
                               config.momentum,
                               loss_scale=config.loss_scale,
                               use_nesterov=config.use_nesterov)
        opt = nn.LARS(momentum,
                      epsilon=config.lars_epsilon,
                      coefficient=config.lars_coefficient,
                      lars_filter=lambda x: 'beta' not in x.name and 'gamma'
                      not in x.name and 'bias' not in x.name)

    else:
        opt = nn.Momentum(group_params,
                          lr,
                          config.momentum,
                          loss_scale=config.loss_scale,
                          use_nesterov=config.use_nesterov)

    # model
    model = Model(net,
                  loss_fn=loss,
                  optimizer=opt,
                  loss_scale_manager=loss_scale,
                  amp_level="O2",
                  keep_batchnorm_fp32=False,
                  metrics={
                      'acc':
                      DistAccuracy(batch_size=config.eval_batch_size,
                                   device_num=device_num)
                  },
                  eval_network=dist_eval_network)

    # callbacks
    loss_cb = LossGet(1, step_size)

    # train and eval
    print("run_start", device_id)
    acc = 0.0
    time_cost = 0.0
    for epoch_idx in range(0, int(epoch_size / eval_interval)):
        model.train(1, dataset, callbacks=loss_cb)
        eval_start = time.time()
        output = model.eval(eval_dataset)
        eval_cost = (time.time() - eval_start) * 1000
        acc = float(output["acc"])
        time_cost = loss_cb.get_per_step_time()
        loss = loss_cb.get_loss()
        print(
            "the {} epoch's resnet result:\n "
            "device{}, training loss {}, acc {}, "
            "training per step cost {:.2f} ms, eval cost {:.2f} ms, total_cost {:.2f} ms"
            .format(epoch_idx, device_id, loss, acc, time_cost, eval_cost,
                    time_cost * step_size + eval_cost))
    q.put({'acc': acc, 'cost': time_cost})
Exemple #29
0
def run_Readcomprehension():
    '''
    run Readcomprehension task

    '''
    parser = argparse.ArgumentParser(description="Finetune and Evaluate translation")
    parser.add_argument("--device_target", type=str, default="Ascend",
                        help="Device type. Default: Ascend.")
    parser.add_argument("--device_id", type=int, default=0,
                        help="ID of target device. ")
    parser.add_argument("--metric_method", type=str, default="F1",
                        help="The eval method including [F1]. Default: F1.")
    parser.add_argument("--do_train", type=str, default="false",
                        help="Enable train. Default: false.")
    parser.add_argument("--do_eval", type=str, default="true",
                        help="Enable evaluation. Default: false.")
    parser.add_argument("--eval_type", type=str, default="zero-shot",
                        help="The type of evaluation including [zero-shot, finetuned]. Default: zero-shot.")
    parser.add_argument("--epoch_num", type=int, default=1,
                        help="Epoch number. Default: 1.")
    parser.add_argument("--train_data_shuffle", type=str, default="true",
                        help="Enable train data shuffle. Default: true.")
    parser.add_argument("--eval_data_shuffle", type=str, default="false",
                        help="Enable eval data shuffle. Default: false.")
    parser.add_argument("--save_finetune_ckpt_path", type=str, default="",
                        help="Save the checkpoint path.")
    parser.add_argument("--load_pretrain_ckpt_path", type=str, default="",
                        help="Load the checkpoint file path.")
    parser.add_argument("--load_finetune_ckpt_path", type=str, default="",
                        help="Load the checkpoint file path.")
    parser.add_argument("--train_data_file_path", type=str, default="",
                        help="Data path, it is better to use absolute path")
    parser.add_argument("--eval_data_file_path", type=str, default="",
                        help="Data path, it is better to use absolute path")
    parser.add_argument("--tokenizer_file_path", type=str, default="",
                        help="pretrained vocab and merge file path.")

    parser.add_argument("--generate_length", type=int, default=55,
                        help="The generation length of translation sentence.")
    parser.add_argument("--top_k", type=int, default=1,
                        help="Parameter for Top-K sampling.")
    parser.add_argument("--top_p", type=str, default="1.0",
                        help="parameter for Top-P sampling.")
    parser.add_argument("--temperature", type=str, default="1.0",
                        help="Parameter for generation, greater if generation more diverse. ")

    args_opt = parser.parse_args()

    epoch_num = args_opt.epoch_num
    metric = args_opt.metric_method
    save_finetune_ckpt_path = args_opt.save_finetune_ckpt_path
    load_finetune_ckpt_path = args_opt.load_finetune_ckpt_path
    load_pretrain_ckpt_path = args_opt.load_pretrain_ckpt_path

    if args_opt.do_train.lower() == "false" and args_opt.do_eval.lower() == "false":
        raise ValueError("At least one of 'do_train' or 'do_eval' must be true")
    if args_opt.do_train.lower() == "true" and args_opt.train_data_file_path == "":
        raise ValueError("'train_data_file_path' must be set when do finetune task")
    if args_opt.do_eval.lower() == "true" and args_opt.eval_data_file_path == "":
        raise ValueError("'eval_data_file_path' must be set when do evaluation task")

    device_target = args_opt.device_target

    if device_target == "Ascend":
        context.set_context(mode=context.GRAPH_MODE,
                            device_target=device_target,
                            device_id=args_opt.device_id,
                            max_call_depth=3000)
        context.set_auto_parallel_context(parallel_mode="stand_alone")
        print(" | Device: {}  | Device id: {}".format(device_target, args_opt.device_id))
    else:
        raise Exception("Device target error, Ascend is supported.")

    gpt2_loss = GPT2CoQA(config=gpt2_net_cfg,
                         is_training=True,
                         use_one_hot_embeddings=False)

    if args_opt.do_train.lower() == "true":
        get_train_setting(cfg)
        get_model_setting(cfg, gpt2_net_cfg)
        print("==============   Start Loading Translation Train Dataset   ==============")
        print(" | Train Dataset: {}".format(args_opt.train_data_file_path))
        print(" | Checkpoint: {}".format(args_opt.load_pretrain_ckpt_path))
        train_dataset = create_language_model_dataset(do_shuffle=(args_opt.train_data_shuffle.lower() == "true"),
                                                      dataset_path=args_opt.train_data_file_path)
        do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num)

    if args_opt.do_eval.lower() == "true":
        get_model_setting(cfg, gpt2_net_cfg)
        print("============   Start Loading Translation Evaluation Dataset  ============")
        print(" | Eval Dataset: {}".format(args_opt.eval_data_file_path))
        print(" | Checkpoint: {}".format(args_opt.load_finetune_ckpt_path))
        eval_dataset = create_language_model_dataset(do_shuffle=(args_opt.eval_data_shuffle.lower() == "true"),
                                                     dataset_path=args_opt.eval_data_file_path)
        do_eval(eval_dataset, GPT2CoQAModel, metric, load_finetune_ckpt_path, args_opt.eval_type,
                args_opt.tokenizer_file_path, args_opt.generate_length, args_opt.top_k, args_opt.top_p,
                args_opt.temperature)
Exemple #30
0
                    type=str,
                    default=None,
                    help='Dataset path, default is None')
args_opt = parser.parse_args()

device_id = int(os.getenv('DEVICE_ID'))
context.set_context(mode=context.GRAPH_MODE,
                    device_target="Ascend",
                    save_graphs=False,
                    device_id=device_id)

if __name__ == '__main__':
    if args_opt.run_distribute:
        context.reset_auto_parallel_context()
        context.set_auto_parallel_context(
            device_num=args_opt.device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            mirror_mean=True)
        init()
    max_captcha_digits = cf.max_captcha_digits
    input_size = m.ceil(cf.captcha_height / 64) * 64 * 3
    # create dataset
    dataset = create_dataset(dataset_path=args_opt.dataset_path,
                             repeat_num=1,
                             batch_size=cf.batch_size)
    step_size = dataset.get_dataset_size()
    # define lr
    lr_init = cf.learning_rate if not args_opt.run_distribute else cf.learning_rate * args_opt.device_num
    lr = get_lr(cf.epoch_size, step_size, lr_init)
    # define loss
    loss = CTCLoss(max_sequence_length=cf.captcha_width,
                   max_label_length=max_captcha_digits,