Ejemplo n.º 1
0
def asfarray(a, dtype=DEFAULT_FLOAT_DTYPE):
    """
    Similar to asarray, convert the input to an float array.

    If non-float dtype is defined, this function will return a float32 Tensor instead.

    Args:
        a (Union[int, float, bool, list, tuple, numpy.ndarray]): Input data, in
        any form that can be converted to an array. This includes lists, lists of
        tuples, tuples, tuples of tuples, tuples of lists and ndarrays.
        dtype (Union[mindspore.dtype, str], optional): Designated array dtype, can
            be in format of np.float32, or `float32`. Default is mindspore.float32.

    Returns:
        Tensor, generated tensor with the specified float dtype.

    Supported Platforms:
        ``Ascend`` ``GPU`` ``CPU``

    Examples:
        >>> import mindspore.numpy as np
        >>> print(np.asfarray([1,2,3]))
        [1. 2. 3.]
    """
    dtype = _check_dtype(dtype)
    _ = _check_input_for_asarray(a)

    if dtype not in (mindspore.float16, mindspore.float32, mindspore.float64):
        dtype = DEFAULT_FLOAT_DTYPE

    if isinstance(a, (list, tuple)):
        a = onp.asarray(a)

    if isinstance(a, onp.ndarray):
        a = Tensor.from_numpy(a)

    return Tensor(a, dtype)
Ejemplo n.º 2
0
        cb_params.train_network = network
        cb_params.epoch_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
        cb_params.cur_epoch_num = 1
        run_context = RunContext(cb_params)
        ckpt_cb.begin(run_context)

    old_progress = -1
    t_end = time.time()
    data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1)

    for i, data in enumerate(data_loader):
        images = data["image"]
        input_shape = images.shape[2:4]
        args.logger.info('iter[{}], shape{}'.format(i, input_shape[0]))

        images = Tensor.from_numpy(images)

        batch_y_true_0 = Tensor.from_numpy(data['bbox1'])
        batch_y_true_1 = Tensor.from_numpy(data['bbox2'])
        batch_y_true_2 = Tensor.from_numpy(data['bbox3'])
        batch_gt_box0 = Tensor.from_numpy(data['gt_box1'])
        batch_gt_box1 = Tensor.from_numpy(data['gt_box2'])
        batch_gt_box2 = Tensor.from_numpy(data['gt_box3'])

        input_shape = Tensor(tuple(input_shape[::-1]), ms.float32)
        loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2,
                       batch_gt_box0, batch_gt_box1, batch_gt_box2,
                       input_shape)
        loss_meter.update(loss.asnumpy())

        if args.rank_save_ckpt_flag:
Ejemplo n.º 3
0
def train():
    """Train function."""
    args = parse_args()
    devid = int(os.getenv('DEVICE_ID', '0'))
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        device_target=args.device_target,
                        save_graphs=False,
                        device_id=devid)
    loss_meter = AverageMeter('loss')

    network = YOLOV4CspDarkNet53(is_training=True)
    # default is kaiming-normal
    default_recursive_init(network)

    if args.pretrained_backbone:
        pretrained_backbone_slice = args.pretrained_backbone.split('/')
        backbone_ckpt_file = pretrained_backbone_slice[
            len(pretrained_backbone_slice) - 1]
        local_backbone_ckpt_path = '/cache/' + backbone_ckpt_file
        # download backbone checkpoint
        mox.file.copy_parallel(src_url=args.pretrained_backbone,
                               dst_url=local_backbone_ckpt_path)
        args.pretrained_backbone = local_backbone_ckpt_path
    load_yolov4_params(args, network)

    network = YoloWithLossCell(network)
    args.logger.info('finish get network')

    config = ConfigYOLOV4CspDarkNet53()

    config.label_smooth = args.label_smooth
    config.label_smooth_factor = args.label_smooth_factor

    if args.training_shape:
        config.multi_scale = [convert_training_shape(args)]
    if args.resize_rate:
        config.resize_rate = args.resize_rate

    # data download
    local_data_path = '/cache/data'
    local_ckpt_path = '/cache/ckpt_file'
    print('Download data.')
    mox.file.copy_parallel(src_url=args.data_url, dst_url=local_data_path)

    ds, data_size = create_yolo_dataset(
        image_dir=os.path.join(local_data_path, 'images'),
        anno_path=os.path.join(local_data_path, 'annotation.json'),
        is_training=True,
        batch_size=args.per_batch_size,
        max_epoch=args.max_epoch,
        device_num=args.group_size,
        rank=args.rank,
        config=config)
    args.logger.info('Finish loading dataset')

    args.steps_per_epoch = int(data_size / args.per_batch_size /
                               args.group_size)

    if not args.ckpt_interval:
        args.ckpt_interval = args.steps_per_epoch * 10

    lr = get_lr(args)

    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)
    is_gpu = context.get_context("device_target") == "GPU"
    if is_gpu:
        loss_scale_value = 1.0
        loss_scale = FixedLossScaleManager(loss_scale_value,
                                           drop_overflow_update=False)
        network = amp.build_train_network(network,
                                          optimizer=opt,
                                          loss_scale_manager=loss_scale,
                                          level="O2",
                                          keep_batchnorm_fp32=False)
        keep_loss_fp32(network)
    else:
        network = TrainingWrapper(network, opt)
        network.set_train()

    # checkpoint save
    ckpt_max_num = 10
    ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval,
                                   keep_checkpoint_max=ckpt_max_num)
    ckpt_cb = ModelCheckpoint(config=ckpt_config,
                              directory=local_ckpt_path,
                              prefix='yolov4')
    cb_params = _InternalCallbackParam()
    cb_params.train_network = network
    cb_params.epoch_num = ckpt_max_num
    cb_params.cur_epoch_num = 1
    run_context = RunContext(cb_params)
    ckpt_cb.begin(run_context)

    old_progress = -1
    t_end = time.time()
    data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1)

    for i, data in enumerate(data_loader):
        images = data["image"]
        input_shape = images.shape[2:4]
        images = Tensor.from_numpy(images)

        batch_y_true_0 = Tensor.from_numpy(data['bbox1'])
        batch_y_true_1 = Tensor.from_numpy(data['bbox2'])
        batch_y_true_2 = Tensor.from_numpy(data['bbox3'])
        batch_gt_box0 = Tensor.from_numpy(data['gt_box1'])
        batch_gt_box1 = Tensor.from_numpy(data['gt_box2'])
        batch_gt_box2 = Tensor.from_numpy(data['gt_box3'])

        input_shape = Tensor(tuple(input_shape[::-1]), ms.float32)
        loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2,
                       batch_gt_box0, batch_gt_box1, batch_gt_box2,
                       input_shape)
        loss_meter.update(loss.asnumpy())

        # ckpt progress
        cb_params.cur_step_num = i + 1  # current step number
        cb_params.batch_num = i + 2
        ckpt_cb.step_end(run_context)

        if i % args.log_interval == 0:
            time_used = time.time() - t_end
            epoch = int(i / args.steps_per_epoch)
            fps = args.per_batch_size * (
                i - old_progress) * args.group_size / time_used
            if args.rank == 0:
                args.logger.info(
                    'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(
                        epoch, i, loss_meter, fps, lr[i]))
            t_end = time.time()
            loss_meter.reset()
            old_progress = i

        if (i + 1) % args.steps_per_epoch == 0:
            cb_params.cur_epoch_num += 1

    args.logger.info('==========end training===============')

    # upload checkpoint files
    print('Upload checkpoint.')
    mox.file.copy_parallel(src_url=local_ckpt_path, dst_url=args.train_url)
Ejemplo n.º 4
0
def train():
    """Train function."""
    args = parse_args()
    devid = int(os.getenv('DEVICE_ID', '0'))
    context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True,
                        device_target=args.device_target, save_graphs=True, device_id=devid)
    if args.need_profiler:
        from mindspore.profiler.profiling import Profiler
        profiler = Profiler(output_path=args.outputs_dir, is_detail=True, is_show_op_path=True)

    loss_meter = AverageMeter('loss')

    context.reset_auto_parallel_context()
    parallel_mode = ParallelMode.STAND_ALONE
    degree = 1
    if args.is_distributed:
        parallel_mode = ParallelMode.DATA_PARALLEL
        degree = get_group_size()
    context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=degree)

    network = YOLOV3DarkNet53(is_training=True)
    # default is kaiming-normal
    default_recurisive_init(network)
    load_yolov3_params(args, network)

    network = YoloWithLossCell(network)
    args.logger.info('finish get network')

    config = ConfigYOLOV3DarkNet53()

    config.label_smooth = args.label_smooth
    config.label_smooth_factor = args.label_smooth_factor

    if args.training_shape:
        config.multi_scale = [conver_training_shape(args)]
    if args.resize_rate:
        config.resize_rate = args.resize_rate

    ds, data_size = create_yolo_dataset(image_dir=args.data_root, anno_path=args.annFile, is_training=True,
                                        batch_size=args.per_batch_size, max_epoch=args.max_epoch,
                                        device_num=args.group_size, rank=args.rank, config=config)
    args.logger.info('Finish loading dataset')

    args.steps_per_epoch = int(data_size / args.per_batch_size / args.group_size)

    if not args.ckpt_interval:
        args.ckpt_interval = args.steps_per_epoch

    lr = get_lr(args)

    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)
    is_gpu = context.get_context("device_target") == "GPU"
    if is_gpu:
        loss_scale_value = 1.0
        loss_scale = FixedLossScaleManager(loss_scale_value, drop_overflow_update=False)
        network = amp.build_train_network(network, optimizer=opt, loss_scale_manager=loss_scale,
                                          level="O2", keep_batchnorm_fp32=True)
        keep_loss_fp32(network)
    else:
        network = TrainingWrapper(network, opt)
        network.set_train()

    if args.rank_save_ckpt_flag:
        # checkpoint save
        ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
        ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval,
                                       keep_checkpoint_max=ckpt_max_num)
        save_ckpt_path = os.path.join(args.outputs_dir, 'ckpt_' + str(args.rank) + '/')
        ckpt_cb = ModelCheckpoint(config=ckpt_config,
                                  directory=save_ckpt_path,
                                  prefix='{}'.format(args.rank))
        cb_params = _InternalCallbackParam()
        cb_params.train_network = network
        cb_params.epoch_num = ckpt_max_num
        cb_params.cur_epoch_num = 1
        run_context = RunContext(cb_params)
        ckpt_cb.begin(run_context)

    old_progress = -1
    t_end = time.time()
    data_loader = ds.create_dict_iterator(output_numpy=True)

    for i, data in enumerate(data_loader):
        images = data["image"]
        input_shape = images.shape[2:4]
        args.logger.info('iter[{}], shape{}'.format(i, input_shape[0]))

        images = Tensor.from_numpy(images)

        batch_y_true_0 = Tensor.from_numpy(data['bbox1'])
        batch_y_true_1 = Tensor.from_numpy(data['bbox2'])
        batch_y_true_2 = Tensor.from_numpy(data['bbox3'])
        batch_gt_box0 = Tensor.from_numpy(data['gt_box1'])
        batch_gt_box1 = Tensor.from_numpy(data['gt_box2'])
        batch_gt_box2 = Tensor.from_numpy(data['gt_box3'])

        input_shape = Tensor(tuple(input_shape[::-1]), ms.float32)
        loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1,
                       batch_gt_box2, input_shape)
        loss_meter.update(loss.asnumpy())

        if args.rank_save_ckpt_flag:
            # ckpt progress
            cb_params.cur_step_num = i + 1  # current step number
            cb_params.batch_num = i + 2
            ckpt_cb.step_end(run_context)

        if i % args.log_interval == 0:
            time_used = time.time() - t_end
            epoch = int(i / args.steps_per_epoch)
            fps = args.per_batch_size * (i - old_progress) * args.group_size / time_used
            if args.rank == 0:
                args.logger.info(
                    'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(epoch, i, loss_meter, fps, lr[i]))
            t_end = time.time()
            loss_meter.reset()
            old_progress = i

        if (i + 1) % args.steps_per_epoch == 0 and args.rank_save_ckpt_flag:
            cb_params.cur_epoch_num += 1

        if args.need_profiler:
            if i == 10:
                profiler.analyse()
                break

    args.logger.info('==========end training===============')
Ejemplo n.º 5
0
def test_yolov3_darknet53():
    devid = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        device_target="Ascend",
                        device_id=devid)

    rank = 0
    device_num = 1
    lr_init = 0.001
    epoch_size = 3
    batch_size = 32
    loss_scale = 1024
    mindrecord_dir = DATA_DIR
    # It will generate mindrecord file in args_opt.mindrecord_dir,
    # and the file name is yolo.mindrecord0, 1, ... file_num.
    if not os.path.isdir(mindrecord_dir):
        raise KeyError("mindrecord path is not exist.")
    data_root = os.path.join(mindrecord_dir, 'train2014')
    annFile = os.path.join(mindrecord_dir,
                           'annotations/instances_train2014.json')
    # print("yolov3 mindrecord is ", mindrecord_file)
    if not os.path.exists(annFile):
        print("instances_train2014 file is not exist.")
        assert False
    loss_meter = AverageMeter('loss')
    context.reset_auto_parallel_context()
    parallel_mode = ParallelMode.STAND_ALONE
    context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                      gradients_mean=True,
                                      device_num=1)
    network = YOLOV3DarkNet53(is_training=True)
    # default is kaiming-normal
    default_recurisive_init(network)
    network = YoloWithLossCell(network)
    print('finish get network')

    config = ConfigYOLOV3DarkNet53()
    label_smooth = 0
    label_smooth_factor = 0.1
    config.label_smooth = label_smooth
    config.label_smooth_factor = label_smooth_factor
    # When create MindDataset, using the fitst mindrecord file, such as yolo.mindrecord0.
    print("Create dataset begin!")
    training_shape = [int(416), int(416)]
    config.multi_scale = [training_shape]
    num_samples = 256
    ds, data_size = create_yolo_dataset(image_dir=data_root,
                                        anno_path=annFile,
                                        is_training=True,
                                        batch_size=batch_size,
                                        max_epoch=epoch_size,
                                        device_num=device_num,
                                        rank=rank,
                                        config=config,
                                        num_samples=num_samples)
    print("Create dataset done!")
    per_batch_size = batch_size
    group_size = 1
    print("data_size:", data_size)
    steps_per_epoch = int(data_size / per_batch_size / group_size)
    print("steps_per_epoch:", steps_per_epoch)

    warmup_epochs = 0.
    max_epoch = epoch_size
    T_max = 1
    eta_min = 0
    lr = warmup_cosine_annealing_lr(lr_init, steps_per_epoch, warmup_epochs,
                                    max_epoch, T_max, eta_min)

    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=0.9,
                   weight_decay=0.0005,
                   loss_scale=loss_scale)

    network = TrainingWrapper(network, opt)
    network.set_train()
    old_progress = -1
    t_end = time.time()
    data_loader = ds.create_dict_iterator(output_numpy=True)
    train_starttime = time.time()
    time_used_per_epoch = 0
    print("time:", time.time())
    for i, data in enumerate(data_loader):
        images = data["image"]
        input_shape = images.shape[2:4]
        print('iter[{}], shape{}'.format(i, input_shape[0]))
        images = Tensor.from_numpy(images)
        batch_y_true_0 = Tensor.from_numpy(data['bbox1'])
        batch_y_true_1 = Tensor.from_numpy(data['bbox2'])
        batch_y_true_2 = Tensor.from_numpy(data['bbox3'])
        batch_gt_box0 = Tensor.from_numpy(data['gt_box1'])
        batch_gt_box1 = Tensor.from_numpy(data['gt_box2'])
        batch_gt_box2 = Tensor.from_numpy(data['gt_box3'])
        input_shape = Tensor(tuple(input_shape[::-1]), ms.float32)
        loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2,
                       batch_gt_box0, batch_gt_box1, batch_gt_box2,
                       input_shape)
        loss_meter.update(loss.asnumpy())
        if (i + 1) % steps_per_epoch == 0:
            time_used = time.time() - t_end
            epoch = int(i / steps_per_epoch)
            fps = per_batch_size * (i - old_progress) * group_size / time_used
            if rank == 0:
                print(
                    'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}, time_used:{}'
                    .format(epoch, i, loss_meter, fps, lr[i], time_used))
            t_end = time.time()
            loss_meter.reset()
            old_progress = i
            time_used_per_epoch = time_used

    train_endtime = time.time() - train_starttime
    print('train_time_used:{}'.format(train_endtime))
    expect_loss_value = 3210.0
    loss_value = re.findall(r"\d+\.?\d*", str(loss_meter))
    print('loss_value:{}'.format(loss_value[0]))
    assert float(loss_value[0]) < expect_loss_value
    export_time_used = 20.0
    print('time_used_per_epoch:{}'.format(time_used_per_epoch))
    assert time_used_per_epoch < export_time_used
    print('==========test case passed===========')
Ejemplo n.º 6
0
def train():
    """Train function."""
    args = parse_args()
    args.logger.save_args(args)

    if args.need_profiler:
        from mindspore.profiler.profiling import Profiler
        profiler = Profiler(output_path=args.outputs_dir,
                            is_detail=True,
                            is_show_op_path=True)

    loss_meter = AverageMeter('loss')

    context.reset_auto_parallel_context()
    parallel_mode = ParallelMode.STAND_ALONE
    degree = 1
    if args.is_distributed:
        parallel_mode = ParallelMode.DATA_PARALLEL
        degree = get_group_size()
    context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                      gradients_mean=True,
                                      device_num=degree)

    network = YOLOV3DarkNet53(is_training=True)
    # default is kaiming-normal
    default_recurisive_init(network)
    load_yolov3_quant_params(args, network)

    config = ConfigYOLOV3DarkNet53()
    # convert fusion network to quantization aware network
    if config.quantization_aware:
        network = quant.convert_quant_network(network,
                                              bn_fold=True,
                                              per_channel=[True, False],
                                              symmetric=[True, False])

    network = YoloWithLossCell(network)
    args.logger.info('finish get network')

    config.label_smooth = args.label_smooth
    config.label_smooth_factor = args.label_smooth_factor

    if args.training_shape:
        config.multi_scale = [conver_training_shape(args)]

    if args.resize_rate:
        config.resize_rate = args.resize_rate

    ds, data_size = create_yolo_dataset(image_dir=args.data_root,
                                        anno_path=args.annFile,
                                        is_training=True,
                                        batch_size=args.per_batch_size,
                                        max_epoch=args.max_epoch,
                                        device_num=args.group_size,
                                        rank=args.rank,
                                        config=config)
    args.logger.info('Finish loading dataset')

    args.steps_per_epoch = int(data_size / args.per_batch_size /
                               args.group_size)

    if not args.ckpt_interval:
        args.ckpt_interval = args.steps_per_epoch

    lr = get_lr(args)

    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)

    network = TrainingWrapper(network, opt)
    network.set_train()

    if args.rank_save_ckpt_flag:
        # checkpoint save
        ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
        ckpt_config = CheckpointConfig(
            save_checkpoint_steps=args.ckpt_interval,
            keep_checkpoint_max=ckpt_max_num)
        save_ckpt_path = os.path.join(args.outputs_dir,
                                      'ckpt_' + str(args.rank) + '/')
        ckpt_cb = ModelCheckpoint(config=ckpt_config,
                                  directory=save_ckpt_path,
                                  prefix='{}'.format(args.rank))
        cb_params = _InternalCallbackParam()
        cb_params.train_network = network
        cb_params.epoch_num = ckpt_max_num
        cb_params.cur_epoch_num = 1
        run_context = RunContext(cb_params)
        ckpt_cb.begin(run_context)

    old_progress = -1
    t_end = time.time()
    data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1)

    shape_record = ShapeRecord()
    for i, data in enumerate(data_loader):
        images = data["image"]
        input_shape = images.shape[2:4]
        args.logger.info('iter[{}], shape{}'.format(i, input_shape[0]))
        shape_record.set(input_shape)

        images = Tensor.from_numpy(images)
        annos = data["annotation"]
        if args.group_size == 1:
            batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2 = \
                batch_preprocess_true_box(annos, config, input_shape)
        else:
            batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2 = \
                batch_preprocess_true_box_single(annos, config, input_shape)

        batch_y_true_0 = Tensor.from_numpy(batch_y_true_0)
        batch_y_true_1 = Tensor.from_numpy(batch_y_true_1)
        batch_y_true_2 = Tensor.from_numpy(batch_y_true_2)
        batch_gt_box0 = Tensor.from_numpy(batch_gt_box0)
        batch_gt_box1 = Tensor.from_numpy(batch_gt_box1)
        batch_gt_box2 = Tensor.from_numpy(batch_gt_box2)

        input_shape = Tensor(tuple(input_shape[::-1]), ms.float32)
        loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2,
                       batch_gt_box0, batch_gt_box1, batch_gt_box2,
                       input_shape)
        loss_meter.update(loss.asnumpy())

        if args.rank_save_ckpt_flag:
            # ckpt progress
            cb_params.cur_step_num = i + 1  # current step number
            cb_params.batch_num = i + 2
            ckpt_cb.step_end(run_context)

        if i % args.log_interval == 0:
            time_used = time.time() - t_end
            epoch = int(i / args.steps_per_epoch)
            fps = args.per_batch_size * (
                i - old_progress) * args.group_size / time_used
            if args.rank == 0:
                args.logger.info(
                    'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(
                        epoch, i, loss_meter, fps, lr[i]))
            t_end = time.time()
            loss_meter.reset()
            old_progress = i

        if (i + 1) % args.steps_per_epoch == 0 and args.rank_save_ckpt_flag:
            cb_params.cur_epoch_num += 1

        if args.need_profiler:
            if i == 10:
                profiler.analyse()
                break

    args.logger.info('==========end training===============')
Ejemplo n.º 7
0
def train():
    """Train function."""
    args = parse_args()

    devid = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        device_target=args.device_target,
                        save_graphs=True,
                        device_id=devid)

    # init distributed
    if args.is_distributed:
        if args.device_target == "Ascend":
            init()
        else:
            init("nccl")
        args.rank = get_rank()
        args.group_size = get_group_size()

    # select for master rank save ckpt or all rank save, compatiable for model parallel
    args.rank_save_ckpt_flag = 0
    if args.is_save_on_master:
        if args.rank == 0:
            args.rank_save_ckpt_flag = 1
    else:
        args.rank_save_ckpt_flag = 1

    # logger
    args.outputs_dir = os.path.join(
        args.ckpt_path,
        datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    args.logger = get_logger(args.outputs_dir, args.rank)
    args.logger.save_args(args)

    if args.need_profiler:
        from mindspore.profiler.profiling import Profiler
        profiler = Profiler(output_path=args.outputs_dir,
                            is_detail=True,
                            is_show_op_path=True)

    loss_meter = AverageMeter('loss')

    context.reset_auto_parallel_context()
    if args.is_distributed:
        parallel_mode = ParallelMode.DATA_PARALLEL
        degree = get_group_size()
    else:
        parallel_mode = ParallelMode.STAND_ALONE
        degree = 1
    context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                      gradients_mean=True,
                                      device_num=degree)

    network = YOLOV3DarkNet53(is_training=True)
    # default is kaiming-normal
    default_recurisive_init(network)

    if args.pretrained_backbone:
        network = load_backbone(network, args.pretrained_backbone, args)
        args.logger.info('load pre-trained backbone {} into network'.format(
            args.pretrained_backbone))
    else:
        args.logger.info('Not load pre-trained backbone, please be careful')

    if args.resume_yolov3:
        param_dict = load_checkpoint(args.resume_yolov3)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('yolo_network.'):
                param_dict_new[key[13:]] = values
                args.logger.info('in resume {}'.format(key))
            else:
                param_dict_new[key] = values
                args.logger.info('in resume {}'.format(key))

        args.logger.info('resume finished')
        load_param_into_net(network, param_dict_new)
        args.logger.info('load_model {} success'.format(args.resume_yolov3))

    network = YoloWithLossCell(network)
    args.logger.info('finish get network')

    config = ConfigYOLOV3DarkNet53()

    config.label_smooth = args.label_smooth
    config.label_smooth_factor = args.label_smooth_factor

    if args.training_shape:
        config.multi_scale = [conver_training_shape(args)]
    if args.resize_rate:
        config.resize_rate = args.resize_rate

    ds, data_size = create_yolo_dataset(image_dir=args.data_root,
                                        anno_path=args.annFile,
                                        is_training=True,
                                        batch_size=args.per_batch_size,
                                        max_epoch=args.max_epoch,
                                        device_num=args.group_size,
                                        rank=args.rank,
                                        config=config)
    args.logger.info('Finish loading dataset')

    args.steps_per_epoch = int(data_size / args.per_batch_size /
                               args.group_size)

    if not args.ckpt_interval:
        args.ckpt_interval = args.steps_per_epoch

    # lr scheduler
    if args.lr_scheduler == 'exponential':
        lr = warmup_step_lr(
            args.lr,
            args.lr_epochs,
            args.steps_per_epoch,
            args.warmup_epochs,
            args.max_epoch,
            gamma=args.lr_gamma,
        )
    elif args.lr_scheduler == 'cosine_annealing':
        lr = warmup_cosine_annealing_lr(args.lr, args.steps_per_epoch,
                                        args.warmup_epochs, args.max_epoch,
                                        args.T_max, args.eta_min)
    elif args.lr_scheduler == 'cosine_annealing_V2':
        lr = warmup_cosine_annealing_lr_V2(args.lr, args.steps_per_epoch,
                                           args.warmup_epochs, args.max_epoch,
                                           args.T_max, args.eta_min)
    elif args.lr_scheduler == 'cosine_annealing_sample':
        lr = warmup_cosine_annealing_lr_sample(args.lr, args.steps_per_epoch,
                                               args.warmup_epochs,
                                               args.max_epoch, args.T_max,
                                               args.eta_min)
    else:
        raise NotImplementedError(args.lr_scheduler)

    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)
    enable_amp = False
    is_gpu = context.get_context("device_target") == "GPU"
    if is_gpu:
        enable_amp = True
    if enable_amp:
        loss_scale_value = 1.0
        loss_scale = FixedLossScaleManager(loss_scale_value,
                                           drop_overflow_update=False)
        network = amp.build_train_network(network,
                                          optimizer=opt,
                                          loss_scale_manager=loss_scale,
                                          level="O2",
                                          keep_batchnorm_fp32=True)
        keep_loss_fp32(network)
    else:
        network = TrainingWrapper(network, opt)
        network.set_train()

    if args.rank_save_ckpt_flag:
        # checkpoint save
        ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
        ckpt_config = CheckpointConfig(
            save_checkpoint_steps=args.ckpt_interval,
            keep_checkpoint_max=ckpt_max_num)
        ckpt_cb = ModelCheckpoint(config=ckpt_config,
                                  directory=args.outputs_dir,
                                  prefix='{}'.format(args.rank))
        cb_params = _InternalCallbackParam()
        cb_params.train_network = network
        cb_params.epoch_num = ckpt_max_num
        cb_params.cur_epoch_num = 1
        run_context = RunContext(cb_params)
        ckpt_cb.begin(run_context)

    old_progress = -1
    t_end = time.time()
    data_loader = ds.create_dict_iterator(output_numpy=True)

    for i, data in enumerate(data_loader):
        images = data["image"]
        input_shape = images.shape[2:4]
        args.logger.info('iter[{}], shape{}'.format(i, input_shape[0]))

        images = Tensor.from_numpy(images)

        batch_y_true_0 = Tensor.from_numpy(data['bbox1'])
        batch_y_true_1 = Tensor.from_numpy(data['bbox2'])
        batch_y_true_2 = Tensor.from_numpy(data['bbox3'])
        batch_gt_box0 = Tensor.from_numpy(data['gt_box1'])
        batch_gt_box1 = Tensor.from_numpy(data['gt_box2'])
        batch_gt_box2 = Tensor.from_numpy(data['gt_box3'])

        input_shape = Tensor(tuple(input_shape[::-1]), ms.float32)
        loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2,
                       batch_gt_box0, batch_gt_box1, batch_gt_box2,
                       input_shape)
        loss_meter.update(loss.asnumpy())

        if args.rank_save_ckpt_flag:
            # ckpt progress
            cb_params.cur_step_num = i + 1  # current step number
            cb_params.batch_num = i + 2
            ckpt_cb.step_end(run_context)

        if i % args.log_interval == 0:
            time_used = time.time() - t_end
            epoch = int(i / args.steps_per_epoch)
            fps = args.per_batch_size * (
                i - old_progress) * args.group_size / time_used
            if args.rank == 0:
                args.logger.info(
                    'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(
                        epoch, i, loss_meter, fps, lr[i]))
            t_end = time.time()
            loss_meter.reset()
            old_progress = i

        if (i + 1) % args.steps_per_epoch == 0 and args.rank_save_ckpt_flag:
            cb_params.cur_epoch_num += 1

        if args.need_profiler:
            if i == 10:
                profiler.analyse()
                break

    args.logger.info('==========end training===============')
Ejemplo n.º 8
0
def asarray(a, dtype=None):
    """
    Convert the input to tensor.

    This function convert tensors from an array-like object.

    Args:
        a (Union[int, float, bool, list, tuple, numpy.ndarray]): Input data, in
        any form that can be converted to an array. This includes lists, lists of
        tuples, tuples, tuples of tuples, tuples of lists and ndarrays.
        dtype (Union[mindspore.dtype, str], optional): Designated array dtype, can
            be in format of np.int32, or `int32`. If dtype is None, the data type
            of the new tensor will be inferred from a. Default is None.

    Returns:
        Tensor, generated tensor with the specified dtype.

    Supported Platforms:
        ``Ascend`` ``GPU`` ``CPU``

    Examples:
        >>> import mindspore.numpy as np
        >>> print(np.asarray([1,2,3]))
        [1 2 3]
    """

    if dtype is not None:
        dtype = _check_dtype(dtype)

    _ = _check_input_for_asarray(a)

    if isinstance(a, float) and (dtype is None):
        dtype = DEFAULT_FLOAT_DTYPE

    if isinstance(a, int) and not isinstance(a, bool) and (dtype is None):
        dtype = DEFAULT_INT_DTYPE

    if isinstance(a, bool) and (dtype is None):
        dtype = mindspore.bool_

    if isinstance(a, (list, tuple)):
        a = onp.asarray(a)
        # If dtype is not specified, we keep consistent with numpy decision
        # only exceptions are: we use int/float32
        if dtype is None:
            if a.dtype is onp.dtype('int64'):
                dtype = DEFAULT_INT_DTYPE
            elif a.dtype is onp.dtype('float64'):
                dtype = DEFAULT_FLOAT_DTYPE

    if isinstance(a, onp.ndarray) and dtype is None:
        if a.dtype is onp.dtype('bool'):
            dtype = mindspore.bool_
        elif a.dtype is onp.dtype('int'):
            dtype = DEFAULT_INT_DTYPE
        elif a.dtype is onp.dtype('float'):
            dtype = DEFAULT_FLOAT_DTYPE
        a = Tensor.from_numpy(a)

    # If a is already an tensor and we don't need to cast dtype, return a
    if isinstance(a, Tensor):
        if dtype is None:
            return a
        dtype = _check_dtype(dtype)
        if dtype == a.dtype:
            return a

    return Tensor(a, dtype=dtype)
Ejemplo n.º 9
0
def logspace(start,
             stop,
             num=50,
             endpoint=True,
             base=10.0,
             dtype=None,
             axis=0):
    """
    Return numbers spaced evenly on a log scale.

    In linear space, the sequence starts at base ** start (base to the power of
    start) and ends with base ** stop (see endpoint below).
    The current implementation is a direct wrapper on top of numpy.logspace, except
    the default dtype is float32, compare to float64 for numpy,

    Args:
        start (Union[int, list(int), tuple(int), tensor]):The starting value of the sequence.
        stop (Union[int, list(int), tuple(int), tensor]):The end value of the sequence,
            unless `endpoint` is set to False. In that case, the sequence consists
            of all but the last of ``num + 1` evenly spaced samples, so that `stop`
            is excluded.  Note that the step size changes when `endpoint` is False.
        num (int, optional): Number of samples to generate. Default is 50.
        endpoint (bool, optional): If True, `stop` is the last sample. Otherwise, it is
            not included. Default is True.
        base (Union[int, float], optional): The base of the log space. The step size
            between the elements in ln(samples) / ln(base) (or log_base(samples))
            is uniform. Default is 10.0.
        dtype (Union[mindspore.dtype, str], optional): Designated array dtype, can
            be in format of np.float32, or `float32`.If `dtype` is None, infer the data
            type from other input arguments. Default is None.
        axis (int, optional): The axis in the result to store the samples. Relevant
            only if start or stop are array-like.  By default (0), the samples will
            be along a new axis inserted at the beginning. Use -1 to get an axis at the end.
            Default is 0.

    Returns:
        samples (Tensor): num samples, equally spaced on a log scale.

    Supported Platforms:
        ``Ascend`` ``GPU`` ``CPU``

    Examples:
        >>> import mindspore.numpy as np
        >>> print(np.logspace(0, 5, 6, base=2.0))
        [ 1.  2.  4.  8. 16. 32.]
    """

    if isinstance(start, Tensor):
        start = start.asnumpy()

    if isinstance(stop, Tensor):
        stop = stop.asnumpy()

    final_dtype = None
    if dtype is not None:
        final_dtype = _check_dtype(dtype)
        final_dtype = mindspore.dtype_to_nptype(final_dtype)
    else:
        final_dtype = onp.float32

    dtype = final_dtype
    out = onp.logspace(start, stop, num, endpoint, base, dtype, axis)

    tensor_out = Tensor.from_numpy(out)
    return tensor_out
Ejemplo n.º 10
0
def linspace(start,
             stop,
             num=50,
             endpoint=True,
             retstep=False,
             dtype=None,
             axis=0):
    """
    Return evenly spaced values within a given interval.

    The current implementation is a direct wrapper on top of numpy.linspace, except
    the default dtype is float32, compare to float64 for numpy,

    Args:
        start (Union[int, list(int), tuple(int),tensor]):The starting value of the sequence.
        stop (Union[int, list(int), tuple(int),tensor]):The end value of the sequence,
            unless `endpoint` is set to False. In that case, the sequence consists
            of all but the last of ``num + 1` evenly spaced samples, so that `stop`
            is excluded.  Note that the step size changes when `endpoint` is False.
        num (int, optional): Number of samples to generate. Default is 50.
        endpoint (bool, optional): If True, `stop` is the last sample. Otherwise, it is
            not included. Default is True.
        retstep (bool, optional): If True, return (`samples`, `step`), where `step` is
            the spacing between samples.
        dtype (Union[mindspore.dtype, str], optional): Designated array dtype, can
            be in format of np.float32, or `float32`.If `dtype` is None, infer the data
            type from other input arguments. Default is None.
        axis (int, optional): The axis in the result to store the samples. Relevant
            only if start or stop are array-like.  By default (0), the samples will
            be along a new axis inserted at the beginning. Use -1 to get an axis at the end.
            Default is 0.

    Returns:
        samples (Tensor): There are `num` equally spaced samples in the closed interval
            ``[start, stop]`` or the half-open interval ``[start, stop)``
            (depending on whether `endpoint` is True or False).

        step (float, optional): Only returned if `retstep` is True.
            Size of spacing between samples.

    Supported Platforms:
        ``Ascend`` ``GPU`` ``CPU``

    Examples:
        >>> import mindspore.numpy as np
        >>> print(np.linspace(0, 5, 6))
        [0. 1. 2. 3. 4. 5.]
    """

    if isinstance(start, Tensor):
        start = start.asnumpy()

    if isinstance(stop, Tensor):
        stop = stop.asnumpy()

    final_dtype = None
    if dtype is not None:
        final_dtype = _check_dtype(dtype)
        final_dtype = mindspore.dtype_to_nptype(final_dtype)
    else:
        final_dtype = onp.float32

    dtype = final_dtype
    out = onp.linspace(start, stop, num, endpoint, retstep, dtype, axis)

    if retstep:
        array_out, step_out = out[0], out[1]
        tensor_out = Tensor.from_numpy(array_out)
        return tensor_out, step_out

    tensor_out = Tensor.from_numpy(out)
    return tensor_out
Ejemplo n.º 11
0
def arange(*args, **kwargs):
    """
    Return evenly spaced values within a given interval.

    Returns `num` evenly spaced samples, calculated over the interval [`start`, `stop`].
    The endpoint of the interval can optionally be excluded.
    The current implementation is a direct wrapper on top of numpy.arange, except
    the default dtype is float32 and int32, compare to float64 and int64 for numpy
    implementation.

    Args:
        start(Union[int, float], optional): Start of interval. The interval includes
            this value. Default is 0.
        stop(Union[int, float], optional): End of interval. The interval does not
            include this value, except in some cases where step is not an integer
            and floating point round-off affects the length of out.
        step(Union[int, float], optional): Spacing between values. For any output
            out, this is the distance between two adjacent values, out[i+1] - out[i].
            The default step size is 1. If step is specified as a position argument,
            start must also be given.
        dtype (Union[mindspore.dtype, str], optional): Designated array dtype, can
            be in format of np.float32, or `float32`. If dtype is None, the data type
            of the new tensor will be inferred from start, stop and step. Default is None.

    Returns:
        arangend Tensor, array of evenly spaced values.

    Supported Platforms:
        ``Ascend`` ``GPU`` ``CPU``

    Examples:
        >>> import mindspore.numpy as np
        >>> print(np.arange(0, 5, 1))
        [0 1 2 3 4]
    """
    # infer the dtype, if either of start, end, step is float, default dtype is
    # float32, else int32.
    int_flag = True
    final_dtype = None

    if args:
        for item in args:
            if isinstance(item, float):
                int_flag = False
    if kwargs:
        if ('start' in kwargs and isinstance(kwargs['start'], float)) or \
           ('stop' in kwargs and isinstance(kwargs['stop'], float)) or \
           ('step' in kwargs and isinstance(kwargs['step'], float)):
            int_flag = False

    if int_flag:
        final_dtype = onp.int32
    else:
        final_dtype = onp.float32

    if 'dtype' in kwargs and kwargs['dtype'] is not None:
        final_dtype = _check_dtype(kwargs['dtype'])
        final_dtype = mindspore.dtype_to_nptype(final_dtype)
    kwargs['dtype'] = final_dtype
    out = onp.arange(*args, **kwargs)
    out = Tensor.from_numpy(out)
    return Tensor(out)
def train():
    """Train function."""
    args = parse_args()

    context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True,
                        device_target=args.device_target, save_graphs=False)

    # logger
    args.outputs_dir = os.path.join(args.ckpt_path,
                                    datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    args.logger = get_logger(args.outputs_dir, args.rank)
    args.logger.save_args(args)

    loss_meter = AverageMeter('loss')

    pretrained_backbone_slice = args.pretrained_backbone.split('/')
    backbone_ckpt_file = pretrained_backbone_slice[len(pretrained_backbone_slice)-1]
    local_backbone_ckpt_path = '/cache/'+backbone_ckpt_file
    # download backbone checkpoint
    mox.file.copy_parallel(src_url=args.pretrained_backbone, dst_url=local_backbone_ckpt_path)

    network = YOLOV3DarkNet53(is_training=True)
    # default is kaiming-normal
    default_recursive_init(network)

    if args.pretrained_backbone:
        network = load_backbone(network, local_backbone_ckpt_path, args)
        args.logger.info('load pre-trained backbone {} into network'.format(args.pretrained_backbone))
    else:
        args.logger.info('Not load pre-trained backbone, please be careful')

    if args.resume_yolov3:
        param_dict = load_checkpoint(args.resume_yolov3)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('yolo_network.'):
                param_dict_new[key[13:]] = values
                args.logger.info('in resume {}'.format(key))
            else:
                param_dict_new[key] = values
                args.logger.info('in resume {}'.format(key))

        args.logger.info('resume finished')
        load_param_into_net(network, param_dict_new)
        args.logger.info('load_model {} success'.format(args.resume_yolov3))

    network = YoloWithLossCell(network)
    args.logger.info('finish get network')

    config = ConfigYOLOV3DarkNet53()

    config.label_smooth = args.label_smooth
    config.label_smooth_factor = args.label_smooth_factor

    if args.training_shape:
        config.multi_scale = [convert_training_shape(args)]
    if args.resize_rate:
        config.resize_rate = args.resize_rate

    # data download
    local_data_path = '/cache/data'
    local_ckpt_path = '/cache/ckpt_file'
    print('Download data.')
    mox.file.copy_parallel(src_url=args.data_url, dst_url=local_data_path)

    ds, data_size = create_yolo_dataset(image_dir=os.path.join(local_data_path, 'images'),
                                        anno_path=os.path.join(local_data_path, 'annotation.json'),
                                        is_training=True,
                                        batch_size=args.per_batch_size, max_epoch=args.max_epoch,
                                        device_num=args.group_size, rank=args.rank, config=config)
    args.logger.info('Finish loading dataset')

    args.steps_per_epoch = int(data_size / args.per_batch_size / args.group_size)

    if not args.ckpt_interval:
        args.ckpt_interval = args.steps_per_epoch * 10

    # lr scheduler
    lr = get_lr(args)

    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)

    network = TrainingWrapper(network, opt)
    network.set_train()

    # checkpoint save
    ckpt_max_num = 10
    ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval,
                                   keep_checkpoint_max=ckpt_max_num)
    ckpt_cb = ModelCheckpoint(config=ckpt_config,
                              directory=local_ckpt_path,
                              prefix='yolov3')
    cb_params = _InternalCallbackParam()
    cb_params.train_network = network
    cb_params.epoch_num = ckpt_max_num
    cb_params.cur_epoch_num = 1
    run_context = RunContext(cb_params)
    ckpt_cb.begin(run_context)

    old_progress = -1
    t_end = time.time()
    data_loader = ds.create_dict_iterator(output_numpy=True)

    for i, data in enumerate(data_loader):
        images = data["image"]
        input_shape = images.shape[2:4]
        images = Tensor.from_numpy(images)

        batch_y_true_0 = Tensor.from_numpy(data['bbox1'])
        batch_y_true_1 = Tensor.from_numpy(data['bbox2'])
        batch_y_true_2 = Tensor.from_numpy(data['bbox3'])
        batch_gt_box0 = Tensor.from_numpy(data['gt_box1'])
        batch_gt_box1 = Tensor.from_numpy(data['gt_box2'])
        batch_gt_box2 = Tensor.from_numpy(data['gt_box3'])

        input_shape = Tensor(tuple(input_shape[::-1]), ms.float32)
        loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1,
                       batch_gt_box2, input_shape)
        loss_meter.update(loss.asnumpy())

        # ckpt progress
        cb_params.cur_step_num = i + 1  # current step number
        cb_params.batch_num = i + 2
        ckpt_cb.step_end(run_context)

        if i % args.log_interval == 0:
            time_used = time.time() - t_end
            epoch = int(i / args.steps_per_epoch)
            fps = args.per_batch_size * (i - old_progress) * args.group_size / time_used
            if args.rank == 0:
                args.logger.info(
                    'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(epoch, i, loss_meter, fps, lr[i]))
            t_end = time.time()
            loss_meter.reset()
            old_progress = i

        if (i + 1) % args.steps_per_epoch == 0:
            cb_params.cur_epoch_num += 1

    args.logger.info('==========end training===============')

    # upload checkpoint files
    print('Upload checkpoint.')
    mox.file.copy_parallel(src_url=local_ckpt_path, dst_url=args.train_url)