예제 #1
0
def get_rec_data_iterators(train_db_prefix, val_db_prefix, input_size,
                           batch_size, devices):
    num_threads = 2
    num_shards = len(devices)
    train_pipes = [
        HybridRecPipe(train_db_prefix, True, input_size, batch_size,
                      num_threads, device_id, num_shards)
        for device_id in range(num_shards)
    ]
    # Build train pipeline to get the epoch size out of the reader
    train_pipes[0].build()
    print("Training pipeline epoch size: {}".format(
        train_pipes[0].epoch_size("Reader")))
    # Make train MXNet iterators out of rec pipelines
    dali_train_iter = DALIClassificationIterator(
        train_pipes, train_pipes[0].epoch_size("Reader"))
    if val_db_prefix:
        val_pipes = [
            HybridRecPipe(val_db_prefix, False, input_size, batch_size,
                          num_threads, device_id, num_shards)
            for device_id in range(num_shards)
        ]
        # Build val pipeline get the epoch size out of the reader
        val_pipes[0].build()
        print("Validation pipeline epoch size: {}".format(
            val_pipes[0].epoch_size("Reader")))
        # Make val MXNet iterators out of rec pipelines
        dali_val_iter = DALIClassificationIterator(
            val_pipes, val_pipes[0].epoch_size("Reader"))
    else:
        dali_val_iter = None
    return dali_train_iter, dali_val_iter
예제 #2
0
def get_rec_iter(args, trainpipes, valpipes, data_paths, kv=None):
    (rank, num_workers) = _get_rank_and_worker_count(args, kv)

    # now data is available in the provided paths to DALI, it ensures that the data has not been touched
    # user need to clean up the /tmp from the created symlinks
    # DALIClassificationIterator() does the init so we need to provide the real data here
    if args.dali_cache_size > 0 and args.lazy_init_sanity:
        link_to_tmp_file(args.data_train, data_paths["train_data_tmp"])
        link_to_tmp_file(args.data_train_idx, data_paths["train_idx_tmp"])
        link_to_tmp_file(args.data_val, data_paths["val_data_tmp"])
        link_to_tmp_file(args.data_val_idx, data_paths["val_idx_tmp"])

    dali_train_iter = DALIClassificationIterator(
        trainpipes, args.num_examples // num_workers)

    if args.num_examples < trainpipes[0].epoch_size("Reader"):
        warnings.warn(
            "{} training examples will be used, although full training set contains {} examples"
            .format(args.num_examples, trainpipes[0].epoch_size("Reader")))

    worker_val_examples = valpipes[0].epoch_size("Reader")
    if not args.separ_val:
        worker_val_examples = worker_val_examples // num_workers
        if rank < valpipes[0].epoch_size("Reader") % num_workers:
            worker_val_examples += 1

    dali_val_iter = DALIClassificationIterator(
        valpipes, worker_val_examples,
        fill_last_batch=False) if args.data_val else None

    return dali_train_iter, dali_val_iter
예제 #3
0
def get_dali_iter(data_dir, batch_size, kv, image_shape, num_gpus):
    num_examples = 1281167
    trainpipes = [
        HybridTrainPipe(batch_size=batch_size // num_gpus,
                        num_threads=2,
                        device_id=i,
                        num_gpus=num_gpus,
                        db_folder=data_dir) for i in range(num_gpus)
    ]
    valpipes = [
        HybridValPipe(batch_size=batch_size // num_gpus,
                      num_threads=2,
                      device_id=i,
                      num_gpus=num_gpus,
                      db_folder=data_dir) for i in range(num_gpus)
    ]

    trainpipes[0].build()
    valpipes[0].build()

    print("Training pipeline epoch size: {}".format(
        trainpipes[0].epoch_size("Reader")))
    print("Validation pipeline epoch size: {}".format(
        valpipes[0].epoch_size("Reader")))

    dali_train_iter = DALIClassificationIterator(
        trainpipes, trainpipes[0].epoch_size("Reader"))
    dali_val_iter = DALIClassificationIterator(
        valpipes, valpipes[0].epoch_size("Reader"))

    return dali_train_iter, dali_val_iter, num_examples
예제 #4
0
def get_rec_iter(args, kv=None):
    # resize is default base length of shorter edge for dataset;
    # all images will be reshaped to this size
    resize = int(args.resize)
    # target shape is final shape of images pipelined to network;
    # all images will be cropped to this size
    target_shape = tuple([int(l) for l in args.image_shape.split(',')])
    pad_output = target_shape[0] == 4
    gpus = list(map(int, filter(None, args.gpus.split(',')))) # filter to not encount eventually empty strings
    batch_size = args.batch_size//len(gpus)
    num_threads = args.dali_threads
    num_validation_threads = args.validation_dali_threads
    #db_folder = "/data/imagenet/train-480-val-256-recordio/"

    # the input_layout w.r.t. the model is the output_layout of the image pipeline
    output_layout = types.NHWC if args.input_layout == 'NHWC' else types.NCHW

    rank = kv.rank if kv else 0
    nWrk = kv.num_workers if kv else 1

    trainpipes = [HybridTrainPipe(batch_size     = batch_size,
                                  num_threads    = num_threads,
                                  device_id      = gpu_id,
                                  rec_path       = args.data_train,
                                  idx_path       = args.data_train_idx,
                                  shard_id       = gpus.index(gpu_id) + len(gpus)*rank,
                                  num_shards     = len(gpus)*nWrk,
                                  crop_shape     = target_shape[1:],
                                  output_layout  = output_layout,
                                  pad_output     = pad_output,
                                  dtype          = args.dtype,
                                  nvjpeg_padding = args.dali_nvjpeg_memory_padding * 1024 * 1024,
                                  prefetch_queue = args.dali_prefetch_queue) for gpu_id in gpus]

    valpipes = [HybridValPipe(batch_size     = batch_size,
                              num_threads    = num_validation_threads,
                              device_id      = gpu_id,
                              rec_path       = args.data_val,
                              idx_path       = args.data_val_idx,
                              shard_id       = 0 if args.separ_val
                                                 else gpus.index(gpu_id) + len(gpus)*rank,
                              num_shards     = 1 if args.separ_val else len(gpus)*nWrk,
                              crop_shape     = target_shape[1:],
                              resize_shp     = resize,
                              output_layout  = output_layout,
                              pad_output     = pad_output,
                              dtype          = args.dtype,
                              nvjpeg_padding = args.dali_nvjpeg_memory_padding * 1024 * 1024,
                              prefetch_queue = args.dali_prefetch_queue) for gpu_id in gpus] if args.data_val else None
    trainpipes[0].build()
    if args.data_val:
        valpipes[0].build()

    if args.num_examples < trainpipes[0].epoch_size("Reader"):
        warnings.warn("{} training examples will be used, although full training set contains {} examples".format(args.num_examples, trainpipes[0].epoch_size("Reader")))
    dali_train_iter = DALIClassificationIterator(trainpipes, args.num_examples // nWrk)
    dali_val_iter = DALIClassificationIterator(valpipes, valpipes[0].epoch_size("Reader") // (1 if args.separ_val else nWrk), fill_last_batch = False) if args.data_val else None
    return dali_train_iter, dali_val_iter
예제 #5
0
def get_rec_iter(args, trainpipes, valpipes, cvalpipes, kv=None):
    rank = kv.rank if kv else 0
    nWrk = kv.num_workers if kv else 1

    dali_train_iter = DALIClassificationIterator(trainpipes,
                                                 args.num_examples // nWrk)
    if args.no_augument_epoch < args.num_epochs:
        dali_cval_iter = DALIClassificationIterator(cvalpipes,
                                                    args.num_examples // nWrk)
    else:
        dali_cval_iter = None

    mx_resnet_print(key=mlperf_log.INPUT_SIZE,
                    val=trainpipes[0].epoch_size("Reader"))

    mx_resnet_print(key=mlperf_log.PREPROC_NUM_TRAIN_EXAMPLES,
                    val=trainpipes[0].epoch_size("Reader"))

    if args.data_val:
        mx_resnet_print(key=mlperf_log.EVAL_SIZE,
                        val=valpipes[0].epoch_size("Reader"))

        mx_resnet_print(key=mlperf_log.PREPROC_NUM_EVAL_EXAMPLES,
                        val=valpipes[0].epoch_size("Reader"))

    if args.num_examples < trainpipes[0].epoch_size("Reader"):
        warnings.warn(
            "{} training examples will be used, although full training set contains {} examples"
            .format(args.num_examples, trainpipes[0].epoch_size("Reader")))

    worker_val_examples = valpipes[0].epoch_size("Reader")
    if not args.separ_val:
        worker_val_examples = worker_val_examples // nWrk
        if rank < valpipes[0].epoch_size("Reader") % nWrk:
            worker_val_examples += 1

    dali_val_iter = DALIClassificationIterator(
        valpipes, worker_val_examples,
        fill_last_batch=False) if args.data_val else None
    return dali_train_iter, dali_val_iter, dali_cval_iter
예제 #6
0
def get_dali_dataloder(batch_size, ctx, opt):
    from nvidia.dali.plugin.mxnet import DALIClassificationIterator
    from lib.data.loader import HybridTrainPipe, HybridValPipe
    rec_train = os.path.expanduser(opt.rec_train)
    rec_train_idx = os.path.expanduser(opt.rec_train_idx)
    rec_val = os.path.expanduser(opt.rec_val)
    rec_val_idx = os.path.expanduser(opt.rec_val_idx)
    input_size = opt.input_size
    num_devices = len(ctx)

    trainpipes = [
        HybridTrainPipe(rec_path=rec_train,
                        index_path=rec_train_idx,
                        batch_size=batch_size,
                        input_size=input_size,
                        num_gpus=num_devices,
                        num_threads=32,
                        device_id=i) for i in range(num_devices)
    ]
    valpipes = [
        HybridValPipe(rec_path=rec_val,
                      index_path=rec_val_idx,
                      batch_size=batch_size,
                      input_size=input_size,
                      num_gpus=num_devices,
                      num_threads=32,
                      device_id=i) for i in range(num_devices)
    ]

    trainpipes[0].build()
    valpipes[0].build()

    train_loader = DALIClassificationIterator(
        trainpipes, trainpipes[0].epoch_size("Reader"))
    val_loader = DALIClassificationIterator(valpipes,
                                            valpipes[0].epoch_size("Reader"))

    logging.info('dali dataloder was loaded.')

    return train_loader, val_loader
예제 #7
0
def get_rec_iter(args, kv=None, dali_cpu=False):
    gpus = args.gpus
    num_threads = args.dali_threads
    num_validation_threads = args.dali_validation_threads
    pad_output = (args.image_shape[0] == 4)

    # the input_layout w.r.t. the model is the output_layout of the image pipeline
    output_layout = types.NHWC if args.input_layout == 'NHWC' else types.NCHW

    if 'horovod' in args.kv_store:
        rank = hvd.rank()
        nWrk = hvd.size()
    else:
        rank = kv.rank if kv else 0
        nWrk = kv.num_workers if kv else 1

    batch_size = args.batch_size // nWrk // len(gpus)

    trainpipes = [HybridTrainPipe(args           = args,
                                  batch_size     = batch_size,
                                  num_threads    = num_threads,
                                  device_id      = gpu_id,
                                  rec_path       = args.data_train,
                                  idx_path       = args.data_train_idx,
                                  shard_id       = gpus.index(gpu_id) + len(gpus)*rank,
                                  num_shards     = len(gpus)*nWrk,
                                  crop_shape     = args.image_shape[1:],
                                  output_layout  = output_layout,
                                  dtype          = args.dtype,
                                  pad_output     = pad_output,
                                  dali_cpu       = dali_cpu,
                                  nvjpeg_padding = args.dali_nvjpeg_memory_padding * 1024 * 1024,
                                  prefetch_queue = args.dali_prefetch_queue,
                                  nvjpeg_width_hint  = args.dali_nvjpeg_width_hint, 
                                  nvjpeg_height_hint = args.dali_nvjpeg_height_hint) for gpu_id in gpus]
                                  

    if args.data_val:
        valpipes = [HybridValPipe(args           = args,
                                  batch_size     = batch_size,
                                  num_threads    = num_validation_threads,
                                  device_id      = gpu_id,
                                  rec_path       = args.data_val,
                                  idx_path       = args.data_val_idx,
                                  shard_id       = 0 if args.dali_separ_val
                                                      else gpus.index(gpu_id) + len(gpus)*rank,
                                  num_shards     = 1 if args.dali_separ_val else len(gpus)*nWrk,
                                  crop_shape     = args.image_shape[1:],
                                  resize_shp     = args.data_val_resize,
                                  output_layout  = output_layout,
                                  dtype          = args.dtype,
                                  pad_output     = pad_output,
                                  dali_cpu       = dali_cpu,
                                  nvjpeg_padding = args.dali_nvjpeg_memory_padding * 1024 * 1024,
                                  prefetch_queue = args.dali_prefetch_queue,
                                  nvjpeg_width_hint  = args.dali_nvjpeg_width_hint, 
                                  nvjpeg_height_hint = args.dali_nvjpeg_height_hint) for gpu_id in gpus] if args.data_val else None
    trainpipes[0].build()
    if args.data_val:
        valpipes[0].build()
        worker_val_examples = valpipes[0].epoch_size("Reader")
        if not args.dali_separ_val:
            worker_val_examples = worker_val_examples // nWrk
            if rank < valpipes[0].epoch_size("Reader") % nWrk:
                worker_val_examples += 1

    if args.num_examples < trainpipes[0].epoch_size("Reader"):
        warnings.warn("{} training examples will be used, although full training set contains {} examples".format(args.num_examples, trainpipes[0].epoch_size("Reader")))
    dali_train_iter = DALIClassificationIterator(trainpipes, args.num_examples // nWrk)

    if args.data_val:
        dali_val_iter = DALIClassificationIterator(valpipes, worker_val_examples, fill_last_batch = False) if args.data_val else None
    else:
        dali_val_iter = None

    return dali_train_iter, dali_val_iter
예제 #8
0
    def get(data_shape, label_shape, labels_range, args, kv_store=None):
        """Creates data iterator.

        Args:
            data_shape (tuple): Shape of input data tensor (X) including batch size. The batch size is the 0th
                dimension (bsz = data_shape[0]). This batch size must be an effective batch for a whole node.
            label_shape (tuple): Shape of input label tensor (Y) including batch size. The batch size is the 0th
                dimension (bsz = labels_shape[0]). This batch size must be an effective batch for a whole node.
            labels_range (list): List of output labels. For ImageNet, that would be a list with integers from 0 to 999.
            args (argparse.Namespace): Command line arguments.
            kv_store (mxnet.kvstore.KVStore): An object returned by mx.kvstore.create('...').

        The data_shape and label_shape have first dimension to be batch dimension. It is a local batch, i.e.:
            replica_batch * num_devices
        Returns:
            Data iterator (instance of mx.io.DataIter).
        """
        logging.info("Creating data iterator: data_shape=%s, label_shape=%s.",
                     data_shape, label_shape)
        # 1. Synthetic Iterator ----------------------------------------------------------------------------------------
        if args.data_dir is None or args.data_dir == "":
            logging.info(
                "Creating synthetic data iterator with data shape = %s.",
                data_shape)
            return mx.io.ResizeIter(
                SyntheticDataIterator(data_shape, label_shape, labels_range,
                                      args.dtype),
                args.num_warmup_batches + args.num_batches)
        # 2. Numpy Array Iterator --------------------------------------------------------------------------------------
        fnames = [
            f for f in os.listdir(args.data_dir)
            if os.path.isfile(os.path.join(args.data_dir, f))
        ]
        if len(fnames) == 1 and fnames[0].endswith('.npz'):
            dataset = np.load(os.path.join(args.data_dir, fnames[0]))
            data, labels = dataset.get('data',
                                       None), dataset.get('labels', None)
            if data is None:
                raise ValueError("The dataset ({}) does not contain 'data' "
                                 "field.".format(
                                     os.path.join(args.data_dir, fnames[0])))
            logging.info("Creating NDArray iterator: data=%s, labels=%s",
                         data.shape, labels.shape)
            nd_arr_iter = mx.io.NDArrayIter(data=data,
                                            label=labels,
                                            batch_size=data_shape[0],
                                            shuffle=False,
                                            last_batch_handle='discard')
            return mx.io.ResizeIter(nd_arr_iter,
                                    args.num_warmup_batches + args.num_batches)
        # 3. DALI Iterator ---------------------------------------------------------------------------------------------
        if 'horovod' in args.kv_store:
            if not hvd:
                raise ValueError("Horovod library not found")
            rank, nworker = hvd.rank(), hvd.size()
        else:
            rank, nworker = (kv_store.rank,
                             kv_store.num_workers) if kv_store else (0, 1)
        dataset_files = [
            os.path.join(args.data_dir, 'train.rec'),
            os.path.join(args.data_dir, 'train.idx')
        ]
        if os.path.exists(dataset_files[0]) and os.path.exists(
                dataset_files[1]):
            if args.use_dali is True:
                # https://docs.nvidia.com/deeplearning/sdk/dali-developer-guide/docs/examples/mxnet/mxnet-resnet50.html
                if dali is None:
                    raise ValueError(
                        "DALI library not found (use_dali is true).")
                if len(args.gpus) == 0:
                    raise ValueError(
                        "DALI can only be used with GPU devices (gpus={})".
                        format(args.gpus))
                logging.info("Creating DALI iterator")
                output_layout = dali.types.NHWC if args.input_layout == 'NHWC' else dali.types.NCHW
                cropshape = (
                    data_shape[1],
                    data_shape[2]) if args.input_layout == 'NHWC' else (
                        data_shape[2], data_shape[3])
                channel_idx = 3 if args.input_layout == 'NHWC' else 1
                trainpipes = [
                    HybridTrainPipe(
                        batch_size=data_shape[0] //
                        len(args.gpus),  # Replica batch.
                        num_threads=3,  # Per GPU
                        device_id=gpu_id,
                        rec_path=dataset_files[0],
                        idx_path=dataset_files[1],
                        shard_id=args.gpus.index(gpu_id) +
                        len(args.gpus) * rank,
                        num_shards=len(args.gpus) * nworker,
                        crop_shape=cropshape,
                        output_layout=output_layout,
                        pad_output=data_shape[channel_idx] == 4,
                        dtype=args.dtype,
                        nvjpeg_padding=16 * 1024 * 1024,
                        prefetch_queue=3) for gpu_id in args.gpus
                ]
                trainpipes[0].build()
                # epoch_size = trainpipes[0].epoch_size("Reader") // nworker
                epoch_size = data_shape[0] * (args.num_warmup_batches +
                                              args.num_batches)
                return DALIClassificationIterator(
                    trainpipes,  # List of pipelines to use
                    epoch_size,  # Epoch size.
                    'data',  # Data name for provided symbols.
                    'softmax_label',  # Label name for provided symbols.
                    args.
                    input_layout  # Layout of the pipeline outputs (NCHW / NHWC).
                )

            # 4. MXNET Image Record Iterator ---------------------------------------------------------------------------
            # https://mxnet.incubator.apache.org/api/python/io.html#mxnet.io.imagerecorditer
            # https://github.com/apache/incubator-mxnet/blob/master/example/image-classification/common/data.py
            # this iterator supports channels first format only.
            if args.input_layout != 'NCHW':
                raise ValueError(
                    "Standard mxnet image record iterator only supports channel first format (NCHW), "
                    "requested format: {}.".format(args.input_layout))

            logging.info(
                "Creating standard image record iterator (ImageRecordIter) with data layout = %s.",
                args.input_layout)
            num_preprocess_threads = args.preprocess_threads
            if num_preprocess_threads <= 4:
                logging.warning(
                    "[Number of pre-process threads is %d. This may be too small for large number of GPUs. "
                    "If you do not see speedup as you add more GPUs, increase this number.",
                    num_preprocess_threads)
            img_rec_iter = mx.io.ImageRecordIter(
                path_imgrec=dataset_files[0],
                path_imgidx=dataset_files[1],
                data_name='data',
                label_name='softmax_label',
                data_shape=(data_shape[1], data_shape[2], data_shape[3]),
                batch_size=data_shape[0],
                rand_crop=True,
                rand_mirror=True,
                preprocess_threads=num_preprocess_threads,
                prefetch_buffer=args.prefetch_buffer,
                dtype='float32',
                num_parts=nworker,
                part_index=rank)
            return mx.io.ResizeIter(img_rec_iter,
                                    args.num_warmup_batches + args.num_batches)

        # 5. All Failed ------------------------------------------------------------------------------------------------
        raise ValueError(
            "Cannot find data set files. MXNET benchmark backend supports the following data sets:\n"
            "  1. Synthetic data set. It is used when data_dir parameter is none or empty:\n"
            "     -Pexp.data_dir='\"\"'\n"
            "  2. Real data set in a file with 'npz' extension. This data set is used if data_dir value\n"
            "     is a valid directory and contains one file with npz extension. If found, this file\n"
            "     must contain a dictionary with at least one key - `data`. It can also contain 'labels'\n"
            "     key for labels.\n"
            "  3. Real image data set in standard RecordIO format. This data set is used if provided data directory\n"
            "     contains 'train.rec' and 'train.idx' files.'")
예제 #9
0
def get_rec_iter(args, kv=None):
    # resize is default base length of shorter edge for dataset;
    # all images will be reshaped to this size
    resize = int(args.resize)
    # target shape is final shape of images pipelined to network;
    # all images will be cropped to this size
    target_shape = tuple([int(l) for l in args.image_shape.split(',')])

    pad_output = target_shape[0] == 4
    gpus = list(map(int, filter(None, args.gpus.split(',')))) # filter to not encount eventually empty strings
    batch_size = args.batch_size//len(gpus)
    
    mx_resnet_print(
            key=mlperf_log.INPUT_BATCH_SIZE,
            val=batch_size) # TODO MPI WORLD SIZE
    
    num_threads = args.dali_threads

    # the input_layout w.r.t. the model is the output_layout of the image pipeline
    output_layout = types.NHWC if args.input_layout == 'NHWC' else types.NCHW

    rank = kv.rank if kv else 0
    nWrk = kv.num_workers if kv else 1


    trainpipes = [HybridTrainPipe(batch_size      = batch_size,
                                  num_threads     = num_threads,
                                  device_id       = gpu_id,
                                  rec_path        = args.data_train,
                                  idx_path        = args.data_train_idx,
                                  shard_id        = gpus.index(gpu_id) + len(gpus)*rank,
                                  num_shards      = len(gpus)*nWrk,
                                  crop_shape      = target_shape[1:],
                                  min_random_area = args.min_random_area,
                                  max_random_area = args.max_random_area,
                                  min_random_aspect_ratio = args.min_random_aspect_ratio,
                                  max_random_aspect_ratio = args.max_random_aspect_ratio,
                                  nvjpeg_padding  = args.dali_nvjpeg_memory_padding * 1024 * 1024,
                                  prefetch_queue  = args.dali_prefetch_queue,
                                  seed            = args.seed,
                                  output_layout   = output_layout,
                                  pad_output      = pad_output,
                                  dtype           = args.dtype,
                                  mlperf_print    = gpu_id == gpus[0]) for gpu_id in gpus]

    valpipes = [HybridValPipe(batch_size     = batch_size,
                              num_threads    = num_threads,
                              device_id      = gpu_id,
                              rec_path       = args.data_val,
                              idx_path       = args.data_val_idx,
                              shard_id       = 0 if args.separ_val
                                                 else gpus.index(gpu_id) + len(gpus)*rank,
                              num_shards     = 1 if args.separ_val else len(gpus)*nWrk,
                              crop_shape     = target_shape[1:],
                              nvjpeg_padding = args.dali_nvjpeg_memory_padding * 1024 * 1024,
                              prefetch_queue = args.dali_prefetch_queue,
                              seed           = args.seed,
                              resize_shp     = resize,
                              output_layout  = output_layout,
                              pad_output     = pad_output,
                              dtype          = args.dtype,
                              mlperf_print   = gpu_id == gpus[0]) for gpu_id in gpus] if args.data_val else None
    
    trainpipes[0].build()
    if args.data_val:
        valpipes[0].build()

    mx_resnet_print(
            key=mlperf_log.INPUT_SIZE,
            val=trainpipes[0].epoch_size("Reader"))

    mx_resnet_print(
            key=mlperf_log.PREPROC_NUM_TRAIN_EXAMPLES,
            val=trainpipes[0].epoch_size("Reader"))


    if args.data_val:
        mx_resnet_print(
                key=mlperf_log.EVAL_SIZE,
                val=valpipes[0].epoch_size("Reader"))

        mx_resnet_print(
                key=mlperf_log.PREPROC_NUM_EVAL_EXAMPLES,
                val=valpipes[0].epoch_size("Reader"))


    if args.num_examples < trainpipes[0].epoch_size("Reader"):
        warnings.warn("{} training examples will be used, although full training set contains {} examples".format(args.num_examples, trainpipes[0].epoch_size("Reader")))
    dali_train_iter = DALIClassificationIterator(trainpipes, args.num_examples // nWrk)

    worker_val_examples = valpipes[0].epoch_size("Reader")
    if not args.separ_val:
        worker_val_examples = worker_val_examples // nWrk
        if rank < valpipes[0].epoch_size("Reader") % nWrk:
            worker_val_examples += 1

    dali_val_iter = DALIClassificationIterator(valpipes, worker_val_examples, fill_last_batch = False) if args.data_val else None
    return dali_train_iter, dali_val_iter
예제 #10
0
def inst_iterators(data_train,
                   data_dev,
                   batch_size=1,
                   data_shape=(3, 224, 224),
                   resize=(-1, -1),
                   resize_scale=(1, 1),
                   resize_area=(1, 1),
                   use_svm_label=False,
                   use_dali=False):
    '''
    Instantiate specified training and developing data iterators
    :params:
    data_train      training rec/lst
    data_dev        developing rec/lst
    batch_size      mini batch size, sum of all device
    data_shape      input shape
    resize          resize shorter edge of (train,dev) data, -1 means no resize
    resize_scale    resize train-data into (width*s, height*s), with s randomly chosen from this range 
    resize_area     Change the area (namely width * height) to a random value in [min_random_area, max_random_area]. Ignored if random_resized_crop is False 
    use_svm_label   set as True if classifier needs svm label name
    use_dali        set as True if nvidia dali is supposed to be used
    :return:
    train, dev      tuple of 2 iterators
    '''
    # initialization
    assert data_train and data_dev, logging.error(
        "Please input training or developing data")
    mean, std = cfg.TRAIN.MEAN_RGB, cfg.TRAIN.STD_RGB
    assert len(mean) == 3 and len(std) == 3, logging.error(
        "Mean or Std should be a list of 3 items")
    mean_r, mean_g, mean_b, std_r, std_g, std_b = mean[:] + std[:]
    min_random_scale, max_random_scale = resize_scale
    min_random_area, max_random_area = resize_area
    min_aspect_ratio = cfg.TRAIN.MIN_ASPECT_RATIO if cfg.TRAIN.MIN_ASPECT_RATIO else None
    logging.info('Input normalization : Mean-RGB {}, Std-RGB {}'.format(
        [mean_r, mean_g, mean_b], [std_r, std_g, std_b]))
    logging.info(
        'Input scale augmentation : Max-random-sclae {}, Min-random-scale {}'.
        format(max_random_scale, min_random_scale))
    logging.info(
        'Input area augmentation : Max-random-area {}, Min-random-area {}'.
        format(max_random_area, min_random_area))
    resize_train, resize_dev = resize
    label_name = 'softmax_label' if not use_svm_label else 'svm_label'

    # build iterators
    if not cfg.TRAIN.USE_DALI and cfg.TRAIN.USE_REC:
        logging.info("Creating recordio iterators")
        train = mx.io.ImageRecordIter(
            dtype=cfg.TRAIN.DATA_TYPE,
            path_imgrec=data_train,
            preprocess_threads=cfg.TRAIN.PROCESS_THREAD,
            data_name='data',
            label_name=label_name,
            label_width=cfg.TRAIN.LABEL_WIDTH,
            data_shape=data_shape,
            batch_size=batch_size,
            resize=resize_train,
            max_random_scale=max_random_scale,
            min_random_scale=min_random_scale,
            shuffle=cfg.TRAIN.SHUFFLE,
            rand_crop=cfg.TRAIN.RAND_CROP,
            rand_mirror=cfg.TRAIN.RAND_MIRROR,
            max_rotate_angle=cfg.TRAIN.MAX_ROTATE_ANGLE,
            max_aspect_ratio=cfg.TRAIN.MAX_ASPECT_RATIO,
            min_aspect_ratio=min_aspect_ratio,
            random_resized_crop=cfg.TRAIN.RANDOM_RESIZED_CROP,
            max_random_area=max_random_area,
            min_random_area=min_random_area,
            max_img_size=cfg.TRAIN.MAX_IMG_SIZE,
            min_img_size=cfg.TRAIN.MIN_IMG_SIZE,
            max_shear_ratio=cfg.TRAIN.MAX_SHEAR_RATIO,
            brightness=cfg.TRAIN.BRIGHTNESS_JITTER,
            contrast=cfg.TRAIN.CONTRAST_JITTER,
            saturation=cfg.TRAIN.SATURATION_JITTER,
            hue=cfg.TRAIN.HUE_JITTER,
            pca_noise=cfg.TRAIN.PCA_NOISE,
            random_h=cfg.TRAIN.RANDOM_H,
            random_s=cfg.TRAIN.RANDOM_S,
            random_l=cfg.TRAIN.RANDOM_L,
            mean_r=mean_r,
            mean_g=mean_g,
            mean_b=mean_b,
            std_r=std_r,
            std_g=std_g,
            std_b=std_b,
            inter_method=cfg.TRAIN.INTERPOLATION_METHOD)
        dev = mx.io.ImageRecordIter(
            dtype=cfg.TRAIN.DATA_TYPE,
            path_imgrec=data_dev,
            preprocess_threads=cfg.TRAIN.PROCESS_THREAD,
            data_name='data',
            label_name=label_name,
            label_width=cfg.TRAIN.LABEL_WIDTH,
            batch_size=batch_size,
            data_shape=data_shape,
            resize=resize_dev,
            shuffle=False,
            rand_crop=False,  # center crop
            rand_mirror=False,
            mean_r=mean_r,
            mean_g=mean_g,
            mean_b=mean_b,
            std_r=std_r,
            std_g=std_g,
            std_b=std_b,
            inter_method=cfg.TRAIN.INTERPOLATION_METHOD)

    elif not cfg.TRAIN.USE_DALI and not cfg.TRAIN.USE_REC:
        logging.info("Creating image iterators")
        # set decoding thread number
        os.environ['MXNET_CPU_WORKER_NTHREADS'] = str(cfg.TRAIN.PROCESS_THREAD)
        # set rand_crop and rand_resize as default, and append separately
        aug_list_train = mx.image.CreateAugmenter(
            data_shape=data_shape,
            resize=resize_train,
            rand_mirror=cfg.TRAIN.RAND_MIRROR,
            mean=np.asarray(mean),
            std=np.asarray(std),
            brightness=cfg.TRAIN.BRIGHTNESS_JITTER,
            contrast=cfg.TRAIN.CONTRAST_JITTER,
            saturation=cfg.TRAIN.SATURATION_JITTER,
            hue=cfg.TRAIN.HUE_JITTER,
            pca_noise=cfg.TRAIN.PCA_NOISE,
            inter_method=cfg.TRAIN.INTERPOLATION_METHOD)

        if cfg.TRAIN.RAND_CROP and min_random_scale != 1:
            aug_list_train.append(
                mx.image.RandomSizedCropAug((data_shape[2], data_shape[1]),
                                            min_random_scale**2,
                                            (1 - cfg.TRAIN.MAX_ASPECT_RATIO,
                                             1 + cfg.TRAIN.MAX_ASPECT_RATIO),
                                            cfg.TRAIN.INTERPOLATION_METHOD))
        elif cfg.TRAIN.RAND_CROP:
            aug_list_train.append(
                mx.image.RandomCropAug((data_shape[2], data_shape[1]),
                                       cfg.TRAIN.INTERPOLATION_METHOD))

        # set rand_crop and rand_resize as default to use center-crop
        aug_list_dev = mx.image.CreateAugmenter(
            data_shape=data_shape,
            resize=resize_dev,
            mean=np.asarray(mean),
            std=np.asarray(std),
            inter_method=cfg.TRAIN.INTERPOLATION_METHOD)

        try:
            train = mx.image.ImageIter(
                dtype=cfg.TRAIN.DATA_TYPE,
                path_imglist=data_train,
                data_name='data',
                label_name=label_name,
                label_width=cfg.TRAIN.LABEL_WIDTH,
                data_shape=data_shape,
                batch_size=batch_size,
                path_root=cfg.TRAIN.TRAIN_IMG_PREFIX,
                shuffle=cfg.TRAIN.SHUFFLE,
                last_batch_handle=cfg.TRAIN.LAST_BATCH_HANDLE,
                aug_list=aug_list_train)
            dev = mx.image.ImageIter(
                dtype=cfg.TRAIN.DATA_TYPE,
                path_imglist=data_dev,
                data_name='data',
                label_name=label_name,
                label_width=cfg.TRAIN.LABEL_WIDTH,
                data_shape=data_shape,
                batch_size=batch_size,
                path_root=cfg.TRAIN.DEV_IMG_PREFIX,
                shuffle=cfg.TRAIN.SHUFFLE,
                last_batch_handle=cfg.TRAIN.LAST_BATCH_HANDLE,
                aug_list=aug_list_dev)
        except:
            print("!!!!!!!!!!!!!!!!!!!!!!!!")

    elif cfg.TRAIN.USE_DALI and cfg.TRAIN.USE_REC:
        from dali_util import HybridTrainPipe, HybridValPipe
        from nvidia.dali.plugin.mxnet import DALIClassificationIterator
        num_gpus = len(cfg.TRAIN.GPU_IDX)
        batch_size /= num_gpus
        train_pipes = [
            HybridTrainPipe(batch_size=batch_size,
                            num_threads=cfg.TRAIN.PROCESS_THREAD,
                            device_id=i,
                            num_gpus=num_gpus) for i in range(num_gpus)
        ]
        dev_pipes = [
            HybridValPipe(batch_size=batch_size,
                          num_threads=cfg.TRAIN.PROCESS_THREAD,
                          device_id=i,
                          num_gpus=num_gpus) for i in range(num_gpus)
        ]
        train_pipes[0].build()
        dev_pipes[0].build()
        train = DALIClassificationIterator(train_pipes,
                                           train_pipes[0].epoch_size("Reader"))
        dev = DALIClassificationIterator(dev_pipes,
                                         dev_pipes[0].epoch_size("Reader"))

    else:
        logging.error('Invalid data loader type')
        pass
    logging.info("Data iters created successfully")
    return train, dev
예제 #11
0
train_params.update(loss.params)
trainer = gluon.Trainer(train_params, 'sgd', {
    'learning_rate': lr,
    'momentum': momentum,
    'wd': wd
})
lr_counter = 0

logger.info([lamda, r_init, lr_steps, lr, momentum, wd, batch_size])

it, epoch = 0, 0

loss_mtc, acc_mtc = mx.metric.Loss(), mx.metric.Accuracy()
tic = time.time()
btic = time.time()
dali_iter = DALIClassificationIterator(train_pipes, size)

while it < iters + 1:
    if it == lr_steps[lr_counter]:
        trainer.set_learning_rate(trainer.learning_rate * 0.1)
        lr_counter += 1

    for batches in tqdm(dali_iter):
        datas, labels = split_and_load(batches, num_gpu)

        with ag.record():
            ots = [net(X) for X in datas]
            embedds = [ot[0] for ot in ots]
            outputs = [ot[1] for ot in ots]
            losses = [
                loss(yhat, y, emb)
예제 #12
0
    # 多卡测试,速度和单卡一样,也是18000samples/s,可能主要卡在 SSD 读取速度上了,2080Ti GPU占用20%左右
    # 测试 HHD 8000 samples/s, SSD 18000 samples/s
    # trainpipes = [HybridTrainPipe(path_imgidx, path_imgrec, batch_size=batch_size, num_threads=6, device_id = i, num_gpus = N) for i in range(N)]
    # htp = trainpipes[0]
    # 单卡测试
    htp = HybridTrainPipe(path_imgrec,
                          batch_size,
                          6,
                          device_id=0,
                          num_gpus=N,
                          initial_fill=batch_size)
    trainpipes = [htp]

    htp.build()
    print("Training pipeline epoch size: {}".format(htp.epoch_size("Reader")))
    dali_train_iter = DALIClassificationIterator(trainpipes,
                                                 htp.epoch_size("Reader"))
    print([dali_train_iter.provide_data[0][:2]],
          [dali_train_iter.provide_label[0][:2]])
    import time
    time_start = time.time()
    batch_num = 0
    while True:
        batch = dali_train_iter.next()
        batch_num += 1
        # # print("batch num:", len(batch))
        # # # print("batch:", batch[0].asnumpy())
        # # print("elem num:", len(batch[0].data))
        # # print("image num:", batch[0].data[0].shape)
        # # print("label num:", batch[0].label[0].shape)
        # 查看图像结果
        # for image, label in zip(batch[0].data[0], batch[0].label[0]):
예제 #13
0
파일: dali.py 프로젝트: zgerem/gluon-cv
def get_rec_iter(args, kv=None, batch_fn=None, dali_cpu=False):
    devices = [0] if dali_cpu else args.gpus
    num_devices = len(devices)
    pad_output = (args.image_shape[0] == 4)

    # the input_layout w.r.t. the model is the output_layout of the image pipeline
    output_layout = types.NHWC if args.input_layout == 'NHWC' else types.NCHW

    if 'horovod' in args.kv_store:
        rank = hvd.rank()
        nWrk = hvd.size()
    else:
        rank = kv.rank if kv else 0
        nWrk = kv.num_workers if kv else 1

    batch_size = args.batch_size // nWrk * num_devices

    trainpipes = [
        HybridTrainPipe(
            args=args,
            batch_size=batch_size,
            num_threads=args.dali_threads,
            device_id=dev_id,
            rec_path=args.rec_train,
            idx_path=args.rec_train_idx,
            shard_id=devices.index(dev_id) + num_devices * rank,
            num_shards=num_devices * nWrk,
            crop_shape=args.image_shape[1:],
            output_layout=output_layout,
            dtype=args.dtype,
            pad_output=pad_output,
            dali_cpu=dali_cpu,
            nvjpeg_padding=args.dali_nvjpeg_memory_padding * 1024 * 1024,
            prefetch_queue=args.dali_prefetch_queue) for dev_id in devices
    ]
    trainpipes[0].build()
    num_examples = trainpipes[0].epoch_size("Reader")
    if args.num_examples < num_examples:
        warnings.warn(
            "{} training examples will be used, although full training set contains {} examples"
            .format(args.num_examples, num_examples))

    train_examples = args.num_examples // nWrk
    dali_train_iter = DALIClassificationIterator(trainpipes, train_examples)
    if not args.rec_val:
        return dali_train_iter, None, batch_fn

    valpipes = [
        HybridValPipe(
            args=args,
            batch_size=batch_size,
            num_threads=args.dali_validation_threads,
            device_id=dev_id,
            rec_path=args.rec_val,
            idx_path=args.rec_val_idx,
            shard_id=0 if args.dali_separ_val else devices.index(dev_id) +
            num_devices * rank,
            num_shards=1 if args.dali_separ_val else num_devices * nWrk,
            crop_shape=args.image_shape[1:],
            resize_shp=args.data_val_resize,
            output_layout=output_layout,
            dtype=args.dtype,
            pad_output=pad_output,
            dali_cpu=dali_cpu,
            nvjpeg_padding=args.dali_nvjpeg_memory_padding * 1024 * 1024,
            prefetch_queue=args.dali_prefetch_queue) for dev_id in devices
    ]
    valpipes[0].build()
    worker_val_examples = valpipes[0].epoch_size("Reader")
    if not args.dali_separ_val:
        adj = 1 if rank < worker_val_examples % nWrk else 0
        worker_val_examples = adj + worker_val_examples // nWrk

    dali_val_iter = DALIClassificationIterator(valpipes, worker_val_examples)
    return dali_train_iter, dali_val_iter, batch_fn
예제 #14
0
def train_net(args):
    #_seed = 727
    #random.seed(_seed)
    #np.random.seed(_seed)
    #mx.random.seed(_seed)
    ctx = []
    cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
    if len(cvd) > 0:
        for i in range(len(cvd.split(','))):
            ctx.append(mx.gpu(i))
    if len(ctx) == 0:
        ctx = [mx.cpu()]
        print('use cpu')
    else:
        print('gpu num:', len(ctx))
    if len(args.extra_model_name) == 0:
        prefix = os.path.join(
            args.models_root,
            '%s-%s-%s' % (args.network, args.loss, args.dataset), 'model')
    else:
        prefix = os.path.join(
            args.models_root, '%s-%s-%s-%s' %
            (args.network, args.loss, args.dataset, args.extra_model_name),
            'model')
    prefix_dir = os.path.dirname(prefix)
    print('prefix', prefix)
    if not os.path.exists(prefix_dir):
        os.makedirs(prefix_dir)
    args.ctx_num = len(ctx)
    if args.per_batch_size == 0:
        args.per_batch_size = 128
    args.batch_size = args.per_batch_size * args.ctx_num
    args.rescale_threshold = 0
    args.image_channel = config.image_shape[2]
    config.batch_size = args.batch_size
    config.per_batch_size = args.per_batch_size
    data_dir = config.dataset_path
    path_imgrec = None
    path_imglist = None
    image_size = config.image_shape[0:2]
    assert len(image_size) == 2
    assert image_size[0] == image_size[1]
    print('image_size', image_size)
    print('num_classes', config.num_classes)
    path_imgrec = os.path.join(data_dir, "train.rec")

    data_shape = (args.image_channel, image_size[0], image_size[1])

    num_workers = config.num_workers
    global_num_ctx = num_workers * args.ctx_num
    if config.num_classes % global_num_ctx == 0:
        args.ctx_num_classes = config.num_classes // global_num_ctx
    else:
        args.ctx_num_classes = config.num_classes // global_num_ctx + 1
    args.local_num_classes = args.ctx_num_classes * args.ctx_num
    args.local_class_start = args.local_num_classes * args.worker_id

    #if len(args.partial)==0:
    #  local_classes_range = (0, args.num_classes)
    #else:
    #  _vec = args.partial.split(',')
    #  local_classes_range = (int(_vec[0]), int(_vec[1]))

    #args.partial_num_classes = local_classes_range[1] - local_classes_range[0]
    #args.partial_start = local_classes_range[0]

    print('Called with argument:', args, config)
    mean = None

    begin_epoch = 0
    base_lr = args.lr
    base_wd = args.wd
    base_mom = args.mom
    arg_params = None
    aux_params = None
    if len(args.pretrained) == 0:
        esym = get_symbol_embedding()
        asym = get_symbol_arcface
    else:
        #assert False
        print('loading', args.pretrained, args.pretrained_epoch)
        pretrain_esym, arg_params, aux_params = mx.model.load_checkpoint(
            args.pretrained, args.pretrained_epoch)
        esym = get_symbol_embedding(pretrain_esym)
        asym = get_symbol_arcface

    if config.count_flops:
        all_layers = esym.get_internals()
        _sym = all_layers['fc1_output']
        FLOPs = flops_counter.count_flops(_sym,
                                          data=(1, 3, image_size[0],
                                                image_size[1]))
        _str = flops_counter.flops_str(FLOPs)
        print('Network FLOPs: %s' % _str)

    # if config.num_workers == 1:
    #     from dali_parall_module_local_v1 import ParallModule
    # else:
    #     from parall_module_dist import ParallModule

    model = ParallModule(
        context=ctx,
        symbol=esym,
        data_names=['data'],
        label_names=['softmax_label'],
        asymbol=asym,
        args=args,
    )
    val_dataiter = None
    # TODO: if config.use_dali:
    if True:
        from dali_image_iter import HybridTrainPipe
        from nvidia.dali.plugin.mxnet import DALIClassificationIterator
        # trainpipes = [HybridTrainPipe(path_imgrec, args.batch_size, num_threads=4, device_id = i, num_gpus = 4) for i in range(2)]
        # htp = trainpipes[0]
        htp = HybridTrainPipe(path_imgrec, args.batch_size, 4, 0, 4,
                              args.batch_size * 1000)
        trainpipes = [htp]
        htp.build()
        print("Training pipeline epoch size: {}".format(
            htp.epoch_size("Reader")))
        dali_train_iter = DALIClassificationIterator(trainpipes,
                                                     htp.epoch_size("Reader"))
        train_dataiter = dali_train_iter
    else:
        train_dataiter = FaceImageIter(
            batch_size=args.batch_size,
            data_shape=data_shape,
            path_imgrec=path_imgrec,
            shuffle=True,
            rand_mirror=config.data_rand_mirror,
            mean=mean,
            cutoff=config.data_cutoff,
            color_jittering=config.data_color,
            images_filter=config.data_images_filter,
        )

    if config.net_name == 'fresnet' or config.net_name == 'fmobilefacenet':
        initializer = mx.init.Xavier(rnd_type='gaussian',
                                     factor_type="out",
                                     magnitude=2)  #resnet style
    else:
        initializer = mx.init.Xavier(rnd_type='uniform',
                                     factor_type="in",
                                     magnitude=2)

    _rescale = 1.0 / args.batch_size
    opt = optimizer.SGD(learning_rate=base_lr,
                        momentum=base_mom,
                        wd=base_wd,
                        rescale_grad=_rescale)
    _cb = mx.callback.Speedometer(args.batch_size, args.frequent)

    ver_list = []
    ver_name_list = []
    for name in config.val_targets:
        path = os.path.join(data_dir, name + ".bin")
        if os.path.exists(path):
            data_set = verification.load_bin(path, image_size)
            ver_list.append(data_set)
            ver_name_list.append(name)
            print('ver', name)

    def ver_test(nbatch):
        results = []
        for i in range(len(ver_list)):
            acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(
                ver_list[i], model, args.batch_size, 10, None, None)
            print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm))
            #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1))
            print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' %
                  (ver_name_list[i], nbatch, acc2, std2))
            results.append(acc2)
        return results

    highest_acc = [0.0, 0.0]  #lfw and target
    #for i in range(len(ver_list)):
    #  highest_acc.append(0.0)
    global_step = [0]
    save_step = [0]
    lr_steps = [int(x) for x in args.lr_steps.split(',')]
    print('lr_steps', lr_steps)

    def _batch_callback(param):
        #global global_step
        global_step[0] += 1
        mbatch = global_step[0]
        for step in lr_steps:
            if mbatch == step:
                opt.lr *= 0.1
                print('lr change to', opt.lr)
                break

        _cb(param)
        if mbatch % 1000 == 0:
            print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch)

        if mbatch >= 0 and mbatch % args.verbose == 0:
            acc_list = ver_test(mbatch)
            save_step[0] += 1
            msave = save_step[0]
            do_save = False
            is_highest = False
            if len(acc_list) > 0:
                #lfw_score = acc_list[0]
                #if lfw_score>highest_acc[0]:
                #  highest_acc[0] = lfw_score
                #  if lfw_score>=0.998:
                #    do_save = True
                score = sum(acc_list)
                if acc_list[-1] >= highest_acc[-1]:
                    if acc_list[-1] > highest_acc[-1]:
                        is_highest = True
                    else:
                        if score >= highest_acc[0]:
                            is_highest = True
                            highest_acc[0] = score
                    highest_acc[-1] = acc_list[-1]
                    #if lfw_score>=0.99:
                    #  do_save = True
            if is_highest:
                do_save = True
            if args.ckpt == 0:
                do_save = False
            elif args.ckpt == 2:
                do_save = True
            elif args.ckpt == 3:
                msave = 1

            if do_save:
                # print('saving', msave)
                # arg, aux = model.get_export_params()
                # all_layers = model.symbol.get_internals()
                # _sym = all_layers['fc1_output']
                # mx.model.save_checkpoint(prefix, msave, _sym, arg, aux)

                print('saving', msave)
                arg, aux = model.get_params()
                # TODO: 这里求加个保存全部参数的方法,方便继续训练,我这么改不知道有没有问题
                if config.ckpt_embedding:
                    all_layers = model.symbol.get_internals()
                    _sym = all_layers['fc1_output']
                    _arg = {}
                    for k in arg:
                        if not k.startswith('fc7'):
                            _arg[k] = arg[k]
                    mx.model.save_checkpoint(prefix, msave, _sym, _arg, aux)
                else:
                    mx.model.save_checkpoint(prefix, msave, model.symbol, arg,
                                             aux)

            print('[%d]Accuracy-Highest: %1.5f' % (mbatch, highest_acc[-1]))
        if config.max_steps > 0 and mbatch > config.max_steps:
            sys.exit(0)

    epoch_cb = None
    # train_dataiter = mx.io.PrefetchingIter(train_dataiter)

    model.fit(
        train_dataiter,
        begin_epoch=begin_epoch,
        num_epoch=999999,
        eval_data=val_dataiter,
        #eval_metric        = eval_metrics,
        kvstore=args.kvstore,
        optimizer=opt,
        #optimizer_params   = optimizer_params,
        initializer=initializer,
        arg_params=arg_params,
        aux_params=aux_params,
        allow_missing=True,
        batch_end_callback=_batch_callback,
        epoch_end_callback=epoch_cb)
예제 #15
0
logger = Logger(root="./log", prefix="mobile_facenet", local_rank=local_rank)

# train and val pipeline
train_pipes = [
    FacePipe(name="emore",
             batch_size=batch_size_per_gpu,
             num_threads=num_worker,
             device_id=local_rank,
             num_shards=num_gpu,
             shard_id=rank)
]
train_size = train_pipes[0].size
num_classes = train_pipes[0].num_classes
train_iter = DALIClassificationIterator(train_pipes,
                                        train_size // num_gpu,
                                        auto_reset=True)

validator = ParallelValidation(val_targets,
                               batch_size_per_gpu,
                               rank,
                               local_rank,
                               logger=logger)

# loss, network
net = get_mobile_facenet(num_classes, weight_norm=True)
net.initialize(init=mx.init.MSRAPrelu(), ctx=ctx)
net.hybridize(static_alloc=True)

loss = RingLoss(lamda, r_init)
loss.initialize(ctx=ctx)
예제 #16
0
epochs = args.epochs + 1
alpha = args.alpha
max_accuracy = 0.0

ctx = mx.gpu(bps.local_rank())

# load_data
batch_size = args.batch_size * num_gpu

train_pipes = [
    CifarPipe(args.batch_size, args.num_workers, local_rank, num_gpu, rank,
              use_float16)
]
train_size = train_pipes[0].size
train_data = DALIClassificationIterator(train_pipes,
                                        train_size // num_gpu,
                                        auto_reset=True)
val_pipes = [
    CifarPipe(args.batch_size,
              args.num_workers,
              local_rank,
              1,
              0,
              use_float16,
              train=False)
]
val_size = val_pipes[0].size
val_data = DALIClassificationIterator(val_pipes, val_size, auto_reset=True)

# set the network and trainer
net = get_attention_cifar(10, num_layers=args.num_layers)