Ejemplo n.º 1
0
              os.path.join(data_dir, "cifar10_val.rec"))
    download_file('http://data.mxnet.io/data/cifar10/cifar10_val.rec', fnames[1])
    download_file('http://data.mxnet.io/data/cifar10/cifar10_train.rec', fnames[0])
    return fnames

if __name__ == '__main__':
    # download data
    (train_fname, val_fname) = download_cifar10()

    # parse args
    parser = argparse.ArgumentParser(description="train cifar10",
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    fit.add_fit_args(parser)
    data.add_data_args(parser)
    data.add_data_aug_args(parser)
    data.set_data_aug_level(parser, 2)
    parser.set_defaults(
        # network
        network        = 'resnet',
        num_layers     = 110,
        # data
        data_train     = train_fname,
        data_val       = val_fname,
        num_classes    = 10,
        num_examples  = 50000,
        image_shape    = '3,28,28',
        pad_size       = 4,
        # train
        batch_size     = 512,

        num_epochs     = 300,
Ejemplo n.º 2
0

if __name__ == "__main__":
    # parse args
    parser = argparse.ArgumentParser(description="fine-tune a dataset",
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    train = fit.add_fit_args(parser)
    data.add_data_args(parser)
    aug = data.add_data_aug_args(parser)

    parser.add_argument('--pretrained-model', type=str,
                        help='the pre-trained model')
    parser.add_argument('--layer-before-fullc', type=str, default='flatten0',
                        help='the name of the layer before the last fullc layer')
    # use less augmentations for fine-tune
    data.set_data_aug_level(parser, 1)
    set_imagenet_aug(parser)
    # use a small learning rate and less regularizations
    parser.set_defaults(image_shape='3,224,224', num_epochs=30,
                        lr=.01, lr_step_epochs='20', wd=0, mom=0)

    args = parser.parse_args()

    # load pretrained model
    dir_path = os.path.dirname(os.path.realpath(__file__))
    (prefix, epoch) = modelzoo.download_model(
        args.pretrained_model, os.path.join(dir_path, 'model'))
    if args.load_epoch is not None:
        (prefix, epoch) = (args.model_prefix, args.load_epoch)
    logging.info(prefix)
    logging.info(epoch)
Ejemplo n.º 3
0
def start_new_training():
    # binding signals
    SUPPORTED_SIGNALS = (
        signal.SIGINT,
        signal.SIGTERM,
    )
    for signum in SUPPORTED_SIGNALS:
        try:
            signal.signal(signum, signal_handler)
            logger.info("Bind signal '%s' success to %s", signum,
                        signal_handler)
        except Exception as identifier:
            logger.warning("Bind signal '%s' failed, err: %s", signum,
                           identifier)
    try:
        # parse args
        parser = argparse.ArgumentParser(
            description="train imagenet-1k",
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
        fit.add_fit_args(parser)
        data.add_data_args(parser)
        data.add_data_aug_args(parser)
        # use a large aug level
        data.set_data_aug_level(parser, 3)
        parser.set_defaults(
            # network
            network='resnet',
            num_layers=50,
            # data
            num_classes=10,
            num_examples=60000,
            image_shape='3,28,28',
            min_random_scale=1,  # if input image has min size k, suggest to use
            # 256.0/x, e.g. 0.533 for 480
            # train
            num_epochs=80,
            lr_step_epochs='30,60',
            dtype='float32',
            batch_size=32)
        args = parser.parse_args()

        # AVA-SDK  new an Instance
        train_ins = train.TrainInstance()
        # add CALLBACK
        batch_end_cb = train_ins.get_monitor_callback(
            "mxnet", batch_size=args.batch_size, batch_freq=10)
        args.batch_end_callback = batch_end_cb

        # load network
        from importlib import import_module
        net = import_module('symbols.' + args.network)
        sym = net.get_symbol(**vars(args))

        # train
        fit.fit(args, sym, data.get_rec_iter)

        logger.info("training finish")
        err_msg = ""
        if train_ins == None:
            return
        train_ins.done(err_msg=err_msg)
    except Exception as err:
        err_msg = "training failed, err: %s" % (err)
        logger.info(err_msg)
        traceback.print_exc(file=sys.stderr)

        if train_ins == None:
            return
        train_ins.done(err_msg=err_msg)