def train(args, print_log=False):
    chainer.CHAINER_SEED = args.seed
    numpy.random.seed(args.seed)

    vocab = None

    # Load a dataset
    if args.dataset == 'dbpedia':
        train, test, vocab = text_datasets.get_dbpedia(vocab=vocab)
    elif args.dataset.startswith('imdb.'):
        train, test, vocab = text_datasets.get_imdb(
            fine_grained=args.dataset.endswith('.fine'), vocab=vocab)
    elif args.dataset in [
            'TREC', 'stsa.binary', 'stsa.fine', 'custrev', 'mpqa',
            'rt-polarity', 'subj'
    ]:
        train, test, real_test, vocab = text_datasets.read_text_dataset(
            args.dataset, vocab=None, dir=args.data_dir)
        #train, test, vocab = text_datasets.get_other_text_dataset(
        #    args.dataset, vocab=vocab)
    #if args.validation:
    #    real_test = test
    #    dataset_pairs = chainer.datasets.get_cross_validation_datasets_random(
    #        train, 10, seed=777)
    #    train, test = dataset_pairs[0]

    print('# train data: {}'.format(len(train)))
    print('# test  data: {}'.format(len(test)))
    print('# vocab: {}'.format(len(vocab)))
    n_class = len(set([int(d[1]) for d in train]))
    print('# class: {}'.format(n_class))

    chainer.CHAINER_SEED = args.seed
    numpy.random.seed(args.seed)
    train = UnkDropout(train, vocab['<unk>'], 0.01)
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # Setup a model
    chainer.CHAINER_SEED = args.seed
    numpy.random.seed(args.seed)
    if args.model == 'rnn':
        Encoder = class_nets.RNNEncoder
    elif args.model == 'cnn':
        Encoder = class_nets.CNNEncoder
    elif args.model == 'bow':
        Encoder = class_nets.BOWMLPEncoder
    encoder = Encoder(n_layers=args.layer,
                      n_vocab=len(vocab),
                      n_units=args.unit,
                      dropout=args.dropout)
    model = class_nets.TextClassifier(encoder, n_class)

    if args.bilm:
        bilm = bilm_nets.BiLanguageModel(len(vocab), args.bilm_unit,
                                         args.bilm_layer, args.bilm_dropout)
        n_labels = len(set([int(v[1]) for v in test]))
        print('# labels =', n_labels)
        if not args.no_label:
            print('add label')
            bilm.add_label_condition_nets(n_labels, args.bilm_unit)
        else:
            print('not using label')
        chainer.serializers.load_npz(args.bilm, bilm)
        with model.encoder.init_scope():
            initialW = numpy.array(model.encoder.embed.W.data)
            del model.encoder.embed
            model.encoder.embed = bilm_nets.PredictiveEmbed(len(vocab),
                                                            args.unit,
                                                            bilm,
                                                            args.dropout,
                                                            initialW=initialW)
            model.encoder.use_predict_embed = True

            model.encoder.embed.setup(mode=args.bilm_mode,
                                      temp=args.bilm_temp,
                                      word_lower_bound=0.,
                                      gold_lower_bound=0.,
                                      gumbel=args.bilm_gumbel,
                                      residual=args.bilm_residual,
                                      wordwise=args.bilm_wordwise,
                                      add_original=args.bilm_add_original,
                                      augment_ratio=args.bilm_ratio,
                                      ignore_unk=vocab['<unk>'])

    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU
        model.xp.random.seed(args.seed)
    chainer.CHAINER_SEED = args.seed
    numpy.random.seed(args.seed)

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam(args.learning_rate)
    optimizer.setup(model)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       converter=convert_seq,
                                       device=args.gpu)

    from triggers import FailMaxValueTrigger
    stop_trigger = FailMaxValueTrigger(key='validation/main/accuracy',
                                       trigger=(1, 'epoch'),
                                       n_times=args.stop_epoch,
                                       max_trigger=args.epoch)
    trainer = training.Trainer(updater, stop_trigger, out=args.output_dir)

    # Evaluate the model with the test dataset for each epoch
    # VALIDATION SET
    trainer.extend(
        MicroEvaluator(test_iter,
                       model,
                       converter=convert_seq,
                       device=args.gpu))

    if args.validation:
        real_test_iter = chainer.iterators.SerialIterator(real_test,
                                                          args.batchsize,
                                                          repeat=False,
                                                          shuffle=False)
    eval_on_real_test = MicroEvaluator(real_test_iter,
                                       model,
                                       converter=convert_seq,
                                       device=args.gpu)
    eval_on_real_test.default_name = 'test'
    trainer.extend(eval_on_real_test)

    # Take a best snapshot
    record_trigger = training.triggers.MaxValueTrigger(
        'validation/main/accuracy', (1, 'epoch'))
    if args.save_model:
        trainer.extend(extensions.snapshot_object(model, 'best_model.npz'),
                       trigger=record_trigger)

    # Write a log of evaluation statistics for each epoch
    out = Outer()
    trainer.extend(
        extensions.LogReport(filename=args.output_dir + '/classifier.log'))
    if print_log:
        trainer.extend(
            extensions.PrintReport(
                [
                    'epoch', 'main/loss', 'validation/main/loss',
                    'test/main/loss', 'main/accuracy',
                    'validation/main/accuracy', 'test/main/accuracy'
                    #, 'elapsed_time'
                ],
                out=out),
            trigger=record_trigger)
    else:
        trainer.extend(extensions.PrintReport([
            'main/accuracy', 'validation/main/accuracy', 'test/main/accuracy'
        ],
                                              out=out),
                       trigger=record_trigger)

    # Print a progress bar to stdout
    #trainer.extend(extensions.ProgressBar())

    # Run the training
    trainer.run()

    # free all unused memory blocks “cached” in the memory pool
    mempool = cupy.get_default_memory_pool()
    mempool.free_all_blocks()
    print("val_acc:{}, test_acc:{}\n", out[-2], out[-1])
    return float(out[-1])
Esempio n. 2
0
def train(args):
    """Train with the given args.

    Args:
        args (namespace): The program arguments.

    """
    set_deterministic_pytorch(args)

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning("cuda is not available")

    # get input and output dimension info
    with open(args.valid_json, "rb") as f:
        valid_json = json.load(f)["utts"]
    utts = list(valid_json.keys())
    idim = int(valid_json[utts[0]]["input"][0]["shape"][-1])
    odim = int(valid_json[utts[0]]["output"][0]["shape"][-1])
    logging.info("#input dims : " + str(idim))
    logging.info("#output dims: " + str(odim))

    # Initialize with pre-trained ASR encoder and MT decoder
    if args.enc_init is not None or args.dec_init is not None:
        model = load_trained_modules(idim, odim, args, interface=STInterface)
    else:
        model_class = dynamic_import(args.model_module)
        model = model_class(idim, odim, args)
    assert isinstance(model, STInterface)

    if args.rnnlm is not None:
        rnnlm_args = get_model_conf(args.rnnlm, args.rnnlm_conf)
        rnnlm = lm_pytorch.ClassifierWithState(
            lm_pytorch.RNNLM(len(args.char_list), rnnlm_args.layer,
                             rnnlm_args.unit))
        torch_load(args.rnnlm, rnnlm)
        model.rnnlm = rnnlm

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + "/model.json"
    with open(model_conf, "wb") as f:
        logging.info("writing a model config file to " + model_conf)
        f.write(
            json.dumps((idim, odim, vars(args)),
                       indent=4,
                       ensure_ascii=False,
                       sort_keys=True).encode("utf_8"))
    for key in sorted(vars(args).keys()):
        logging.info("ARGS: " + key + ": " + str(vars(args)[key]))

    reporter = model.reporter

    # check the use of multi-gpu
    if args.ngpu > 1:
        if args.batch_size != 0:
            logging.warning(
                "batch size is automatically increased (%d -> %d)" %
                (args.batch_size, args.batch_size * args.ngpu))
            args.batch_size *= args.ngpu

    # set torch device
    device = torch.device("cuda" if args.ngpu > 0 else "cpu")
    if args.train_dtype in ("float16", "float32", "float64"):
        dtype = getattr(torch, args.train_dtype)
    else:
        dtype = torch.float32
    model = model.to(device=device, dtype=dtype)

    # Setup an optimizer
    if args.opt == "adadelta":
        optimizer = torch.optim.Adadelta(model.parameters(),
                                         rho=0.95,
                                         eps=args.eps,
                                         weight_decay=args.weight_decay)
    elif args.opt == "adam":
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
    elif args.opt == "noam":
        from espnet.nets.pytorch_backend.transformer.optimizer import get_std_opt

        optimizer = get_std_opt(
            model.parameters(),
            args.adim,
            args.transformer_warmup_steps,
            args.transformer_lr,
        )
    else:
        raise NotImplementedError("unknown optimizer: " + args.opt)

    # setup apex.amp
    if args.train_dtype in ("O0", "O1", "O2", "O3"):
        try:
            from apex import amp
        except ImportError as e:
            logging.error(
                f"You need to install apex for --train-dtype {args.train_dtype}. "
                "See https://github.com/NVIDIA/apex#linux")
            raise e
        if args.opt == "noam":
            model, optimizer.optimizer = amp.initialize(
                model, optimizer.optimizer, opt_level=args.train_dtype)
        else:
            model, optimizer = amp.initialize(model,
                                              optimizer,
                                              opt_level=args.train_dtype)
        use_apex = True
    else:
        use_apex = False

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, "target", reporter)
    setattr(optimizer, "serialize", lambda s: reporter.serialize(s))

    # Setup a converter
    converter = CustomConverter(
        subsampling_factor=model.subsample[0],
        dtype=dtype,
        use_source_text=args.asr_weight > 0 or args.mt_weight > 0,
    )

    # read json data
    with open(args.train_json, "rb") as f:
        train_json = json.load(f)["utts"]
    with open(args.valid_json, "rb") as f:
        valid_json = json.load(f)["utts"]

    use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0
    # make minibatch list (variable length)
    train = make_batchset(
        train_json,
        args.batch_size,
        args.maxlen_in,
        args.maxlen_out,
        args.minibatches,
        min_batch_size=args.ngpu if args.ngpu > 1 else 1,
        shortest_first=use_sortagrad,
        count=args.batch_count,
        batch_bins=args.batch_bins,
        batch_frames_in=args.batch_frames_in,
        batch_frames_out=args.batch_frames_out,
        batch_frames_inout=args.batch_frames_inout,
        iaxis=0,
        oaxis=0,
    )
    valid = make_batchset(
        valid_json,
        args.batch_size,
        args.maxlen_in,
        args.maxlen_out,
        args.minibatches,
        min_batch_size=args.ngpu if args.ngpu > 1 else 1,
        count=args.batch_count,
        batch_bins=args.batch_bins,
        batch_frames_in=args.batch_frames_in,
        batch_frames_out=args.batch_frames_out,
        batch_frames_inout=args.batch_frames_inout,
        iaxis=0,
        oaxis=0,
    )

    load_tr = LoadInputsAndTargets(
        mode="asr",
        load_output=True,
        preprocess_conf=args.preprocess_conf,
        preprocess_args={"train": True},  # Switch the mode of preprocessing
    )
    load_cv = LoadInputsAndTargets(
        mode="asr",
        load_output=True,
        preprocess_conf=args.preprocess_conf,
        preprocess_args={"train": False},  # Switch the mode of preprocessing
    )
    # hack to make batchsize argument as 1
    # actual bathsize is included in a list
    # default collate function converts numpy array to pytorch tensor
    # we used an empty collate function instead which returns list
    train_iter = ChainerDataLoader(
        dataset=TransformDataset(train,
                                 lambda data: converter([load_tr(data)])),
        batch_size=1,
        num_workers=args.n_iter_processes,
        shuffle=not use_sortagrad,
        collate_fn=lambda x: x[0],
    )
    valid_iter = ChainerDataLoader(
        dataset=TransformDataset(valid,
                                 lambda data: converter([load_cv(data)])),
        batch_size=1,
        shuffle=False,
        collate_fn=lambda x: x[0],
        num_workers=args.n_iter_processes,
    )

    # Set up a trainer
    updater = CustomUpdater(
        model,
        args.grad_clip,
        {"main": train_iter},
        optimizer,
        device,
        args.ngpu,
        args.grad_noise,
        args.accum_grad,
        use_apex=use_apex,
    )
    trainer = training.Trainer(updater, (args.epochs, "epoch"),
                               out=args.outdir)

    if use_sortagrad:
        trainer.extend(
            ShufflingEnabler([train_iter]),
            trigger=(args.sortagrad if args.sortagrad != -1 else args.epochs,
                     "epoch"),
        )

    # Resume from a snapshot
    if args.resume:
        logging.info("resumed from %s" % args.resume)
        torch_resume(args.resume, trainer)

    # Evaluate the model with the test dataset for each epoch
    if args.save_interval_iters > 0:
        trainer.extend(
            CustomEvaluator(model, {"main": valid_iter}, reporter, device,
                            args.ngpu),
            trigger=(args.save_interval_iters, "iteration"),
        )
    else:
        trainer.extend(
            CustomEvaluator(model, {"main": valid_iter}, reporter, device,
                            args.ngpu))

    # Save attention weight at each epoch
    if args.num_save_attention > 0:
        data = sorted(
            list(valid_json.items())[:args.num_save_attention],
            key=lambda x: int(x[1]["input"][0]["shape"][1]),
            reverse=True,
        )
        if hasattr(model, "module"):
            att_vis_fn = model.module.calculate_all_attentions
            plot_class = model.module.attention_plot_class
        else:
            att_vis_fn = model.calculate_all_attentions
            plot_class = model.attention_plot_class
        att_reporter = plot_class(
            att_vis_fn,
            data,
            args.outdir + "/att_ws",
            converter=converter,
            transform=load_cv,
            device=device,
        )
        trainer.extend(att_reporter, trigger=(1, "epoch"))
    else:
        att_reporter = None

    # Save CTC prob at each epoch
    if (args.asr_weight > 0 and args.mtlalpha > 0) and args.num_save_ctc > 0:
        # NOTE: sort it by output lengths
        data = sorted(
            list(valid_json.items())[:args.num_save_ctc],
            key=lambda x: int(x[1]["output"][0]["shape"][0]),
            reverse=True,
        )
        if hasattr(model, "module"):
            ctc_vis_fn = model.module.calculate_all_ctc_probs
            plot_class = model.module.ctc_plot_class
        else:
            ctc_vis_fn = model.calculate_all_ctc_probs
            plot_class = model.ctc_plot_class
        ctc_reporter = plot_class(
            ctc_vis_fn,
            data,
            args.outdir + "/ctc_prob",
            converter=converter,
            transform=load_cv,
            device=device,
            ikey="output",
            iaxis=1,
        )
        trainer.extend(ctc_reporter, trigger=(1, "epoch"))
    else:
        ctc_reporter = None

    # Make a plot for training and validation values
    trainer.extend(
        extensions.PlotReport(
            [
                "main/loss",
                "validation/main/loss",
                "main/loss_asr",
                "validation/main/loss_asr",
                "main/loss_mt",
                "validation/main/loss_mt",
                "main/loss_st",
                "validation/main/loss_st",
            ],
            "epoch",
            file_name="loss.png",
        ))
    trainer.extend(
        extensions.PlotReport(
            [
                "main/acc",
                "validation/main/acc",
                "main/acc_asr",
                "validation/main/acc_asr",
                "main/acc_mt",
                "validation/main/acc_mt",
            ],
            "epoch",
            file_name="acc.png",
        ))
    trainer.extend(
        extensions.PlotReport(["main/bleu", "validation/main/bleu"],
                              "epoch",
                              file_name="bleu.png"))

    # Save best models
    trainer.extend(
        snapshot_object(model, "model.loss.best"),
        trigger=training.triggers.MinValueTrigger("validation/main/loss"),
    )
    trainer.extend(
        snapshot_object(model, "model.acc.best"),
        trigger=training.triggers.MaxValueTrigger("validation/main/acc"),
    )

    # save snapshot which contains model and optimizer states
    if args.save_interval_iters > 0:
        trainer.extend(
            torch_snapshot(filename="snapshot.iter.{.updater.iteration}"),
            trigger=(args.save_interval_iters, "iteration"),
        )
    else:
        trainer.extend(torch_snapshot(), trigger=(1, "epoch"))

    # epsilon decay in the optimizer
    if args.opt == "adadelta":
        if args.criterion == "acc":
            trainer.extend(
                restore_snapshot(model,
                                 args.outdir + "/model.acc.best",
                                 load_fn=torch_load),
                trigger=CompareValueTrigger(
                    "validation/main/acc",
                    lambda best_value, current_value: best_value >
                    current_value,
                ),
            )
            trainer.extend(
                adadelta_eps_decay(args.eps_decay),
                trigger=CompareValueTrigger(
                    "validation/main/acc",
                    lambda best_value, current_value: best_value >
                    current_value,
                ),
            )
        elif args.criterion == "loss":
            trainer.extend(
                restore_snapshot(model,
                                 args.outdir + "/model.loss.best",
                                 load_fn=torch_load),
                trigger=CompareValueTrigger(
                    "validation/main/loss",
                    lambda best_value, current_value: best_value <
                    current_value,
                ),
            )
            trainer.extend(
                adadelta_eps_decay(args.eps_decay),
                trigger=CompareValueTrigger(
                    "validation/main/loss",
                    lambda best_value, current_value: best_value <
                    current_value,
                ),
            )
    elif args.opt == "adam":
        if args.criterion == "acc":
            trainer.extend(
                restore_snapshot(model,
                                 args.outdir + "/model.acc.best",
                                 load_fn=torch_load),
                trigger=CompareValueTrigger(
                    "validation/main/acc",
                    lambda best_value, current_value: best_value >
                    current_value,
                ),
            )
            trainer.extend(
                adam_lr_decay(args.lr_decay),
                trigger=CompareValueTrigger(
                    "validation/main/acc",
                    lambda best_value, current_value: best_value >
                    current_value,
                ),
            )
        elif args.criterion == "loss":
            trainer.extend(
                restore_snapshot(model,
                                 args.outdir + "/model.loss.best",
                                 load_fn=torch_load),
                trigger=CompareValueTrigger(
                    "validation/main/loss",
                    lambda best_value, current_value: best_value <
                    current_value,
                ),
            )
            trainer.extend(
                adam_lr_decay(args.lr_decay),
                trigger=CompareValueTrigger(
                    "validation/main/loss",
                    lambda best_value, current_value: best_value <
                    current_value,
                ),
            )

    # Write a log of evaluation statistics for each epoch
    trainer.extend(
        extensions.LogReport(trigger=(args.report_interval_iters,
                                      "iteration")))
    report_keys = [
        "epoch",
        "iteration",
        "main/loss",
        "main/loss_st",
        "main/loss_asr",
        "validation/main/loss",
        "validation/main/loss_st",
        "validation/main/loss_asr",
        "main/acc",
        "validation/main/acc",
    ]
    if args.asr_weight > 0:
        report_keys.append("main/acc_asr")
        report_keys.append("validation/main/acc_asr")
    report_keys += ["elapsed_time"]
    if args.opt == "adadelta":
        trainer.extend(
            extensions.observe_value(
                "eps",
                lambda trainer: trainer.updater.get_optimizer("main").
                param_groups[0]["eps"],
            ),
            trigger=(args.report_interval_iters, "iteration"),
        )
        report_keys.append("eps")
    elif args.opt in ["adam", "noam"]:
        trainer.extend(
            extensions.observe_value(
                "lr",
                lambda trainer: trainer.updater.get_optimizer("main").
                param_groups[0]["lr"],
            ),
            trigger=(args.report_interval_iters, "iteration"),
        )
        report_keys.append("lr")
    if args.asr_weight > 0:
        if args.mtlalpha > 0:
            report_keys.append("main/cer_ctc")
            report_keys.append("validation/main/cer_ctc")
        if args.mtlalpha < 1:
            if args.report_cer:
                report_keys.append("validation/main/cer")
            if args.report_wer:
                report_keys.append("validation/main/wer")
    if args.report_bleu:
        report_keys.append("main/bleu")
        report_keys.append("validation/main/bleu")
    trainer.extend(
        extensions.PrintReport(report_keys),
        trigger=(args.report_interval_iters, "iteration"),
    )

    trainer.extend(
        extensions.ProgressBar(update_interval=args.report_interval_iters))
    set_early_stop(trainer, args)

    if args.tensorboard_dir is not None and args.tensorboard_dir != "":
        trainer.extend(
            TensorboardLogger(
                SummaryWriter(args.tensorboard_dir),
                att_reporter=att_reporter,
                ctc_reporter=ctc_reporter,
            ),
            trigger=(args.report_interval_iters, "iteration"),
        )
    # Run the training
    trainer.run()
    check_early_stop(trainer, args.epochs)
Esempio n. 3
0
def train(args):
    '''Run training'''
    # seed setting
    torch.manual_seed(args.seed)

    # debug mode setting
    # 0 would be fastest, but 1 seems to be reasonable
    # by considering reproducability
    # revmoe type check
    if args.debugmode < 2:
        chainer.config.type_check = False
        logging.info('torch type check is disabled')
    # use determinisitic computation or not
    if args.debugmode < 1:
        torch.backends.cudnn.deterministic = False
        logging.info('torch cudnn deterministic is disabled')
    else:
        torch.backends.cudnn.deterministic = True

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning('cuda is not available')

    # get input and output dimension info
    with open(args.valid_label, 'rb') as f:
        valid_json = json.load(f)['utts']
    utts = list(valid_json.keys())
    idim = int(valid_json[utts[0]]['idim'])
    odim = int(valid_json[utts[0]]['odim'])
    logging.info('#input dims : ' + str(idim))
    logging.info('#output dims: ' + str(odim))

    # specify attention, CTC, hybrid mode
    if args.mtlalpha == 1.0:
        mtl_mode = 'ctc'
        logging.info('Pure CTC mode')
    elif args.mtlalpha == 0.0:
        mtl_mode = 'att'
        logging.info('Pure attention mode')
    else:
        mtl_mode = 'mtl'
        logging.info('Multitask learning mode')

    # specify model architecture
    e2e = E2E(idim, odim, args)
    model = Loss(e2e, args.mtlalpha)

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + '/model.conf'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to' + model_conf)
        # TODO(watanabe) use others than pickle, possibly json, and save as a text
        pickle.dump((idim, odim, args), f)
    for key in sorted(vars(args).keys()):
        logging.info('ARGS: ' + key + ': ' + str(vars(args)[key]))

    # Set gpu
    reporter = model.reporter
    ngpu = args.ngpu
    if ngpu == 1:
        gpu_id = range(ngpu)
        logging.info('gpu id: ' + str(gpu_id))
        model.cuda()
    elif ngpu > 1:
        gpu_id = range(ngpu)
        logging.info('gpu id: ' + str(gpu_id))
        model = DataParallel(model, device_ids=gpu_id)
        model.cuda()
        logging.info('batch size is automatically increased (%d -> %d)' %
                     (args.batch_size, args.batch_size * args.ngpu))
        args.batch_size *= args.ngpu
    else:
        gpu_id = [-1]

    # Setup an optimizer
    if args.opt == 'adadelta':
        optimizer = torch.optim.Adadelta(model.parameters(),
                                         rho=0.95,
                                         eps=args.eps)
    elif args.opt == 'adam':
        optimizer = torch.optim.Adam(model.parameters())

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, "target", reporter)
    setattr(optimizer, "serialize", lambda s: reporter.serialize(s))

    # read json data
    with open(args.train_label, 'rb') as f:
        train_json = json.load(f)['utts']
    with open(args.valid_label, 'rb') as f:
        valid_json = json.load(f)['utts']

    # make minibatch list (variable length)
    train = make_batchset(train_json, args.batch_size, args.maxlen_in,
                          args.maxlen_out, args.minibatches)
    valid = make_batchset(valid_json, args.batch_size, args.maxlen_in,
                          args.maxlen_out, args.minibatches)
    # hack to make batchsze argument as 1
    # actual bathsize is included in a list
    train_iter = chainer.iterators.SerialIterator(train, 1)
    valid_iter = chainer.iterators.SerialIterator(valid,
                                                  1,
                                                  repeat=False,
                                                  shuffle=False)

    # prepare Kaldi reader
    train_reader = lazy_io.read_dict_scp(args.train_feat)
    valid_reader = lazy_io.read_dict_scp(args.valid_feat)

    # Set up a trainer
    updater = PytorchSeqUpdaterKaldi(model, args.grad_clip, train_iter,
                                     optimizer, train_reader, gpu_id)
    trainer = training.Trainer(updater, (args.epochs, 'epoch'),
                               out=args.outdir)

    # Resume from a snapshot
    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)
        if ngpu > 1:
            model.module.load_state_dict(
                torch.load(args.outdir + '/model.acc.best'))
        else:
            model.load_state_dict(torch.load(args.outdir + '/model.acc.best'))
        model = trainer.updater.model

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(
        PytorchSeqEvaluaterKaldi(model,
                                 valid_iter,
                                 reporter,
                                 valid_reader,
                                 device=gpu_id))

    # Take a snapshot for each specified epoch
    trainer.extend(extensions.snapshot(), trigger=(1, 'epoch'))

    # Make a plot for training and validation values
    trainer.extend(
        extensions.PlotReport([
            'main/loss', 'validation/main/loss', 'main/loss_ctc',
            'validation/main/loss_ctc', 'main/loss_att',
            'validation/main/loss_att'
        ],
                              'epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/acc', 'validation/main/acc'],
                              'epoch',
                              file_name='acc.png'))

    # Save best models
    def torch_save(path, _):
        if ngpu > 1:
            torch.save(model.module.state_dict(), path)
            torch.save(model.module, path + ".pkl")
        else:
            torch.save(model.state_dict(), path)
            torch.save(model, path + ".pkl")

    trainer.extend(
        extensions.snapshot_object(model,
                                   'model.loss.best',
                                   savefun=torch_save),
        trigger=training.triggers.MinValueTrigger('validation/main/loss'))
    if mtl_mode is not 'ctc':
        trainer.extend(
            extensions.snapshot_object(model,
                                       'model.acc.best',
                                       savefun=torch_save),
            trigger=training.triggers.MaxValueTrigger('validation/main/acc'))

    # epsilon decay in the optimizer
    def torch_load(path, obj):
        if ngpu > 1:
            model.module.load_state_dict(torch.load(path))
        else:
            model.load_state_dict(torch.load(path))
        return obj

    if args.opt == 'adadelta':
        if args.criterion == 'acc' and mtl_mode is not 'ctc':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.acc.best',
                                            load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
        elif args.criterion == 'loss':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.loss.best',
                                            load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(100, 'iteration')))
    report_keys = [
        'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att',
        'validation/main/loss', 'validation/main/loss_ctc',
        'validation/main/loss_att', 'main/acc', 'validation/main/acc',
        'elapsed_time'
    ]
    if args.opt == 'adadelta':
        trainer.extend(extensions.observe_value(
            'eps', lambda trainer: trainer.updater.get_optimizer('main').
            param_groups[0]["eps"]),
                       trigger=(100, 'iteration'))
        report_keys.append('eps')
    trainer.extend(extensions.PrintReport(report_keys),
                   trigger=(100, 'iteration'))

    trainer.extend(extensions.ProgressBar())

    # Run the training
    trainer.run()
Esempio n. 4
0
                                            optimizer,
                                            device=gpu_id)

##########################################Setup the Trainer
trainer = training.Trainer(updater, (MAX_EPOCH, 'epoch'), out='mnist_result')

##########################################Add extentions to the Trainer object
trainer.extend(extensions.LogReport())
trainer.extend(extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'))
trainer.extend(
    extensions.snapshot_object(model.predictor,
                               filename='model_epoch-{.updater.epoch}'))
trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id))
trainer.extend(
    extensions.PrintReport([
        'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss',
        'validation/main/accuracy', 'elapsed_time'
    ]))
trainer.extend(
    extensions.PlotReport(['main/loss', 'validation/main/loss'],
                          x_key='epoch',
                          file_name='loss.png'))
trainer.extend(
    extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'],
                          x_key='epoch',
                          file_name='accuracy.png'))
trainer.extend(extensions.DumpGraph('main/loss'))

##########################################Start trainig
trainer.run()
Esempio n. 5
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: FCIS')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--lr',
                        '-l',
                        type=float,
                        default=None,
                        help='Learning rate for multi GPUs')
    parser.add_argument('--batchsize', type=int, default=8)
    parser.add_argument('--epoch', '-e', type=int, default=42)
    parser.add_argument('--cooldown-epoch', '-ce', type=int, default=28)
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    # chainermn
    comm = chainermn.create_communicator('pure_nccl')
    device = comm.intra_rank

    np.random.seed(args.seed)

    # model
    fcis = FCISResNet101(n_fg_class=len(sbd_instance_segmentation_label_names),
                         pretrained_model='imagenet',
                         iter2=False)
    fcis.use_preset('evaluate')
    model = FCISTrainChain(fcis)
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    # dataset
    train_dataset = TransformDataset(
        SBDInstanceSegmentationDataset(split='train'),
        ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis))
    if comm.rank == 0:
        indices = np.arange(len(train_dataset))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train_dataset = train_dataset.slice[indices]
    train_iter = chainer.iterators.SerialIterator(train_dataset,
                                                  batch_size=args.batchsize //
                                                  comm.size)

    if comm.rank == 0:
        test_dataset = SBDInstanceSegmentationDataset(split='val')
        test_iter = chainer.iterators.SerialIterator(test_dataset,
                                                     batch_size=1,
                                                     repeat=False,
                                                     shuffle=False)

    # optimizer
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9), comm)
    optimizer.setup(model)

    model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0))
    model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    for param in model.params():
        if param.name in ['beta', 'gamma']:
            param.update_rule.enabled = False
    model.fcis.extractor.conv1.disable_update()
    model.fcis.extractor.res2.disable_update()

    updater = chainer.training.updater.StandardUpdater(
        train_iter, optimizer, converter=concat_examples, device=device)

    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

    @make_shift('lr')
    def lr_scheduler(trainer):
        if args.lr is None:
            base_lr = 0.0005 * args.batchsize
        else:
            base_lr = args.lr

        epoch = trainer.updater.epoch
        if epoch < args.cooldown_epoch:
            rate = 1
        else:
            rate = 0.1
        return rate * base_lr

    trainer.extend(lr_scheduler)

    if comm.rank == 0:
        # interval
        log_interval = 100, 'iteration'
        plot_interval = 3000, 'iteration'
        print_interval = 20, 'iteration'

        # training extensions
        trainer.extend(extensions.snapshot_object(
            model.fcis, filename='snapshot_model.npz'),
                       trigger=(args.epoch, 'epoch'))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        trainer.extend(extensions.PrintReport([
            'iteration',
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            'main/rpn_loc_loss',
            'main/rpn_cls_loss',
            'main/roi_loc_loss',
            'main/roi_cls_loss',
            'main/roi_mask_loss',
            'validation/main/map',
        ]),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        if extensions.PlotReport.available():
            trainer.extend(extensions.PlotReport(['main/loss'],
                                                 file_name='loss.png',
                                                 trigger=plot_interval),
                           trigger=plot_interval)

        trainer.extend(InstanceSegmentationVOCEvaluator(
            test_iter,
            model.fcis,
            iou_thresh=0.5,
            use_07_metric=True,
            label_names=sbd_instance_segmentation_label_names),
                       trigger=ManualScheduleTrigger([
                           len(train_dataset) * args.cooldown_epoch,
                           len(train_dataset) * args.epoch
                       ], 'iteration'))

        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Esempio n. 6
0
def main():

    # cuDNNのautotuneを有効にする
    chainer.cuda.set_max_workspace_size(512 * 1024 * 1024)
    chainer.config.autotune = True

    gpu_id = 0
    batchsize = 6
    out_num = 'results'
    log_interval = 1, 'epoch'
    epoch_max = 500
    initial_lr = 0.0001
    lr_decay_rate = 0.1
    lr_decay_timing = [200, 300, 400]

    # モデルの設定
    model = SSD300(n_fg_class=len(voc_labels), pretrained_model='imagenet')
    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)

    # GPUの設定
    chainer.cuda.get_device_from_id(gpu_id).use()
    model.to_gpu()

    # データセットの設定
    train_dataset = MyVoTTVOCDataset(
        'C:\Python_Programs\chainer_practice\Telescope_corner', 'train')
    valid_dataset = MyVoTTVOCDataset(
        'C:\Python_Programs\chainer_practice\Telescope_corner', 'val')

    # データ拡張
    transformed_train_dataset = TransformDataset(
        train_dataset, Transform(model.coder, model.insize, model.mean))

    # イテレーターの設定
    train_iter = chainer.iterators.MultiprocessIterator(
        transformed_train_dataset, batchsize)
    valid_iter = chainer.iterators.SerialIterator(valid_dataset,
                                                  batchsize,
                                                  repeat=False,
                                                  shuffle=False)

    # オプティマイザーの設定
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    # アップデーターの設定
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=gpu_id)

    # トレーナーの設定
    trainer = training.Trainer(updater, (epoch_max, 'epoch'), out_num)
    trainer.extend(extensions.ExponentialShift('lr',
                                               lr_decay_rate,
                                               init=initial_lr),
                   trigger=triggers.ManualScheduleTrigger(
                       lr_decay_timing, 'epoch'))
    trainer.extend(DetectionVOCEvaluator(valid_iter,
                                         model,
                                         use_07_metric=False,
                                         label_names=voc_labels),
                   trigger=(1, 'epoch'))
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
        'main/loss/conf', 'validation/main/map', 'elapsed_time'
    ]),
                   trigger=log_interval)

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(
                ['main/loss', 'main/loss/loc', 'main/loss/conf'],
                'epoch',
                file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(['validation/main/map'],
                                  'epoch',
                                  file_name='accuracy.png'))
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'),
        trigger=(10, 'epoch'))

    # 途中で止めた学習を再開する場合は、trainerにスナップショットをロードして再開する
    # serializers.load_npz('results/snapshot_epoch_100.npz', trainer)

    # 学習実行
    trainer.run()

    # 学習データの保存
    model.to_cpu()
    serializers.save_npz('my_ssd_model.npz', model)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--device',
                        '-d',
                        type=str,
                        default='-1',
                        help='Device specifier. Either ChainerX device '
                        'specifier or an integer. If non-negative integer, '
                        'CuPy arrays with specified device id are used. If '
                        'negative integer, NumPy arrays are used')
    parser.add_argument('--epoch',
                        '-e',
                        default=400,
                        type=int,
                        help='number of epochs to learn')
    parser.add_argument('--unit',
                        '-u',
                        default=30,
                        type=int,
                        help='number of units')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=25,
                        help='learning minibatch size')
    parser.add_argument('--label',
                        '-l',
                        type=int,
                        default=5,
                        help='number of labels')
    parser.add_argument('--epocheval',
                        '-p',
                        type=int,
                        default=5,
                        help='number of epochs per evaluation')
    parser.add_argument('--test', dest='test', action='store_true')
    parser.set_defaults(test=False)
    group = parser.add_argument_group('deprecated arguments')
    group.add_argument('--gpu',
                       '-g',
                       dest='device',
                       type=int,
                       nargs='?',
                       const=0,
                       help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    vocab = {}
    max_size = None
    train_trees = data.read_corpus('trees/train.txt', max_size)
    test_trees = data.read_corpus('trees/test.txt', max_size)

    device = chainer.get_device(args.device)
    device.use()
    xp = device.xp

    train_data = [linearize_tree(vocab, t, xp) for t in train_trees]
    train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize)
    test_data = [linearize_tree(vocab, t, xp) for t in test_trees]
    test_iter = chainer.iterators.SerialIterator(test_data,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    model = ThinStackRecursiveNet(len(vocab), args.unit, args.label)
    model.to_device(device)

    optimizer = chainer.optimizers.AdaGrad(0.1)
    optimizer.setup(model)

    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       converter=convert,
                                       device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'))
    trainer.extend(extensions.Evaluator(test_iter,
                                        model,
                                        converter=convert,
                                        device=device),
                   trigger=(args.epocheval, 'epoch'))
    trainer.extend(extensions.LogReport())

    trainer.extend(
        extensions.MicroAverage('main/correct', 'main/total', 'main/accuracy'))
    trainer.extend(
        extensions.MicroAverage('validation/main/correct',
                                'validation/main/total',
                                'validation/main/accuracy'))

    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))

    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Esempio n. 8
0
def train(config):
    config_backup = copy.deepcopy(config)

    # Setup
    device, comm = get_device_communicator(config['gpu'],
                                           config['communicator'],
                                           config['seed'], config['batchsize'])
    chainer.config.comm = comm  # To use from the inside of models

    if config.get('seed', None) is not None:
        random.seed(config['seed'])
        numpy.random.seed(config['seed'])
        cuda.cupy.random.seed(config['seed'])

    # Prepare dataset and models
    if not config['label']:
        if comm.mpi_comm.rank == 0:
            dataset = make_instance(tgan2, config['dataset'])
        else:
            dataset = None
        dataset = chainermn.scatter_dataset(dataset, comm, shuffle=True)
        # Retrieve property from the original of SubDataset
        n_channels = dataset._dataset.n_channels
        gen = make_instance(tgan2,
                            config['gen'],
                            args={'out_channels': n_channels})
        dis = make_instance(tgan2,
                            config['dis'],
                            args={'in_channels': n_channels})
    else:
        if comm.mpi_comm.rank == 0:
            print('## NOTE: Training Conditional TGAN')
            dataset = make_instance(tgan2,
                                    config['dataset'],
                                    args={'label': True})
        else:
            dataset = None
        dataset = chainermn.scatter_dataset(dataset, comm, shuffle=True)
        # Retrieve property from the original of SubDataset
        n_channels = dataset._dataset.n_channels
        n_classes = dataset._dataset.n_classes
        gen = make_instance(tgan2,
                            config['gen'],
                            args={
                                'out_channels': n_channels,
                                'n_classes': n_classes
                            })
        dis = make_instance(tgan2,
                            config['dis'],
                            args={
                                'in_channels': n_channels,
                                'n_classes': n_classes
                            })

    if device >= 0:
        chainer.cuda.get_device(device).use()
        gen.to_gpu()
        dis.to_gpu()

    if comm.mpi_comm.rank == 0:

        def print_params(link):
            n_params = sum([p.size for n, p in link.namedparams()])
            print('# of params in {}:\t{}'.format(link.__class__.__name__,
                                                  n_params))

        print_params(gen)
        print_params(dis)

    # Prepare optimizers
    gen_optimizer = chainermn.create_multi_node_optimizer(
        make_instance(chainer.optimizers, config['gen_opt']), comm)
    dis_optimizer = chainermn.create_multi_node_optimizer(
        make_instance(chainer.optimizers, config['dis_opt']), comm)
    gen_optimizer.setup(gen)
    dis_optimizer.setup(dis)
    optimizers = {
        'generator': gen_optimizer,
        'discriminator': dis_optimizer,
    }

    iterator = chainer.iterators.MultithreadIterator(
        dataset, batch_size=config['batchsize'])
    updater = make_instance(tgan2,
                            config['updater'],
                            args={
                                'iterator': iterator,
                                'optimizer': optimizers,
                                'device': device
                            })

    # Prepare trainer and its extensions
    trainer = training.Trainer(updater, (config['iteration'], 'iteration'),
                               out=config['out'])
    snapshot_interval = (config['snapshot_interval'], 'iteration')
    display_interval = (config['display_interval'], 'iteration')

    if comm.rank == 0:
        # Inception score
        if config.get('inception_score', None) is not None:
            conf_classifier = config['inception_score']['classifier']
            classifier = make_instance(tgan2, conf_classifier)
            if 'model_path' in conf_classifier:
                chainer.serializers.load_npz(conf_classifier['model_path'],
                                             classifier,
                                             path=conf_classifier['npz_path'])
            if device >= 0:
                classifier = classifier.to_gpu()
            is_conf = config['inception_score']
            is_args = {
                'batchsize': is_conf['batchsize'],
                'n_samples': is_conf['n_samples'],
                'splits': is_conf['splits'],
                'n_frames': is_conf['n_frames'],
            }
            trainer.extend(tgan2.make_inception_score_extension(
                gen, classifier, **is_args),
                           trigger=(is_conf['interval'], 'iteration'))

        # Snapshot
        trainer.extend(extensions.snapshot_object(
            gen, 'generator_iter_{.updater.iteration}.npz'),
                       trigger=snapshot_interval)
        # Do not save discriminator to save the space
        # trainer.extend(
        #     extensions.snapshot_object(
        #         dis, 'discriminator_iter_{.updater.iteration}.npz'),
        #     trigger=snapshot_interval)

        # Save movie
        if config.get('preview', None) is not None:
            preview_batchsize = config['preview']['batchsize']
            trainer.extend(tgan2.out_generated_movie(
                gen,
                dis,
                rows=config['preview']['rows'],
                cols=config['preview']['cols'],
                seed=0,
                dst=config['out'],
                batchsize=preview_batchsize),
                           trigger=snapshot_interval)

        # Log
        trainer.extend(extensions.LogReport(trigger=display_interval))
        report_keys = config['report_keys']
        if config.get('inception_score', None) is not None:
            report_keys.append('IS_mean')
        trainer.extend(extensions.PrintReport(report_keys),
                       trigger=display_interval)
        trainer.extend(
            extensions.ProgressBar(update_interval=display_interval[0]))

    # Linear decay
    if ('linear_decay' in config) and (config['linear_decay']['start']
                                       is not None):
        if comm.rank == 0:
            print('Use linear decay: {}:{} -> {}:{}'.format(
                config['linear_decay']['start'], config['iteration'],
                config['gen_opt']['args']['alpha'], 0.))
        trainer.extend(
            extensions.LinearShift(
                'alpha', (config['gen_opt']['args']['alpha'], 0.),
                (config['linear_decay']['start'], config['iteration']),
                gen_optimizer))
        trainer.extend(
            extensions.LinearShift(
                'alpha', (config['dis_opt']['args']['alpha'], 0.),
                (config['linear_decay']['start'], config['iteration']),
                dis_optimizer))

    # Checkpointer
    config_hash = hashlib.sha1()
    config_hash.update(
        yaml.dump(config_backup, default_flow_style=False).encode('utf-8'))
    os.makedirs('snapshots', exist_ok=True)
    checkpointer = chainermn.create_multi_node_checkpointer(
        name='tgan2', comm=comm, path=f'snapshots/{config_hash.hexdigest()}')
    checkpointer.maybe_load(trainer, gen_optimizer)
    if trainer.updater.epoch > 0:
        print('Resuming from checkpoints: epoch =', trainer.updater.epoch)
    trainer.extend(checkpointer, trigger=snapshot_interval)

    # Copy config to result dir
    os.makedirs(config['out'], exist_ok=True)
    config_path = os.path.join(config['out'], 'config.yml')
    with open(config_path, 'w') as fp:
        fp.write(yaml.dump(config_backup, default_flow_style=False))

    # Run the training
    trainer.run()
Esempio n. 9
0
def train(args):
    '''Run training'''
    # display chainer version
    logging.info('chainer version = ' + chainer.__version__)

    # seed setting (chainer seed may not need it)
    os.environ['CHAINER_SEED'] = str(args.seed)
    logging.info('chainer seed = ' + os.environ['CHAINER_SEED'])

    # debug mode setting
    # 0 would be fastest, but 1 seems to be reasonable
    # by considering reproducability
    # revmoe type check
    if args.debugmode < 2:
        chainer.config.type_check = False
        logging.info('chainer type check is disabled')
    # use determinisitic computation or not
    if args.debugmode < 1:
        chainer.config.cudnn_deterministic = False
        logging.info('chainer cudnn deterministic is disabled')
    else:
        chainer.config.cudnn_deterministic = True

    # check cuda and cudnn availability
    if not chainer.cuda.available:
        logging.warning('cuda is not available')
    if not chainer.cuda.cudnn_enabled:
        logging.warning('cudnn is not available')

    # get input and output dimension info
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']
    utts = list(valid_json.keys())
    idim = int(valid_json[utts[0]]['input'][0]['shape'][1])
    odim = int(valid_json[utts[0]]['output'][0]['shape'][1])
    logging.info('#input dims : ' + str(idim))
    logging.info('#output dims: ' + str(odim))

    # check attention type
    if args.atype not in ['noatt', 'dot', 'location']:
        raise NotImplementedError(
            'chainer supports only noatt, dot, and location attention.')

    # specify attention, CTC, hybrid mode
    if args.mtlalpha == 1.0:
        mtl_mode = 'ctc'
        logging.info('Pure CTC mode')
    elif args.mtlalpha == 0.0:
        mtl_mode = 'att'
        logging.info('Pure attention mode')
    else:
        mtl_mode = 'mtl'
        logging.info('Multitask learning mode')

    # specify model architecture
    e2e = E2E(idim, odim, args)
    model = Loss(e2e, args.mtlalpha)

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + '/model.json'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to ' + model_conf)
        f.write(
            json.dumps((idim, odim, vars(args)), indent=4,
                       sort_keys=True).encode('utf_8'))
    for key in sorted(vars(args).keys()):
        logging.info('ARGS: ' + key + ': ' + str(vars(args)[key]))

    # Set gpu
    ngpu = args.ngpu
    if ngpu == 1:
        gpu_id = 0
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(gpu_id).use()
        model.to_gpu()  # Copy the model to the GPU
        logging.info('single gpu calculation.')
    elif ngpu > 1:
        gpu_id = 0
        devices = {'main': gpu_id}
        for gid in six.moves.xrange(1, ngpu):
            devices['sub_%d' % gid] = gid
        logging.info('multi gpu calculation (#gpus = %d).' % ngpu)
        logging.info('batch size is automatically increased (%d -> %d)' %
                     (args.batch_size, args.batch_size * args.ngpu))
    else:
        gpu_id = -1
        logging.info('cpu calculation')

    # Setup an optimizer
    if args.opt == 'adadelta':
        optimizer = chainer.optimizers.AdaDelta(eps=args.eps)
    elif args.opt == 'adam':
        optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.grad_clip))

    # read json data
    with open(args.train_json, 'rb') as f:
        train_json = json.load(f)['utts']
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']

    # set up training iterator and updater
    converter = CustomConverter(e2e.subsample[0])
    if ngpu <= 1:
        # make minibatch list (variable length)
        train = make_batchset(train_json, args.batch_size, args.maxlen_in,
                              args.maxlen_out, args.minibatches)
        # hack to make batchsize argument as 1
        # actual batchsize is included in a list
        if args.n_iter_processes > 0:
            train_iter = chainer.iterators.MultiprocessIterator(
                TransformDataset(train, converter.transform),
                batch_size=1,
                n_processes=args.n_iter_processes,
                n_prefetch=8,
                maxtasksperchild=20)
        else:
            train_iter = chainer.iterators.SerialIterator(TransformDataset(
                train, converter.transform),
                                                          batch_size=1)

        # set up updater
        updater = CustomUpdater(train_iter,
                                optimizer,
                                converter=converter,
                                device=gpu_id)
    else:
        # set up minibatches
        train_subsets = []
        for gid in six.moves.xrange(ngpu):
            # make subset
            train_json_subset = {
                k: v
                for i, (k, v) in enumerate(train_json.items())
                if i % ngpu == gid
            }
            # make minibatch list (variable length)
            train_subsets += [
                make_batchset(train_json_subset, args.batch_size,
                              args.maxlen_in, args.maxlen_out,
                              args.minibatches)
            ]

        # each subset must have same length for MultiprocessParallelUpdater
        maxlen = max([len(train_subset) for train_subset in train_subsets])
        for train_subset in train_subsets:
            if maxlen != len(train_subset):
                for i in six.moves.xrange(maxlen - len(train_subset)):
                    train_subset += [train_subset[i]]

        # hack to make batchsize argument as 1
        # actual batchsize is included in a list
        if args.n_iter_processes > 0:
            train_iters = [
                chainer.iterators.MultiprocessIterator(
                    TransformDataset(train_subsets[gid], converter.transform),
                    batch_size=1,
                    n_processes=args.n_iter_processes,
                    n_prefetch=8,
                    maxtasksperchild=20) for gid in six.moves.xrange(ngpu)
            ]
        else:
            train_iters = [
                chainer.iterators.SerialIterator(TransformDataset(
                    train_subsets[gid], converter.transform),
                                                 batch_size=1)
                for gid in six.moves.xrange(ngpu)
            ]

        # set up updater
        updater = CustomParallelUpdater(train_iters,
                                        optimizer,
                                        converter=converter,
                                        devices=devices)

    # Set up a trainer
    trainer = training.Trainer(updater, (args.epochs, 'epoch'),
                               out=args.outdir)

    # Resume from a snapshot
    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    # set up validation iterator
    valid = make_batchset(valid_json, args.batch_size, args.maxlen_in,
                          args.maxlen_out, args.minibatches)
    if args.n_iter_processes > 0:
        valid_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(valid, converter.transform),
            batch_size=1,
            repeat=False,
            shuffle=False,
            n_processes=args.n_iter_processes,
            n_prefetch=8,
            maxtasksperchild=20)
    else:
        valid_iter = chainer.iterators.SerialIterator(TransformDataset(
            valid, converter.transform),
                                                      batch_size=1,
                                                      repeat=False,
                                                      shuffle=False)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(
        extensions.Evaluator(valid_iter,
                             model,
                             converter=converter,
                             device=gpu_id))

    # Save attention weight each epoch
    if args.num_save_attention > 0 and args.mtlalpha != 1.0:
        data = sorted(list(valid_json.items())[:args.num_save_attention],
                      key=lambda x: int(x[1]['input'][0]['shape'][1]),
                      reverse=True)
        if hasattr(model, "module"):
            att_vis_fn = model.module.predictor.calculate_all_attentions
        else:
            att_vis_fn = model.predictor.calculate_all_attentions
        trainer.extend(PlotAttentionReport(att_vis_fn,
                                           data,
                                           args.outdir + "/att_ws",
                                           converter=converter,
                                           device=gpu_id),
                       trigger=(1, 'epoch'))

    # Take a snapshot for each specified epoch
    trainer.extend(
        extensions.snapshot(filename='snapshot.ep.{.updater.epoch}'),
        trigger=(1, 'epoch'))

    # Make a plot for training and validation values
    trainer.extend(
        extensions.PlotReport([
            'main/loss', 'validation/main/loss', 'main/loss_ctc',
            'validation/main/loss_ctc', 'main/loss_att',
            'validation/main/loss_att'
        ],
                              'epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/acc', 'validation/main/acc'],
                              'epoch',
                              file_name='acc.png'))

    # Save best models
    trainer.extend(
        extensions.snapshot_object(model, 'model.loss.best'),
        trigger=training.triggers.MinValueTrigger('validation/main/loss'))
    if mtl_mode is not 'ctc':
        trainer.extend(
            extensions.snapshot_object(model, 'model.acc.best'),
            trigger=training.triggers.MaxValueTrigger('validation/main/acc'))

    # epsilon decay in the optimizer
    if args.opt == 'adadelta':
        if args.criterion == 'acc' and mtl_mode is not 'ctc':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.acc.best'),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
        elif args.criterion == 'loss':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.loss.best'),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL,
                                                 'iteration')))
    report_keys = [
        'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att',
        'validation/main/loss', 'validation/main/loss_ctc',
        'validation/main/loss_att', 'main/acc', 'validation/main/acc',
        'elapsed_time'
    ]
    if args.opt == 'adadelta':
        trainer.extend(extensions.observe_value(
            'eps', lambda trainer: trainer.updater.get_optimizer('main').eps),
                       trigger=(REPORT_INTERVAL, 'iteration'))
        report_keys.append('eps')
    trainer.extend(extensions.PrintReport(report_keys),
                   trigger=(REPORT_INTERVAL, 'iteration'))

    trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL))

    # Run the training
    trainer.run()
Esempio n. 10
0
def main(args):
    # Initialize the model to train
    model = models.archs[args.arch]()
    if args.finetune and hasattr(model, 'finetuned_model_path'):
        utils.finetuning.load_param(model.finetuned_model_path, model,
                                    args.ignore)
        #model.finetune = True

    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    nowt = datetime.datetime.today()
    outputdir = args.out + '/' + args.arch + '/' + nowt.strftime(
        "%Y%m%d-%H%M") + '_bs' + str(args.batchsize)
    if args.test and args.initmodel is not None:
        outputdir = os.path.dirname(args.initmodel)
    # Load the datasets and mean file
    mean = None
    if hasattr(model, 'mean_value'):
        mean = makeMeanImage(model.mean_value)
    else:
        mean = np.load(args.mean)
    assert mean is not None

    train = ppds.PreprocessedDataset(args.train, args.root, mean, model.insize)
    val = ppds.PreprocessedDataset(args.val, args.root, mean, model.insize,
                                   False)
    # These iterators load the images with subprocesses running in parallel to
    # the training/validation.
    train_iter = chainer.iterators.MultiprocessIterator(
        train, args.batchsize, shuffle=False, n_processes=args.loaderjob)
    #val_iter = chainer.iterators.MultiprocessIterator(
    #    val, args.val_batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob)
    val_iter = chainer.iterators.SerialIterator(val,
                                                args.val_batchsize,
                                                repeat=False,
                                                shuffle=False)

    # Set up an optimizer
    optimizer = optimizers[args.opt]()
    #if args.opt == 'momentumsgd':
    if hasattr(optimizer, 'lr'):
        optimizer.lr = args.baselr
    if hasattr(optimizer, 'momentum'):
        optimizer.momentum = args.momentum
    optimizer.setup(model)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), outputdir)

    #val_interval = (10 if args.test else int(len(train) / args.batchsize)), 'iteration'
    val_interval = (10, 'iteration') if args.test else (1, 'epoch')
    snapshot_interval = (10, 'iteration') if args.test else (4, 'epoch')
    log_interval = (10 if args.test else 200), 'iteration'

    # Copy the chain with shared parameters to flip 'train' flag only in test
    eval_model = model.copy()
    eval_model.train = False
    if not args.test:
        val_evaluator = extensions.Evaluator(val_iter,
                                             eval_model,
                                             device=args.gpu)
    else:
        val_evaluator = utils.EvaluatorPlus(val_iter,
                                            eval_model,
                                            device=args.gpu)
        if 'googlenet' in args.arch:
            val_evaluator.lastname = 'validation/main/loss3'
    trainer.extend(val_evaluator, trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=(500, 'iteration'))
    # Be careful to pass the interval directly to LogReport
    # (it determines when to emit log rather than when to read observations)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch',
        'iteration',
        'main/loss',
        'validation/main/loss',
        'main/accuracy',
        'validation/main/accuracy',
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))
    if args.opt == 'momentumsgd':
        trainer.extend(extensions.ExponentialShift('lr', args.gamma),
                       trigger=(1, 'epoch'))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    if not args.test:
        chainer.serializers.save_npz(outputdir + '/model0', model)
        trainer.run()
        chainer.serializers.save_npz(outputdir + '/model', model)
        with open(outputdir + '/args.txt', 'w') as o:
            print(args, file=o)

    results = val_evaluator(trainer)
    results['outputdir'] = outputdir

    if args.test:
        print(val_evaluator.confmat)
        categories = utils.io.load_categories(args.categories)
        confmat_csv_name = args.initmodel + '.csv'
        confmat_fig_name = args.initmodel + '.eps'
        utils.io.save_confmat_csv(confmat_csv_name, val_evaluator.confmat,
                                  categories)
        utils.io.save_confmat_fig(confmat_fig_name,
                                  val_evaluator.confmat,
                                  categories,
                                  mode="rate",
                                  saveFormat="eps")
    return results
Esempio n. 11
0
def main():
    '''
    main function, start point
    '''
    # 引数関連
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=128,
                        help='Number of images in each mini-batch')
    parser.add_argument('--learnrate',
                        '-l',
                        type=float,
                        default=0.001,
                        help='Learning rate for SGD')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=100,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--iter_parallel',
                        '-p',
                        action='store_true',
                        default=False,
                        help='filter(kernel) sizes')
    parser.add_argument('--opt',
                        '-o',
                        type=str,
                        choices=('adam', 'sgd'),
                        default='adam')
    args = parser.parse_args()

    # parameter出力
    print("-=Learning Parameter=-")
    print("# Max Epochs: {}".format(args.epoch))
    print("# Batch Size: {}".format(args.batchsize))
    print("# Learning Rate: {}".format(args.learnrate))
    print("# Optimizer Method: {}".format(args.opt))
    print('# Train Dataet: General 100')
    if args.iter_parallel:
        print("# Data Iters that loads in Parallel")
    print("\n")

    # 保存ディレクトリ
    # save didrectory
    outdir = path.join(
        ROOT_PATH,
        'results/FI/AEFINet/AEFINetConcat_ch4_fsize5_VGG_content_loss_opt_{}'.
        format(args.opt))
    if not path.exists(outdir):
        os.makedirs(outdir)
    with open(path.join(outdir, 'arg_param.txt'), 'w') as f:
        for k, v in args.__dict__.items():
            f.write('{}:{}\n'.format(k, v))

    print('# loading dataet(General100_train, General100_test) ...')
    if args.iter_parallel:
        train = SequenceDataset(dataset='train')
        test = SequenceDataset(dataset='test')
    else:
        train = SequenceDatasetOnMem(dataset='train')
        test = SequenceDatasetOnMem(dataset='test')

# prepare model
    vgg16 = N.VGG16()
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        vgg16.to_gpu()
    chainer.serializers.load_npz(path.join(ROOT_PATH, 'models/VGG16.npz'),
                                 vgg16)
    model = N.VGG16Evaluator(N.AEFINetConcat(ch=4, f_size=5), vgg16)
    if args.gpu >= 0:
        model.to_gpu()

    # setup optimizer
    if args.opt == 'adam':
        optimizer = chainer.optimizers.Adam(alpha=args.learnrate)
    elif args.opt == 'sgd':
        optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate,
                                                   momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001))

    # setup iter
    if args.iter_parallel:
        train_iter = chainer.iterators.MultiprocessIterator(train,
                                                            args.batchsize,
                                                            n_processes=8)
        test_iter = chainer.iterators.MultiprocessIterator(test,
                                                           args.batchsize,
                                                           repeat=False,
                                                           shuffle=False,
                                                           n_processes=8)
    else:
        train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
        test_iter = chainer.iterators.SerialIterator(test,
                                                     args.batchsize,
                                                     repeat=False,
                                                     shuffle=False)

    # setup trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir)

    # # eval test data
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))
    # dump loss graph
    trainer.extend(extensions.dump_graph('main/loss'))
    # lr shift
    if args.opt == 'sgd':
        trainer.extend(extensions.ExponentialShift("lr", 0.1),
                       trigger=(100, 'epoch'))
    if args.opt == 'adam':
        trainer.extend(extensions.ExponentialShift("alpha", 0.1),
                       trigger=(50, 'epoch'))
    # save snapshot
    trainer.extend(extensions.snapshot(), trigger=(10, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        model, 'model_snapshot_{.updater.epoch}'),
                   trigger=(10, 'epoch'))
    # log report
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch'))
    #  plot loss graph
    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              'epoch',
                              file_name='loss.png'))
    # plot acc graph
    trainer.extend(
        extensions.PlotReport(['main/PSNR', 'validation/main/PSNR'],
                              'epoch',
                              file_name='PSNR.png'))
    # print info
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/loss_mse',
            'main/loss_cont', 'main/PSNR', 'validation/main/PSNR', 'lr',
            'elapsed_time'
        ]))
    # print progbar
    trainer.extend(extensions.ProgressBar())

    trainer.run()
Esempio n. 12
0
def main():
    parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
    parser.add_argument('--dataset',
                        '-d',
                        default='cifar10',
                        help='The dataset to use: cifar10 or cifar100')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=64,
                        help='Number of images in each mini-batch')
    parser.add_argument('--learnrate',
                        '-l',
                        type=float,
                        default=0.05,
                        help='Learning rate for SGD')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=300,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train.
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    if args.dataset == 'cifar10':
        print('Using CIFAR10 dataset.')
        class_labels = 10
        train, test = get_cifar10()
    elif args.dataset == 'cifar100':
        print('Using CIFAR100 dataset.')
        class_labels = 100
        train, test = get_cifar100()
    else:
        raise RuntimeError('Invalid dataset choice.')
    model = L.Classifier(models.VGG.VGG(class_labels))
    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    optimizer = chainer.optimizers.MomentumSGD(args.learnrate)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)
    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # Reduce the learning rate by half every 25 epochs.
    trainer.extend(extensions.ExponentialShift('lr', 0.5),
                   trigger=(25, 'epoch'))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot at each epoch
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))

    # Take a snapshot of best model
    trainer.extend(extensions.snapshot_object(model, 'model_best'),
                   trigger=MinValueTrigger('validation/main/loss'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Save loss and accuracy plot
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch',
                file_name='accuracy.png'))

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
Esempio n. 13
0
def main():
    parser = argparse.ArgumentParser(description='World Models ' + ID)
    parser.add_argument('--data_dir',
                        '-d',
                        default="data/wm",
                        help='The base data/output directory')
    parser.add_argument(
        '--game', default='CarRacing-v0',
        help='Game to use')  # https://gym.openai.com/envs/CarRacing-v0/
    parser.add_argument('--experiment_name',
                        default='experiment_1',
                        help='To isolate its files from others')
    parser.add_argument(
        '--load_batch_size',
        default=100,
        type=int,
        help='Load rollouts in batches so as not to run out of memory')
    parser.add_argument(
        '--model',
        '-m',
        default='',
        help=
        'Initialize the model from given file, or "default" for one in data folder'
    )
    parser.add_argument('--no_resume',
                        action='store_true',
                        help='Don'
                        't auto resume from the latest snapshot')
    parser.add_argument(
        '--resume_from',
        '-r',
        default='',
        help='Resume the optimization from a specific snapshot')
    parser.add_argument('--test',
                        action='store_true',
                        help='Generate samples only')
    parser.add_argument('--gpu',
                        '-g',
                        default=0,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--epoch',
                        '-e',
                        default=20,
                        type=int,
                        help='number of epochs to learn')
    parser.add_argument('--snapshot_interval',
                        '-s',
                        default=200,
                        type=int,
                        help='snapshot every x games')
    parser.add_argument('--z_dim',
                        '-z',
                        default=32,
                        type=int,
                        help='dimension of encoded vector')
    parser.add_argument('--hidden_dim',
                        default=256,
                        type=int,
                        help='LSTM hidden units')
    parser.add_argument('--mixtures',
                        default=5,
                        type=int,
                        help='number of gaussian mixtures for MDN')
    parser.add_argument('--no_progress_bar',
                        '-p',
                        action='store_true',
                        help='Display progress bar during training')
    parser.add_argument('--predict_done',
                        action='store_true',
                        help='Whether MDN-RNN should also predict done state')
    parser.add_argument('--sample_temperature',
                        default=1.,
                        type=float,
                        help='Temperature for generating samples')
    parser.add_argument('--gradient_clip',
                        default=0.,
                        type=float,
                        help='Clip grads L2 norm threshold. 0 = no clip')
    parser.add_argument('--sequence_length',
                        type=int,
                        default=128,
                        help='sequence length for LSTM for TBPTT')

    args = parser.parse_args()
    log(ID, "args =\n " + str(vars(args)).replace(",", ",\n "))

    output_dir = os.path.join(args.data_dir, args.game, args.experiment_name,
                              ID)
    mkdir(output_dir)
    random_rollouts_dir = os.path.join(args.data_dir, args.game,
                                       args.experiment_name, 'random_rollouts')
    vision_dir = os.path.join(args.data_dir, args.game, args.experiment_name,
                              'vision')

    log(ID, "Starting")

    max_iter = 0
    auto_resume_file = None
    files = os.listdir(output_dir)
    for file in files:
        if re.match(r'^snapshot_iter_', file):
            iter = int(re.search(r'\d+', file).group())
            if (iter > max_iter):
                max_iter = iter
    if max_iter > 0:
        auto_resume_file = os.path.join(output_dir,
                                        "snapshot_iter_{}".format(max_iter))

    model = MDN_RNN(args.hidden_dim, args.z_dim, args.mixtures,
                    args.predict_done)
    vision = CVAE(args.z_dim)
    chainer.serializers.load_npz(os.path.join(vision_dir, "vision.model"),
                                 vision)

    if args.model:
        if args.model == 'default':
            args.model = os.path.join(output_dir, ID + ".model")
        log(ID, "Loading saved model from: " + args.model)
        chainer.serializers.load_npz(args.model, model)

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    if args.gradient_clip > 0.:
        optimizer.add_hook(
            chainer.optimizer_hooks.GradientClipping(args.gradient_clip))

    log(ID, "Loading training data")
    train = ModelDataset(dir=random_rollouts_dir,
                         load_batch_size=args.load_batch_size,
                         verbose=False)
    train_iter = chainer.iterators.SerialIterator(train,
                                                  batch_size=1,
                                                  shuffle=False)

    updater = TBPTTUpdater(train_iter, optimizer, args.gpu,
                           model.get_loss_func(), args.sequence_length)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir)
    trainer.extend(extensions.snapshot(),
                   trigger=(args.snapshot_interval, 'iteration'))
    trainer.extend(
        extensions.LogReport(trigger=(10 if args.gpu >= 0 else 1,
                                      'iteration')))
    trainer.extend(
        extensions.PrintReport(['epoch', 'iteration', 'loss', 'elapsed_time']))
    if not args.no_progress_bar:
        trainer.extend(
            extensions.ProgressBar(update_interval=10 if args.gpu >= 0 else 1))

    sample_size = 256
    rollout_z_t, rollout_z_t_plus_1, rollout_action, done = train[0]
    sample_z_t = rollout_z_t[0:sample_size]
    sample_z_t_plus_1 = rollout_z_t_plus_1[0:sample_size]
    sample_action = rollout_action[0:sample_size]
    img_t = vision.decode(sample_z_t).data
    img_t_plus_1 = vision.decode(sample_z_t_plus_1).data
    if args.predict_done:
        done = done.reshape(-1)
        img_t_plus_1[np.where(
            done[0:sample_size] >= 0.5), :, :, :] = 0  # Make done black
    save_images_collage(img_t, os.path.join(output_dir, 'train_t.png'))
    save_images_collage(img_t_plus_1,
                        os.path.join(output_dir, 'train_t_plus_1.png'))
    image_sampler = ImageSampler(model.copy(), vision, args, output_dir,
                                 sample_z_t, sample_action)
    trainer.extend(image_sampler,
                   trigger=(args.snapshot_interval, 'iteration'))

    if args.resume_from:
        log(ID, "Resuming trainer manually from snapshot: " + args.resume_from)
        chainer.serializers.load_npz(args.resume_from, trainer)
    elif not args.no_resume and auto_resume_file is not None:
        log(ID,
            "Auto resuming trainer from last snapshot: " + auto_resume_file)
        chainer.serializers.load_npz(auto_resume_file, trainer)

    if not args.test:
        log(ID, "Starting training")
        trainer.run()
        log(ID, "Done training")
        log(ID, "Saving model")
        chainer.serializers.save_npz(os.path.join(output_dir, ID + ".model"),
                                     model)

    if args.test:
        log(ID, "Saving test samples")
        image_sampler(trainer)

    log(ID, "Generating gif for a rollout generated in dream")
    if args.gpu >= 0:
        model.to_cpu()
    model.reset_state()
    # current_z_t = np.random.randn(64).astype(np.float32)  # Noise as starting frame
    rollout_z_t, rollout_z_t_plus_1, rollout_action, done = train[
        np.random.randint(len(train))]  # Pick a random real rollout
    current_z_t = rollout_z_t[0]  # Starting frame from the real rollout
    current_z_t += np.random.normal(0, 0.5, current_z_t.shape).astype(
        np.float32)  # Add some noise to the real rollout starting frame
    all_z_t = [current_z_t]
    # current_action = np.asarray([0., 1.]).astype(np.float32)
    for i in range(rollout_z_t.shape[0]):
        # if i != 0 and i % 200 == 0: current_action = 1 - current_action  # Flip actions every 100 frames
        current_action = np.expand_dims(
            rollout_action[i], 0)  # follow actions performed in a real rollout
        output = model(current_z_t,
                       current_action,
                       temperature=args.sample_temperature)
        if args.predict_done:
            current_z_t, done = output
            done = done.data
            # print(i, current_action, done)
        else:
            current_z_t = output
        all_z_t.append(current_z_t.data)
        if args.predict_done and done[0] >= 0.5:
            break
    dream_rollout_imgs = vision.decode(np.asarray(all_z_t).astype(
        np.float32)).data
    dream_rollout_imgs = post_process_image_tensor(dream_rollout_imgs)
    imageio.mimsave(os.path.join(output_dir, 'dream_rollout.gif'),
                    dream_rollout_imgs,
                    fps=20)

    log(ID, "Done")
def main():
    # command line argument parsing
    parser = argparse.ArgumentParser(
        description='Multi-Perceptron classifier/regressor')
    parser.add_argument('train', help='Path to csv file')
    parser.add_argument('--root',
                        '-R',
                        default="betti",
                        help='Path to image files')
    parser.add_argument('--val',
                        help='Path to validation csv file',
                        required=True)
    parser.add_argument('--regress',
                        '-r',
                        action='store_true',
                        help='set for regression, otherwise classification')
    parser.add_argument('--time_series',
                        '-ts',
                        action='store_true',
                        help='set for time series data')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=10,
                        help='Number of samples in each mini-batch')
    parser.add_argument('--layer',
                        '-l',
                        type=str,
                        choices=['res5', 'pool5'],
                        default='pool5',
                        help='output layer of the pretrained ResNet')
    parser.add_argument('--fch',
                        type=int,
                        nargs="*",
                        default=[],
                        help='numbers of channels for the last fc layers')
    parser.add_argument('--cols',
                        '-c',
                        type=int,
                        nargs="*",
                        default=[1],
                        help='column indices in csv of target variables')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=300,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--snapshot',
                        '-s',
                        type=int,
                        default=100,
                        help='snapshot interval')
    parser.add_argument('--initmodel',
                        '-i',
                        help='Initialize the model from given file')
    parser.add_argument('--random',
                        '-rt',
                        type=int,
                        default=1,
                        help='random translation')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--loaderjob',
                        '-j',
                        type=int,
                        default=3,
                        help='Number of parallel data loading processes')
    parser.add_argument('--outdir',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--optimizer',
                        '-op',
                        choices=optim.keys(),
                        default='Adam',
                        help='optimizer')
    parser.add_argument('--resume',
                        type=str,
                        default=None,
                        help='Resume the training from snapshot')
    parser.add_argument('--predict',
                        '-p',
                        action='store_true',
                        help='prediction with a specified model')
    parser.add_argument('--tuning_rate',
                        '-tr',
                        type=float,
                        default=0.1,
                        help='learning rate for pretrained layers')
    parser.add_argument('--dropout',
                        '-dr',
                        type=float,
                        default=0,
                        help='dropout ratio for the FC layers')
    parser.add_argument('--cw',
                        '-cw',
                        type=int,
                        default=128,
                        help='crop image width')
    parser.add_argument('--ch',
                        '-ch',
                        type=int,
                        default=128,
                        help='crop image height')
    parser.add_argument('--weight_decay',
                        '-w',
                        type=float,
                        default=1e-6,
                        help='weight decay for regularization')
    parser.add_argument('--wd_norm',
                        '-wn',
                        choices=['none', 'l1', 'l2'],
                        default='l2',
                        help='norm of weight decay for regularization')
    parser.add_argument('--dtype',
                        '-dt',
                        choices=dtypes.keys(),
                        default='fp32',
                        help='floating point precision')
    args = parser.parse_args()

    args.outdir = os.path.join(args.outdir, dt.now().strftime('%m%d_%H%M'))
    # Enable autotuner of cuDNN
    chainer.config.autotune = True
    chainer.config.dtype = dtypes[args.dtype]
    chainer.print_runtime_info()

    # read csv file
    train = Dataset(args.root,
                    args.train,
                    cw=args.cw,
                    ch=args.ch,
                    random=args.random,
                    regression=args.regress,
                    time_series=args.time_series,
                    cols=args.cols)
    test = Dataset(args.root,
                   args.val,
                   cw=args.cw,
                   ch=args.ch,
                   regression=args.regress,
                   time_series=args.time_series,
                   cols=args.cols)

    ##
    if not args.gpu:
        if chainer.cuda.available:
            args.gpu = 0
        else:
            args.gpu = -1
    print(args)
    save_args(args, args.outdir)

    if args.regress:
        accfun = F.mean_absolute_error
        lossfun = F.mean_squared_error
        args.chs = len(args.cols)
    else:
        accfun = F.accuracy
        lossfun = F.softmax_cross_entropy
        args.chs = max(train.chs, test.chs)
        if len(args.cols) > 1:
            print("\n\nClassification only works with a single target.\n\n")
            exit()

    # Set up a neural network to train
    model = L.Classifier(Resnet(args), lossfun=lossfun, accfun=accfun)

    # Set up an optimizer
    optimizer = optim[args.optimizer]()
    optimizer.setup(model)
    if args.weight_decay > 0:
        if args.wd_norm == 'l2':
            optimizer.add_hook(chainer.optimizer.WeightDecay(
                args.weight_decay))
        elif args.wd_norm == 'l1':
            optimizer.add_hook(chainer.optimizer_hooks.Lasso(
                args.weight_decay))
    # slow update for pretrained layers
    if args.optimizer in ['Adam']:
        for func_name in model.predictor.base._children:
            for param in model.predictor.base[func_name].params():
                param.update_rule.hyperparam.alpha *= args.tuning_rate

    if args.initmodel:
        print('Load model from: ', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    # select numpy or cupy
    xp = chainer.cuda.cupy if args.gpu >= 0 else np

    #    train_iter = iterators.SerialIterator(train, args.batchsize, shuffle=True)
    #    test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False)
    train_iter = iterators.MultithreadIterator(train,
                                               args.batchsize,
                                               shuffle=True,
                                               n_threads=args.loaderjob)
    test_iter = iterators.MultithreadIterator(test,
                                              args.batchsize,
                                              repeat=False,
                                              shuffle=False,
                                              n_threads=args.loaderjob)

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir)

    frequency = args.epoch if args.snapshot == -1 else max(1, args.snapshot)
    log_interval = 1, 'epoch'
    val_interval = 20, 'epoch'  # frequency/10, 'epoch'

    #    trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(frequency, 'epoch'))
    trainer.extend(extensions.snapshot_object(model,
                                              'model_epoch_{.updater.epoch}'),
                   trigger=(frequency, 'epoch'))
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu),
                   trigger=val_interval)

    if args.optimizer in ['Momentum', 'AdaGrad', 'RMSprop']:
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.ExponentialShift('lr', 0.5),
                       trigger=(args.epoch / 5, 'epoch'))
    elif args.optimizer in ['Adam']:
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.ExponentialShift("alpha",
                                                   0.5,
                                                   optimizer=optimizer),
                       trigger=(args.epoch / 5, 'epoch'))

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch',
                file_name='accuracy.png'))

    trainer.extend(extensions.PrintReport([
        'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss',
        'validation/main/accuracy', 'elapsed_time', 'lr'
    ]),
                   trigger=log_interval)

    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    # ChainerUI
    #trainer.extend(CommandsExtension())
    trainer.extend(extensions.LogReport(trigger=log_interval))

    if not args.predict:
        trainer.run()

    ## prediction
    print("predicting: {} entries...".format(len(test)))
    test_iter = iterators.SerialIterator(test,
                                         args.batchsize,
                                         repeat=False,
                                         shuffle=False)
    converter = concat_examples
    idx = 0
    with open(os.path.join(args.outdir, 'result.txt'), 'w') as output:
        for batch in test_iter:
            x, t = converter(batch, device=args.gpu)
            with chainer.using_config('train', False):
                with chainer.function.no_backprop_mode():
                    if args.regress:
                        y = model.predictor(x).data
                        if args.gpu > -1:
                            y = xp.asnumpy(y)
                            t = xp.asnumpy(t)
                        y = y * test.std + test.mean
                        t = t * test.std + test.mean
                    else:
                        y = F.softmax(model.predictor(x)).data
                        if args.gpu > -1:
                            y = xp.asnumpy(y)
                            t = xp.asnumpy(t)
            for i in range(y.shape[0]):
                output.write(os.path.basename(test.ids[idx]))
                if (len(t.shape) > 1):
                    for j in range(t.shape[1]):
                        output.write(",{}".format(t[i, j]))
                        output.write(",{}".format(y[i, j]))
                else:
                    output.write(",{}".format(t[i]))
                    output.write(",{}".format(np.argmax(y[i, :])))
                    for yy in y[i]:
                        output.write(",{0:1.5f}".format(yy))
                output.write("\n")
                idx += 1
def main():
    parser = argparse.ArgumentParser(
        description='ChainerMN example: pipelined neural network')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', action='store_true',
                        help='Use GPU')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    args = parser.parse_args()

    # Prepare ChainerMN communicator.
    if args.gpu:
        comm = chainermn.create_communicator('hierarchical')
        device = comm.intra_rank
    else:
        comm = chainermn.create_communicator('naive')
        device = -1

    if comm.size != 2:
        raise ValueError(
            'This example can only be executed on exactly 2 processes.')

    if comm.rank == 0:
        print('==========================================')
        if args.gpu:
            print('Using GPUs')
        print('Num unit: {}'.format(args.unit))
        print('Num Minibatch-size: {}'.format(args.batchsize))
        print('Num epoch: {}'.format(args.epoch))
        print('==========================================')

    if comm.rank == 0:
        model = L.Classifier(MLP0(comm, args.unit))
    elif comm.rank == 1:
        model = MLP1(comm, args.unit, 10)

    if device >= 0:
        chainer.cuda.get_device_from_id(device).use()
        model.to_gpu()

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Iterate dataset only on worker 0.
    train, test = chainer.datasets.get_mnist()
    if comm.rank == 1:
        train = chainermn.datasets.create_empty_dataset(train)
        test = chainermn.datasets.create_empty_dataset(test)

    train_iter = chainer.iterators.SerialIterator(
        train, args.batchsize, shuffle=False)
    test_iter = chainer.iterators.SerialIterator(
        test, args.batchsize, repeat=False, shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(extensions.Evaluator(test_iter, model, device=device))

    # Some display and output extentions are necessary only for worker 0.
    if comm.rank == 0:
        trainer.extend(extensions.DumpGraph('main/loss'))
        trainer.extend(extensions.LogReport())
        trainer.extend(extensions.PrintReport(
            ['epoch', 'main/loss', 'validation/main/loss',
             'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
        trainer.extend(extensions.ProgressBar())

    trainer.run()
Esempio n. 16
0
def main():
    parser = argparse.ArgumentParser(
        description='Train GAN')
    parser.add_argument('--batch_size', '-b', type=int, default=64)
    parser.add_argument('--max_iter', '-m', type=int, default=60000)
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--eval_interval', '-e', type=int, default=200,
                        help='Interval of evaluating generator')

    parser.add_argument("--learning_rate_g", type=float, default=0.0002,
                        help="Learning rate for generator")
    parser.add_argument("--learning_rate_d", type=float, default=0.0002,
                        help="Learning rate for discriminator")

    parser.add_argument('--gen_class', default='', help='generator class')
    parser.add_argument('--dis_class', default='', help='discriminator class')

    parser.add_argument("--load_gen_model", default='', help='load generator model')
    parser.add_argument("--load_dis_model", default='', help='load discriminator model')

    parser.add_argument("--lambda_gp", type=float, default=10, help='gradient penalty')

    parser.add_argument("--image_size", type=int, default=64, help='image size')
    parser.add_argument("--image_channels", type=int, default=3, help='number of image channels')
    parser.add_argument("--latent_len", type=int, default=128, help='latent vector length')

    parser.add_argument("--load_dataset", default='celeba_train', help='load dataset')
    parser.add_argument("--dataset_path", "-d", default=settings.CELEBA_PATH,
                        help='dataset directory')

    args = parser.parse_args()
    print(args)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()

    if args.gen_class != '':
        gen = eval(args.gen_class)
    else:
        gen = DCGANGenerator(latent=args.latent_len, out_ch=args.image_channels)

    if args.dis_class != '':
        dis = eval(args.dis_class)
    else:
        dis = DCGANEncoder(in_ch=args.image_channels, use_bn=False, out_len=256)

    if args.load_gen_model != '':
        serializers.load_npz(args.load_gen_model, gen)
        print("Generator model loaded")

    if args.load_dis_model != '':
        serializers.load_npz(args.load_dis_model, dis)
        print("Discriminator model loaded")

    if args.gpu >= 0:
        gen.to_gpu()
        dis.to_gpu()
        print("use gpu {}".format(args.gpu))

    opt_g = make_adam(gen, lr=args.learning_rate_g, beta1=0.5)
    opt_d = make_adam(dis, lr=args.learning_rate_d, beta1=0.5)

    train_dataset = getattr(datasets, args.load_dataset)(path=args.dataset_path)
    train_iter = chainer.iterators.MultiprocessIterator(
        train_dataset, args.batch_size, n_processes=4)

    updater = Updater(
        models=(gen, dis),
        iterator={
            'main': train_iter,
        },
        optimizer={
            'gen': opt_g,
            'dis': opt_d},
        device=args.gpu,
        params={
            'batch_size': args.batch_size,
            'img_size': args.image_size,
            'img_chan': args.image_channels,
            'lambda_gp': args.lambda_gp,
            'latent_len': args.latent_len,
        },
    )

    trainer = training.Trainer(updater, (args.max_iter, 'iteration'), out=args.out)

    model_save_interval = (4000, 'iteration')
    eval_interval = (args.eval_interval, 'iteration')

    trainer.extend(extensions.snapshot_object(
        gen, 'gen_{.updater.iteration}.npz'), trigger=model_save_interval)
    trainer.extend(extensions.snapshot_object(
        dis, 'dis_{.updater.iteration}.npz'), trigger=model_save_interval)

    log_keys = ['epoch', 'iteration', 'gen/loss', 'dis/loss', 'dis/loss_gp']
    trainer.extend(extensions.LogReport(keys=log_keys, trigger=(20, 'iteration')))
    trainer.extend(extensions.PrintReport(log_keys), trigger=(20, 'iteration'))
    trainer.extend(extensions.ProgressBar(update_interval=50))

    trainer.extend(
        gan_sampling(gen, args.out+"/preview/", args.gpu), trigger=eval_interval
    )

    trainer.run()
Esempio n. 17
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--model',
                        '-m',
                        default='MLP',
                        help='Choose the model: MLP or MLPSideEffect')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=1000,
                        help='Number of units')
    parser.add_argument('--noplot',
                        dest='plot',
                        action='store_false',
                        help='Disable PlotReport extension')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    if args.model == 'MLP':
        model = L.Classifier(MLP(args.unit, 10))
    elif args.model == 'MLPSideEffect':
        model = L.Classifier(MLPSideEffect(args.unit, 10))
    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # Set up a trainer
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot for each specified epoch
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Save two plot images to the result dir
    if args.plot and extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch',
                file_name='accuracy.png'))

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
Esempio n. 18
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: seq2seq')
    parser.add_argument('SOURCE', help='source sentence list')
    parser.add_argument('TARGET', help='target sentence list')
    parser.add_argument('SOURCE_VOCAB', help='source vocabulary file')
    parser.add_argument('TARGET_VOCAB', help='target vocabulary file')
    parser.add_argument('--validation-source',
                        help='source sentence list for validation')
    parser.add_argument('--validation-target',
                        help='target sentence list for validation')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=64,
                        help='number of sentence pairs in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='resume the training from snapshot')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=1024,
                        help='number of units')
    parser.add_argument('--layer',
                        '-l',
                        type=int,
                        default=3,
                        help='number of layers')
    parser.add_argument('--min-source-sentence',
                        type=int,
                        default=1,
                        help='minimium length of source sentence')
    parser.add_argument('--max-source-sentence',
                        type=int,
                        default=50,
                        help='maximum length of source sentence')
    parser.add_argument('--min-target-sentence',
                        type=int,
                        default=1,
                        help='minimium length of target sentence')
    parser.add_argument('--max-target-sentence',
                        type=int,
                        default=50,
                        help='maximum length of target sentence')
    parser.add_argument('--log-interval',
                        type=int,
                        default=200,
                        help='number of iteration to show log')
    parser.add_argument('--validation-interval',
                        type=int,
                        default=4000,
                        help='number of iteration to evlauate the model '
                        'with validation dataset')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='directory to output the result')
    args = parser.parse_args()

    source_ids = load_vocabulary(args.SOURCE_VOCAB)
    target_ids = load_vocabulary(args.TARGET_VOCAB)
    train_source = load_data(source_ids, args.SOURCE)
    train_target = load_data(target_ids, args.TARGET)
    assert len(train_source) == len(train_target)
    train_data = [
        (s, t) for s, t in six.moves.zip(train_source, train_target)
        if args.min_source_sentence <= len(s) <= args.max_source_sentence
        and args.min_source_sentence <= len(t) <= args.max_source_sentence
    ]
    train_source_unknown = calculate_unknown_ratio([s for s, _ in train_data])
    train_target_unknown = calculate_unknown_ratio([t for _, t in train_data])

    print('Source vocabulary size: %d' % len(source_ids))
    print('Target vocabulary size: %d' % len(target_ids))
    print('Train data size: %d' % len(train_data))
    print('Train source unknown ratio: %.2f%%' % (train_source_unknown * 100))
    print('Train target unknown ratio: %.2f%%' % (train_target_unknown * 100))

    target_words = {i: w for w, i in target_ids.items()}
    source_words = {i: w for w, i in source_ids.items()}

    model = Seq2seq(args.layer, len(source_ids), len(target_ids), args.unit)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu(args.gpu)

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize)
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                converter=convert,
                                                device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(
        extensions.LogReport(trigger=(args.log_interval, 'iteration')))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/perp',
        'validation/main/perp', 'validation/main/bleu', 'elapsed_time'
    ]),
                   trigger=(args.log_interval, 'iteration'))

    if args.validation_source and args.validation_target:
        test_source = load_data(source_ids, args.validation_source)
        test_target = load_data(target_ids, args.validation_target)
        assert len(test_source) == len(test_target)
        test_data = list(six.moves.zip(test_source, test_target))
        test_data = [(s, t) for s, t in test_data if 0 < len(s) and 0 < len(t)]
        test_source_unknown = calculate_unknown_ratio(
            [s for s, _ in test_data])
        test_target_unknown = calculate_unknown_ratio(
            [t for _, t in test_data])

        print('Validation data: %d' % len(test_data))
        print('Validation source unknown ratio: %.2f%%' %
              (test_source_unknown * 100))
        print('Validation target unknown ratio: %.2f%%' %
              (test_target_unknown * 100))

        @chainer.training.make_extension()
        def translate(trainer):
            source, target = test_data[numpy.random.choice(len(test_data))]
            result = model.translate([model.xp.array(source)])[0]

            source_sentence = ' '.join([source_words[x] for x in source])
            target_sentence = ' '.join([target_words[y] for y in target])
            result_sentence = ' '.join([target_words[y] for y in result])
            print('# source : ' + source_sentence)
            print('#  result : ' + result_sentence)
            print('#  expect : ' + target_sentence)

        trainer.extend(translate,
                       trigger=(args.validation_interval, 'iteration'))
        trainer.extend(CalculateBleu(model,
                                     test_data,
                                     'validation/main/bleu',
                                     device=args.gpu),
                       trigger=(args.validation_interval, 'iteration'))

    print('start training')
    trainer.run()
def train(mode):

    Dt1_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_(Gallery&Probe)_2nd"
    train1 = load_GEI(path_dir=Dt1_train_dir, mode=True)

    Dt2_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt2_(Gallery&Probe)"
    train2 = load_GEI(path_dir=Dt2_train_dir, mode=True)

    Dt3_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt3_(Gallery&Probe)"
    train3 = load_GEI(path_dir=Dt3_train_dir, mode=True)

    model = Multi_modal_GEINet()

    model.to_gpu()

    # train_iter = iterators.MultiprocessIterator(train, batch_size=239)
    Dt1_train_iter = iterators.SerialIterator(train1,
                                              batch_size=239,
                                              shuffle=False)
    Dt2_train_iter = iterators.SerialIterator(train2,
                                              batch_size=239,
                                              shuffle=False)
    Dt3_train_iter = iterators.SerialIterator(train3,
                                              batch_size=239,
                                              shuffle=False)

    # optimizer = chainer.optimizers.SGD(lr=0.02)
    optimizer = chainer.optimizers.MomentumSGD(lr=0.02, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.01))

    # updater = training.ParallelUpdater(train_iter, optimizer, devices={'main': 0, 'second': 1})
    updater = Multi_modal_Updater(model,
                                  Dt1_train_iter,
                                  Dt2_train_iter,
                                  Dt3_train_iter,
                                  optimizer,
                                  device=0)
    epoch = 6250

    trainer = training.Trainer(
        updater, (epoch, 'epoch'),
        out='/home/wutong/Setoguchi/chainer_files/result')

    # trainer.extend(extensions.Evaluator(test_iter, model, device=0))
    trainer.extend(extensions.ExponentialShift(attr='lr', rate=0.56234),
                   trigger=(1250, 'epoch'))
    trainer.extend(
        extensions.LogReport(log_name='SFDEI_log', trigger=(20, "epoch")))
    trainer.extend((extensions.snapshot_object(
        model, filename='model_shapshot_{.update.epoch}')),
                   trigger=(1250, 'epoch'))
    trainer.extend(extensions.snapshot(), trigger=(1250, 'epoch'))
    trainer.extend(extensions.PrintReport(['epoch', 'accuracy', 'loss']))
    # 'validation/main/accuracy']),
    # trigger=(1, "epoch"))
    trainer.extend(
        extensions.dump_graph(root_name="loss", out_name="multi_modal_3.dot"))
    trainer.extend(extensions.PlotReport(["loss"]), trigger=(50, 'epoch'))
    trainer.extend(extensions.ProgressBar())

    if mode == True:
        # Run the trainer
        trainer.run()
    else:
        serializers.load_npz(
            "/home/wutong/Setoguchi/chainer_files/SFDEINet_multi_modal/SFDEINet_multi_modal_model",
            trainer)
        trainer.run()
        serializers.save_npz(
            "/home/wutong/Setoguchi/chainer_files/SFDEINet_multi_modal/SFDEINet_multi_modal_model",
            trainer)

    serializers.save_npz(
        "/home/wutong/Setoguchi/chainer_files/SFDEINet_multi_modal/SFDEINet_multi_modal_model",
        model)
Esempio n. 20
0
def main():
    parser = argparse.ArgumentParser(description='Train script')
    parser.add_argument('--batchsize', type=int, default=64)
    parser.add_argument('--max_iter', type=int, default=100000)
    parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result', help='Directory to output the result')
    parser.add_argument('--snapshot_interval', type=int, default=10000, help='Interval of snapshot')
    parser.add_argument('--evaluation_interval', type=int, default=10000, help='Interval of evaluation')
    parser.add_argument('--display_interval', type=int, default=100, help='Interval of displaying log to console')
    parser.add_argument('--n_dis', type=int, default=1, help='number of discriminator update per generator update') # 5
    parser.add_argument('--gamma', type=float, default=0.5, help='hyperparameter gamma')
    parser.add_argument('--lam', type=float, default=10, help='gradient penalty')
    parser.add_argument('--adam_alpha', type=float, default=0.0002, help='alpha in Adam optimizer')
    parser.add_argument('--adam_beta1', type=float, default=0.5, help='beta1 in Adam optimizer') # 0.0
    parser.add_argument('--adam_beta2', type=float, default=0.9, help='beta2 in Adam optimizer') # 0.9
    parser.add_argument('--output_dim', type=int, default=256, help='output dimension of the discriminator (for cramer GAN)')
    parser.add_argument('--data-dir', type=str, default="")
    parser.add_argument('--image-npz', type=str, default="")
    parser.add_argument('--n-hidden', type=int, default=128)
    parser.add_argument('--resume', type=str, default="")
    parser.add_argument('--ch', type=int, default=512)
    parser.add_argument('--snapshot-iter', type=int, default=0)

    args = parser.parse_args()
    record_setting(args.out)
    report_keys = ["loss_dis", "loss_gen"]

    # Set up dataset
    if args.image_npz != '':
        from c128dcgan.dataset import NPZColorDataset
        train_dataset = NPZColorDataset(npz=args.image_npz)
    elif args.data_dir != '':
        from c128dcgan.dataset import Color128x128Dataset
        train_dataset = Color128x128Dataset(args.data_dir)
    train_iter = chainer.iterators.SerialIterator(train_dataset, args.batchsize)

    # Setup algorithm specific networks and updaters
    models = []
    opts = {}
    updater_args = {
        "iterator": {'main': train_iter},
        "device": args.gpu
    }

    # fixed algorithm
    #from c128gan import Updater
    generator = common.net.C128Generator(ch=args.ch, n_hidden=args.n_hidden)
    discriminator = common.net.SND128Discriminator(ch=args.ch)
    models = [generator, discriminator]
    from dcgan.updater import Updater

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        print("use gpu {}".format(args.gpu))
        for m in models:
            m.to_gpu()

    # Set up optimizers
    opts["opt_gen"] = make_optimizer(generator, args.adam_alpha, args.adam_beta1, args.adam_beta2)
    opts["opt_dis"] = make_optimizer(discriminator, args.adam_alpha, args.adam_beta1, args.adam_beta2)

    updater_args["optimizer"] = opts
    updater_args["models"] = models

    # Set up updater and trainer
    updater = Updater(**updater_args)
    trainer = training.Trainer(updater, (args.max_iter, 'iteration'), out=args.out)

    # Set up logging
    for m in models:
        trainer.extend(extensions.snapshot_object(
            m, m.__class__.__name__ + '_{.updater.iteration}.npz'), trigger=(args.snapshot_interval, 'iteration'))
    trainer.extend(extensions.LogReport(keys=report_keys,
                                        trigger=(args.display_interval, 'iteration')))
    trainer.extend(extensions.PrintReport(report_keys), trigger=(args.display_interval, 'iteration'))
    trainer.extend(sample_generate(generator, args.out), trigger=(args.evaluation_interval, 'iteration'),
                   priority=extension.PRIORITY_WRITER)
    trainer.extend(sample_generate_light(generator, args.out), trigger=(args.evaluation_interval // 10, 'iteration'),
                   priority=extension.PRIORITY_WRITER)
    trainer.extend(extensions.ProgressBar(update_interval=10))
    if args.snapshot_iter == 0:
        snap_iter= args.max_iter // 100
    else:
        snap_iter = args.snapshot_iter
    trainer.extend(extensions.snapshot(), trigger=(snap_iter , 'iteration'))

    # resume
    if args.resume != "":
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
Esempio n. 21
0
if __name__=='__main__':
	model = L.Classifier(MyModel())
	if os.path.isfile('./dataset.pickle'):
		print("dataset.pickle is exist. loading...")
		with open('./dataset.pickle', mode='rb') as f:
			train, test = pickle.load(f)
			print("Loaded")
	else:
		datasets = dataset.Dataset("mouth")
		train, test = datasets.get_dataset()
		with open('./dataset.pickle', mode='wb') as f:
			pickle.dump((train, test), f)
			print("saving train and test...")
	optimizer = optimizers.MomentumSGD(lr=0.001, momentum=0.9)
	optimizer.setup(model)
	train_iter = iterators.SerialIterator(train, 64)
	test_iter = iterators.SerialIterator(test, 64, repeat=False, shuffle=True)

	updater = training.StandardUpdater(train_iter, optimizer, device=-1)
	trainer = training.Trainer(updater, (800, 'epoch'), out='{}_model_result'.format(MyModel.__class__.__name__))
	trainer.extend(extensions.dump_graph("main/loss"))
	trainer.extend(extensions.Evaluator(test_iter, model, device=-1))
	trainer.extend(extensions.LogReport())
	trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy']))
	trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png'))
	trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png'))
	trainer.extend(extensions.ProgressBar())
	trainer.run()
	print("Learn END")

Esempio n. 22
0
def train(args):
    '''RUN TRAINING'''
    # seed setting
    torch.manual_seed(args.seed)

    # use determinisitic computation or not
    if args.debugmode < 1:
        torch.backends.cudnn.deterministic = False
        logging.info('torch cudnn deterministic is disabled')
    else:
        torch.backends.cudnn.deterministic = True

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning('cuda is not available')

    # get input and output dimension info
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']
    utts = list(valid_json.keys())

    # reverse input and output dimension
    idim = int(valid_json[utts[0]]['output'][0]['shape'][1])
    odim = int(valid_json[utts[0]]['input'][0]['shape'][1])
    if args.use_cbhg:
        args.spc_dim = int(valid_json[utts[0]]['input'][1]['shape'][1])
    if args.use_speaker_embedding:
        args.spk_embed_dim = int(valid_json[utts[0]]['input'][1]['shape'][0])
    else:
        args.spk_embed_dim = None
    logging.info('#input dims : ' + str(idim))
    logging.info('#output dims: ' + str(odim))

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + '/model.json'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to' + model_conf)
        f.write(
            json.dumps((idim, odim, vars(args)), indent=4,
                       sort_keys=True).encode('utf_8'))
    for key in sorted(vars(args).keys()):
        logging.info('ARGS: ' + key + ': ' + str(vars(args)[key]))

    # specify model architecture
    tacotron2 = Tacotron2(idim, odim, args)
    logging.info(tacotron2)

    # check the use of multi-gpu
    if args.ngpu > 1:
        tacotron2 = torch.nn.DataParallel(tacotron2,
                                          device_ids=list(range(args.ngpu)))
        logging.info('batch size is automatically increased (%d -> %d)' %
                     (args.batch_size, args.batch_size * args.ngpu))
        args.batch_size *= args.ngpu

    # set torch device
    device = torch.device("cuda" if args.ngpu > 0 else "cpu")
    tacotron2 = tacotron2.to(device)

    # define loss
    model = Tacotron2Loss(tacotron2, args.use_masking, args.bce_pos_weight)
    reporter = model.reporter

    # Setup an optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 args.lr,
                                 eps=args.eps,
                                 weight_decay=args.weight_decay)

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, 'target', reporter)
    setattr(optimizer, 'serialize', lambda s: reporter.serialize(s))

    # Setup a converter
    converter = CustomConverter(True, args.use_speaker_embedding,
                                args.use_cbhg)

    # read json data
    with open(args.train_json, 'rb') as f:
        train_json = json.load(f)['utts']
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']

    # make minibatch list (variable length)
    train_batchset = make_batchset(train_json, args.batch_size, args.maxlen_in,
                                   args.maxlen_out, args.minibatches,
                                   args.batch_sort_key)
    valid_batchset = make_batchset(valid_json, args.batch_size, args.maxlen_in,
                                   args.maxlen_out, args.minibatches,
                                   args.batch_sort_key)
    # hack to make batchsze argument as 1
    # actual bathsize is included in a list
    if args.n_iter_processes > 0:
        train_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(train_batchset, converter.transform),
            batch_size=1,
            n_processes=args.n_iter_processes,
            n_prefetch=8,
            maxtasksperchild=20)
        valid_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(valid_batchset, converter.transform),
            batch_size=1,
            repeat=False,
            shuffle=False,
            n_processes=args.n_iter_processes,
            n_prefetch=8,
            maxtasksperchild=20)
    else:
        train_iter = chainer.iterators.SerialIterator(TransformDataset(
            train_batchset, converter.transform),
                                                      batch_size=1)
        valid_iter = chainer.iterators.SerialIterator(TransformDataset(
            valid_batchset, converter.transform),
                                                      batch_size=1,
                                                      repeat=False,
                                                      shuffle=False)

    # Set up a trainer
    updater = CustomUpdater(model, args.grad_clip, train_iter, optimizer,
                            converter, device)
    trainer = training.Trainer(updater, (args.epochs, 'epoch'),
                               out=args.outdir)

    # Resume from a snapshot
    if args.resume:
        logging.info('resumed from %s' % args.resume)
        torch_resume(args.resume, trainer)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(
        CustomEvaluator(model, valid_iter, reporter, converter, device))

    # Save snapshot for each epoch
    trainer.extend(torch_snapshot(), trigger=(1, 'epoch'))

    # Save best models
    trainer.extend(
        extensions.snapshot_object(tacotron2,
                                   'model.loss.best',
                                   savefun=torch_save),
        trigger=training.triggers.MinValueTrigger('validation/main/loss'))

    # Save attention figure for each epoch
    if args.num_save_attention > 0:
        data = sorted(list(valid_json.items())[:args.num_save_attention],
                      key=lambda x: int(x[1]['input'][0]['shape'][1]),
                      reverse=True)
        if hasattr(tacotron2, "module"):
            att_vis_fn = tacotron2.module.calculate_all_attentions
        else:
            att_vis_fn = tacotron2.calculate_all_attentions
        trainer.extend(PlotAttentionReport(att_vis_fn,
                                           data,
                                           args.outdir + '/att_ws',
                                           converter=CustomConverter(
                                               False,
                                               args.use_speaker_embedding),
                                           device=device,
                                           reverse=True),
                       trigger=(1, 'epoch'))

    # Make a plot for training and validation values
    plot_keys = [
        'main/loss', 'validation/main/loss', 'main/l1_loss',
        'validation/main/l1_loss', 'main/mse_loss', 'validation/main/mse_loss',
        'main/bce_loss', 'validation/main/bce_loss'
    ]
    trainer.extend(
        extensions.PlotReport(['main/l1_loss', 'validation/main/l1_loss'],
                              'epoch',
                              file_name='l1_loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/mse_loss', 'validation/main/mse_loss'],
                              'epoch',
                              file_name='mse_loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/bce_loss', 'validation/main/bce_loss'],
                              'epoch',
                              file_name='bce_loss.png'))
    if args.use_cbhg:
        plot_keys += [
            'main/cbhg_l1_loss', 'validation/main/cbhg_l1_loss',
            'main/cbhg_mse_loss', 'validation/main/cbhg_mse_loss'
        ]
        trainer.extend(
            extensions.PlotReport(
                ['main/cbhg_l1_loss', 'validation/main/cbhg_l1_loss'],
                'epoch',
                file_name='cbhg_l1_loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/cbhg_mse_loss', 'validation/main/cbhg_mse_loss'],
                'epoch',
                file_name='cbhg_mse_loss.png'))
    trainer.extend(
        extensions.PlotReport(plot_keys, 'epoch', file_name='loss.png'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL,
                                                 'iteration')))
    report_keys = plot_keys[:]
    report_keys[0:0] = ['epoch', 'iteration', 'elapsed_time']
    trainer.extend(extensions.PrintReport(report_keys),
                   trigger=(REPORT_INTERVAL, 'iteration'))
    trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL))

    # Run the training
    trainer.run()
Esempio n. 23
0
def main():
    archs = {
        #'alex': alex.Alex,
        #'alex_fp16': alex.AlexFp16,
        #'googlenet': googlenet.GoogLeNet,
        #'googlenetbn': googlenetbn.GoogLeNetBN,
        #'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16,
        'nin': nin.NIN,
        #'resnet50': resnet50.ResNet50
    }

    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('val', help='Path to validation image-label list file')
    parser.add_argument('--arch',
                        '-a',
                        choices=archs.keys(),
                        default='nin',
                        help='Convnet architecture')
    parser.add_argument('--batchsize',
                        '-B',
                        type=int,
                        default=32,
                        help='Learning minibatch size')
    parser.add_argument('--epoch',
                        '-E',
                        type=int,
                        default=10,
                        help='Number of epochs to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU')
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--loaderjob',
                        '-j',
                        type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--mean',
                        '-m',
                        default='mean.npy',
                        help='Mean file (computed by compute_mean.py)')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--root',
                        '-R',
                        default='.',
                        help='Root directory path of image files')
    parser.add_argument('--val_batchsize',
                        '-b',
                        type=int,
                        default=250,
                        help='Validation minibatch size')
    parser.add_argument('--test', action='store_true')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    # Initialize the model to train
    model = archs[args.arch]()
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()  # Make the GPU current
        model.to_gpu()

    # Load the datasets and mean file
    mean = np.load(args.mean)
    train = PreprocessedDataset(args.train, args.root, mean, model.insize)
    val = PreprocessedDataset(args.val, args.root, mean, model.insize, False)
    # These iterators load the images with subprocesses running in parallel to
    # the training/validation.
    train_iter = chainer.iterators.MultiprocessIterator(
        train, args.batchsize, n_processes=args.loaderjob)
    val_iter = chainer.iterators.MultiprocessIterator(
        val, args.val_batchsize, repeat=False, n_processes=args.loaderjob)

    # Set up an optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    val_interval = (500 if args.test else 100000), 'iteration'
    log_interval = (500 if args.test else 1000), 'iteration'

    trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu),
                   trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=val_interval)
    # Be careful to pass the interval directly to LogReport
    # (it determines when to emit log rather than when to read observations)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'lr'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Esempio n. 24
0
    test_interval = 1, 'epoch'
    snapshot_interval = 10, 'epoch'
    log_interval = 100, 'iteration'

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpus[0]),
                   trigger=test_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'),
        trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(model,
                                              'model_epoch_{.updater.epoch}'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)

    log_list = [
        'epoch', 'iteration', 'main/loss', 'main/accuracy',
        'validation/main/loss', 'validation/main/accuracy', 'lr',
        'elapsed_time'
    ]

    trainer.extend(extensions.PrintReport(log_list), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Esempio n. 25
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize', '-b', type=int, default=20,
                        help='Number of examples in each mini-batch')
    parser.add_argument('--bproplen', '-l', type=int, default=35,
                        help='Number of words in each mini-batch '
                             '(= length of truncated BPTT)')
    parser.add_argument('--epoch', '-e', type=int, default=39,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--gradclip', '-c', type=float, default=5,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--test', action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.set_defaults(test=False)
    parser.add_argument('--unit', '-u', type=int, default=650,
                        help='Number of LSTM units in each layer')
    args = parser.parse_args()

    # Load the Penn Tree Bank long word sequence dataset
    # train, val, test = chainer.datasets.get_ptb_words()
    data = json.load(open("lyric_indexes.json"))
    train = np.array(data['train'], dtype=np.int32)
    val = np.array(data['val'], dtype=np.int32)
    test = np.array(data['test'], dtype=np.int32)
    n_vocab = data['num_vocab'] # train is just an array of integers
    print('#vocab =', n_vocab)

    if args.test:
        train = train[:100]
        val = val[:100]
        test = test[:100]

    train_iter = ParallelSequentialIterator(train, args.batchsize)
    val_iter = ParallelSequentialIterator(val, 1, repeat=False)
    test_iter = ParallelSequentialIterator(test, 1, repeat=False)

    # Prepare an RNNLM model
    rnn = RNNForLM(n_vocab, args.unit)
    model = L.Classifier(rnn)
    model.compute_accuracy = False  # we only want the perplexity
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # make the GPU current
        model.to_gpu()

    # Set up an optimizer
    optimizer = chainer.optimizers.SGD(lr=1.0)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))

    # Set up a trainer
    updater = BPTTUpdater(train_iter, optimizer, args.bproplen, args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    eval_model = model.copy()  # Model with shared params and distinct states
    eval_rnn = eval_model.predictor
    eval_rnn.train = False
    trainer.extend(extensions.Evaluator(
        val_iter, eval_model, device=args.gpu,
        # Reset the RNN state at the beginning of each evaluation
        eval_hook=lambda _: eval_rnn.reset_state()))

    interval = 10 if args.test else 500
    trainer.extend(extensions.LogReport(postprocess=compute_perplexity,
                                        trigger=(interval, 'iteration')))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'perplexity', 'val_perplexity']
    ), trigger=(interval, 'iteration'))
    trainer.extend(extensions.ProgressBar(
        update_interval=1 if args.test else 10))
    trainer.extend(extensions.snapshot())
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'))
    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()

    # Evaluate the final model
    print('test')
    eval_rnn.reset_state()
    evaluator = extensions.Evaluator(test_iter, eval_model, device=args.gpu)
    result = evaluator()
    print('test perplexity:', np.exp(float(result['main/loss'])))
Esempio n. 26
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: Faster R-CNN')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--lr', '-l', type=float, default=1e-3)
    parser.add_argument('--out', '-o', default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--step_size', '-ss', type=int, default=50000)
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    args = parser.parse_args()

    np.random.seed(args.seed)

    train_data = VOCDetectionDataset(split='trainval', year='2007')
    test_data = VOCDetectionDataset(split='test', year='2007',
                                    use_difficult=True, return_difficult=True)
    faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_detection_label_names),
                                  pretrained_model='imagenet')
    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    if args.gpu >= 0:
        model.to_gpu(args.gpu)
        chainer.cuda.get_device(args.gpu).use()
    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    def transform(in_data):
        img, bbox, label = in_data
        _, H, W = img.shape
        img = faster_rcnn.prepare(img)
        _, o_H, o_W = img.shape
        scale = o_H / H
        bbox = transforms.resize_bbox(bbox, (W, H), (o_W, o_H))

        # horizontally flip
        img, params = transforms.random_flip(
            img, x_random=True, return_param=True)
        bbox = transforms.flip_bbox(bbox, (o_W, o_H), params['x_flip'])

        return img, bbox, label, scale
    train_data = TransformDataset(train_data, transform)

    train_iter = chainer.iterators.MultiprocessIterator(
        train_data, batch_size=1, n_processes=None, shared_mem=100000000)
    test_iter = chainer.iterators.SerialIterator(
        test_data, batch_size=1, repeat=False, shuffle=False)
    updater = chainer.training.updater.StandardUpdater(
        train_iter, optimizer, device=args.gpu)

    trainer = training.Trainer(
        updater, (args.iteration, 'iteration'), out=args.out)

    trainer.extend(
        extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'),
        trigger=(args.iteration, 'iteration'))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(args.step_size, 'iteration'))

    log_interval = 20, 'iteration'
    plot_interval = 3000, 'iteration'
    print_interval = 20, 'iteration'

    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport(
        ['iteration', 'epoch', 'elapsed_time', 'lr',
         'main/loss',
         'main/roi_loc_loss',
         'main/roi_cls_loss',
         'main/rpn_loc_loss',
         'main/rpn_cls_loss',
         'validation/main/map',
         ]), trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(
                ['main/loss'],
                file_name='loss.png', trigger=plot_interval
            ),
            trigger=plot_interval
        )

    trainer.extend(
        DetectionVOCEvaluator(
            test_iter, model.faster_rcnn, use_07_metric=True),
        trigger=ManualScheduleTrigger(
            (args.step_size, args.iteration), 'iteration'),
        invoke_before_training=False)

    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Esempio n. 27
0
def train(args):
    """Train with the given args

    :param Namespace args: The program arguments
    """
    # display chainer version
    logging.info('chainer version = ' + chainer.__version__)

    set_deterministic_chainer(args)

    # check cuda and cudnn availability
    if not chainer.cuda.available:
        logging.warning('cuda is not available')
    if not chainer.cuda.cudnn_enabled:
        logging.warning('cudnn is not available')

    # get special label ids
    unk = args.char_list_dict['<unk>']
    eos = args.char_list_dict['<eos>']
    # read tokens as a sequence of sentences
    train = read_tokens(args.train_label, args.char_list_dict)
    val = read_tokens(args.valid_label, args.char_list_dict)
    # count tokens
    n_train_tokens, n_train_oovs = count_tokens(train, unk)
    n_val_tokens, n_val_oovs = count_tokens(val, unk)
    logging.info('#vocab = ' + str(args.n_vocab))
    logging.info('#sentences in the training data = ' + str(len(train)))
    logging.info('#tokens in the training data = ' + str(n_train_tokens))
    logging.info('oov rate in the training data = %.2f %%' %
                 (n_train_oovs / n_train_tokens * 100))
    logging.info('#sentences in the validation data = ' + str(len(val)))
    logging.info('#tokens in the validation data = ' + str(n_val_tokens))
    logging.info('oov rate in the validation data = %.2f %%' %
                 (n_val_oovs / n_val_tokens * 100))

    use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0

    # Create the dataset iterators
    train_iter = ParallelSentenceIterator(train,
                                          args.batchsize,
                                          max_length=args.maxlen,
                                          sos=eos,
                                          eos=eos,
                                          shuffle=not use_sortagrad)
    val_iter = ParallelSentenceIterator(val,
                                        args.batchsize,
                                        max_length=args.maxlen,
                                        sos=eos,
                                        eos=eos,
                                        repeat=False)
    logging.info('#iterations per epoch = ' +
                 str(len(train_iter.batch_indices)))
    logging.info('#total iterations = ' +
                 str(args.epoch * len(train_iter.batch_indices)))
    # Prepare an RNNLM model
    rnn = RNNLM(args.n_vocab, args.layer, args.unit, args.type)
    model = ClassifierWithState(rnn)
    if args.ngpu > 1:
        logging.warning(
            "currently, multi-gpu is not supported. use single gpu.")
    if args.ngpu > 0:
        # Make the specified GPU current
        gpu_id = 0
        chainer.cuda.get_device_from_id(gpu_id).use()
        model.to_gpu()
    else:
        gpu_id = -1

    # Save model conf to json
    model_conf = args.outdir + '/model.json'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to ' + model_conf)
        f.write(
            json.dumps(vars(args),
                       indent=4,
                       ensure_ascii=False,
                       sort_keys=True).encode('utf_8'))

    # Set up an optimizer
    if args.opt == 'sgd':
        optimizer = chainer.optimizers.SGD(lr=1.0)
    elif args.opt == 'adam':
        optimizer = chainer.optimizers.Adam()

    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))

    updater = BPTTUpdater(train_iter, optimizer, gpu_id)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir)
    trainer.extend(LMEvaluator(val_iter, model, device=gpu_id))
    trainer.extend(
        extensions.LogReport(postprocess=compute_perplexity,
                             trigger=(args.report_interval_iters,
                                      'iteration')))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'perplexity', 'val_perplexity',
         'elapsed_time']),
                   trigger=(args.report_interval_iters, 'iteration'))
    trainer.extend(
        extensions.ProgressBar(update_interval=args.report_interval_iters))
    trainer.extend(
        extensions.snapshot(filename='snapshot.ep.{.updater.epoch}'))
    trainer.extend(
        extensions.snapshot_object(model, 'rnnlm.model.{.updater.epoch}'))
    # MEMO(Hori): wants to use MinValueTrigger, but it seems to fail in resuming
    trainer.extend(
        MakeSymlinkToBestModel('validation/main/loss', 'rnnlm.model'))

    if use_sortagrad:
        trainer.extend(
            ShufflingEnabler([train_iter]),
            trigger=(args.sortagrad if args.sortagrad != -1 else args.epoch,
                     'epoch'))

    if args.resume:
        logging.info('resumed from %s' % args.resume)
        chainer.serializers.load_npz(args.resume, trainer)

    set_early_stop(trainer, args, is_lm=True)
    if args.tensorboard_dir is not None and args.tensorboard_dir != "":
        writer = SummaryWriter(args.tensorboard_dir)
        trainer.extend(TensorboardLogger(writer),
                       trigger=(args.report_interval_iters, 'iteration'))

    trainer.run()
    check_early_stop(trainer, args.epoch)

    # compute perplexity for test set
    if args.test_label:
        logging.info('test the best model')
        chainer.serializers.load_npz(args.outdir + '/rnnlm.model.best', model)
        test = read_tokens(args.test_label, args.char_list_dict)
        n_test_tokens, n_test_oovs = count_tokens(test, unk)
        logging.info('#sentences in the test data = ' + str(len(test)))
        logging.info('#tokens in the test data = ' + str(n_test_tokens))
        logging.info('oov rate in the test data = %.2f %%' %
                     (n_test_oovs / n_test_tokens * 100))
        test_iter = ParallelSentenceIterator(test,
                                             args.batchsize,
                                             max_length=args.maxlen,
                                             sos=eos,
                                             eos=eos,
                                             repeat=False)
        evaluator = LMEvaluator(test_iter, model, device=gpu_id)
        with chainer.using_config('train', False):
            result = evaluator()
        logging.info('test perplexity: ' +
                     str(np.exp(float(result['main/loss']))))
Esempio n. 28
0
G.to_gpu(0)

d_optimizer = chainer.optimizers.Adam(alpha=0.00002)
d_optimizer.setup(D)
g_optimizer = chainer.optimizers.Adam(alpha=0.00002)
g_optimizer.setup(G)

updater = GANUpdater(train_iter,
                     D,
                     G,
                     d_optimizer,
                     g_optimizer,
                     latent_size,
                     device=device_id)
trainer = training.Trainer(updater,
                           stop_trigger=(num_epochs, 'epoch'),
                           out='mnist_result')

trainer.extend(extensions.LogReport())
trainer.extend(
    extensions.PrintReport(['epoch', 'd_loss', 'g_loss', 'elapsed_time']))
trainer.run()

z = Variable(np.random.randn(10, 64).astype(np.float32))
z.to_gpu(0)
fake_images = G(z)
fake_images.to_cpu()

for i in range(10):
    plt.imshow(fake_images.data[i].reshape([28, 28]))
    plt.show()
def main():
    parser = argparse.ArgumentParser(
        description='Imbalanced MNIST classification')
    parser.add_argument('--eval-mode',
                        type=int,
                        default=1,
                        help='Evaluation mode.'
                        '0: only binary_accuracy is calculated.'
                        '1: binary_accuracy and ROC-AUC score is calculated')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='batch size')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID to use. Negative value indicates '
                        'not to use GPU and to run the code in CPU.')
    parser.add_argument('--out',
                        '-o',
                        type=str,
                        default='result',
                        help='path to output directory')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=10,
                        help='number of epochs')
    parser.add_argument('--resume',
                        '-r',
                        type=str,
                        default='',
                        help='path to a trainer snapshot')
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--protocol',
                        type=int,
                        default=2,
                        help='protocol version for pickle')
    parser.add_argument('--model-filename',
                        type=str,
                        default='classifier.pkl',
                        help='file name for pickled model')
    parser.add_argument('--updater-type', type=str, default='standard')
    parser.add_argument('--sampling-size', type=int, default=32)
    parser.add_argument('--optimizer-type', type=str, default='Adam')
    parser.add_argument('--alpha', type=str, default='0.001')

    args = parser.parse_args()
    # Dataset preparation
    train, train_val, val = get_binary_imbalanced_data()

    train_iter = iterators.SerialIterator(train, args.batchsize)
    val_iter = iterators.SerialIterator(val,
                                        args.batchsize,
                                        repeat=False,
                                        shuffle=False)

    model = LeNet(n_class=1, binary=True)
    classifier = Classifier(model,
                            lossfun=F.sigmoid_cross_entropy,
                            metrics_fun=F.binary_accuracy,
                            device=args.gpu)

    if args.optimizer_type == 'Adam':
        optimizer = optimizers.Adam()
    else:
        optimizer = optimizers.SGD(lr=1e-3)
    optimizer.setup(classifier)

    updater_type = args.updater_type
    if updater_type == 'standard':
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           device=args.gpu)
    elif updater_type == 'proposed':
        updater = Proposed(train_iter,
                           optimizer,
                           device=args.gpu,
                           sampling_size=args.sampling_size)
    elif updater_type == 'LRE':
        x, t = chainer.dataset.concat_examples(train)

        train_val_iter = iterators.SerialIterator(train_val, len(train_val))
        updater = LRE({
            'main': train_iter,
            'val': train_val_iter
        },
                      optimizer,
                      device=args.gpu,
                      alpha=args.alpha)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(E.Evaluator(val_iter, classifier, device=args.gpu))
    trainer.extend(E.LogReport())

    eval_mode = args.eval_mode
    if eval_mode == 0:
        trainer.extend(
            E.PrintReport([
                'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss',
                'validation/main/accuracy', 'elapsed_time'
            ]))
    elif eval_mode == 1:
        train_eval_iter = iterators.SerialIterator(train,
                                                   args.batchsize,
                                                   repeat=False,
                                                   shuffle=False)
        trainer.extend(
            ROCAUCEvaluator(train_eval_iter,
                            classifier,
                            eval_func=model,
                            device=args.gpu,
                            name='train',
                            pos_labels=1,
                            ignore_labels=-1,
                            raise_value_error=False))
        # extension name='validation' is already used by `Evaluator`,
        # instead extension name `val` is used.
        trainer.extend(
            ROCAUCEvaluator(val_iter,
                            classifier,
                            eval_func=model,
                            device=args.gpu,
                            name='val',
                            pos_labels=1,
                            ignore_labels=-1))
        trainer.extend(
            E.PrintReport([
                'epoch', 'main/loss', 'main/accuracy', 'train/main/roc_auc',
                'validation/main/loss', 'validation/main/accuracy',
                'val/main/roc_auc', 'elapsed_time'
            ]))
    else:
        raise ValueError('Invalid accfun_mode {}'.format(eval_mode))
    trainer.extend(E.ProgressBar(update_interval=10))
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(E.snapshot(), trigger=(frequency, 'epoch'))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
    classifier.save_pickle(os.path.join(args.out, args.model_filename),
                           protocol=args.protocol)
Esempio n. 30
0
def main():
    # 各種パラメータ設定
    parser = argparse.ArgumentParser(description='iMaterialist_Challenge:')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=128,
                        help='1バッチあたり何枚か')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=10,
                        help='何epochやるか')
    parser.add_argument('--out', '-o', default='result', help='結果を出力するディレクトリ')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='指定したsnapshopから継続して学習します')
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=1,
                        help='指定したepotchごとに重みを保存します')
    parser.add_argument('--gpu', '-g', type=int, default=-1, help='使うGPUの番号')
    parser.add_argument('--size',
                        '-s',
                        type=int,
                        default=256,
                        help='正規化する時の一辺のpx'),
    parser.add_argument('--label_variety',
                        type=int,
                        default=228,
                        help='確認できたlabelの総数 この中で判断する'),
    parser.add_argument('--total_photo_num',
                        '-n',
                        type=int,
                        default=-1,
                        help='使用する写真データの数'),  # (9815, 39269)
    parser.add_argument('--object',
                        type=str,
                        default='train',
                        help='train or test のどちらか選んだ方のデータを使用する'),
    parser.add_argument('--cleanup',
                        '-c',
                        dest='cleanup',
                        action='store_false',
                        help='付与すると 邪魔な画像を取り除き trashディレクトリに移動させる機能を停止させます'),
    parser.add_argument('--interval',
                        '-i',
                        type=int,
                        default=10,
                        help='何iteraionごとに画面に出力するか')
    parser.add_argument('--model', '-m', type=int, default=0, help='使うモデルの種類')
    parser.add_argument('--lossfunc',
                        '-l',
                        type=int,
                        default=0,
                        help='使うlossの種類'),
    parser.add_argument('--stream',
                        '-d',
                        dest='stream',
                        action='store_true',
                        help='画像のダウンロードを同時に行う'),
    parser.add_argument('--parallel',
                        '-p',
                        dest='douji',
                        action='store_true',
                        help='画像ダウンロードを並列処理するか')
    args = parser.parse_args()

    # args.model = -1
    # args.batchsize = 8
    args.size = 224
    # args.interval = 1
    # args.cleanup = False
    # args.lossfunc = 3
    # # args.stream = True
    # # args.total_photo_num = 200
    # args.resume = 'serverresult/snapshot_iter_74'

    # liteがついているのはsizeをデフォルトの半分にするの前提で作っています
    # RES_SPP_netはchainerで可変量サイズの入力を実装するのが難しかったので頓挫
    model = {
        0: 'ResNet',
        1: 'ResNet_lite',
        2: 'Bottle_neck_RES_net',
        3: 'Bottle_neck_RES_net_lite',
        4: 'Mymodel',
        5: 'RES_SPP_net',
        6: 'VGGTrans',
        7: 'RESNetTrans',
        8: 'Lite'
    }[args.model]

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('# model: {}'.format(model))
    print('# size: {}'.format(args.size))
    print('')

    # モデルの定義
    model = getattr(mymodel, model)(args.label_variety, args.lossfunc)

    # GPUで動かせるのならば動かす
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # optimizerのセットアップ
    optimizer = chainer.optimizers.Adam()
    # optimizer = chainer.optimizers.MomentumSGD(0.1, 0.9)  # https://arxiv.org/pdf/1605.07146.pdf
    # chainer.optimizer.WeightDecay(0.0005)

    optimizer.setup(model)

    # データセットのセットアップ
    photo_nums = photos(args)
    train, val = chainer.datasets.split_dataset_random(
        photo_nums, int(len(photo_nums) * 0.8), seed=0)  # 2割をvalidation用にとっておく
    trans = Transform(args, photo_nums, True,
                      False if args.model == 5 else True)
    train = chainer.datasets.TransformDataset(train, trans)
    val = chainer.datasets.TransformDataset(val, trans)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    val_iter = chainer.iterators.SerialIterator(val,
                                                args.batchsize,
                                                repeat=False,
                                                shuffle=False)

    # 学習をどこまで行うかの設定
    stop_trigger = (args.epoch, 'epoch')
    # if args.early_stopping:  # optimizerがAdamだと無意味
    #     stop_trigger = training.triggers.EarlyStoppingTrigger(
    #         monitor=args.early_stopping, verbose=True,
    #         max_trigger=(args.epoch, 'epoch'))

    # uodater, trainerのセットアップ
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu,
                                                loss_func=model.loss_func)
    trainer = training.Trainer(updater, stop_trigger, out=args.out)

    # testデータでの評価の設定
    evaluator = MyEvaluator(val_iter,
                            model,
                            device=args.gpu,
                            eval_func=model.loss_func)
    evaluator.trigger = 1, 'epoch'
    trainer.extend(evaluator)

    if args.model == 6 or args.model == 7:
        model.base.disable_update()

    # モデルの層をdotファイルとして出力する設定
    trainer.extend(extensions.dump_graph('main/loss'))

    # snapshot(学習中の重み情報)の保存
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

    # trainデータでの評価の表示頻度設定
    logreport = extensions.LogReport(trigger=(args.interval, 'iteration'))
    trainer.extend(logreport)

    # 各データでの評価の保存設定
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'val/loss'],
                                  'iteration',
                                  trigger=(5, 'iteration'),
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(['main/acc', 'val/acc'],
                                  'iteration',
                                  trigger=(5, 'iteration'),
                                  file_name='accuracy.png'))
        trainer.extend(
            extensions.PlotReport(['main/freq_err', 'val/freq_err'],
                                  'iteration',
                                  trigger=(5, 'iteration'),
                                  file_name='frequent_error.png'))
        trainer.extend(
            extensions.PlotReport(['main/acc2', 'val/acc2'],
                                  'iteration',
                                  trigger=(5, 'iteration'),
                                  file_name='accuracy2.png'))
        trainer.extend(
            extensions.PlotReport(['main/f1', 'val/f1'],
                                  'iteration',
                                  trigger=(5, 'iteration'),
                                  file_name='f1.png'))

    # 各データでの評価の表示(欄に関する)設定
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'iteration', 'main/loss', 'val/loss', 'main/acc',
            'main/acc2', 'val/acc2', 'main/precision', 'main/recall',
            'main/f1', 'val/f1', 'main/labelnum', 'main/fpk', 'elapsed_time'
        ]))
    # ['epoch', 'iteration', 'main/loss', 'val/loss','main/acc', 'val/acc', 'main/acc2', 'val/acc2',
    #  'main/precision', 'main/recall', 'main/f1', 'val/f1', 'main/labelnum', 'main/tpk', 'elapsed_time']))

    # プログレスバー表示の設定
    trainer.extend(extensions.ProgressBar(update_interval=args.interval))

    # trainer.extend(MyShift("lr", 1 / 5, logreport, 0.1))

    # 学習済みデータの読み込み設定
    if args.resume:
        chainer.serializers.load_npz(
            args.resume, model,
            path='updater/model:main/')  # なぜかpathを外すと読み込めなくなってしまった 原因不明

    # 学習の実行
    if args.stream and args.parallel:
        import concurrent.futures
        executor = concurrent.futures.ThreadPoolExecutor(max_workers=2)
        executor.submit(trainer.run)

        def train_download():
            for num in [
                    train_iter.dataset._dataset[i] for i in train_iter._order
            ]:
                trans.download(-num)
                time.sleep(0.01)

        def val_download():
            for num in train_iter.dataset._dataset[train_iter.dataset._start:]:
                trans.download(-num)
                time.sleep(0.01)

        executor.submit(train_download)
        executor.submit(val_download)
    else:
        trainer.run()