Esempio n. 1
0
def train(args):
    """Train with the given args

    :param Namespace args: The program arguments
    """
    set_deterministic_pytorch(args)

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning('cuda is not available')

    # get input and output dimension info
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']
    utts = list(valid_json.keys())
    idim = int(valid_json[utts[0]]['output'][1]['shape'][1])
    odim = int(valid_json[utts[0]]['output'][0]['shape'][1])
    logging.info('#input dims : ' + str(idim))
    logging.info('#output dims: ' + str(odim))

    # specify model architecture
    model_class = dynamic_import(args.model_module)
    model = model_class(idim, odim, args)
    assert isinstance(model, MTInterface)

    if args.rnnlm is not None:
        rnnlm_args = get_model_conf(args.rnnlm, args.rnnlm_conf)
        rnnlm = lm_pytorch.ClassifierWithState(
            lm_pytorch.RNNLM(
                len(args.char_list), rnnlm_args.layer, rnnlm_args.unit))
        torch.load(args.rnnlm, rnnlm)
        model.rnnlm = rnnlm

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + '/model.json'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to ' + model_conf)
        f.write(json.dumps((idim, odim, vars(args)),
                           indent=4, ensure_ascii=False, sort_keys=True).encode('utf_8'))
    for key in sorted(vars(args).keys()):
        logging.info('ARGS: ' + key + ': ' + str(vars(args)[key]))

    reporter = model.reporter

    # check the use of multi-gpu
    if args.ngpu > 1:
        model = torch.nn.DataParallel(model, device_ids=list(range(args.ngpu)))
        if args.batch_size != 0:
            logging.info('batch size is automatically increased (%d -> %d)' % (
                args.batch_size, args.batch_size * args.ngpu))
            args.batch_size *= args.ngpu

    # set torch device
    device = torch.device("cuda" if args.ngpu > 0 else "cpu")
    model = model.to(device)

    # Setup an optimizer
    if args.opt == 'adadelta':
        optimizer = torch.optim.Adadelta(
            model.parameters(), rho=0.95, eps=args.eps,
            weight_decay=args.weight_decay)
    elif args.opt == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     weight_decay=args.weight_decay)
    elif args.opt == 'noam':
        from espnet.nets.pytorch_backend.transformer.optimizer import get_std_opt
        optimizer = get_std_opt(model, args.adim, args.transformer_warmup_steps, args.transformer_lr)
    else:
        raise NotImplementedError("unknown optimizer: " + args.opt)

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, "target", reporter)
    setattr(optimizer, "serialize", lambda s: reporter.serialize(s))

    # Setup a converter
    converter = CustomConverter(idim=idim)

    # read json data
    with open(args.train_json, 'rb') as f:
        train_json = json.load(f)['utts']
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']

    use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0
    # make minibatch list (variable length)
    train = make_batchset(train_json, args.batch_size,
                          args.maxlen_in, args.maxlen_out, args.minibatches,
                          min_batch_size=args.ngpu if args.ngpu > 1 else 1,
                          shortest_first=use_sortagrad,
                          count=args.batch_count,
                          batch_bins=args.batch_bins,
                          batch_frames_in=args.batch_frames_in,
                          batch_frames_out=args.batch_frames_out,
                          batch_frames_inout=args.batch_frames_inout,
                          mt=True, iaxis=1, oaxis=0)
    valid = make_batchset(valid_json, args.batch_size,
                          args.maxlen_in, args.maxlen_out, args.minibatches,
                          min_batch_size=args.ngpu if args.ngpu > 1 else 1,
                          count=args.batch_count,
                          batch_bins=args.batch_bins,
                          batch_frames_in=args.batch_frames_in,
                          batch_frames_out=args.batch_frames_out,
                          batch_frames_inout=args.batch_frames_inout,
                          mt=True, iaxis=1, oaxis=0)

    load_tr = LoadInputsAndTargets(
        mode='mt', load_output=True, preprocess_conf=args.preprocess_conf,
        preprocess_args={'train': True}  # Switch the mode of preprocessing
    )
    load_cv = LoadInputsAndTargets(
        mode='mt', load_output=True, preprocess_conf=args.preprocess_conf,
        preprocess_args={'train': False}  # Switch the mode of preprocessing
    )
    # hack to make batchsize argument as 1
    # actual bathsize is included in a list
    if args.n_iter_processes > 0:
        train_iter = ToggleableShufflingMultiprocessIterator(
            TransformDataset(train, load_tr),
            batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20,
            shuffle=not use_sortagrad)
        valid_iter = ToggleableShufflingMultiprocessIterator(
            TransformDataset(valid, load_cv),
            batch_size=1, repeat=False, shuffle=False,
            n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20)
    else:
        train_iter = ToggleableShufflingSerialIterator(
            TransformDataset(train, load_tr),
            batch_size=1, shuffle=not use_sortagrad)
        valid_iter = ToggleableShufflingSerialIterator(
            TransformDataset(valid, load_cv),
            batch_size=1, repeat=False, shuffle=False)

    # Set up a trainer
    updater = CustomUpdater(
        model, args.grad_clip, train_iter, optimizer, converter, device, args.ngpu, args.accum_grad)
    trainer = training.Trainer(
        updater, (args.epochs, 'epoch'), out=args.outdir)

    if use_sortagrad:
        trainer.extend(ShufflingEnabler([train_iter]),
                       trigger=(args.sortagrad if args.sortagrad != -1 else args.epochs, 'epoch'))

    # Resume from a snapshot
    if args.resume:
        logging.info('resumed from %s' % args.resume)
        torch_resume(args.resume, trainer)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(CustomEvaluator(model, valid_iter, reporter, converter, device))

    # Save attention weight each epoch
    if args.num_save_attention > 0:
        # sort it by output lengths
        data = sorted(list(valid_json.items())[:args.num_save_attention],
                      key=lambda x: int(x[1]['output'][0]['shape'][0]), reverse=True)
        if hasattr(model, "module"):
            att_vis_fn = model.module.calculate_all_attentions
            plot_class = model.module.attention_plot_class
        else:
            att_vis_fn = model.calculate_all_attentions
            plot_class = model.attention_plot_class
        att_reporter = plot_class(
            att_vis_fn, data, args.outdir + "/att_ws",
            converter=converter, transform=load_cv, device=device,
            ikey="output", iaxis=1)
        trainer.extend(att_reporter, trigger=(1, 'epoch'))
    else:
        att_reporter = None

    # Make a plot for training and validation values
    trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss',
                                          'main/loss_att', 'validation/main/loss_att'],
                                         'epoch', file_name='loss.png'))
    trainer.extend(extensions.PlotReport(['main/acc', 'validation/main/acc'],
                                         'epoch', file_name='acc.png'))
    trainer.extend(extensions.PlotReport(['main/ppl', 'validation/main/ppl'],
                                         'epoch', file_name='ppl.png'))

    # Save best models
    trainer.extend(snapshot_object(model, 'model.loss.best'),
                   trigger=training.triggers.MinValueTrigger('validation/main/loss'))
    trainer.extend(snapshot_object(model, 'model.acc.best'),
                   trigger=training.triggers.MaxValueTrigger('validation/main/acc'))

    # save snapshot which contains model and optimizer states
    trainer.extend(torch_snapshot(), trigger=(1, 'epoch'))

    # epsilon decay in the optimizer
    if args.opt == 'adadelta':
        if args.criterion == 'acc':
            trainer.extend(restore_snapshot(model, args.outdir + '/model.acc.best', load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/acc',
                               lambda best_value, current_value: best_value > current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/acc',
                               lambda best_value, current_value: best_value > current_value))
        elif args.criterion == 'loss':
            trainer.extend(restore_snapshot(model, args.outdir + '/model.loss.best', load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/loss',
                               lambda best_value, current_value: best_value < current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/loss',
                               lambda best_value, current_value: best_value < current_value))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(args.report_interval_iters, 'iteration')))
    report_keys = ['epoch', 'iteration', 'main/loss', 'validation/main/loss',
                   'main/acc', 'validation/main/acc',
                   'main/ppl', 'validation/main/ppl',
                   'elapsed_time']
    if args.opt == 'adadelta':
        trainer.extend(extensions.observe_value(
            'eps', lambda trainer: trainer.updater.get_optimizer('main').param_groups[0]["eps"]),
            trigger=(args.report_interval_iters, 'iteration'))
        report_keys.append('eps')
    trainer.extend(extensions.PrintReport(
        report_keys), trigger=(args.report_interval_iters, 'iteration'))

    trainer.extend(extensions.ProgressBar(update_interval=args.report_interval_iters))
    set_early_stop(trainer, args)

    if args.tensorboard_dir is not None and args.tensorboard_dir != "":
        writer = SummaryWriter(args.tensorboard_dir)
        trainer.extend(TensorboardLogger(writer, att_reporter),
                       trigger=(args.report_interval_iters, 'iteration'))
    # Run the training
    trainer.run()
    check_early_stop(trainer, args.epochs)
Esempio n. 2
0
max_epoch = 10
trainer = training.Trainer(updater, (max_epoch, 'epoch'), out='result')

trainer.extend(extensions.LogReport())
trainer.extend(extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'))
trainer.extend(extensions.Evaluator(valid_iter, net, device=gpu_id),
               name='val')
trainer.extend(
    extensions.PrintReport([
        'epoch', 'main/loss', 'main/accuracy', 'val/main/loss',
        'val/main/accuracy', 'l1/W/data/std', 'elapsed_time'
    ]))
trainer.extend(
    extensions.ParameterStatistics(net.predictor.l1, {'std': np.std}))
trainer.extend(
    extensions.PlotReport(['l1/W/data/std'],
                          x_key='epoch',
                          file_name='std.png'))
trainer.extend(
    extensions.PlotReport(['main/loss', 'val/main/loss'],
                          x_key='epoch',
                          file_name='loss.png'))
trainer.extend(
    extensions.PlotReport(['main/accuracy', 'val/main/accuracy'],
                          x_key='epoch',
                          file_name='accuracy.png'))
trainer.extend(extensions.dump_graph('main/loss'))

trainer.run()
Esempio n. 3
0
def train(args):
    '''Run training'''
    # seed setting
    torch.manual_seed(args.seed)

    # debug mode setting
    # 0 would be fastest, but 1 seems to be reasonable
    # by considering reproducability
    # revmoe type check
    if args.debugmode < 2:
        chainer.config.type_check = False
        logging.info('torch type check is disabled')
    # use determinisitic computation or not
    if args.debugmode < 1:
        torch.backends.cudnn.deterministic = False
        logging.info('torch cudnn deterministic is disabled')
    else:
        torch.backends.cudnn.deterministic = True

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning('cuda is not available')

    # get input and output dimension info
    with open(args.valid_label, 'rb') as f:
        valid_json = json.load(f)['utts']
    utts = list(valid_json.keys())
    idim = int(valid_json[utts[0]]['idim'])
    odim = int(valid_json[utts[0]]['odim'])
    logging.info('#input dims : ' + str(idim))
    logging.info('#output dims: ' + str(odim))

    # specify attention, CTC, hybrid mode
    if args.mtlalpha == 1.0:
        mtl_mode = 'ctc'
        logging.info('Pure CTC mode')
    elif args.mtlalpha == 0.0:
        mtl_mode = 'att'
        logging.info('Pure attention mode')
    else:
        mtl_mode = 'mtl'
        logging.info('Multitask learning mode')

    # specify model architecture
    e2e = E2E(idim, odim, args)
    model = Loss(e2e, args.mtlalpha)

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + '/model.conf'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to' + model_conf)
        # TODO(watanabe) use others than pickle, possibly json, and save as a text
        pickle.dump((idim, odim, args), f)
    for key in sorted(vars(args).keys()):
        logging.info('ARGS: ' + key + ': ' + str(vars(args)[key]))

    # Set gpu
    reporter = model.reporter
    ngpu = args.ngpu
    if ngpu == 1:
        gpu_id = range(ngpu)
        logging.info('gpu id: ' + str(gpu_id))
        model.cuda()
    elif ngpu > 1:
        gpu_id = range(ngpu)
        logging.info('gpu id: ' + str(gpu_id))
        model = DataParallel(model, device_ids=gpu_id)
        model.cuda()
        logging.info('batch size is automatically increased (%d -> %d)' %
                     (args.batch_size, args.batch_size * args.ngpu))
        args.batch_size *= args.ngpu
    else:
        gpu_id = [-1]

    # Setup an optimizer
    if args.opt == 'adadelta':
        optimizer = torch.optim.Adadelta(model.parameters(),
                                         rho=0.95,
                                         eps=args.eps)
    elif args.opt == 'adam':
        optimizer = torch.optim.Adam(model.parameters())

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, "target", reporter)
    setattr(optimizer, "serialize", lambda s: reporter.serialize(s))

    # read json data
    with open(args.train_label, 'rb') as f:
        train_json = json.load(f)['utts']
    with open(args.valid_label, 'rb') as f:
        valid_json = json.load(f)['utts']

    # make minibatch list (variable length)
    train = make_batchset(train_json, args.batch_size, args.maxlen_in,
                          args.maxlen_out, args.minibatches)
    valid = make_batchset(valid_json, args.batch_size, args.maxlen_in,
                          args.maxlen_out, args.minibatches)
    # hack to make batchsze argument as 1
    # actual bathsize is included in a list
    train_iter = chainer.iterators.SerialIterator(train, 1)
    valid_iter = chainer.iterators.SerialIterator(valid,
                                                  1,
                                                  repeat=False,
                                                  shuffle=False)

    # prepare Kaldi reader
    train_reader = lazy_io.read_dict_scp(args.train_feat)
    valid_reader = lazy_io.read_dict_scp(args.valid_feat)

    # Set up a trainer
    updater = PytorchSeqUpdaterKaldi(model, args.grad_clip, train_iter,
                                     optimizer, train_reader, gpu_id)
    trainer = training.Trainer(updater, (args.epochs, 'epoch'),
                               out=args.outdir)

    # Resume from a snapshot
    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)
        if ngpu > 1:
            model.module.load_state_dict(
                torch.load(args.outdir + '/model.acc.best'))
        else:
            model.load_state_dict(torch.load(args.outdir + '/model.acc.best'))
        model = trainer.updater.model

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(
        PytorchSeqEvaluaterKaldi(model,
                                 valid_iter,
                                 reporter,
                                 valid_reader,
                                 device=gpu_id))

    # Save attention weight each epoch
    if args.num_save_attention > 0 and args.mtlalpha != 1.0:
        data = sorted(valid_json.items()[:args.num_save_attention],
                      key=lambda x: int(x[1]['ilen']),
                      reverse=True)
        data = converter_kaldi(data, valid_reader)
        trainer.extend(PlotAttentionReport(model, data,
                                           args.outdir + "/att_ws"),
                       trigger=(1, 'epoch'))

    # Take a snapshot for each specified epoch
    trainer.extend(extensions.snapshot(), trigger=(1, 'epoch'))

    # Make a plot for training and validation values
    trainer.extend(
        extensions.PlotReport([
            'main/loss', 'validation/main/loss', 'main/loss_ctc',
            'validation/main/loss_ctc', 'main/loss_att',
            'validation/main/loss_att'
        ],
                              'epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/acc', 'validation/main/acc'],
                              'epoch',
                              file_name='acc.png'))

    # Save best models
    def torch_save(path, _):
        if ngpu > 1:
            torch.save(model.module.state_dict(), path)
            torch.save(model.module, path + ".pkl")
        else:
            torch.save(model.state_dict(), path)
            torch.save(model, path + ".pkl")

    trainer.extend(
        extensions.snapshot_object(model,
                                   'model.loss.best',
                                   savefun=torch_save),
        trigger=training.triggers.MinValueTrigger('validation/main/loss'))
    if mtl_mode is not 'ctc':
        trainer.extend(
            extensions.snapshot_object(model,
                                       'model.acc.best',
                                       savefun=torch_save),
            trigger=training.triggers.MaxValueTrigger('validation/main/acc'))

    # epsilon decay in the optimizer
    def torch_load(path, obj):
        if ngpu > 1:
            model.module.load_state_dict(torch.load(path))
        else:
            model.load_state_dict(torch.load(path))
        return obj

    if args.opt == 'adadelta':
        if args.criterion == 'acc' and mtl_mode is not 'ctc':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.acc.best',
                                            load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
        elif args.criterion == 'loss':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.loss.best',
                                            load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(100, 'iteration')))
    report_keys = [
        'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att',
        'validation/main/loss', 'validation/main/loss_ctc',
        'validation/main/loss_att', 'main/acc', 'validation/main/acc',
        'elapsed_time'
    ]
    if args.opt == 'adadelta':
        trainer.extend(extensions.observe_value(
            'eps', lambda trainer: trainer.updater.get_optimizer('main').
            param_groups[0]["eps"]),
                       trigger=(100, 'iteration'))
        report_keys.append('eps')
    trainer.extend(extensions.PrintReport(report_keys),
                   trigger=(100, 'iteration'))

    trainer.extend(extensions.ProgressBar())

    # Run the training
    trainer.run()
Esempio n. 4
0
def main(config_path):
    # Init args
    args = parse_config(config_path)

    # Load sentences
    train_sentences = load_sentences(args["path_train"], args["replace_digit"])
    dev_sentences = load_sentences(args["path_dev"], args["replace_digit"])

    # Update tagging scheme (IOB/IOBES)
    update_tag_scheme(train_sentences, args["tag_scheme"])
    update_tag_scheme(dev_sentences, args["tag_scheme"])

    # Create a dictionary / mapping of words
    if args['path_pre_emb']:
        dico_words_train = word_mapping(train_sentences, args["lowercase"])[0]
        dico_words, word_to_id, id_to_word, pretrained = augment_with_pretrained(
            dico_words_train.copy(),
            args['path_pre_emb'],
            list(itertools.chain.from_iterable([[w[0] for w in s] for s in dev_sentences])))
    else:
        dico_words, word_to_id, id_to_word = word_mapping(train_sentences, args["lowercase"])
        dico_words_train = dico_words

    # Create a dictionary and a mapping for words / POS tags / tags
    dico_chars, char_to_id, id_to_char = char_mapping(train_sentences + dev_sentences)
    dico_entities, entity_to_id, id_to_entity = entity_mapping(train_sentences + dev_sentences)

    # Set id of tag 'O' as 0 in order to make it easier for padding
    # Resort id_to_tag
    id_to_tag, tag_to_id = entity_tags(id_to_entity)

    if args["use_singletons"]:
        singletons = set([word_to_id[k] for k, v in dico_words_train.items() if v == 1])
    else:
        singletons = None

    # Index data
    train_data = prepare_dataset(train_sentences, word_to_id, char_to_id, tag_to_id, singletons, args["lowercase"])
    dev_data = prepare_dataset(dev_sentences, word_to_id, char_to_id, tag_to_id, None, args["lowercase"])
    print("%i / %i sentences in train / dev." % (len(train_data), len(dev_data)))

    # Init model
    model = Model(len(word_to_id), len(char_to_id), len(tag_to_id), args)

    if args['gpus']['main'] >= 0:
        cuda.get_device_from_id(args['gpus']['main']).use()
        model.to_gpu()

    print('Saving the mappings to disk...')
    model.save_mappings(id_to_word, id_to_char, id_to_tag, args)

    if args['path_pre_emb']:
        print("Loading pretrained embedding...")
        model.load_pretrained(args['path_pre_emb'])

    result_path = '../result/'

    # Init Iterators
    train_iter = chainer.iterators.SerialIterator(train_data, model.batch_size)
    dev_iter = chainer.iterators.SerialIterator(dev_data, model.batch_size, repeat=False)

    # Reset cost matrix
    id_to_tag = model.id_to_tag
    cost = model.crf.cost.data
    model.crf.cost.data = load_cost_matrix(id_to_tag, cost)

    # Init Optimizer
    optimizer = chainer.optimizers.Adam(model.lr_param)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(model.threshold))
    optimizer.add_hook(chainer.optimizer.WeightDecay(model.decay_rate))

    # Init early_stopping_trigger
    early_stopping_trigger = EarlyStoppingTrigger(args["epoch"],
                                                  key='dev/main/fscore',
                                                  eps=args["early_stopping_eps"],
                                                  early_stopping=args["early_stopping"])

    # Init Updater, Trainer and Evaluator
    updater = Updater(train_iter, optimizer, args['gpus'])
    trainer = training.Trainer(updater, stop_trigger=early_stopping_trigger, out=result_path)
    trainer.extend(Evaluator(dev_iter, optimizer.target, args['gpus']))

    # Save the best model
    trainer.extend(extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'),
                   trigger=training.triggers.MaxValueTrigger('dev/main/fscore'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'dev/main/loss',
         'main/accuracy', 'dev/main/accuracy',
         'elapsed_time']))

    if extensions.PlotReport.available():
        # Plot graph for loss,accuracy and fscore for each epoch
        trainer.extend(extensions.PlotReport(['main/loss', 'dev/main/loss'], x_key='epoch', file_name='loss.png'))
        trainer.extend(extensions.PlotReport(['main/accuracy', 'dev/main/accuracy'], x_key='epoch', file_name='accuracy.png'))
        trainer.extend(extensions.PlotReport(['dev/main/fscore'], x_key='epoch', file_name='fscore.png'))

    trainer.run()
Esempio n. 5
0
def train(args):
    '''Run training'''
    # display chainer version
    logging.info('chainer version = ' + chainer.__version__)

    # seed setting (chainer seed may not need it)
    os.environ['CHAINER_SEED'] = str(args.seed)
    logging.info('chainer seed = ' + os.environ['CHAINER_SEED'])

    # debug mode setting
    # 0 would be fastest, but 1 seems to be reasonable
    # by considering reproducability
    # revmoe type check
    if args.debugmode < 2:
        chainer.config.type_check = False
        logging.info('chainer type check is disabled')
    # use determinisitic computation or not
    if args.debugmode < 1:
        chainer.config.cudnn_deterministic = False
        logging.info('chainer cudnn deterministic is disabled')
    else:
        chainer.config.cudnn_deterministic = True

    # check cuda and cudnn availability
    if not chainer.cuda.available:
        logging.warning('cuda is not available')
    if not chainer.cuda.cudnn_enabled:
        logging.warning('cudnn is not available')

    # get input and output dimension info
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']
    utts = list(valid_json.keys())
    idim = int(valid_json[utts[0]]['input'][0]['shape'][1])
    odim = int(valid_json[utts[0]]['output'][0]['shape'][1])
    logging.info('#input dims : ' + str(idim))
    logging.info('#output dims: ' + str(odim))

    # check attention type
    if args.atype not in ['noatt', 'dot', 'location']:
        raise NotImplementedError(
            'chainer supports only noatt, dot, and location attention.')

    # specify attention, CTC, hybrid mode
    if args.mtlalpha == 1.0:
        mtl_mode = 'ctc'
        logging.info('Pure CTC mode')
    elif args.mtlalpha == 0.0:
        mtl_mode = 'att'
        logging.info('Pure attention mode')
    else:
        mtl_mode = 'mtl'
        logging.info('Multitask learning mode')

    # specify model architecture
    e2e = E2E(idim, odim, args)
    model = Loss(e2e, args.mtlalpha)

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + '/model.json'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to ' + model_conf)
        f.write(
            json.dumps((idim, odim, vars(args)), indent=4,
                       sort_keys=True).encode('utf_8'))
    for key in sorted(vars(args).keys()):
        logging.info('ARGS: ' + key + ': ' + str(vars(args)[key]))

    # Set gpu
    ngpu = args.ngpu
    if ngpu == 1:
        gpu_id = 0
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(gpu_id).use()
        model.to_gpu()  # Copy the model to the GPU
        logging.info('single gpu calculation.')
    elif ngpu > 1:
        gpu_id = 0
        devices = {'main': gpu_id}
        for gid in six.moves.xrange(1, ngpu):
            devices['sub_%d' % gid] = gid
        logging.info('multi gpu calculation (#gpus = %d).' % ngpu)
        logging.info('batch size is automatically increased (%d -> %d)' %
                     (args.batch_size, args.batch_size * args.ngpu))
    else:
        gpu_id = -1
        logging.info('cpu calculation')

    # Setup an optimizer
    if args.opt == 'adadelta':
        optimizer = chainer.optimizers.AdaDelta(eps=args.eps)
    elif args.opt == 'adam':
        optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.grad_clip))

    # read json data
    with open(args.train_json, 'rb') as f:
        train_json = json.load(f)['utts']
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']

    # set up training iterator and updater
    converter = CustomConverter(e2e.subsample[0])
    if ngpu <= 1:
        # make minibatch list (variable length)
        train = make_batchset(train_json, args.batch_size, args.maxlen_in,
                              args.maxlen_out, args.minibatches)
        # hack to make batchsize argument as 1
        # actual batchsize is included in a list
        train_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(train, converter.transform),
            1,
            n_processes=1,
            n_prefetch=8,
            maxtasksperchild=20)

        # set up updater
        updater = CustomUpdater(train_iter,
                                optimizer,
                                converter=converter,
                                device=gpu_id)
    else:
        # set up minibatches
        train_subsets = []
        for gid in six.moves.xrange(ngpu):
            # make subset
            train_json_subset = {
                k: v
                for i, (k, v) in enumerate(train_json.items())
                if i % ngpu == gid
            }
            # make minibatch list (variable length)
            train_subsets += [
                make_batchset(train_json_subset, args.batch_size,
                              args.maxlen_in, args.maxlen_out,
                              args.minibatches)
            ]

        # each subset must have same length for MultiprocessParallelUpdater
        maxlen = max([len(train_subset) for train_subset in train_subsets])
        for train_subset in train_subsets:
            if maxlen != len(train_subset):
                for i in six.moves.xrange(maxlen - len(train_subset)):
                    train_subset += [train_subset[i]]

        # hack to make batchsize argument as 1
        # actual batchsize is included in a list
        train_iters = [
            chainer.iterators.MultiprocessIterator(TransformDataset(
                train_subsets[gid], converter.transform),
                                                   1,
                                                   n_processes=1,
                                                   n_prefetch=8,
                                                   maxtasksperchild=20)
            for gid in six.moves.xrange(ngpu)
        ]

        # set up updater
        updater = CustomParallelUpdater(train_iters,
                                        optimizer,
                                        converter=converter,
                                        devices=devices)

    # Set up a trainer
    trainer = training.Trainer(updater, (args.epochs, 'epoch'),
                               out=args.outdir)

    # Resume from a snapshot
    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    # set up validation iterator
    valid = make_batchset(valid_json, args.batch_size, args.maxlen_in,
                          args.maxlen_out, args.minibatches)
    valid_iter = chainer.iterators.SerialIterator(TransformDataset(
        valid, converter.transform),
                                                  1,
                                                  repeat=False,
                                                  shuffle=False)
    # Evaluate the model with the test dataset for each epoch
    trainer.extend(
        extensions.Evaluator(valid_iter,
                             model,
                             converter=converter,
                             device=gpu_id))

    # Save attention weight each epoch
    if args.num_save_attention > 0 and args.mtlalpha != 1.0:
        data = sorted(list(valid_json.items())[:args.num_save_attention],
                      key=lambda x: int(x[1]['input'][0]['shape'][1]),
                      reverse=True)
        if hasattr(model, "module"):
            att_vis_fn = model.module.predictor.calculate_all_attentions
        else:
            att_vis_fn = model.predictor.calculate_all_attentions
        trainer.extend(PlotAttentionReport(att_vis_fn,
                                           data,
                                           args.outdir + "/att_ws",
                                           converter=converter,
                                           device=gpu_id),
                       trigger=(1, 'epoch'))

    # Take a snapshot for each specified epoch
    trainer.extend(
        extensions.snapshot(filename='snapshot.ep.{.updater.epoch}'),
        trigger=(1, 'epoch'))

    # Make a plot for training and validation values
    trainer.extend(
        extensions.PlotReport([
            'main/loss', 'validation/main/loss', 'main/loss_ctc',
            'validation/main/loss_ctc', 'main/loss_att',
            'validation/main/loss_att'
        ],
                              'epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/acc', 'validation/main/acc'],
                              'epoch',
                              file_name='acc.png'))

    # Save best models
    trainer.extend(
        extensions.snapshot_object(model, 'model.loss.best'),
        trigger=training.triggers.MinValueTrigger('validation/main/loss'))
    if mtl_mode is not 'ctc':
        trainer.extend(
            extensions.snapshot_object(model, 'model.acc.best'),
            trigger=training.triggers.MaxValueTrigger('validation/main/acc'))

    # epsilon decay in the optimizer
    if args.opt == 'adadelta':
        if args.criterion == 'acc' and mtl_mode is not 'ctc':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.acc.best'),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
        elif args.criterion == 'loss':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.loss.best'),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL,
                                                 'iteration')))
    report_keys = [
        'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att',
        'validation/main/loss', 'validation/main/loss_ctc',
        'validation/main/loss_att', 'main/acc', 'validation/main/acc',
        'elapsed_time'
    ]
    if args.opt == 'adadelta':
        trainer.extend(extensions.observe_value(
            'eps', lambda trainer: trainer.updater.get_optimizer('main').eps),
                       trigger=(REPORT_INTERVAL, 'iteration'))
        report_keys.append('eps')
    trainer.extend(extensions.PrintReport(report_keys),
                   trigger=(REPORT_INTERVAL, 'iteration'))

    trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL))

    # Run the training
    trainer.run()
Esempio n. 6
0
def main(arg_list=None):
    parser = argparse.ArgumentParser(description='Chainer LSTM')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        nargs='+',
                        default=[20],
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--optimizer',
                        '-o',
                        nargs='+',
                        default=['momentumsgd'],
                        help='Optimizer (sgd, momentumsgd, adam)')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        nargs='+',
                        default=[128],
                        help='Number of training points in each mini-batch')
    parser.add_argument('--lr',
                        type=float,
                        nargs='+',
                        default=[1e-2, 1e-3, 1e-4, 1e-5],
                        help='Learning rate')
    parser.add_argument(
        '--network',
        '-n',
        default='ff',
        help=
        'Neural network type, either "ff", "tdnn", "lstm", "zoneoutlstm", "peepholelstm" or "gru". Setting any recurrent network implies "--shuffle-sequences"'
    )
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--units',
                        '-u',
                        type=int,
                        nargs='+',
                        default=[1024],
                        help='Number of units')
    parser.add_argument('--layers',
                        '-l',
                        type=int,
                        default=2,
                        help='Number of hidden layers')
    parser.add_argument('--activation',
                        '-a',
                        default='relu',
                        help='FF activation function (sigmoid, tanh or relu)')
    parser.add_argument('--tdnn-ksize',
                        type=int,
                        nargs='+',
                        default=[5],
                        help='TDNN kernel size')
    parser.add_argument('--bproplen',
                        type=int,
                        default=20,
                        help='Backpropagation length')
    parser.add_argument('--timedelay',
                        type=int,
                        default=0,
                        help='Delay target values by this many time steps')
    parser.add_argument('--noplot',
                        dest='plot',
                        action='store_false',
                        help='Disable PlotReport extension')
    parser.add_argument('--splice', type=int, default=0, help='Splicing size')
    parser.add_argument(
        '--dropout',
        '-d',
        type=float,
        nargs='+',
        default=[0],
        help=
        'Dropout rate (0 to disable). In case of Zoneout LSTM, this parameter has 2 arguments: c_ratio h_ratio'
    )
    parser.add_argument('--ft',
                        default='final.feature_transform',
                        help='Kaldi feature transform file')
    parser.add_argument('--tri', action='store_true', help='Use triphones')
    parser.add_argument(
        '--shuffle-sequences',
        action='store_true',
        help=
        'True if sequences should be shuffled as a whole, otherwise all frames will be shuffled independent of each other'
    )
    parser.add_argument(
        '--data-dir',
        default='data/fmllr',
        help=
        'Data directory, this will be prepended to data files and feature transform'
    )
    parser.add_argument(
        '--offset-dir',
        default='data',
        help='Data directory, this will be prepended to offset files')
    parser.add_argument(
        '--target-dir',
        default='data/targets',
        help='Data directory, this will be prepended to target files')
    parser.add_argument(
        '--ivector-dir',
        help='Data directory, this will be prepended to ivector files')
    parser.add_argument('--data', default='data_{}.npy', help='Training data')
    parser.add_argument('--offsets',
                        default='offsets_{}.npy',
                        help='Training offsets')
    parser.add_argument('--targets',
                        default='targets_{}.npy',
                        help='Training targets')
    parser.add_argument('--ivectors',
                        default='ivectors_{}.npy',
                        help='Training ivectors')
    parser.add_argument('--no-validation',
                        dest='use_validation',
                        action='store_false',
                        help='Do not evaluate validation data while training')
    parser.add_argument('--train-fold',
                        type=int,
                        help='Train fold network with this ID')
    parser.add_argument('--train-rpl',
                        action='store_true',
                        help='Train RPL layer')
    parser.add_argument('--rpl-model',
                        default="result_rpl/model",
                        help='RPL layer model')
    parser.add_argument('--fold-data-dir',
                        default="fold_data",
                        help='Directory with fold input data')
    parser.add_argument('--fold-output-dir',
                        default="fold_data_out",
                        help='Directory with predicted fold output')
    parser.add_argument('--fold-model-dir',
                        default="fold_models",
                        help='Directory with output fold model')
    parser.add_argument(
        '--fold-data-pattern',
        default='data_{0}.npy',
        help=
        'Filename pattern of each fold data, {0} will be replaced by fold ID')
    parser.add_argument('--fold-offset-pattern',
                        default='offsets_{0}.npy',
                        help='Filename pattern of each fold offset')
    parser.add_argument('--fold-target-pattern',
                        default='targets_{0}.npy',
                        help='Filename pattern of each fold targets')
    parser.add_argument(
        '--fold-ivector-pattern',
        default='ivectors_{}.npy',
        help=
        'Filename pattern of each fold i-vectors file, {} will be replaced by fold ID'
    )
    parser.add_argument('--fold-output-pattern',
                        default='data_{0}.npy',
                        help='Filename pattern of each fold network output')
    parser.add_argument('--fold-network-pattern',
                        default='fold_{0}.npz',
                        help='Filename pattern of each fold network')
    parser.add_argument('--no-progress',
                        action='store_true',
                        help='Disable progress bar')
    if arg_list is not None:
        args = parser.parse_args(list(map(str, arg_list)))
    else:
        args = parser.parse_args()

    # set options implied by other options
    if is_nn_recurrent(args.network):
        args.shuffle_sequences = True

    # create output directories
    Path(args.out).mkdir(exist_ok=True, parents=True)
    if args.train_fold is not None:
        file_out = Path(args.fold_model_dir,
                        args.fold_network_pattern.format(args.train_fold))
        Path(file_out.parent).mkdir(exist_ok=True, parents=True)

    # print arguments to the file
    with open(args.out + "/args.txt", "w") as f:
        for attr in dir(args):
            if not attr.startswith('_'):
                f.write('# {}: {}\n'.format(attr, getattr(args, attr)))
        f.write(' '.join(
            map(lambda x: "'" + x + "'" if ' ' in x else x, sys.argv)) + '\n')

    # print arguments to stdout
    for attr in dir(args):
        if not attr.startswith('_'):
            print('# {}: {}'.format(attr, getattr(args, attr)))
    print('')

    # input feature vector length
    num_classes = 1909 if args.tri else 39

    # create model
    if args.train_rpl:
        model = RPL4(num_classes)
        model_cls = L.Classifier(model)
    else:
        if args.activation == "sigmoid":
            activation = F.sigmoid
        elif args.activation == "tanh":
            activation = F.tanh
        elif args.activation == "relu":
            activation = F.relu
        else:
            print("Wrong activation function specified")
            return
        model = get_nn(args.network, args.layers, args.units, num_classes,
                       activation, args.tdnn_ksize, args.dropout)

        # classifier reports softmax cross entropy loss and accuracy at every
        # iteration, which will be used by the PrintReport extension below.
        model_cls = L.Classifier(model)
    if args.gpu >= 0:
        # make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model_cls.to_gpu()  # copy the model to the GPU

    offsets = offsets_dev = None

    if args.train_rpl:
        # load training data
        fold = 0
        x = []
        y = []

        while True:
            x_file = Path(args.fold_output_dir,
                          args.fold_output_pattern.format(fold))
            y_file = Path(args.fold_data_dir,
                          args.fold_target_pattern.format(fold))
            if not x_file.is_file() or not y_file.is_file():
                break
            print("Loading fold {} data".format(fold))
            x_ = np.load(str(x_file))
            y_ = np.load(str(y_file))
            x.append(x_)
            y.append(y_)
            fold += 1

        if fold == 0:
            print("Error: No fold data found")
            return

        x = np.concatenate(x, axis=0)
        y = np.concatenate(y, axis=0)

        if args.use_validation:  #TODO: use args.data instead of args.dev_data
            x_dev = np.load(str(Path(args.data_dir, args.data.format("dev"))))
            # offsets_dev = loadBin(str(Path(args.datadir, args.dev_offsets)), np.int32)
            y_dev = np.load(
                str(Path(args.target_dir, args.targets.format("dev"))))
    else:
        # load training data
        ivectors = None
        ivectors_dev = None
        if args.train_fold is not None:
            x = []
            offsets = [0]
            y = []
            ivectors = []
            num = 0
            fold = 0
            while True:
                if fold != args.train_fold:
                    x_file = Path(args.fold_data_dir,
                                  args.fold_data_pattern.format(fold))
                    if not x_file.is_file():
                        break
                    offsets_file = Path(args.fold_data_dir,
                                        args.fold_offset_pattern.format(fold))
                    y_file = Path(args.fold_data_dir,
                                  args.fold_target_pattern.format(fold))
                    if args.ivector_dir is not None:
                        ivectors_file = Path(
                            args.fold_data_dir,
                            args.fold_ivector_pattern.format(fold))
                        if not ivectors_file.is_file():
                            print("Error: missing ivectors for fold data {}".
                                  format(fold))
                            return

                    print("Loading fold {} data".format(fold))
                    x_fold = np.load(str(x_file))
                    x.append(x_fold)
                    if is_nn_recurrent(args.network):
                        offsets_fold = np.load(str(offsets_file))
                        offsets.extend(offsets_fold[1:] + num)
                    y_fold = np.load(str(y_file))
                    y.append(y_fold)
                    if args.ivector_dir is not None:
                        ivectors_fold = np.load(str(ivectors_file))
                        ivectors.append(ivectors_fold)
                    num += x_fold.shape[0]
                fold += 1

            if len(x) == 0:
                print("Error: No fold data found")
                return

            x = np.concatenate(x, axis=0)
            if is_nn_recurrent(args.network):
                offsets = np.array(offsets, dtype=np.int32)
            y = np.concatenate(y, axis=0)
            if args.ivector_dir is not None:
                ivectors = np.concatenate(ivectors, axis=0)
        else:
            x = np.load(str(Path(args.data_dir, args.data.format("train"))))
            if is_nn_recurrent(args.network):
                offsets = np.load(
                    str(Path(args.offset_dir, args.offsets.format("train"))))
            y = np.load(
                str(Path(args.target_dir, args.targets.format("train"))))
            if args.ivector_dir is not None:
                ivectors = np.load(
                    str(Path(args.ivector_dir, args.ivectors.format("train"))))

        if args.use_validation:
            x_dev = np.load(str(Path(args.data_dir, args.data.format("dev"))))
            if is_nn_recurrent(args.network):
                offsets_dev = np.load(
                    str(Path(args.offset_dir, args.offsets.format("dev"))))
            y_dev = np.load(
                str(Path(args.target_dir, args.targets.format("dev"))))
            if args.ivector_dir is not None:
                ivectors_dev = np.load(
                    str(Path(args.ivector_dir, args.ivectors.format("dev"))))

        # apply splicing
        if args.network == "tdnn":
            splice = (sum(args.tdnn_ksize) - len(args.tdnn_ksize)) // 2
        else:
            splice = args.splice
        if splice > 0:
            x = splicing(x, range(-splice, splice + 1))
            x_dev = splicing(x_dev, range(-splice, splice + 1))

        # load feature transform
        if not args.ft and args.ft != '-':
            ft = loadKaldiFeatureTransform(str(Path(args.data_dir, args.ft)))
            if is_nn_recurrent(
                    args.network
            ):  # select transform middle frame if the network is recurrent
                dim = ft["shape"][1]
                zi = ft["shifts"].index(0)
                ft["rescale"] = ft["rescale"][zi * dim:(zi + 1) * dim]
                ft["addShift"] = ft["addShift"][zi * dim:(zi + 1) * dim]
                ft["shape"][0] = dim
                ft["shifts"] = [0]
            elif args.network == "tdnn":
                dim = ft["shape"][1]
                zi = ft["shifts"].index(0)
                winlen = 2 * splice + 1
                ft["rescale"] = np.tile(ft["rescale"][zi * dim:(zi + 1) * dim],
                                        winlen)
                ft["addShift"] = np.tile(
                    ft["addShift"][zi * dim:(zi + 1) * dim], winlen)
                ft["shape"][0] = dim * winlen
                ft["shifts"] = list(range(-splice, splice + 1))
            # apply feature transform
            x = applyKaldiFeatureTransform(x, ft)
            if args.use_validation:
                x_dev = applyKaldiFeatureTransform(x_dev, ft)

        if ivectors is not None:
            x = np.concatenate((x, ivectors), axis=1)
        if ivectors_dev is not None:
            x_dev = np.concatenate((x_dev, ivectors_dev), axis=1)

        # shift the input dataset according to time delay
        if is_nn_recurrent(args.network) and args.timedelay != 0:
            x, y, offsets = apply_time_delay(x, y, offsets, args.timedelay)
            if args.use_validation:
                x_dev, y_dev, offsets_dev = apply_time_delay(
                    x_dev, y_dev, offsets_dev, args.timedelay)

    # create chainer datasets
    train_dataset = chainer.datasets.TupleDataset(x, y)
    if args.use_validation:
        dev_dataset = chainer.datasets.TupleDataset(x_dev, y_dev)

    # prepare train stages
    train_stages_len = max(len(args.batchsize), len(args.lr))
    train_stages = [{
        'epoch': index_padded(args.epoch, i),
        'opt': index_padded(args.optimizer, i),
        'bs': index_padded(args.batchsize, i),
        'lr': index_padded(args.lr, i)
    } for i in range(train_stages_len)]

    for i, ts in enumerate(train_stages):
        if ts['opt'] == 'adam':  # learning rate not used, don't print it
            print(
                "=== Training stage {}: epoch = {}, batchsize = {}, optimizer = {}"
                .format(i, ts['epoch'], ts['bs'], ts['opt']))
        else:
            print(
                "=== Training stage {}: epoch = {}, batchsize = {}, optimizer = {}, learning rate = {}"
                .format(i, ts['epoch'], ts['bs'], ts['opt'], ts['lr']))

        # reset state to allow training with different batch size in each stage
        if not args.train_rpl and is_nn_recurrent(args.network):
            model.reset_state()

        # setup an optimizer
        if ts['opt'] == "sgd":
            optimizer = chainer.optimizers.SGD(lr=ts['lr'])
        elif ts['opt'] == "momentumsgd":
            optimizer = chainer.optimizers.MomentumSGD(lr=ts['lr'])
        elif ts['opt'] == "adam":
            optimizer = chainer.optimizers.Adam()
        else:
            print("Wrong optimizer specified: {}".format(ts['opt']))
            exit(1)
        optimizer.setup(model_cls)

        if args.shuffle_sequences:
            train_iter = SequenceShuffleIterator(train_dataset, offsets,
                                                 ts['bs'])
            if args.use_validation:
                dev_iter = SequenceShuffleIterator(dev_dataset,
                                                   None,
                                                   ts['bs'],
                                                   repeat=False,
                                                   shuffle=False)
        else:
            train_iter = SerialIterator(train_dataset, ts['bs'])
            if args.use_validation:
                dev_iter = SerialIterator(dev_dataset,
                                          ts['bs'],
                                          repeat=False,
                                          shuffle=False)

        # set up a trainer
        if is_nn_recurrent(args.network):
            updater = BPTTUpdater(train_iter,
                                  optimizer,
                                  args.bproplen,
                                  device=args.gpu)
        else:
            updater = StandardUpdater(train_iter, optimizer, device=args.gpu)
        if args.use_validation:
            stop_trigger = EarlyStoppingTrigger(ts['epoch'],
                                                key='validation/main/loss',
                                                eps=-0.001)
        else:
            stop_trigger = (ts['epoch'], 'epoch')
        trainer = training.Trainer(updater,
                                   stop_trigger,
                                   out="{}/{}".format(args.out, i))

        trainer.extend(model_saver)

        # evaluate the model with the development dataset for each epoch
        if args.use_validation:
            trainer.extend(
                extensions.Evaluator(dev_iter, model_cls, device=args.gpu))

        # dump a computational graph from 'loss' variable at the first iteration
        # the "main" refers to the target link of the "main" optimizer.
        trainer.extend(extensions.dump_graph('main/loss'))

        # take a snapshot for each specified epoch
        frequency = ts['epoch'] if args.frequency == -1 else max(
            1, args.frequency)
        trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

        # write a log of evaluation statistics for each epoch
        trainer.extend(extensions.LogReport())

        # save two plot images to the result dir
        if args.plot and extensions.PlotReport.available():
            plot_vars_loss = ['main/loss']
            plot_vars_acc = ['main/accuracy']
            if args.use_validation:
                plot_vars_loss.append('validation/main/loss')
                plot_vars_acc.append('validation/main/accuracy')
            trainer.extend(
                extensions.PlotReport(plot_vars_loss,
                                      'epoch',
                                      file_name='loss.png'))
            trainer.extend(
                extensions.PlotReport(plot_vars_acc,
                                      'epoch',
                                      file_name='accuracy.png'))

        # print selected entries of the log to stdout
        # here "main" refers to the target link of the "main" optimizer again, and
        # "validation" refers to the default name of the Evaluator extension.
        # entries other than 'epoch' are reported by the Classifier link, called by
        # either the updater or the evaluator.
        if args.use_validation:
            print_report_vars = [
                'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
                'validation/main/accuracy', 'elapsed_time'
            ]
        else:
            print_report_vars = [
                'epoch', 'main/loss', 'main/accuracy', 'elapsed_time'
            ]
        trainer.extend(extensions.PrintReport(print_report_vars))

        # print a progress bar to stdout
        # trainer.extend(extensions.ProgressBar())

        if args.resume:
            # Resume from a snapshot
            chainer.serializers.load_npz(args.resume, trainer)

        # Run the training
        trainer.run()

        # load the last model if the max epoch was not reached (that means early stopping trigger stopped training
        # because the validation loss increased)
        if updater.epoch_detail < ts['epoch']:
            chainer.serializers.load_npz("{}/{}/model_tmp".format(args.out, i),
                                         model_cls)

        # remove temporary model from this training stage
        os.remove("{}/{}/model_tmp".format(args.out, i))

    # save the final model
    chainer.serializers.save_npz("{}/model".format(args.out), model_cls)
    if args.train_fold is not None:
        chainer.serializers.save_npz(
            str(
                Path(args.fold_model_dir,
                     args.fold_network_pattern.format(args.train_fold))),
            model_cls)
Esempio n. 7
0
def main():
    #初期設定
    parser = argparse.ArgumentParser(description='Chainer')
    parser.add_argument('--batchsize', '-b', type=int, default=50,
                        help='バッチサイズ')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='エポック数')
    parser.add_argument('--frequency', '-f', type=int, default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='GPUの有無')
    parser.add_argument('--out', '-o', default='result',
                        help='リサルトファイルのフォルダ')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=unitSize,
                        help='中間層の数')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    ##MLPをここで引っ張る
    model = L.Classifier(ADPPD(args.unit, outUnit),lossfun=F.mean_squared_error)#out-10種類(0-9の数字判別のため)
    model.compute_accuracy = False
    #GPU有無の判別
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
        xp = cp
        print('I use GPU and cupy')

    ##optimizerのセット
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    ##データセットをchainerにセット
    train_data = DatasetPourDot(trainQuePath,trainAnsPath)
    test_data = DatasetPourDot(testQuePath,testAnsPath)

    train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test_data, args.batchsize,
                                                 repeat=False, shuffle=False)

    ##updater=重みの調整、今回はStandardUpdaterを使用
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    ##updaterをtrainerにセット
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    ##評価の際、Evaluatorを使用
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))


    ##途中経過の表示用の記述
    trainer.extend(extensions.dump_graph('main/loss'))
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))
    trainer.extend(extensions.LogReport())
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch', file_name='accuracy.png'))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
    trainer.extend(extensions.ProgressBar())

    #中断データの有無、あれば続きから
    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)
    #実験開始、trainerにお任せ
    trainer.run()
    #CPUで計算できるようにしておく
    model.to_cpu()
    #npz形式で書き出し
    chainer.serializers.save_npz(args.out+'/mymodel.npz', model)
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-g', '--gpu', type=int, default=0)
    args = parser.parse_args()

    args.max_iteration = 10000
    args.interval_eval = 1000
    args.interval_print = 10

    args.git_hash = instance_occlsegm_lib.utils.git_hash(__file__)
    args.hostname = socket.gethostname()

    now = datetime.datetime.now()
    args.timestamp = now.isoformat()
    args.out = osp.join(here, 'logs/train_fcn_fgbg',
                        now.strftime('%Y%m%d_%H%M%S'))
    try:
        os.makedirs(args.out)
    except OSError:
        pass

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()

    data = contrib.datasets.InstanceImageDataset()
    class_names = data.class_names
    data_train = chainer.datasets.TransformDataset(data, Transform(train=True))
    iter_train = chainer.iterators.SerialIterator(data_train, batch_size=1)
    iter_test = chainer.iterators.SerialIterator(data, batch_size=1)

    model = contrib.models.FCN8sAtOnce(n_class=len(class_names))
    vgg16 = fcn.models.VGG16()
    chainer.serializers.load_npz(vgg16.pretrained_model, vgg16)
    model.init_from_vgg16(vgg16)
    model = chainercv.links.PixelwiseSoftmaxClassifier(predictor=model)
    if args.gpu >= 0:
        model.to_gpu()

    optimizer = chainer.optimizers.Adam(alpha=1e-5)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))
    model.predictor.upscore2.disable_update()
    model.predictor.upscore_pool4.disable_update()
    model.predictor.upscore8.disable_update()

    updater = training.StandardUpdater(iter_train, optimizer, device=args.gpu)

    trainer = training.Trainer(
        updater, stop_trigger=(args.max_iteration, 'iteration'), out=args.out)

    trainer.extend(contrib.extensions.ParamsReport(args.__dict__))

    trainer.extend(extensions.snapshot_object(
        target=model.predictor, filename='model_{.updater.iteration:08}.npz'),
        trigger=(args.interval_eval, 'iteration'))

    trainer.extend(extensions.LogReport(
        trigger=(args.interval_print, 'iteration')))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'elapsed_time', 'main/loss']))

    assert extensions.PlotReport.available()
    trainer.extend(extensions.PlotReport(
        y_keys=['main/loss'], x_key='iteration', file_name='loss.png',
        trigger=(args.interval_print, 'iteration')))

    trainer.extend(
        contrib.extensions.SemanticSegmentationVisReport(
            iter_test, transform=Transform(train=False),
            class_names=class_names, device=args.gpu, shape=(15, 5)),
        trigger=(args.interval_print, 'iteration'))

    trainer.extend(extensions.ProgressBar(update_interval=5))

    trainer.run()
Esempio n. 9
0
File: tts.py Progetto: akreal/espnet
def train(args):
    """Train E2E-TTS model."""
    set_deterministic_pytorch(args)

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning("cuda is not available")

    # get input and output dimension info
    with open(args.valid_json, "rb") as f:
        valid_json = json.load(f)["utts"]
    utts = list(valid_json.keys())

    # reverse input and output dimension
    idim = int(valid_json[utts[0]]["output"][0]["shape"][1])
    odim = int(valid_json[utts[0]]["input"][0]["shape"][1])
    logging.info("#input dims : " + str(idim))
    logging.info("#output dims: " + str(odim))

    # get extra input and output dimenstion
    if args.use_speaker_embedding:
        args.spk_embed_dim = int(valid_json[utts[0]]["input"][1]["shape"][0])
    else:
        args.spk_embed_dim = None
    if args.use_second_target:
        args.spc_dim = int(valid_json[utts[0]]["input"][1]["shape"][1])
    else:
        args.spc_dim = None

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + "/model.json"
    with open(model_conf, "wb") as f:
        logging.info("writing a model config file to" + model_conf)
        f.write(
            json.dumps(
                (idim, odim, vars(args)), indent=4, ensure_ascii=False, sort_keys=True
            ).encode("utf_8")
        )
    for key in sorted(vars(args).keys()):
        logging.info("ARGS: " + key + ": " + str(vars(args)[key]))

    # specify model architecture
    if args.enc_init is not None or args.dec_init is not None:
        model = load_trained_modules(idim, odim, args, TTSInterface)
    else:
        model_class = dynamic_import(args.model_module)
        model = model_class(idim, odim, args)
    assert isinstance(model, TTSInterface)
    logging.info(model)
    reporter = model.reporter

    # check the use of multi-gpu
    if args.ngpu > 1:
        model = torch.nn.DataParallel(model, device_ids=list(range(args.ngpu)))
        if args.batch_size != 0:
            logging.warning(
                "batch size is automatically increased (%d -> %d)"
                % (args.batch_size, args.batch_size * args.ngpu)
            )
            args.batch_size *= args.ngpu

    # set torch device
    device = torch.device("cuda" if args.ngpu > 0 else "cpu")
    model = model.to(device)

    # freeze modules, if specified
    if args.freeze_mods:
        if hasattr(model, "module"):
            freeze_mods = ["module." + x for x in args.freeze_mods]
        else:
            freeze_mods = args.freeze_mods

        for mod, param in model.named_parameters():
            if any(mod.startswith(key) for key in freeze_mods):
                logging.info(f"{mod} is frozen not to be updated.")
                param.requires_grad = False

        model_params = filter(lambda x: x.requires_grad, model.parameters())
    else:
        model_params = model.parameters()

    logging.warning(
        "num. model params: {:,} (num. trained: {:,} ({:.1f}%))".format(
            sum(p.numel() for p in model.parameters()),
            sum(p.numel() for p in model.parameters() if p.requires_grad),
            sum(p.numel() for p in model.parameters() if p.requires_grad)
            * 100.0
            / sum(p.numel() for p in model.parameters()),
        )
    )

    # Setup an optimizer
    if args.opt == "adam":
        optimizer = torch.optim.Adam(
            model_params, args.lr, eps=args.eps, weight_decay=args.weight_decay
        )
    elif args.opt == "noam":
        from espnet.nets.pytorch_backend.transformer.optimizer import get_std_opt

        optimizer = get_std_opt(
            model_params, args.adim, args.transformer_warmup_steps, args.transformer_lr
        )
    else:
        raise NotImplementedError("unknown optimizer: " + args.opt)

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, "target", reporter)
    setattr(optimizer, "serialize", lambda s: reporter.serialize(s))

    # read json data
    with open(args.train_json, "rb") as f:
        train_json = json.load(f)["utts"]
    with open(args.valid_json, "rb") as f:
        valid_json = json.load(f)["utts"]

    use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0
    if use_sortagrad:
        args.batch_sort_key = "input"
    # make minibatch list (variable length)
    train_batchset = make_batchset(
        train_json,
        args.batch_size,
        args.maxlen_in,
        args.maxlen_out,
        args.minibatches,
        batch_sort_key=args.batch_sort_key,
        min_batch_size=args.ngpu if args.ngpu > 1 else 1,
        shortest_first=use_sortagrad,
        count=args.batch_count,
        batch_bins=args.batch_bins,
        batch_frames_in=args.batch_frames_in,
        batch_frames_out=args.batch_frames_out,
        batch_frames_inout=args.batch_frames_inout,
        swap_io=True,
        iaxis=0,
        oaxis=0,
    )
    valid_batchset = make_batchset(
        valid_json,
        args.batch_size,
        args.maxlen_in,
        args.maxlen_out,
        args.minibatches,
        batch_sort_key=args.batch_sort_key,
        min_batch_size=args.ngpu if args.ngpu > 1 else 1,
        count=args.batch_count,
        batch_bins=args.batch_bins,
        batch_frames_in=args.batch_frames_in,
        batch_frames_out=args.batch_frames_out,
        batch_frames_inout=args.batch_frames_inout,
        swap_io=True,
        iaxis=0,
        oaxis=0,
    )

    load_tr = LoadInputsAndTargets(
        mode="tts",
        use_speaker_embedding=args.use_speaker_embedding,
        use_second_target=args.use_second_target,
        preprocess_conf=args.preprocess_conf,
        preprocess_args={"train": True},  # Switch the mode of preprocessing
        keep_all_data_on_mem=args.keep_all_data_on_mem,
    )

    load_cv = LoadInputsAndTargets(
        mode="tts",
        use_speaker_embedding=args.use_speaker_embedding,
        use_second_target=args.use_second_target,
        preprocess_conf=args.preprocess_conf,
        preprocess_args={"train": False},  # Switch the mode of preprocessing
        keep_all_data_on_mem=args.keep_all_data_on_mem,
    )

    converter = CustomConverter()
    # hack to make batchsize argument as 1
    # actual bathsize is included in a list
    train_iter = {
        "main": ChainerDataLoader(
            dataset=TransformDataset(
                train_batchset, lambda data: converter([load_tr(data)])
            ),
            batch_size=1,
            num_workers=args.num_iter_processes,
            shuffle=not use_sortagrad,
            collate_fn=lambda x: x[0],
        )
    }
    valid_iter = {
        "main": ChainerDataLoader(
            dataset=TransformDataset(
                valid_batchset, lambda data: converter([load_cv(data)])
            ),
            batch_size=1,
            shuffle=False,
            collate_fn=lambda x: x[0],
            num_workers=args.num_iter_processes,
        )
    }

    # Set up a trainer
    updater = CustomUpdater(
        model, args.grad_clip, train_iter, optimizer, device, args.accum_grad
    )
    trainer = training.Trainer(updater, (args.epochs, "epoch"), out=args.outdir)

    # Resume from a snapshot
    if args.resume:
        logging.info("resumed from %s" % args.resume)
        torch_resume(args.resume, trainer)

    # set intervals
    eval_interval = (args.eval_interval_epochs, "epoch")
    save_interval = (args.save_interval_epochs, "epoch")
    report_interval = (args.report_interval_iters, "iteration")

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(
        CustomEvaluator(model, valid_iter, reporter, device), trigger=eval_interval
    )

    # Save snapshot for each epoch
    trainer.extend(torch_snapshot(), trigger=save_interval)

    # Save best models
    trainer.extend(
        snapshot_object(model, "model.loss.best"),
        trigger=training.triggers.MinValueTrigger(
            "validation/main/loss", trigger=eval_interval
        ),
    )

    # Save attention figure for each epoch
    if args.num_save_attention > 0:
        data = sorted(
            list(valid_json.items())[: args.num_save_attention],
            key=lambda x: int(x[1]["output"][0]["shape"][0]),
        )
        if hasattr(model, "module"):
            att_vis_fn = model.module.calculate_all_attentions
            plot_class = model.module.attention_plot_class
            reduction_factor = model.module.reduction_factor
        else:
            att_vis_fn = model.calculate_all_attentions
            plot_class = model.attention_plot_class
            reduction_factor = model.reduction_factor
        if reduction_factor > 1:
            # fix the length to crop attention weight plot correctly
            data = copy.deepcopy(data)
            for idx in range(len(data)):
                ilen = data[idx][1]["input"][0]["shape"][0]
                data[idx][1]["input"][0]["shape"][0] = ilen // reduction_factor
        att_reporter = plot_class(
            att_vis_fn,
            data,
            args.outdir + "/att_ws",
            converter=converter,
            transform=load_cv,
            device=device,
            reverse=True,
        )
        trainer.extend(att_reporter, trigger=eval_interval)
    else:
        att_reporter = None

    # Make a plot for training and validation values
    if hasattr(model, "module"):
        base_plot_keys = model.module.base_plot_keys
    else:
        base_plot_keys = model.base_plot_keys
    plot_keys = []
    for key in base_plot_keys:
        plot_key = ["main/" + key, "validation/main/" + key]
        trainer.extend(
            extensions.PlotReport(plot_key, "epoch", file_name=key + ".png"),
            trigger=eval_interval,
        )
        plot_keys += plot_key
    trainer.extend(
        extensions.PlotReport(plot_keys, "epoch", file_name="all_loss.png"),
        trigger=eval_interval,
    )

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=report_interval))
    report_keys = ["epoch", "iteration", "elapsed_time"] + plot_keys
    trainer.extend(extensions.PrintReport(report_keys), trigger=report_interval)
    trainer.extend(extensions.ProgressBar(), trigger=report_interval)

    set_early_stop(trainer, args)
    if args.tensorboard_dir is not None and args.tensorboard_dir != "":
        from torch.utils.tensorboard import SummaryWriter

        writer = SummaryWriter(args.tensorboard_dir)
        trainer.extend(TensorboardLogger(writer, att_reporter), trigger=report_interval)

    if use_sortagrad:
        trainer.extend(
            ShufflingEnabler([train_iter]),
            trigger=(args.sortagrad if args.sortagrad != -1 else args.epochs, "epoch"),
        )

    # Run the training
    trainer.run()
    check_early_stop(trainer, args.epochs)
Esempio n. 10
0
def train_model():
    archs = {
        'mymodel': mymodel.MyModel,
        'nin': nin.NIN,
        'alex': alex.Alex,
        'lenet': lenet5.Lenet5,
        'vgg': vgg16.VGG16,
        'googlenet': googlenet.GoogLeNet,
        'deepface': deepface.DeepFace
    }

    parser = argparse.ArgumentParser(
        description='Training convnet from dataset (only 3 channels image)')
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('test', help='Path to test image-label list file')
    parser.add_argument('--arch',
                        '-a',
                        choices=archs.keys(),
                        default='nin',
                        help='Convnet architecture')
    parser.add_argument('--epoch',
                        '-E',
                        type=int,
                        default=10,
                        help='Number of epochs to train')
    parser.add_argument('--batchsize',
                        '-B',
                        type=int,
                        default=32,
                        help='Training minibatch size')
    parser.add_argument('--test_batchsize',
                        '-b',
                        type=int,
                        default=250,
                        help='Test minibatch size')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--root',
                        '-R',
                        default='.',
                        help='Root directory path of image files')
    parser.add_argument('--mean',
                        '-m',
                        default='mean.npy',
                        help='Mean file (computed by compute_mean.py)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    args = parser.parse_args()

    print 'GPU: {}'.format(args.gpu)
    print '# Minibatch-size: {}'.format(args.batchsize)
    print '# epoch: {}'.format(args.epoch)
    print ''

    # Initialize model to train
    model = archs[args.arch]()

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # Load datasets
    mean = np.load(args.mean)
    train = PreprocessedDataset(args.train, args.root, mean, model.insize)
    test = PreprocessedDataset(args.test, args.root, mean, model.insize)

    # Set up iterator
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.test_batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # Set up optimizer
    optimizer = chainer.optimizers.AdaDelta()
    optimizer.setup(model)

    # Set up trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    # Copy chain with shared parameters to flip 'train' flag only in test
    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.Evaluator(test_iter, eval_model,
                                        device=args.gpu))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              'epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'],
                              'epoch',
                              file_name='accuracy.png'))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))
    trainer.extend(extensions.ProgressBar())

    # Run trainer
    date = datetime.datetime.today()
    start_time = time.clock()
    trainer.run()
    total_time = datetime.timedelta(seconds=time.clock() - start_time)

    # Save trained model
    print ''
    print 'Training has been finished.'
    print 'Total training time: {}.'.format(total_time)
    print 'Saving the trained model...',
    chainer.serializers.save_npz(
        os.path.join(args.out, 'model_final_' + args.arch), model)
    print '----> done'

    info = open(os.path.join(args.out, 'info'), 'a')
    info.write('Date: {}.\n'.format(date.strftime("%Y/%m/%d %H:%M:%S")))
    info.write('----> Total training time: {}.'.format(total_time))
Esempio n. 11
0
def main():
    parser = argparse.ArgumentParser(description='ChainerMN example: VGG16')
    parser.add_argument('--dataset',
                        '-d',
                        default='cifar10',
                        help='The dataset to use: cifar10 or cifar100')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=64,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--learnrate',
                        '-l',
                        type=float,
                        default=0.05,
                        help='Learning rate for SGD')
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu',
                        '-g',
                        action='store_true',
                        default=False,
                        help='use GPU')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--noplot',
                        dest='plot',
                        action='store_false',
                        help='Disable PlotReport extension')
    args = parser.parse_args()

    # Create ChainerMN communicator.
    if args.gpu:
        comm = chainermn.create_communicator('hierarchical')
        device = comm.rank
    else:
        comm = chainermn.create_communicator('naive')
        device = -1

    if comm.rank == 0:
        print('GPU: {}'.format(args.gpu))
        print('# Minibatch-size: {}'.format(args.batchsize))
        print('# epoch: {}'.format(args.epoch))
        print('')

    # Load the CIFAR10 dataset
    if args.dataset == 'cifar10':
        class_labels = 10
        train, test = chainer.datasets.get_cifar10()
    elif args.dataset == 'cifar100':
        class_labels = 100
        train, test = chainer.datasets.get_cifar100()
    else:
        raise RuntimeError('Invalid dataset choice.')

    model = L.Classifier(VGG.VGG(comm, class_labels))

    if args.gpu:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(device).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.MomentumSGD(args.learnrate)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

    if comm.rank != 0:
        train = chainermn.datasets.create_empty_dataset(train)
        test = chainermn.datasets.create_empty_dataset(test)

    train_iter = chainermn.iterators.create_multi_node_iterator(
        chainer.iterators.SerialIterator(train, args.batchsize), comm)
    test_iter = chainermn.iterators.create_multi_node_iterator(
        chainer.iterators.SerialIterator(test,
                                         args.batchsize,
                                         repeat=False,
                                         shuffle=False), comm)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=device))

    if comm.rank == 0:
        # Dump a computational graph from 'loss' variable
        # The "main" refers to the target link of the "main" optimizer.
        trainer.extend(extensions.DumpGraph('main/loss'))

        # Write a log of evaluation statistics for each epoch
        trainer.extend(extensions.LogReport())

        # Save two plot images to the result dir
        if args.plot and extensions.PlotReport.available():
            trainer.extend(
                extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                      'epoch',
                                      file_name='loss.png'))
            trainer.extend(
                extensions.PlotReport(
                    ['main/accuracy', 'validation/main/accuracy'],
                    'epoch',
                    file_name='accuracy.png'))

        trainer.extend(
            extensions.PrintReport([
                'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
                'validation/main/accuracy', 'elapsed_time'
            ]))

        trainer.extend(extensions.ProgressBar())

    # Run the training
    trainer.run()
Esempio n. 12
0
def train(args):
    '''Run training'''
    # seed setting
    torch.manual_seed(args.seed)

    # debug mode setting
    # 0 would be fastest, but 1 seems to be reasonable
    # by considering reproducability
    # revmoe type check
    if args.debugmode < 2:
        chainer.config.type_check = False
        logging.info('torch type check is disabled')
    # use determinisitic computation or not
    if args.debugmode < 1:
        torch.backends.cudnn.deterministic = False
        logging.info('torch cudnn deterministic is disabled')
    else:
        torch.backends.cudnn.deterministic = True

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning('cuda is not available')

    # get input and output dimension info
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']
    utts = list(valid_json.keys())
    idim = int(valid_json[utts[0]]['input'][0]['shape'][1])
    odim = int(valid_json[utts[0]]['output'][0]['shape'][1])
    logging.info('#input dims : ' + str(idim))
    logging.info('#output dims: ' + str(odim))

    # specify attention, CTC, hybrid mode
    if args.mtlalpha == 1.0:
        mtl_mode = 'ctc'
        logging.info('Pure CTC mode')
    elif args.mtlalpha == 0.0:
        mtl_mode = 'att'
        logging.info('Pure attention mode')
    else:
        mtl_mode = 'mtl'
        logging.info('Multitask learning mode')

    # specify model architecture
    e2e = E2E(idim, odim, args)
    model = Loss(e2e, args.mtlalpha)

    if args.rnnlm is not None:
        rnnlm_args = get_model_conf(args.rnnlm, args.rnnlm_conf)
        rnnlm = lm_pytorch.ClassifierWithState(
            lm_pytorch.RNNLM(len(args.char_list), rnnlm_args.layer,
                             rnnlm_args.unit))
        torch.load(args.rnnlm, rnnlm)
        e2e.rnnlm = rnnlm

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + '/model.json'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to ' + model_conf)
        f.write(
            json.dumps((idim, odim, vars(args)), indent=4,
                       sort_keys=True).encode('utf_8'))
    for key in sorted(vars(args).keys()):
        logging.info('ARGS: ' + key + ': ' + str(vars(args)[key]))

    reporter = model.reporter

    # check the use of multi-gpu
    if args.ngpu > 1:
        model = torch.nn.DataParallel(model, device_ids=list(range(args.ngpu)))
        logging.info('batch size is automatically increased (%d -> %d)' %
                     (args.batch_size, args.batch_size * args.ngpu))
        args.batch_size *= args.ngpu

    # set torch device
    device = torch.device("cuda" if args.ngpu > 0 else "cpu")
    model = model.to(device)

    # Setup an optimizer
    if args.opt == 'adadelta':
        optimizer = torch.optim.Adadelta(model.parameters(),
                                         rho=0.95,
                                         eps=args.eps)
    elif args.opt == 'adam':
        optimizer = torch.optim.Adam(model.parameters())

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, "target", reporter)
    setattr(optimizer, "serialize", lambda s: reporter.serialize(s))

    # Setup a converter
    converter = CustomConverter(e2e.subsample[0])

    # read json data
    with open(args.train_json, 'rb') as f:
        train_json = json.load(f)['utts']
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']

    # make minibatch list (variable length)
    train = make_batchset(train_json,
                          args.batch_size,
                          args.maxlen_in,
                          args.maxlen_out,
                          args.minibatches,
                          min_batch_size=args.ngpu if args.ngpu > 1 else 1)
    valid = make_batchset(valid_json,
                          args.batch_size,
                          args.maxlen_in,
                          args.maxlen_out,
                          args.minibatches,
                          min_batch_size=args.ngpu if args.ngpu > 1 else 1)
    # hack to make batchsze argument as 1
    # actual bathsize is included in a list
    if args.n_iter_processes > 0:
        train_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(train, converter.transform),
            batch_size=1,
            n_processes=args.n_iter_processes,
            n_prefetch=8,
            maxtasksperchild=20)
        valid_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(valid, converter.transform),
            batch_size=1,
            repeat=False,
            shuffle=False,
            n_processes=args.n_iter_processes,
            n_prefetch=8,
            maxtasksperchild=20)
    else:
        train_iter = chainer.iterators.SerialIterator(TransformDataset(
            train, converter.transform),
                                                      batch_size=1)
        valid_iter = chainer.iterators.SerialIterator(TransformDataset(
            valid, converter.transform),
                                                      batch_size=1,
                                                      repeat=False,
                                                      shuffle=False)

    # Set up a trainer
    updater = CustomUpdater(model, args.grad_clip, train_iter, optimizer,
                            converter, device, args.ngpu)
    trainer = training.Trainer(updater, (args.epochs, 'epoch'),
                               out=args.outdir)

    # Resume from a snapshot
    if args.resume:
        logging.info('resumed from %s' % args.resume)
        torch_resume(args.resume, trainer)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(
        CustomEvaluator(model, valid_iter, reporter, converter, device))

    # Save attention weight each epoch
    if args.num_save_attention > 0 and args.mtlalpha != 1.0:
        data = sorted(list(valid_json.items())[:args.num_save_attention],
                      key=lambda x: int(x[1]['input'][0]['shape'][1]),
                      reverse=True)
        if hasattr(model, "module"):
            att_vis_fn = model.module.predictor.calculate_all_attentions
        else:
            att_vis_fn = model.predictor.calculate_all_attentions
        trainer.extend(PlotAttentionReport(att_vis_fn,
                                           data,
                                           args.outdir + "/att_ws",
                                           converter=converter,
                                           device=device),
                       trigger=(1, 'epoch'))

    # Make a plot for training and validation values
    trainer.extend(
        extensions.PlotReport([
            'main/loss', 'validation/main/loss', 'main/loss_ctc',
            'validation/main/loss_ctc', 'main/loss_att',
            'validation/main/loss_att'
        ],
                              'epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/acc', 'validation/main/acc'],
                              'epoch',
                              file_name='acc.png'))

    # Save best models
    trainer.extend(
        extensions.snapshot_object(model,
                                   'model.loss.best',
                                   savefun=torch_save),
        trigger=training.triggers.MinValueTrigger('validation/main/loss'))
    if mtl_mode is not 'ctc':
        trainer.extend(
            extensions.snapshot_object(model,
                                       'model.acc.best',
                                       savefun=torch_save),
            trigger=training.triggers.MaxValueTrigger('validation/main/acc'))

    # save snapshot which contains model and optimizer states
    trainer.extend(torch_snapshot(), trigger=(1, 'epoch'))

    # epsilon decay in the optimizer
    if args.opt == 'adadelta':
        if args.criterion == 'acc' and mtl_mode is not 'ctc':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.acc.best',
                                            load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
        elif args.criterion == 'loss':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.loss.best',
                                            load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL,
                                                 'iteration')))
    report_keys = [
        'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att',
        'validation/main/loss', 'validation/main/loss_ctc',
        'validation/main/loss_att', 'main/acc', 'validation/main/acc',
        'elapsed_time'
    ]
    if args.opt == 'adadelta':
        trainer.extend(extensions.observe_value(
            'eps', lambda trainer: trainer.updater.get_optimizer('main').
            param_groups[0]["eps"]),
                       trigger=(REPORT_INTERVAL, 'iteration'))
        report_keys.append('eps')
    if args.report_cer:
        report_keys.append('validation/main/cer')
    if args.report_wer:
        report_keys.append('validation/main/wer')
    trainer.extend(extensions.PrintReport(report_keys),
                   trigger=(REPORT_INTERVAL, 'iteration'))

    trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL))

    # Run the training
    trainer.run()
def main():
    # Introduce argparse for clarity and organization.
    # Starting to use higher capacity models, thus set up for GPU.
    parser = argparse.ArgumentParser(description='Chainer-Tutorial: MLP')
    parser.add_argument('--batch_size', '-b', type=int, default=128,
                        help='Number of samples in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=100,
                        help='Number of times to train on data set')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID: -1 indicates CPU')
    parser.add_argument('--frequency', '-f', type=int, default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    args = parser.parse_args()

    # Load mnist data
    # http://docs.chainer.org/en/latest/reference/datasets.html
    train, test = chainer.datasets.get_mnist()

    # Define iterators.
    train_iter = chainer.iterators.SerialIterator(train, args.batch_size)
    test_iter = chainer.iterators.SerialIterator(test, args.batch_size,
                                                 repeat=False, shuffle=False)

    # Initialize model: Loss function defaults to softmax_cross_entropy.
    # 784 is dimension of the inputs, 625 is n_units in hidden layer
    # and 10 is the output dimension.
    model = L.Classifier(ModernMLP(625, 10))

    # Set up GPU usage if necessary. args.gpu is a condition as well as an
    # identification when passed to get_device().
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    # Define optimizer (SGD, Adam, RMSprop, etc)
    # http://docs.chainer.org/en/latest/reference/optimizers.html
    # RMSprop default parameter setting:
    # lr=0.01, alpha=0.99, eps=1e-8
    optimizer = chainer.optimizers.RMSprop()
    optimizer.setup(model)

    # Set up trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'))

    # Evaluate the model at end of each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Helper functions (extensions) to monitor progress on stdout.
    report_params = [
        'epoch',
        'main/loss',
        'validation/main/loss',
        'main/accuracy',
        'validation/main/accuracy',
        'elapsed_time'
    ]
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(report_params))
    trainer.extend(extensions.ProgressBar())

    # Here we add a bit more boiler plate code to help in output of useful
    # information in related to training. Very intuitive and great for post
    # analysis.
    # source:
    # https://github.com/pfnet/chainer/blob/master/examples/mnist/train_mnist.py

    # Take a snapshot for each specified epoch
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Save two plot images to the result dir
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(
                ['main/loss', 'validation/main/loss'],
                'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch', file_name='accuracy.png'))

    if args.resume:
        # Resume from a snapshot (NumPy NPZ format and HDF5 format available)
        # http://docs.chainer.org/en/latest/reference/serializers.html
        chainer.serializers.load_npz(args.resume, trainer)

    # Run trainer
    trainer.run()
Esempio n. 14
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--out', type=str, default='result',
                        help='Output directory')
    parser.add_argument('--resume', '-r', type=str,
                        help='Resume the training from snapshot')
    parser.add_argument('--mscoco-root', type=str, default='data',
                        help='MSOCO dataset root directory')
    parser.add_argument('--max-iters', type=int, default=50000,
                        help='Maximum number of iterations to train')
    parser.add_argument('--batch-size', type=int, default=128,
                        help='Minibatch size')
    parser.add_argument('--dropout-ratio', type=float, default=0.5,
                        help='Language model dropout ratio')
    parser.add_argument('--val-keep-quantity', type=int, default=100,
                        help='Keep every N-th validation image')
    parser.add_argument('--val-iter', type=int, default=100,
                        help='Run validation every N-th iteration')
    parser.add_argument('--log-iter', type=int, default=1,
                        help='Log every N-th iteration')
    parser.add_argument('--snapshot-iter', type=int, default=1000,
                        help='Model snapshot every N-th iteration')
    parser.add_argument('--rnn', type=str, default='nsteplstm',
                        choices=['nsteplstm', 'lstm'],
                        help='Language model layer type')
    parser.add_argument('--gpu', type=int, default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--max-caption-length', type=int, default=30,
                        help='Maxium caption length when using LSTM layer')
    args = parser.parse_args()

    # Load the MSCOCO dataset. Assumes that the dataset has been downloaded
    # already using e.g. the `download.py` script
    train, val = datasets.get_mscoco(args.mscoco_root)

    # Validation samples are used to address overfitting and see how well your
    # model generalizes to yet unseen data. However, since the number of these
    # samples in MSCOCO is quite large (~200k) and thus require time to
    # evaluate, you may choose to use only a fraction of the available samples
    val = val[::args.val_keep_quantity]

    # Number of unique words that are found in the dataset
    vocab_size = len(train.vocab)

    # Instantiate the model to be trained either with LSTM layers or with
    # NStepLSTM layers
    model = ImageCaptionModel(
        vocab_size, dropout_ratio=args.dropout_ratio, rnn=args.rnn)

    if args.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    def transform(in_data):
        # Called for each sample and applies necessary preprocessing to the
        # image such as resizing and normalizing
        img, caption = in_data
        img = model.prepare(img)
        return img, caption

    # We need to preprocess the images since their sizes may vary (and the
    # model requires that they have the exact same fixed size)
    train = TransformDataset(train, transform)
    val = TransformDataset(val, transform)

    train_iter = iterators.MultiprocessIterator(
        train, args.batch_size, shared_mem=700000)
    val_iter = chainer.iterators.MultiprocessIterator(
        val, args.batch_size, repeat=False, shuffle=False, shared_mem=700000)

    optimizer = optimizers.Adam()
    optimizer.setup(model)

    def converter(batch, device):
        # The converted receives a batch of input samples any may modify it if
        # necessary. In our case, we need to align the captions depending on if
        # we are using LSTM layers of NStepLSTM layers in the model.
        if args.rnn == 'lstm':
            max_caption_length = args.max_caption_length
        elif args.rnn == 'nsteplstm':
            max_caption_length = None
        else:
            raise ValueError('Invalid RNN type.')
        return datasets.converter(
            batch, device, max_caption_length=max_caption_length)

    updater = training.updater.StandardUpdater(
        train_iter, optimizer=optimizer, device=args.gpu, converter=converter)

    trainer = training.Trainer(
        updater, out=args.out, stop_trigger=(args.max_iters, 'iteration'))
    trainer.extend(
        extensions.Evaluator(
            val_iter,
            target=model,
            converter=converter,
            device=args.gpu
        ),
        trigger=(args.val_iter, 'iteration')
    )
    trainer.extend(
        extensions.LogReport(
            ['main/loss', 'validation/main/loss'],
            trigger=(args.log_iter, 'iteration')
        )
    )
    trainer.extend(
        extensions.PlotReport(
            ['main/loss', 'validation/main/loss'],
            trigger=(args.log_iter, 'iteration')
        )
    )
    trainer.extend(
        extensions.PrintReport(
            ['elapsed_time', 'epoch', 'iteration', 'main/loss',
             'validation/main/loss']
        ),
        trigger=(args.log_iter, 'iteration')
    )

    # Save model snapshots so that later on, we can load them and generate new
    # captions for any image. This can be done in the `predict.py` script
    trainer.extend(
        extensions.snapshot(filename='snapshot_{.updater.iteration}'),
        trigger=(args.snapshot_iter, 'iteration')
    )
    trainer.extend(
        extensions.snapshot_object(model, 'model_{.updater.iteration}'),
        trigger=(args.snapshot_iter, 'iteration')
    )
    trainer.extend(extensions.ProgressBar())

    if args.resume is not None:
        chainer.serializers.load_npz(args.resume, trainer)
    trainer.run()
Esempio n. 15
0
def main():
    '''
    main function, start point
    '''
    # 引数関連
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize', '-b', type=int, default=128,
                        help='Number of images in each mini-batch')
    parser.add_argument('--learnrate', '-l', type=float, default=0.001,
                        help='Learning rate for SGD')
    parser.add_argument('--epoch', '-e', type=int, default=100,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='GPU1 ID (negative value indicates CPU)')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--iter_parallel', '-p', action='store_true', default=False,
                        help='loading dataset from disk')
    parser.add_argument('--test', action='store_true', default=False,
                        help='Test Mode, a few dataset')
    parser.add_argument('--opt' , '-o', type=str, choices=('adam', 'sgd') ,default='adam')
    parser.add_argument('--fsize' , '-f', type=int ,default=5)
    parser.add_argument('--ch' , '-c', type=int ,default=4)
    parser.add_argument('--decay' , '-d', type=str ,default='exp', choices=('exp', 'lin'))
    parser.add_argument('--weight', '-w', type=float ,default=1.0)
    args = parser.parse_args()

    # parameter出力
    print("-=Learning Parameter=-")
    print("# Max Epochs: {}".format(args.epoch))
    print("# Batch Size: {}".format(args.batchsize))
    print("# Learning Rate: {}".format(args.learnrate))
    print("# Optimizer Method: {}".format(args.opt))
    print("# Filter Size: {}".format(args.fsize))
    print("# Channel Scale: {}".format(args.ch))
    print("# coef. decay : {}".format(args.decay))
    print("# contloss' weight : {}".format(args.weight))
    print('# Train Dataet: General 100')
    if args.iter_parallel:
        print("# Data Iters that loads in Parallel")
    print("\n")

    # 保存ディレクトリ
    # save didrectory
    model_dir_name = 'CAEFINet_opt_{}_ch_{}_fsize_{}_decay_{}_weight_{}'.format(args.opt, args.ch, args.fsize, args.decay, args.weight)
    outdir = path.join(ROOT_PATH, 'results','FI' ,'CAEFINet', model_dir_name)
    if not path.exists(outdir):
        os.makedirs(outdir)
    with open(path.join(outdir, 'arg_param.txt'), 'w') as f:
        for k, v in args.__dict__.items():
            f.write('{}:{}\n'.format(k, v))

    #loading dataset
    if args.test:
        print('# loading test dataet(UCF101_minimam_test_size64_frame3_group2_max4_p) ...')
        train_dataset = 'UCF101_minimam_test_size64_frame3_group2_max4_p'
        test_dataset = 'UCF101_minimam_test_size64_frame3_group2_max4_p'
    else:
        print('# loading test dataet(UCF101_train_size64_frame3_group10_max100_p, UCF101_test_size64_frame3_group25_max5_p) ...')
        train_dataset = 'UCF101_train_size64_frame3_group10_max100_p'
        test_dataset = 'UCF101_test_size64_frame3_group25_max5_p'

    if args.iter_parallel:
        train = ds.SequenceDataset(dataset=train_dataset)
        test = ds.SequenceDataset(dataset=test_dataset)
    else:
        train = ds.SequenceDatasetOnMem(dataset=train_dataset)
        test = ds.SequenceDatasetOnMem(dataset=test_dataset)

   # prepare model
    model = N.CAEFINet(vgg_path=path.join(ROOT_PATH, 'models', 'VGG16.npz'), f_size=args.fsize, n_ch=args.ch, size=64)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # setup optimizer
    if args.opt == 'adam':
        optimizer = chainer.optimizers.Adam(alpha=args.learnrate)
    elif args.opt == 'sgd':
        optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001))

    # setup iter
    if args.iter_parallel:
        train_iter = chainer.iterators.MultiprocessIterator(
            train, args.batchsize, n_processes=8)
        test_iter = chainer.iterators.MultiprocessIterator(
            test, args.batchsize, repeat=False, shuffle=False, n_processes=8)
    else:
        train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
        test_iter = chainer.iterators.SerialIterator(
            test, args.batchsize, repeat=False, shuffle=False)

    # setup trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, loss_func=model.get_loss_func(weight=args.weight, coef_decay=args.decay))
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir)

    # # eval test data
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu, eval_func=model.get_loss_func(weight=args.weight, coef_decay=args.decay)))
    # dump loss graph
    trainer.extend(extensions.dump_graph('main/loss'))
    # lr shift
    if args.opt == 'sgd':
        trainer.extend(extensions.ExponentialShift("lr", 0.1), trigger=(50, 'epoch'))
    elif args.opt == 'adam':
        trainer.extend(extensions.ExponentialShift("alpha", 0.1), trigger=(50, 'epoch'))
    # save snapshot
    trainer.extend(extensions.snapshot(), trigger=(10, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch'))
    # log report
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch'))
    #  plot loss graph
    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                            'epoch', file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/mse_loss', 'validation/main/mse_loss'],
                            'epoch', file_name='mse_loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/cont_loss', 'validation/main/cont_loss'],
                            'epoch', file_name='cont_loss.png'))
    # plot acc graph
    trainer.extend(extensions.PlotReport(['main/psnr', 'validation/main/psnr'],
                            'epoch', file_name='PSNR.png'))
    # print info
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss','main/mse_loss', 'validation/main/mse_loss',
        'main/cont_loss', 'validation/main/cont_loss', 'main/psnr', 'validation/main/psnr', 'lr', 'elapsed_time']))
    # print progbar
    trainer.extend(extensions.ProgressBar())

    # [ChainerUI] enable to send commands from ChainerUI
    trainer.extend(CommandsExtension())
    # [ChainerUI] save 'args' to show experimental conditions
    save_args(args, outdir)

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()

    # save final model
    model_outdir = path.join(ROOT_PATH, 'models', model_dir_name)
    if not path.exists(model_outdir):
        os.makedirs(model_outdir)
    model_name = 'CAEFINet_{}_ch_{}_fsize_{}_decay_{}_weight_{}.npz'.format(args.opt, args.ch, args.fsize, args.decay, args.weight)
    chainer.serializers.save_npz(path.join(model_outdir, model_name), model)

    model_parameter = {
        'name': 'CAEFINetConcat',
        'parameter': {'f_size':args.fsize, 'ch':args.ch}
    }
    with open(path.join(model_outdir, 'model_parameter.json'), 'w') as f:
        json.dump(model_parameter, f)
Esempio n. 16
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: Faster R-CNN')
    parser.add_argument('--dataset',
                        choices=('voc07', 'voc0712'),
                        help='The dataset to use: VOC07, VOC07+12',
                        default='voc07')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--lr', '-l', type=float, default=1e-3)
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--step_size', '-ss', type=int, default=50000)
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    args = parser.parse_args()

    np.random.seed(args.seed)

    if args.dataset == 'voc07':
        train_data = VOCBboxDataset(split='trainval', year='2007')
    elif args.dataset == 'voc0712':
        train_data = ConcatenatedDataset(
            VOCBboxDataset(year='2007', split='trainval'),
            VOCBboxDataset(year='2012', split='trainval'))
    test_data = VOCBboxDataset(split='test',
                               year='2007',
                               use_difficult=True,
                               return_difficult=True)
    faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_bbox_label_names),
                                  pretrained_model='imagenet')
    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    train_data = TransformDataset(train_data, Transform(faster_rcnn))

    train_iter = chainer.iterators.MultiprocessIterator(train_data,
                                                        batch_size=1,
                                                        n_processes=None,
                                                        shared_mem=100000000)
    test_iter = chainer.iterators.SerialIterator(test_data,
                                                 batch_size=1,
                                                 repeat=False,
                                                 shuffle=False)
    updater = chainer.training.updater.StandardUpdater(train_iter,
                                                       optimizer,
                                                       device=args.gpu)

    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               out=args.out)

    trainer.extend(extensions.snapshot_object(model.faster_rcnn,
                                              'snapshot_model.npz'),
                   trigger=(args.iteration, 'iteration'))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(args.step_size, 'iteration'))

    log_interval = 20, 'iteration'
    plot_interval = 3000, 'iteration'
    print_interval = 20, 'iteration'

    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/roi_loc_loss',
        'main/roi_cls_loss',
        'main/rpn_loc_loss',
        'main/rpn_cls_loss',
        'validation/main/map',
    ]),
                   trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(['main/loss'],
                                             file_name='loss.png',
                                             trigger=plot_interval),
                       trigger=plot_interval)

    trainer.extend(DetectionVOCEvaluator(test_iter,
                                         model.faster_rcnn,
                                         use_07_metric=True,
                                         label_names=voc_bbox_label_names),
                   trigger=ManualScheduleTrigger(
                       [args.step_size, args.iteration], 'iteration'))

    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Esempio n. 17
0
def get_trainer(args):
    config = yaml.load(open(args.config))

    # Set workspace size
    if 'max_workspace_size' in config:
        chainer.cuda.set_max_workspace_size(config['max_workspace_size'])

    # Prepare ChainerMN communicator
    if args.gpu:
        if args.communicator == 'naive':
            print("Error: 'naive' communicator does not support GPU.\n")
            exit(-1)
        comm = chainermn.create_communicator(args.communicator)
        device = comm.intra_rank
    else:
        if args.communicator != 'naive':
            print('Warning: using naive communicator '
                  'because only naive supports CPU-only execution')
        comm = chainermn.create_communicator('naive')
        device = -1

    # Show the setup information
    if comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size()))
        if args.gpu:
            print('Using GPUs - max workspace size:',
                  chainer.cuda.get_max_workspace_size())
        print('Using {} communicator'.format(args.communicator))

    # Output version info
    if comm.rank == 0:
        print('Chainer version: {}'.format(chainer.__version__))
        print('ChainerMN version: {}'.format(chainermn.__version__))
        print('cuda: {}, cudnn: {}'.format(chainer.cuda.available,
                                           chainer.cuda.cudnn_enabled))

    # Create result_dir
    if args.result_dir is not None:
        config['result_dir'] = args.result_dir
        model_fn = config['model']['module'].split('.')[-1]
        sys.path.insert(0, args.result_dir)
        config['model']['module'] = model_fn
    else:
        config['result_dir'] = create_result_dir_from_config_path(args.config)
    log_fn = save_config_get_log_fn(config['result_dir'], args.config)
    if comm.rank == 0:
        print('result_dir:', config['result_dir'])

    # Instantiate model
    model = get_model_from_config(config, comm)
    if args.gpu:
        chainer.cuda.get_device(device).use()
        model.to_gpu()
    if comm.rank == 0:
        print('model:', model.__class__.__name__)

    # Initialize optimizer
    optimizer = get_optimizer_from_config(model, config)
    optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)
    if comm.rank == 0:
        print('optimizer:', optimizer.__class__.__name__)

    # Setting up datasets
    if comm.rank == 0:
        train_dataset, valid_dataset = get_dataset_from_config(config)
        print('train_dataset: {}'.format(len(train_dataset)),
              train_dataset.__class__.__name__)
        print('valid_dataset: {}'.format(len(valid_dataset)),
              valid_dataset.__class__.__name__)
    else:
        train_dataset, valid_dataset = [], []
    train_dataset = chainermn.scatter_dataset(train_dataset, comm)
    valid_dataset = chainermn.scatter_dataset(valid_dataset, comm)

    # Create iterators
    # multiprocessing.set_start_method('forkserver')
    train_iter, valid_iter = create_iterators(train_dataset, valid_dataset,
                                              config)
    if comm.rank == 0:
        print('train_iter:', train_iter.__class__.__name__)
        print('valid_iter:', valid_iter.__class__.__name__)

    # Create updater and trainer
    if 'updater_creator' in config:
        updater_creator = get_updater_creator_from_config(config)
        updater = updater_creator(train_iter, optimizer, device=device)
    else:
        updater = create_updater(train_iter, optimizer, device=device)
    if comm.rank == 0:
        print('updater:', updater.__class__.__name__)

    # Create Trainer
    trainer = training.Trainer(updater,
                               config['stop_trigger'],
                               out=config['result_dir'])
    if comm.rank == 0:
        print('Trainer stops:', config['stop_trigger'])

    # Trainer extensions
    for ext in config['trainer_extension']:
        ext, values = ext.popitem()
        if ext == 'LogReport' and comm.rank == 0:
            trigger = values['trigger']
            trainer.extend(
                extensions.LogReport(trigger=trigger, log_name=log_fn))
        elif ext == 'observe_lr' and comm.rank == 0:
            trainer.extend(extensions.observe_lr(), trigger=values['trigger'])
        elif ext == 'dump_graph' and comm.rank == 0:
            trainer.extend(extensions.dump_graph(**values))
        elif ext == 'Evaluator':
            assert 'module' in values
            mod = import_module(values['module'])
            evaluator = getattr(mod, values['name'])
            if evaluator is extensions.Evaluator:
                evaluator = evaluator(valid_iter, model, device=device)
            else:
                evaluator = evaluator(valid_iter, model.predictor)
            evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
            trainer.extend(evaluator,
                           trigger=values['trigger'],
                           name=values['prefix'])
        elif ext == 'PlotReport' and comm.rank == 0:
            trainer.extend(extensions.PlotReport(**values))
        elif ext == 'PrintReport' and comm.rank == 0:
            trigger = values.pop('trigger')
            trainer.extend(extensions.PrintReport(**values), trigger=trigger)
        elif ext == 'ProgressBar' and comm.rank == 0:
            upd_int = values['update_interval']
            trigger = values['trigger']
            trainer.extend(extensions.ProgressBar(update_interval=upd_int),
                           trigger=trigger)
        elif ext == 'snapshot' and comm.rank == 0:
            filename = values['filename']
            trigger = values['trigger']
            trainer.extend(extensions.snapshot(filename=filename),
                           trigger=trigger)

    # LR decay
    if 'lr_drop_ratio' in config['optimizer'] \
            and 'lr_drop_triggers' in config['optimizer']:
        ratio = config['optimizer']['lr_drop_ratio']
        points = config['optimizer']['lr_drop_triggers']['points']
        unit = config['optimizer']['lr_drop_triggers']['unit']
        drop_trigger = triggers.ManualScheduleTrigger(points, unit)

        def lr_drop(trainer):
            trainer.updater.get_optimizer('main').lr *= ratio

        trainer.extend(lr_drop, trigger=drop_trigger)

    if 'lr_drop_poly_power' in config['optimizer']:
        power = config['optimizer']['lr_drop_poly_power']
        stop_trigger = config['stop_trigger']
        batchsize = train_iter.batch_size
        len_dataset = len(train_dataset)
        trainer.extend(PolynomialShift('lr', power, stop_trigger, batchsize,
                                       len_dataset),
                       trigger=(1, 'iteration'))

    # Resume
    if args.resume is not None:
        # fn = '{}.bak'.format(args.resume)
        # shutil.copy(args.resume, fn)
        serializers.load_npz(args.resume, trainer)
        if comm.rank == 0:
            print('Resumed from:', args.resume)

    if comm.rank == 0:
        print('==========================================')

    return trainer
Esempio n. 18
0
# Extensions
snapshot_interval = (args.snapshot_interval, 'iteration')
display_interval = (args.display_interval, 'iteration')
trainer.extend(extensions.Evaluator(valid_iter, model, device=args.gpu))
trainer.extend(extensions.dump_graph('main/loss'))
trainer.extend(extensions.snapshot(), trigger=snapshot_interval)
trainer.extend(extensions.LogReport(trigger=display_interval))
trainer.extend(extensions.PrintReport([
    'epoch', 'iteration', 'main/loss', 'main/accuracy', 'validation/main/loss',
    'validation/main/accuracy'
]),
               trigger=display_interval)
trainer.extend(
    extensions.PlotReport(['main/loss', 'validation/main/loss'],
                          'iteration',
                          file_name='loss.png',
                          trigger=display_interval))
trainer.extend(
    extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'],
                          'iteration',
                          file_name='accuracy.png',
                          trigger=display_interval))
trainer.extend(extensions.ProgressBar(update_interval=10))

# Resume
if args.resume:
    chainer.serializers.load_npz(args.resume, trainer)

# Run
trainer.run()
def main():
    parser = argparse.ArgumentParser(description='ColumnNet')
    parser.add_argument('--batchsize', '-B', type=int, default=32,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=200,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--frequency', '-f', type=int, default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    parser.add_argument('--loaderjob', '-j', type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--val_batchsize', '-b', type=int, default=250,
                        help='Validation minibatch size')
    parser.add_argument('--test', action='store_true') 
    args = parser.parse_args()
 

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    Model = ColumnNet()
    model = L.Classifier(Model)
   
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    
    # Load the ColumnNet dataset
    f = open('train_list.txt')
    train_lines = f.readlines()
    f.close()

    f = open('val_list.txt')
    val_lines = f.readlines()
    f.close()

    #dataset = LabeledImageDataset(list(zip(fnames, labels)))
    #transform_dataset = TransformDataset(dataset, transform)

    #train, val = datasets.split_dataset_random(transform_dataset, int(len(dataset) * 0.8), seed=0)

    train = load_dataset(train_lines)
    val = load_dataset(val_lines)

    train_iter = iterators.MultiprocessIterator(train, args.batchsize)
    val_iter = chainer.iterators.MultiprocessIterator(
        val, args.val_batchsize, repeat=False, shuffle=False)


    if args.test:
        val_interval = 5, 'epoch'
        log_interval = 1, 'epoch'
    else:
        val_interval = 100000, 'iteration'
        log_interval = 1000, 'iteration'


    # Set up an optimizer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out='result')
 
    # Set up a trainer
    trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu),trigger=val_interval)
    trainer.extend(extensions.snapshot(), trigger=(1, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    # Be careful to pass the interval directly to LogReport
    # (it determines when to emit log rather than when to read observations)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'validation/main/map', 'lr'
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/map'], x_key='epoch', file_name='loss.png'))
    trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/map'], x_key='epoch', file_name='accuracy.png'))
    trainer.extend(extensions.dump_graph('main/loss'))
    # Run the training
    trainer.run()
    chainer.serializers.save_npz('result/columnnet.model', Model)
Esempio n. 20
0
def main():
    parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
    parser.add_argument('--dataset',
                        '-d',
                        default='cifar10',
                        help='The dataset to use: cifar10 or cifar100')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=256,
                        help='Number of images in each mini-batch')
    parser.add_argument('--learnrate',
                        '-l',
                        type=float,
                        default=0.05,
                        help='Learning rate for SGD')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=300,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--model',
                        '-m',
                        default='resnet',
                        help='using model name')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train.
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    if args.dataset == 'cifar10':
        print('Using CIFAR10 dataset.')
        class_labels = 10
        train, test = get_cifar10()
    elif args.dataset == 'cifar100':
        print('Using CIFAR100 dataset.')
        class_labels = 100
        train, test = get_cifar100()
    else:
        raise RuntimeError('Invalid dataset choice.')

    train = AugmentedDataset(train)

    if args.model == 'resnet':
        model = models.resnet_shift.ResNet(False, class_labels)
    if args.model == 'shift':
        model = models.resnet_shift.ResNet(True, class_labels)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    optimizer = chainer.optimizers.MomentumSGD(args.learnrate)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)
    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # snapshot model
    trainer.extend(extensions.snapshot_object(
        model, filename='model_epoch-{.updater.epoch}'),
                   trigger=(10, 'epoch'))

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # Reduce the learning rate by half every 25 epochs.
    trainer.extend(extensions.ExponentialShift('lr', 0.5),
                   trigger=(25, 'epoch'))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot at each epoch
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'main/accuracy', 'validation/main/accuracy',
            'elapsed_time'
        ]))

    trainer.extend(
        extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'],
                              x_key='epoch',
                              file_name='loss.png'))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
def train(mode):

    Dt1_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_(Gallery&Probe)_2nd"
    train1 = load_GEI(path_dir=Dt1_train_dir, mode=True)

    Dt2_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt2_(Gallery&Probe)"
    train2 = load_GEI(path_dir=Dt2_train_dir, mode=True)

    Dt3_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt3_(Gallery&Probe)"
    train3 = load_GEI(path_dir=Dt3_train_dir, mode=True)

    Dt4_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt4_(Gallery&Probe)"
    train4 = load_GEI(path_dir=Dt4_train_dir, mode=True)

    model = Multi_modal_GEINet()

    model.to_gpu()

    # train_iter = iterators.MultiprocessIterator(train, batch_size=239)
    Dt1_train_iter = iterators.SerialIterator(train1,
                                              batch_size=239,
                                              shuffle=False)
    Dt2_train_iter = iterators.SerialIterator(train2,
                                              batch_size=239,
                                              shuffle=False)
    Dt3_train_iter = iterators.SerialIterator(train3,
                                              batch_size=239,
                                              shuffle=False)
    Dt4_train_iter = iterators.SerialIterator(train4,
                                              batch_size=239,
                                              shuffle=False)

    # optimizer = chainer.optimizers.SGD(lr=0.02)
    optimizer = chainer.optimizers.MomentumSGD(lr=0.02, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.01))

    # updater = training.ParallelUpdater(train_iter, optimizer, devices={'main': 0, 'second': 1})
    updater = Multi_modal_Updater(model,
                                  Dt1_train_iter,
                                  Dt2_train_iter,
                                  Dt3_train_iter,
                                  Dt4_train_iter,
                                  optimizer,
                                  device=0)
    epoch = 6250

    trainer = training.Trainer(
        updater, (epoch, 'epoch'),
        out='/home/wutong/Setoguchi/chainer_files/result')

    # trainer.extend(extensions.Evaluator(test_iter, model, device=0))
    trainer.extend(extensions.ExponentialShift(attr='lr', rate=0.56234),
                   trigger=(1250, 'epoch'))
    trainer.extend(
        extensions.LogReport(log_name='SFDEI_log', trigger=(20, "epoch")))
    trainer.extend((extensions.snapshot_object(
        model, filename='model_shapshot_{.update.epoch}')),
                   trigger=(1250, 'epoch'))
    trainer.extend(extensions.snapshot(), trigger=(1250, 'epoch'))
    trainer.extend(extensions.PrintReport(['epoch', 'accuracy', 'loss']))
    # 'validation/main/accuracy']),
    # trigger=(1, "epoch"))
    trainer.extend(
        extensions.dump_graph(root_name="loss", out_name="multi_modal_3.dot"))
    trainer.extend(extensions.PlotReport(["loss"]), trigger=(50, 'epoch'))
    trainer.extend(extensions.ProgressBar())

    if mode == True:
        # Run the trainer
        trainer.run()
    else:
        serializers.load_npz(
            "/home/wutong/Setoguchi/chainer_files/SFDEINet_multi_modal/SFDEINet_multi_modal_model",
            trainer)
        trainer.run()
        serializers.save_npz(
            "/home/wutong/Setoguchi/chainer_files/SFDEINet_multi_modal/SFDEINet_multi_modal_model",
            trainer)

    serializers.save_npz(
        "/home/wutong/Setoguchi/chainer_files/SFDEINet_multi_modal/SFDEINet_multi_modal_model",
        model)
Esempio n. 22
0
def main():
    parser = argparse.ArgumentParser(description='Chainer Darknet53 Train')
    parser.add_argument('--batchsize', '-b', type=int, default=8)
    parser.add_argument('--iteration', '-i', type=int, default=100000)
    parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[])
    parser.add_argument('--out', '-o', default='darknet53-voc-result')
    parser.add_argument('--seed', default=0)
    parser.add_argument('--display_interval', type=int, default=100)
    parser.add_argument('--snapshot_interval', type=int, default=100)
    parser.add_argument('--validation_size', type=int, default=2048)
    args = parser.parse_args()

    print('GPUs: {}'.format(args.gpus))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# iteration: {}'.format(args.iteration))
    print('')

    random.seed(args.seed)
    np.random.seed(args.seed)

    darknet53 = Darknet53(20)
    model = L.Classifier(darknet53)
    device = -1
    if len(args.gpus) > 0:
        device = args.gpus[0]
        cuda.cupy.random.seed(args.seed)
        cuda.get_device_from_id(args.gpus[0]).use()
    if len(args.gpus) == 1:
        model.to_gpu()

    optimizer = chainer.optimizers.MomentumSGD(lr=0.001)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.0005),
                       'hook_decay')

    train = VOCBboxDataset(split='train')
    test = VOCBboxDataset(split='val')
    train = YOLOVOCDataset(train,
                           classifier=True,
                           jitter=0.2,
                           hue=0.1,
                           sat=.75,
                           val=.75)
    test = YOLOVOCDataset(test, classifier=True, crop_size=(256, 256))
    test = test[np.random.permutation(np.arange(
        len(test)))[:min(args.validation_size, len(test))]]

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    if len(args.gpus) <= 1:
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           device=device)
    else:
        devices = {'main': args.gpus[0]}
        for gpu in args.gpus[1:]:
            devices['gpu{}'.format(gpu)] = gpu
        updater = training.ParallelUpdater(train_iter,
                                           optimizer,
                                           devices=devices)

    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               out=args.out)

    display_interval = (args.display_interval, 'iteration')
    snapshot_interval = (args.snapshot_interval, 'iteration')

    trainer.extend(extensions.Evaluator(test_iter, model, device=device),
                   trigger=display_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.LogReport(trigger=display_interval))
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'iteration',
                                  display_interval,
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'iteration',
                display_interval,
                file_name='accuracy.png'))

    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'elapsed_time'
    ]),
                   trigger=display_interval)
    trainer.extend(extensions.ProgressBar(update_interval=5))
    trainer.extend(extensions.snapshot_object(darknet53,
                                              'darknet53_snapshot.npz'),
                   trigger=training.triggers.MinValueTrigger(
                       'validation/main/loss', snapshot_interval))
    trainer.extend(extensions.snapshot_object(darknet53,
                                              'darknet53_final.npz'),
                   trigger=snapshot_interval)

    trainer.extend(DarknetShift(optimizer, 'poly', args.iteration))

    trainer.extend(CropSizeUpdater(train,
                                   [(4 + i) * 32 for i in range(0, 11)]))

    trainer.run()
Esempio n. 23
0
def train(args):
    '''RUN TRAINING'''
    # seed setting
    torch.manual_seed(args.seed)

    # use determinisitic computation or not
    if args.debugmode < 1:
        torch.backends.cudnn.deterministic = False
        logging.info('torch cudnn deterministic is disabled')
    else:
        torch.backends.cudnn.deterministic = True

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning('cuda is not available')

    # get input and output dimension info
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']
    utts = list(valid_json.keys())

    # reverse input and output dimension
    idim = int(valid_json[utts[0]]['output'][0]['shape'][1])
    odim = int(valid_json[utts[0]]['input'][0]['shape'][1])
    if args.use_speaker_embedding:
        args.spk_embed_dim = int(valid_json[utts[0]]['input'][1]['shape'][0])
    else:
        args.spk_embed_dim = None
    logging.info('#input dims : ' + str(idim))
    logging.info('#output dims: ' + str(odim))

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + '/model.json'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to' + model_conf)
        f.write(
            json.dumps((idim, odim, vars(args)), indent=4,
                       sort_keys=True).encode('utf_8'))
    for key in sorted(vars(args).keys()):
        logging.info('ARGS: ' + key + ': ' + str(vars(args)[key]))

    # specify model architecture
    tacotron2 = Tacotron2(idim, odim, args)
    logging.info(tacotron2)

    # check the use of multi-gpu
    if args.ngpu > 1:
        tacotron2 = torch.nn.DataParallel(tacotron2,
                                          device_ids=list(range(args.ngpu)))
        logging.info('batch size is automatically increased (%d -> %d)' %
                     (args.batch_size, args.batch_size * args.ngpu))
        args.batch_size *= args.ngpu

    # set torch device
    device = torch.device("cuda" if args.ngpu > 0 else "cpu")
    tacotron2 = tacotron2.to(device)

    # define loss
    model = Tacotron2Loss(tacotron2, args.use_masking, args.bce_pos_weight,
                          args.monotonic)
    reporter = model.reporter

    # Setup an optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 args.lr,
                                 eps=args.eps,
                                 weight_decay=args.weight_decay)

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, 'target', reporter)
    setattr(optimizer, 'serialize', lambda s: reporter.serialize(s))

    # Setup a converter
    converter = CustomConverter(True, args.use_speaker_embedding)

    # read json data
    with open(args.train_json, 'rb') as f:
        train_json = json.load(f)['utts']
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']

    # make minibatch list (variable length)
    train_batchset = make_batchset(train_json, args.batch_size, args.maxlen_in,
                                   args.maxlen_out, args.minibatches,
                                   args.batch_sort_key)
    valid_batchset = make_batchset(valid_json, args.batch_size, args.maxlen_in,
                                   args.maxlen_out, args.minibatches,
                                   args.batch_sort_key)
    # hack to make batchsze argument as 1
    # actual bathsize is included in a list
    train_iter = chainer.iterators.MultiprocessIterator(TransformDataset(
        train_batchset, converter.transform),
                                                        batch_size=1,
                                                        n_processes=2,
                                                        n_prefetch=8)
    #, maxtasksperchild=20)
    valid_iter = chainer.iterators.MultiprocessIterator(TransformDataset(
        valid_batchset, converter.transform),
                                                        batch_size=1,
                                                        repeat=False,
                                                        shuffle=False,
                                                        n_processes=2,
                                                        n_prefetch=8)
    #maxtasksperchild=20)

    # Set up a trainer
    updater = CustomUpdater(model, args.grad_clip, train_iter, optimizer,
                            converter, device)
    trainer = training.Trainer(updater, (args.epochs, 'epoch'),
                               out=args.outdir)

    # Resume from a snapshot
    if args.resume:
        logging.info('resumed from %s' % args.resume)
        torch_resume(args.resume, trainer)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(
        CustomEvaluator(model, valid_iter, reporter, converter, device))

    # Save attention figure for each epoch
    if args.num_save_attention > 0:
        data = sorted(list(valid_json.items())[:args.num_save_attention],
                      key=lambda x: int(x[1]['input'][0]['shape'][1]),
                      reverse=True)
        if hasattr(tacotron2, "module"):
            att_vis_fn = tacotron2.module.calculate_all_attentions
        else:
            att_vis_fn = tacotron2.calculate_all_attentions
        trainer.extend(PlotAttentionReport(att_vis_fn,
                                           data,
                                           args.outdir + '/att_ws',
                                           converter=CustomConverter(
                                               False,
                                               args.use_speaker_embedding),
                                           device=device,
                                           reverse=True),
                       trigger=(1, 'epoch'))

    # Make a plot for training and validation values
    trainer.extend(
        extensions.PlotReport([
            'main/loss', 'validation/main/loss', 'main/l1_loss',
            'validation/main/l1_loss', 'main/mse_loss',
            'validation/main/mse_loss', 'main/bce_loss',
            'validation/main/bce_loss', 'main/monotonic_loss',
            'validation/main/monotonic_loss'
        ],
                              'epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/l1_loss', 'validation/main/l1_loss'],
                              'epoch',
                              file_name='l1_loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/mse_loss', 'validation/main/mse_loss'],
                              'epoch',
                              file_name='mse_loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/bce_loss', 'validation/main/bce_loss'],
                              'epoch',
                              file_name='bce_loss.png'))
    trainer.extend(
        extensions.PlotReport(
            ['main/monotonic_loss', 'validation/main/monotonic_loss'],
            'epoch',
            file_name='monotonic_loss.png'))

    # Save snapshot for each epoch
    trainer.extend(torch_snapshot(), trigger=(1, 'epoch'))

    # Save best models
    trainer.extend(
        extensions.snapshot_object(tacotron2,
                                   'model.loss.best',
                                   savefun=torch_save),
        trigger=training.triggers.MinValueTrigger('validation/main/loss'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL,
                                                 'iteration')))
    report_keys = [
        'epoch', 'iteration', 'elapsed_time', 'main/loss', 'main/l1_loss',
        'main/mse_loss', 'main/bce_loss', 'main/monotonic_loss',
        'validation/main/loss', 'validation/main/l1_loss',
        'validation/main/mse_loss', 'validation/main/bce_loss',
        'validation/main/monotonic_loss'
    ]
    trainer.extend(extensions.PrintReport(report_keys),
                   trigger=(REPORT_INTERVAL, 'iteration'))
    trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL))

    # Run the training
    trainer.run()
def main():
    rospack = rospkg.RosPack()
    jsk_perception_datasets_path = osp.join(rospack.get_path('jsk_perception'),
                                            'learning_datasets')

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-g', '--gpu', default=0, type=int, help='GPU id')
    parser.add_argument('-d',
                        '--dataset_dir',
                        default=osp.join(jsk_perception_datasets_path,
                                         'human_size_mirror_dataset'),
                        type=str,
                        help='Path to root directory of dataset')
    parser.add_argument('-m',
                        '--model',
                        default='FCN8sDepthPredictionConcatFirst',
                        type=str,
                        help='Model class name')
    parser.add_argument('-b',
                        '--batch_size',
                        default=1,
                        type=int,
                        help='Batch size')
    parser.add_argument('-e',
                        '--epoch',
                        default=100,
                        type=int,
                        help='Training epoch')
    parser.add_argument('-o',
                        '--out',
                        type=str,
                        default=None,
                        help='Output directory')
    args = parser.parse_args()

    gpu = args.gpu
    out = args.out

    # 0. config

    timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
    if out is None:
        out = osp.join(rospkg.get_ros_home(), 'learning_logs', timestamp)

    max_iter_epoch = args.epoch, 'epoch'
    progress_bar_update_interval = 10  # iteration
    print_interval = 100, 'iteration'
    log_interval = 100, 'iteration'
    test_interval = 5, 'epoch'
    save_interval = 5, 'epoch'

    # 1. dataset

    dataset_train = DepthPredictionDataset(args.dataset_dir,
                                           split='train',
                                           aug=True)
    dataset_valid = DepthPredictionDataset(args.dataset_dir,
                                           split='test',
                                           aug=False)

    dataset_train_transformed = TransformDataset(dataset_train, transform)
    dataset_valid_transformed = TransformDataset(dataset_valid, transform)

    iter_train = chainer.iterators.MultiprocessIterator(
        dataset_train_transformed,
        batch_size=args.batch_size,
        shared_mem=10**8)
    iter_valid = chainer.iterators.MultiprocessIterator(
        dataset_valid_transformed,
        batch_size=1,
        shared_mem=10**8,
        repeat=False,
        shuffle=False)

    # 2. model

    vgg = fcn.models.VGG16()
    vgg_path = vgg.download()
    chainer.serializers.load_npz(vgg_path, vgg)

    n_class = len(dataset_train.class_names)
    assert n_class == 2

    if args.model == 'FCN8sDepthPredictionConcatFirst':
        model = FCN8sDepthPredictionConcatFirst(n_class=n_class, masking=True)
    else:
        print('Invalid model class.')
        exit(1)

    model.init_from_vgg16(vgg)

    if gpu >= 0:
        cuda.get_device_from_id(gpu).use()
        model.to_gpu()

    # 3. optimizer

    optimizer = chainer.optimizers.Adam(alpha=1.0e-5)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    updater = chainer.training.updater.StandardUpdater(iter_train,
                                                       optimizer,
                                                       device=gpu)

    trainer = chainer.training.Trainer(updater, max_iter_epoch, out=out)

    trainer.extend(extensions.ExponentialShift("alpha", 0.99997))

    if not osp.isdir(out):
        os.makedirs(out)

    with open(osp.join(out, 'dataset.txt'), 'w') as f:
        f.write(dataset_train.__class__.__name__)

    with open(osp.join(out, 'model.txt'), 'w') as f:
        f.write(model.__class__.__name__)

    with open(osp.join(out, 'batch_size.txt'), 'w') as f:
        f.write(str(args.batch_size))

    trainer.extend(extensions.snapshot_object(
        model,
        savefun=chainer.serializers.save_npz,
        filename='model_snapshot.npz'),
                   trigger=chainer.training.triggers.MaxValueTrigger(
                       'validation/main/depth_acc<0.10', save_interval))

    trainer.extend(
        extensions.dump_graph(root_name='main/loss',
                              out_name='network_architecture.dot'))

    trainer.extend(
        extensions.LogReport(log_name='log.json', trigger=log_interval))

    trainer.extend(extensions.PlotReport([
        'main/loss',
        'validation/main/loss',
    ],
                                         file_name='loss_plot.png',
                                         x_key='epoch',
                                         trigger=(5, 'epoch')),
                   trigger=(5, 'epoch'))

    trainer.extend(chainer.training.extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/seg_loss',
        'main/reg_loss',
        'main/miou',
        'main/depth_acc<0.03',
        'main/depth_acc<0.10',
        'main/depth_acc<0.30',
        'validation/main/miou',
        'validation/main/depth_acc<0.03',
        'validation/main/depth_acc<0.10',
        'validation/main/depth_acc<0.30',
    ]),
                   trigger=print_interval)

    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(
        extensions.ProgressBar(update_interval=progress_bar_update_interval))
    trainer.extend(extensions.Evaluator(iter_valid, model, device=gpu),
                   trigger=test_interval)

    trainer.run()
Esempio n. 25
0
    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot for each specified epoch
    trainer.extend(extensions.snapshot(), trigger=(args.epochs, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(log_name=None))

    # Save two plot images to the result dir
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch',
                file_name='accuracy.png'))

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
Esempio n. 26
0
def train(args):
    config = yaml.load(open(args.config))

    print('==========================================')

    # Set workspace size
    if 'max_workspace_size' in config:
        chainer.cuda.set_max_workspace_size(config['max_workspace_size'])

    # Output version info
    print('chainer version: {}'.format(chainer.__version__))
    print('cuda: {}, cudnn: {}, nccl: {}'.format(chainer.cuda.available,
                                                 chainer.cuda.cudnn_enabled,
                                                 HAVE_NCCL))

    # Create result_dir
    if args.result_dir is not None:
        config['result_dir'] = args.result_dir
    else:
        config['result_dir'] = create_result_dir_from_config_path(args.config)
    log_fn = save_config_get_log_fn(config['result_dir'], args.config)
    print('result_dir:', config['result_dir'])

    # Instantiate model
    model = get_model_from_config(config)
    print('model:', model.__class__.__name__)

    # Initialize optimizer
    optimizer = get_optimizer_from_config(model, config)
    print('optimizer:', optimizer.__class__.__name__)

    # Setting up datasets
    train_dataset, valid_dataset = get_dataset_from_config(config)
    print('train_dataset: {}'.format(len(train_dataset)),
          train_dataset.__class__.__name__)
    print('valid_dataset: {}'.format(len(valid_dataset)),
          valid_dataset.__class__.__name__)

    # Prepare devices
    devices = {'main': args.gpus[0]}
    for gid in args.gpus[1:]:
        devices['gpu{}'.format(gid)] = gid

    # Create iterators
    train_iter, valid_iter = create_iterators(
        train_dataset, config['dataset']['train']['batchsize'], valid_dataset,
        config['dataset']['valid']['batchsize'], devices)
    print('train_iter:', train_iter.__class__.__name__)
    print('valid_iter:', valid_iter.__class__.__name__)

    # Create updater
    updater_creator = get_updater_creator_from_config(config)
    updater = updater_creator(train_iter, optimizer, devices)
    print('updater:', updater.__class__.__name__)

    # Create trainer
    trainer = training.Trainer(updater,
                               config['stop_trigger'],
                               out=config['result_dir'])
    print('Trainer stops:', config['stop_trigger'])

    # Trainer extensions
    for ext in config['trainer_extension']:
        ext, values = ext.popitem()
        if ext == 'LogReport':
            trigger = values['trigger']
            trainer.extend(
                extensions.LogReport(trigger=trigger, log_name=log_fn))
        elif ext == 'observe_lr':
            trainer.extend(extensions.observe_lr(), trigger=values['trigger'])
        elif ext == 'dump_graph':
            trainer.extend(extensions.dump_graph(**values))
        elif ext == 'Evaluator':
            evaluator_creator = get_evaluator_creator_from_config(values)
            evaluator = evaluator_creator(valid_iter, model, devices)
            trainer.extend(evaluator,
                           trigger=values['trigger'],
                           name=values['prefix'])
        elif ext == 'PlotReport':
            trainer.extend(extensions.PlotReport(**values))
        elif ext == 'PrintReport':
            trigger = values.pop('trigger')
            trainer.extend(extensions.PrintReport(**values), trigger=trigger)
        elif ext == 'ProgressBar':
            upd_int = values['update_interval']
            trigger = values['trigger']
            trainer.extend(extensions.ProgressBar(update_interval=upd_int),
                           trigger=trigger)
        elif ext == 'snapshot':
            filename = values['filename']
            trigger = values['trigger']
            trainer.extend(extensions.snapshot(filename=filename),
                           trigger=trigger)
        elif ext == 'ParameterStatistics':
            links = []
            for link_name in values.pop('links'):
                lns = [ln.strip() for ln in link_name.split('.') if ln.strip()]
                target = model.predictor
                for ln in lns:
                    target = getattr(target, ln)
                links.append(target)
            trainer.extend(extensions.ParameterStatistics(links, **values))
        elif ext == 'custom':
            custom_extension = get_custum_extension_from_config(values)
            trainer.extend(custom_extension, trigger=values['trigger'])

    # LR decay
    if 'lr_drop_ratio' in config['optimizer'] \
            and 'lr_drop_triggers' in config['optimizer']:
        ratio = config['optimizer']['lr_drop_ratio']
        points = config['optimizer']['lr_drop_triggers']['points']
        unit = config['optimizer']['lr_drop_triggers']['unit']
        drop_trigger = triggers.ManualScheduleTrigger(points, unit)

        def lr_drop(trainer):
            trainer.updater.get_optimizer('main').lr *= ratio

        trainer.extend(lr_drop, trigger=drop_trigger)

    # Resume
    if args.resume is not None:
        fn = '{}.bak'.format(args.resume)
        shutil.copy(args.resume, fn)
        serializers.load_npz(args.resume, trainer)
        print('Resumed from:', args.resume)

    print('==========================================')

    trainer.run()
    return 0
Esempio n. 27
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--batchsize', type=int, default=12)
    parser.add_argument('--class-weight', type=str, default='class_weight.npy')
    parser.add_argument('--out', type=str, default='result')
    args = parser.parse_args()

    # Triggers
    log_trigger = (50, 'iteration')
    validation_trigger = (2000, 'iteration')
    end_trigger = (16000, 'iteration')

    # Dataset
    train = CamVidDataset(split='train')
    train = TransformDataset(train, transform)
    val = CamVidDataset(split='val')

    # Iterator
    train_iter = iterators.MultiprocessIterator(train, args.batchsize)
    val_iter = iterators.MultiprocessIterator(
        val, args.batchsize, shuffle=False, repeat=False)

    # Model
    class_weight = np.load(args.class_weight)
    model = SegNetBasic(n_class=len(camvid_label_names))
    model = PixelwiseSoftmaxClassifier(
        model, class_weight=class_weight)
    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Optimizer
    optimizer = optimizers.MomentumSGD(lr=0.1, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(rate=0.0005))

    # Updater
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, device=args.gpu)

    # Trainer
    trainer = training.Trainer(updater, end_trigger, out=args.out)

    trainer.extend(extensions.LogReport(trigger=log_trigger))
    trainer.extend(extensions.observe_lr(), trigger=log_trigger)
    trainer.extend(extensions.dump_graph('main/loss'))

    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(
            ['main/loss'], x_key='iteration',
            file_name='loss.png'))
        trainer.extend(extensions.PlotReport(
            ['validation/main/miou'], x_key='iteration',
            file_name='miou.png'))

    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'elapsed_time', 'lr',
         'main/loss', 'validation/main/miou',
         'validation/main/mean_class_accuracy',
         'validation/main/pixel_accuracy']),
        trigger=log_trigger)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(
        SemanticSegmentationEvaluator(
            val_iter, model.predictor,
            camvid_label_names),
        trigger=validation_trigger)

    trainer.run()

    chainer.serializers.save_npz(
        os.path.join(args.out, 'snapshot_model.npz'),
        recalculate_bn_statistics(model.predictor, 24))
Esempio n. 28
0
def train(args):
    """Train with the given args.

    Args:
        args (namespace): The program arguments.

    """
    set_deterministic_pytorch(args)

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning("cuda is not available")

    # get input and output dimension info
    with open(args.valid_json, "rb") as f:
        valid_json = json.load(f)["utts"]
    utts = list(valid_json.keys())
    idim = int(valid_json[utts[0]]["output"][1]["shape"][1])
    odim = int(valid_json[utts[0]]["output"][0]["shape"][1])
    logging.info("#input dims : " + str(idim))
    logging.info("#output dims: " + str(odim))

    # specify model architecture
    model_class = dynamic_import(args.model_module)
    model = model_class(idim, odim, args)
    assert isinstance(model, MTInterface)

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + "/model.json"
    with open(model_conf, "wb") as f:
        logging.info("writing a model config file to " + model_conf)
        f.write(
            json.dumps((idim, odim, vars(args)),
                       indent=4,
                       ensure_ascii=False,
                       sort_keys=True).encode("utf_8"))
    for key in sorted(vars(args).keys()):
        logging.info("ARGS: " + key + ": " + str(vars(args)[key]))

    reporter = model.reporter

    # check the use of multi-gpu
    if args.ngpu > 1:
        if args.batch_size != 0:
            logging.warning(
                "batch size is automatically increased (%d -> %d)" %
                (args.batch_size, args.batch_size * args.ngpu))
            args.batch_size *= args.ngpu

    # set torch device
    device = torch.device("cuda" if args.ngpu > 0 else "cpu")
    if args.train_dtype in ("float16", "float32", "float64"):
        dtype = getattr(torch, args.train_dtype)
    else:
        dtype = torch.float32
    model = model.to(device=device, dtype=dtype)

    logging.warning(
        "num. model params: {:,} (num. trained: {:,} ({:.1f}%))".format(
            sum(p.numel() for p in model.parameters()),
            sum(p.numel() for p in model.parameters() if p.requires_grad),
            sum(p.numel() for p in model.parameters() if p.requires_grad) *
            100.0 / sum(p.numel() for p in model.parameters()),
        ))

    # Setup an optimizer
    if args.opt == "adadelta":
        optimizer = torch.optim.Adadelta(model.parameters(),
                                         rho=0.95,
                                         eps=args.eps,
                                         weight_decay=args.weight_decay)
    elif args.opt == "adam":
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
    elif args.opt == "noam":
        from espnet.nets.pytorch_backend.transformer.optimizer import get_std_opt

        optimizer = get_std_opt(
            model.parameters(),
            args.adim,
            args.transformer_warmup_steps,
            args.transformer_lr,
        )
    else:
        raise NotImplementedError("unknown optimizer: " + args.opt)

    # setup apex.amp
    if args.train_dtype in ("O0", "O1", "O2", "O3"):
        try:
            from apex import amp
        except ImportError as e:
            logging.error(
                f"You need to install apex for --train-dtype {args.train_dtype}. "
                "See https://github.com/NVIDIA/apex#linux")
            raise e
        if args.opt == "noam":
            model, optimizer.optimizer = amp.initialize(
                model, optimizer.optimizer, opt_level=args.train_dtype)
        else:
            model, optimizer = amp.initialize(model,
                                              optimizer,
                                              opt_level=args.train_dtype)
        use_apex = True
    else:
        use_apex = False

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, "target", reporter)
    setattr(optimizer, "serialize", lambda s: reporter.serialize(s))

    # Setup a converter
    converter = CustomConverter()

    # read json data
    with open(args.train_json, "rb") as f:
        train_json = json.load(f)["utts"]
    with open(args.valid_json, "rb") as f:
        valid_json = json.load(f)["utts"]

    use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0
    # make minibatch list (variable length)
    train = make_batchset(
        train_json,
        args.batch_size,
        args.maxlen_in,
        args.maxlen_out,
        args.minibatches,
        min_batch_size=args.ngpu if args.ngpu > 1 else 1,
        shortest_first=use_sortagrad,
        count=args.batch_count,
        batch_bins=args.batch_bins,
        batch_frames_in=args.batch_frames_in,
        batch_frames_out=args.batch_frames_out,
        batch_frames_inout=args.batch_frames_inout,
        mt=True,
        iaxis=1,
        oaxis=0,
    )
    valid = make_batchset(
        valid_json,
        args.batch_size,
        args.maxlen_in,
        args.maxlen_out,
        args.minibatches,
        min_batch_size=args.ngpu if args.ngpu > 1 else 1,
        count=args.batch_count,
        batch_bins=args.batch_bins,
        batch_frames_in=args.batch_frames_in,
        batch_frames_out=args.batch_frames_out,
        batch_frames_inout=args.batch_frames_inout,
        mt=True,
        iaxis=1,
        oaxis=0,
    )

    load_tr = LoadInputsAndTargets(mode="mt", load_output=True)
    load_cv = LoadInputsAndTargets(mode="mt", load_output=True)
    # hack to make batchsize argument as 1
    # actual bathsize is included in a list
    # default collate function converts numpy array to pytorch tensor
    # we used an empty collate function instead which returns list
    train_iter = ChainerDataLoader(
        dataset=TransformDataset(train,
                                 lambda data: converter([load_tr(data)])),
        batch_size=1,
        num_workers=args.n_iter_processes,
        shuffle=not use_sortagrad,
        collate_fn=lambda x: x[0],
    )
    valid_iter = ChainerDataLoader(
        dataset=TransformDataset(valid,
                                 lambda data: converter([load_cv(data)])),
        batch_size=1,
        shuffle=False,
        collate_fn=lambda x: x[0],
        num_workers=args.n_iter_processes,
    )

    # Set up a trainer
    updater = CustomUpdater(
        model,
        args.grad_clip,
        {"main": train_iter},
        optimizer,
        device,
        args.ngpu,
        False,
        args.accum_grad,
        use_apex=use_apex,
    )
    trainer = training.Trainer(updater, (args.epochs, "epoch"),
                               out=args.outdir)

    if use_sortagrad:
        trainer.extend(
            ShufflingEnabler([train_iter]),
            trigger=(args.sortagrad if args.sortagrad != -1 else args.epochs,
                     "epoch"),
        )

    # Resume from a snapshot
    if args.resume:
        logging.info("resumed from %s" % args.resume)
        torch_resume(args.resume, trainer)

    # Evaluate the model with the test dataset for each epoch
    if args.save_interval_iters > 0:
        trainer.extend(
            CustomEvaluator(model, {"main": valid_iter}, reporter, device,
                            args.ngpu),
            trigger=(args.save_interval_iters, "iteration"),
        )
    else:
        trainer.extend(
            CustomEvaluator(model, {"main": valid_iter}, reporter, device,
                            args.ngpu))

    # Save attention weight each epoch
    if args.num_save_attention > 0:
        # NOTE: sort it by output lengths
        data = sorted(
            list(valid_json.items())[:args.num_save_attention],
            key=lambda x: int(x[1]["output"][0]["shape"][0]),
            reverse=True,
        )
        if hasattr(model, "module"):
            att_vis_fn = model.module.calculate_all_attentions
            plot_class = model.module.attention_plot_class
        else:
            att_vis_fn = model.calculate_all_attentions
            plot_class = model.attention_plot_class
        att_reporter = plot_class(
            att_vis_fn,
            data,
            args.outdir + "/att_ws",
            converter=converter,
            transform=load_cv,
            device=device,
            ikey="output",
            iaxis=1,
        )
        trainer.extend(att_reporter, trigger=(1, "epoch"))
    else:
        att_reporter = None

    # Make a plot for training and validation values
    trainer.extend(
        extensions.PlotReport(["main/loss", "validation/main/loss"],
                              "epoch",
                              file_name="loss.png"))
    trainer.extend(
        extensions.PlotReport(["main/acc", "validation/main/acc"],
                              "epoch",
                              file_name="acc.png"))
    trainer.extend(
        extensions.PlotReport(["main/ppl", "validation/main/ppl"],
                              "epoch",
                              file_name="ppl.png"))
    trainer.extend(
        extensions.PlotReport(["main/bleu", "validation/main/bleu"],
                              "epoch",
                              file_name="bleu.png"))

    # Save best models
    trainer.extend(
        snapshot_object(model, "model.loss.best"),
        trigger=training.triggers.MinValueTrigger("validation/main/loss"),
    )
    trainer.extend(
        snapshot_object(model, "model.acc.best"),
        trigger=training.triggers.MaxValueTrigger("validation/main/acc"),
    )

    # save snapshot which contains model and optimizer states
    if args.save_interval_iters > 0:
        trainer.extend(
            torch_snapshot(filename="snapshot.iter.{.updater.iteration}"),
            trigger=(args.save_interval_iters, "iteration"),
        )
    else:
        trainer.extend(torch_snapshot(), trigger=(1, "epoch"))

    # epsilon decay in the optimizer
    if args.opt == "adadelta":
        if args.criterion == "acc":
            trainer.extend(
                restore_snapshot(model,
                                 args.outdir + "/model.acc.best",
                                 load_fn=torch_load),
                trigger=CompareValueTrigger(
                    "validation/main/acc",
                    lambda best_value, current_value: best_value >
                    current_value,
                ),
            )
            trainer.extend(
                adadelta_eps_decay(args.eps_decay),
                trigger=CompareValueTrigger(
                    "validation/main/acc",
                    lambda best_value, current_value: best_value >
                    current_value,
                ),
            )
        elif args.criterion == "loss":
            trainer.extend(
                restore_snapshot(model,
                                 args.outdir + "/model.loss.best",
                                 load_fn=torch_load),
                trigger=CompareValueTrigger(
                    "validation/main/loss",
                    lambda best_value, current_value: best_value <
                    current_value,
                ),
            )
            trainer.extend(
                adadelta_eps_decay(args.eps_decay),
                trigger=CompareValueTrigger(
                    "validation/main/loss",
                    lambda best_value, current_value: best_value <
                    current_value,
                ),
            )
    elif args.opt == "adam":
        if args.criterion == "acc":
            trainer.extend(
                restore_snapshot(model,
                                 args.outdir + "/model.acc.best",
                                 load_fn=torch_load),
                trigger=CompareValueTrigger(
                    "validation/main/acc",
                    lambda best_value, current_value: best_value >
                    current_value,
                ),
            )
            trainer.extend(
                adam_lr_decay(args.lr_decay),
                trigger=CompareValueTrigger(
                    "validation/main/acc",
                    lambda best_value, current_value: best_value >
                    current_value,
                ),
            )
        elif args.criterion == "loss":
            trainer.extend(
                restore_snapshot(model,
                                 args.outdir + "/model.loss.best",
                                 load_fn=torch_load),
                trigger=CompareValueTrigger(
                    "validation/main/loss",
                    lambda best_value, current_value: best_value <
                    current_value,
                ),
            )
            trainer.extend(
                adam_lr_decay(args.lr_decay),
                trigger=CompareValueTrigger(
                    "validation/main/loss",
                    lambda best_value, current_value: best_value <
                    current_value,
                ),
            )

    # Write a log of evaluation statistics for each epoch
    trainer.extend(
        extensions.LogReport(trigger=(args.report_interval_iters,
                                      "iteration")))
    report_keys = [
        "epoch",
        "iteration",
        "main/loss",
        "validation/main/loss",
        "main/acc",
        "validation/main/acc",
        "main/ppl",
        "validation/main/ppl",
        "elapsed_time",
    ]
    if args.opt == "adadelta":
        trainer.extend(
            extensions.observe_value(
                "eps",
                lambda trainer: trainer.updater.get_optimizer("main").
                param_groups[0]["eps"],
            ),
            trigger=(args.report_interval_iters, "iteration"),
        )
        report_keys.append("eps")
    elif args.opt in ["adam", "noam"]:
        trainer.extend(
            extensions.observe_value(
                "lr",
                lambda trainer: trainer.updater.get_optimizer("main").
                param_groups[0]["lr"],
            ),
            trigger=(args.report_interval_iters, "iteration"),
        )
        report_keys.append("lr")
    if args.report_bleu:
        report_keys.append("main/bleu")
        report_keys.append("validation/main/bleu")
    trainer.extend(
        extensions.PrintReport(report_keys),
        trigger=(args.report_interval_iters, "iteration"),
    )

    trainer.extend(
        extensions.ProgressBar(update_interval=args.report_interval_iters))
    set_early_stop(trainer, args)

    if args.tensorboard_dir is not None and args.tensorboard_dir != "":
        trainer.extend(
            TensorboardLogger(SummaryWriter(args.tensorboard_dir),
                              att_reporter),
            trigger=(args.report_interval_iters, "iteration"),
        )
    # Run the training
    trainer.run()
    check_early_stop(trainer, args.epochs)
Esempio n. 29
0
def main():
    parser = argparse.ArgumentParser(description='Chanier example: MNIST')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=32,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=40,
                        help='Number of sweeps over the datasdt to train')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=1000,
                        help='Number of units')
    args = parser.parse_args()
    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    dataset_gomi = []
    preprocessing(dataset_gomi, "sample_images_can1/pic", 10, 1)
    preprocessing(dataset_gomi, "sample_images_can2/pic", 10, 1)
    preprocessing(dataset_gomi, "sample_images_can3/pic", 20, 1)
    preprocessing(dataset_gomi, "sample_images_can4/pic", 10, 1)
    preprocessing(dataset_gomi, "sample_images_can5/pic", 10, 1)
    preprocessing(dataset_gomi, "sample_images_bin1/pic", 10, 2)
    preprocessing(dataset_gomi, "sample_images_bin2/pic", 10, 2)
    preprocessing(dataset_gomi, "sample_images_bin3/pic", 20, 2)
    preprocessing(dataset_gomi, "sample_images_pet1/pic", 10, 3)
    preprocessing(dataset_gomi, "sample_images_pet2/pic", 10, 3)
    preprocessing(dataset_gomi, "sample_images_pet3/pic", 10, 3)
    preprocessing(dataset_gomi, "sample_images_pet4/pic", 10, 3)

    train, test = split_dataset_random(dataset_gomi, 120, seed=0)

    model = L.Classifier(MLP(), lossfun=F.softmax_cross_entropy)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).user()
        model.to_gpu()
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    #trainer.extend(extensions.PlotReport(['main/loss','validation/main/accuracy'],'epoch',file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'],
                              'epoch',
                              file_name='accuracy.png'))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))
    trainer.extend(extensions.ProgressBar())
    trainer.extend(extensions.LogReport())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
    model.to_cpu()

    modelname = args.out + "/MLP.model"
    print('same the trained model: {}'.format(modelname))
    chainer.serializers.save_npz("model.npz", model)
Esempio n. 30
0
def main():
    # command line argument parsing
    parser = argparse.ArgumentParser(description='Multi-Perceptron classifier/regressor')
    parser.add_argument('dataset', help='Path to data file')
    parser.add_argument('--activation', '-a', choices=activ.keys(), default='sigmoid',
                        help='Activation function')
    parser.add_argument('--batchsize', '-b', type=int, default=50,
                        help='Number of samples in each mini-batch')
    parser.add_argument('--dropout_ratio', '-dr', type=float, default=0,
                        help='dropout ratio')
    parser.add_argument('--epoch', '-e', type=int, default=100,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--snapshot', '-s', type=int, default=-1,
                        help='snapshot interval')
    parser.add_argument('--label_index', '-l', type=int, default=5,
                        help='Column number of the target variable (5=Melting)')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--outdir', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--out_ch', '-oc', type=int, default=1,
                        help='num of output channels. set to 1 for regression')
    parser.add_argument('--optimizer', '-op', default='AdaDelta',
                        help='optimizer {MomentumSGD,AdaDelta,AdaGrad,Adam}')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--skip_columns', '-sc', type=int, default=29,
                        help='num of columns which are not used as explanatory variables')
    parser.add_argument('--layers', '-nl', type=int, default=3,
                        help='Number of layers')
    parser.add_argument('--unit', '-nu', type=int, default=100,
                        help='Number of units in the hidden layers')
    parser.add_argument('--test_every', '-t', type=int, default=5,
                        help='use one in every ? entries in the dataset for validation')
    parser.add_argument('--predict', action='store_true')
    parser.add_argument('--weight_decay', '-w', type=float, default=0,
                        help='weight decay for regularization')
    args = parser.parse_args()
    args.regress = (args.out_ch == 1)

    # select numpy or cupy
    xp = chainer.cuda.cupy if args.gpu >= 0 else np
    label_type = np.int32 if not args.regress else np.float32

    # read csv file
    dat = pd.read_csv(args.dataset, header=0)

    ##
    print('Target: {}, GPU: {} Minibatch-size: {} # epoch: {}'.format(dat.keys()[args.label_index],args.gpu,args.batchsize,args.epoch))

#    csvdata = np.loadtxt(args.dataset, delimiter=",", skiprows=args.skip_rows)
    ind = np.ones(dat.shape[1], dtype=bool)  # indices for unused columns
    dat = dat.dropna(axis='columns')
    x = dat.iloc[:,args.skip_columns:].values
    args.in_ch = x.shape[1]
    t = (dat.iloc[:,args.label_index].values)[:,np.newaxis]
    print('target column:', args.label_index)
#    print('excluded columns: {}'.format(np.where(ind==False)[0].tolist()))
    print("data shape: ",x.shape, t.shape)
    x = np.array(x, dtype=np.float32)
    if args.regress:
        t = np.array(t, dtype=label_type)
    else:
        t = np.array(np.ndarray.flatten(t), dtype=label_type)

    # standardize
    t_mean = np.mean(t)
    t_std = np.std(t)
    x_mean = np.mean(x)
    x_std = np.std(x)
    x = (x-x_mean)/x_std
    t = (t-t_mean)/t_std

    # Set up a neural network to train
    model = MLP(args,std=t_std)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimiser
    if args.optimizer == 'MomentumSGD':
        optimizer = chainer.optimizers.MomentumSGD(lr=0.003, momentum=0.9)
    elif args.optimizer == 'AdaDelta':
        optimizer = chainer.optimizers.AdaDelta(rho=0.95, eps=1e-06)
    elif args.optimizer == 'AdaGrad':
        optimizer = chainer.optimizers.AdaGrad(lr=0.001, eps=1e-08)
    elif args.optimizer == 'Adam':
        optimizer = chainer.optimizers.Adam(alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-08)
    else:
        print("Wrong optimiser")
        exit(-1)
    optimizer.setup(model)
    if args.weight_decay>0:
        optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))
    print('layers: {}, units: {}, optimiser: {}, Weight decay: {}, dropout ratio: {}'.format(args.layers,args.unit,args.optimizer,args.weight_decay,args.dropout_ratio))


## train-validation data
# random spliting
    #train, test = datasets.split_dataset_random(datasets.TupleDataset(x, t), int(0.8*t.size))
# splitting by modulus of index
    train_idx = [i for i in range(t.size) if (i+1) % args.test_every != 0]
    var_idx = [i for i in range(t.size) if (i+1) % args.test_every == 0]
    n = len(train_idx)
    train_idx.extend(var_idx)
    train, test = datasets.split_dataset(datasets.TupleDataset(x, t), n, train_idx)

# dataset iterator
    train_iter = iterators.SerialIterator(train, args.batchsize, shuffle=True)
    test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir)

    frequency = args.epoch if args.snapshot == -1 else max(1, args.snapshot)
    log_interval = 1, 'epoch'
    val_interval = frequency/10, 'epoch'

    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu),trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch', file_name='accuracy.png'))

    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss', 'main/MAE', 'validation/main/MAE',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']), trigger=log_interval)

    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    if not args.predict:
        trainer.run()
    else:
        test = datasets.TupleDataset(x, t)

    ## prediction
    print("predicting: {} entries...".format(len(test)))
    test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False)
    converter=concat_examples
    idx=0
    with open(os.path.join(args.outdir,'result.txt'),'w') as output:
        for batch in test_iter:
            x, t = converter(batch, device=args.gpu)
            with chainer.using_config('train', False):
                with chainer.function.no_backprop_mode():
                    if args.regress:
                        y = model(x).data
                        if args.gpu>-1:
                            y = xp.asnumpy(y)
                            t = xp.asnumpy(t)
                        y = y * t_std + t_mean
                        t = t * t_std + t_mean
                    else:
                        y = F.softmax(model(x)).data
                        if args.gpu>-1:
                            y = xp.asnumpy(y)
                            t = xp.asnumpy(t)
            for i in range(y.shape[0]):
                output.write(str(dat.iloc[var_idx[i],0]))
                if(len(t.shape)>1):
                    for j in range(t.shape[1]):
                        output.write(",{}".format(t[i,j]))
                        output.write(",{}".format(y[i,j]))
                else:
                    output.write(",{0:1.5f},{0:1.5f}".format(t[i],y[i]))
#                    output.write(",{0:1.5f}".format(np.argmax(y[i,:])))
#                    for yy in y[i]:
#                        output.write(",{0:1.5f}".format(yy))
                output.write("\n")
                idx += 1