Beispiel #1
0
    def setup_optimizers(self):
        params = self.params

        self.causal_conv_optimizers = []
        for layer in self.causal_conv_layers:
            opt = optimizers.NesterovAG(lr=params.learning_rate,
                                        momentum=params.gradient_momentum)
            opt.setup(layer)
            opt.add_hook(optimizer.WeightDecay(params.weight_decay))
            opt.add_hook(GradientClipping(params.gradient_clipping))
            self.causal_conv_optimizers.append(opt)

        self.residual_conv_optimizers = []
        for layer in self.residual_conv_layers:
            opt = optimizers.NesterovAG(lr=params.learning_rate,
                                        momentum=params.gradient_momentum)
            opt.setup(layer)
            opt.add_hook(optimizer.WeightDecay(params.weight_decay))
            opt.add_hook(GradientClipping(params.gradient_clipping))
            self.residual_conv_optimizers.append(opt)

        self.softmax_conv_optimizers = []
        for layer in self.softmax_conv_layers:
            opt = optimizers.NesterovAG(lr=params.learning_rate,
                                        momentum=params.gradient_momentum)
            opt.setup(layer)
            opt.add_hook(optimizer.WeightDecay(params.weight_decay))
            opt.add_hook(GradientClipping(params.gradient_clipping))
            self.softmax_conv_optimizers.append(opt)
Beispiel #2
0
def optimizer(opt_str):
    """
    入力文字列からオプティマイザを推定する
    """

    if (opt_str.lower() == 'adam'):
        opt = O.Adam(amsgrad=True)
    elif (opt_str.lower() == 'ada_d'):
        opt = O.AdaDelta()
    elif (opt_str.lower() == 'ada_g'):
        opt = O.AdaGrad()
    elif (opt_str.lower() == 'm_sgd'):
        opt = O.MomentumSGD()
    elif (opt_str.lower() == 'n_ag'):
        opt = O.NesterovAG()
    elif (opt_str.lower() == 'rmsp'):
        opt = O.RMSprop()
    elif (opt_str.lower() == 'rmsp_g'):
        opt = O.RMSpropGraves()
    elif (opt_str.lower() == 'sgd'):
        opt = O.SGD()
    elif (opt_str.lower() == 'smorms'):
        opt = O.SMORMS3()
    else:
        opt = O.Adam(amsgrad=True)
        logger.warning('{}->{}'.format(opt_str, opt.__doc__.split('.')[0]))

    logger.debug('Optimizer: {}'.format(opt.__doc__.split('.')[0]))
    return opt
Beispiel #3
0
def optimizer(opt_str):
    """
    入力文字列からオプティマイザを推定する
    """

    if(opt_str.lower() == 'adam'):
        opt = O.Adam(amsgrad=True)
    elif(opt_str.lower() == 'ada_d'):
        opt = O.AdaDelta()
    elif(opt_str.lower() == 'ada_g'):
        opt = O.AdaGrad()
    elif(opt_str.lower() == 'm_sgd'):
        opt = O.MomentumSGD()
    elif(opt_str.lower() == 'n_ag'):
        opt = O.NesterovAG()
    elif(opt_str.lower() == 'rmsp'):
        opt = O.RMSprop()
    elif(opt_str.lower() == 'rmsp_g'):
        opt = O.RMSpropGraves()
    elif(opt_str.lower() == 'sgd'):
        opt = O.SGD()
    elif(opt_str.lower() == 'smorms'):
        opt = O.SMORMS3()
    else:
        opt = O.Adam(amsgrad=True)
        print('\n[Warning] {0}\n\t{1}->{2}\n'.format(
            fileFuncLine(), opt_str, opt.__doc__.split('.')[0])
        )

    print('Optimizer:', opt.__doc__.split('.')[0])
    return opt
Beispiel #4
0
def which_is_best_optimizer(k=10, model=CNN()):
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.Adam(),
                      tag='Adam')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.SGD(),
                      tag='SGD')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.RMSpropGraves(),
                      tag='RMSpropGraves')
    #    k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.RMSprop(), tag='RMSprop')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.AdaDelta(),
                      tag='AdaDelta')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.AdaGrad(),
                      tag='AdaGrad')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.MomentumSGD(),
                      tag='MomentumSGD')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.NesterovAG(),
                      tag='NesterovAG')
Beispiel #5
0
def get_opt(args):
    if args.opt_model == "SGD":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        return optimizers.SGD(lr=alpha0)
    if args.opt_model == "AdaGrad":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        return optimizers.AdaGrad(lr=alpha0)
    if args.opt_model == "AdaDelta":
        alpha0 = 0.95 if args.alpha0 == 0 else args.alpha0
        alpha1 = 1e-06 if args.alpha1 == 0 else args.alpha1
        return optimizers.AdaDelta(rho=alpha0, eps=alpha1)
    if args.opt_model == "Momentum":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        return optimizers.MomentumSGD(lr=alpha0, momentum=alpha1)
    if args.opt_model == "NAG":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        return optimizers.NesterovAG(lr=alpha0, momentum=alpha1)
    if args.opt_model == "RMS":
        return optimizers.RMSpropGraves()
    if args.opt_model == "SM":
        return optimizers.SMORMS3()
    if args.opt_model == "Adam":  # default case
        alpha0 = 0.001 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        alpha2 = 0.999 if args.alpha2 == 0 else args.alpha2
        alpha3 = 1e-08 if args.alpha3 == 0 else args.alpha3
        return optimizers.Adam(alpha=alpha0,
                               beta1=alpha1,
                               beta2=alpha2,
                               eps=alpha3)
    print('no such optimization method', args.opt_model)
    sys.exit(1)
Beispiel #6
0
def get_optimizer(name, lr, momentum):
    if name == "sgd":
        return optimizers.SGD(lr=lr)
    if name == "msgd":
        return optimizers.MomentumSGD(lr=lr, momentum=momentum)
    if name == "nesterov":
        return optimizers.NesterovAG(lr=lr, momentum=momentum)
    if name == "adam":
        return optimizers.Adam(alpha=lr, beta1=momentum)
    raise NotImplementedError()
Beispiel #7
0
def main():
    opt = opts.parse()
    model = net.ConvNet(opt.n_classes, opt.BC, opt.nobias, opt.dropout_ratio)
    if opt.gpu > -1:
        chainer.cuda.get_device_from_id(opt.gpu).use()
        model.to_gpu()
    optimizer = optimizers.NesterovAG(lr=opt.LR, momentum=opt.momentum)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(opt.weight_decay))
    train_iter, val_iter = dataset.setup(opt)
    updater = training.StandardUpdater(train_iter, optimizer, device=opt.gpu)
    # Trainer
    trainer = training.Trainer(updater, (opt.n_epochs, 'epoch'), opt.save)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, opt.LR),
                   trigger=ManualScheduleTrigger(opt.schedule, 'epoch'))
    trainer.extend(extensions.Evaluator(val_iter, model,
                                        device=opt.gpu), trigger=(1, 'epoch'))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(filename='min_loss'), trigger=MinValueTrigger(
        key='validation/main/loss', trigger=(5, 'epoch')))
    trainer.extend(extensions.snapshot(filename='max_accuracy'), trigger=MaxValueTrigger(
        key='validation/main/accuracy', trigger=(5, 'epoch')))
    trainer.extend(extensions.snapshot_object(model, 'min_loss_model'),
                   trigger=MinValueTrigger(key='validation/main/loss', trigger=(5, 'epoch')))
    trainer.extend(extensions.snapshot_object(model, 'max_accuracy_model'),
                   trigger=MaxValueTrigger(key='validation/main/accuracy', trigger=(5, 'epoch')))
    trainer.extend(extensions.observe_lr())
    trainer.extend(extensions.LogReport())
    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(
            ['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png'))
        trainer.extend(extensions.PlotReport(
            ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png'))
        trainer.extend(extensions.PlotReport(
            ['lr'], 'epoch', file_name='learning_rate.png'))
    trainer.extend(extensions.PrintReport(['elapsed_time', 'epoch', 'iteration', 'lr',
                                           'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy']))
    trainer.extend(extensions.ProgressBar(update_interval=25))
    if opt.resume and os.path.exists(opt.resume):
        chainer.serializers.load_npz(opt.resume, trainer)
    # Run the training
    try:
        trainer.run()
    except Exception as e:
        import shutil
        import traceback
        print('\nerror message')
        print(traceback.format_exc())
        shutil.rmtree(opt.save)
Beispiel #8
0
 def get_optimizer(self, name, lr, momentum=0.9):
     if name.lower() == "adam":
         return optimizers.Adam(alpha=lr, beta1=momentum)
     if name.lower() == "smorms3":
         return optimizers.SMORMS3(lr=lr)
     if name.lower() == "adagrad":
         return optimizers.AdaGrad(lr=lr)
     if name.lower() == "adadelta":
         return optimizers.AdaDelta(rho=momentum)
     if name.lower() == "nesterov" or name.lower() == "nesterovag":
         return optimizers.NesterovAG(lr=lr, momentum=momentum)
     if name.lower() == "rmsprop":
         return optimizers.RMSprop(lr=lr, alpha=momentum)
     if name.lower() == "momentumsgd":
         return optimizers.MomentumSGD(lr=lr, mommentum=mommentum)
     if name.lower() == "sgd":
         return optimizers.SGD(lr=lr)
Beispiel #9
0
def select_optimizer(opt_name, learning_rate):
    if opt_name == "Adam":
        return optimizers.Adam(alpha=learning_rate)
    elif opt_name == "SGD":
        return optimizers.SGD(lr=learning_rate)
    elif opt_name == "RMSpropGraves":
        return optimizers.RMSpropGraves(lr=learning_rate)
    elif opt_name == "RMSprop":
        return optimizers.RMSprop(lr=learning_rate)
    elif opt_name == "AdaDelta":
        return optimizers.AdaDelta()
    elif opt_name == "AdaGrad":
        return optimizers.AdaGrad(lr=learning_rate)
    elif opt_name == "MomentumSGD":
        return optimizers.MomentumSGD(lr=learning_rate)
    elif opt_name == "NesterovAG":
        return optimizers.NesterovAG(lr=learning_rate)
    else:
        print('please select correct optimizer')
        exit()
    def __init__(self):
        self.__start_time = time.time()
        self.__always_attn = get_arg('always_attn')
        self.__fold = 10
        self.__batch_size = 32
        self.__n_epoch = 10
        self.__label_num = 2
        self.__mem_units = get_arg('mem_units')
        self.__data_mode = get_arg('data')
        self.__is_toy = get_arg('toy')
        self.__dict = get_arg('pol_dict')
        self.__logdir = get_arg('logdir')
        self.__is_regression = get_arg('regression')
        self.__attention = get_arg('attention')
        self.__lr = get_arg('lr')
        self.__l2 = get_arg('l2')
        self.__clip_grad = get_arg('clip_grad')
        self.__composition = get_arg('composition')
        self.__n_units = 200
        self.__render_graph = get_arg('not_render')
        self.__dropout = get_arg('dropout')
        if self.__data_mode == 'ntcire' or self.__data_mode.startswith('sst'):
            self.__n_units = 300
        self.__not_embed = get_arg('not_embed')
        self.__attention_target = get_arg('attention_target')
        self.__forget_bias = get_arg('forget_bias')
        self.__only_attn = get_arg('only_attn')

        # optimizer
        self.__opt_name = get_arg('optimizer')
        if self.__opt_name == 'SGD':
            self.__opt = lambda: optimizers.SGD(lr=self.__lr)
        elif self.__opt_name == 'AdaDelta':
            self.__opt = lambda: optimizers.AdaDelta()
        elif self.__opt_name == 'Adam':
            self.__opt = lambda: optimizers.Adam(alpha=self.__lr)
        elif self.__opt_name == 'NesterovAg':
            self.__opt = lambda: optimizers.NesterovAG(lr=self.__lr)
        elif self.__opt_name == 'AdaGrad':
            self.__opt = lambda: optimizers.AdaGrad(lr=self.__lr)

        # data
        data_dir = utils.get_data_path()
        mecab_embedf = data_dir + '/vector/word2vec/wikiDump_mecab_size200_cbow.w2vModel'
        kytea_embedf = data_dir + '/vector/word2vec/wikiDump_kytea_size200_skipgram.w2vModel'
        en_embedf = data_dir + '/vector/glove/glove.840B.300d.txt'
        if self.__data_mode == 'ntcirj_con':
            data = data_dir + '/ntcirj/ckylark/data.pkl.bz2'
            self.__embedf = kytea_embedf
        if self.__data_mode == 'ntcirj_dep':
            data = data_dir + '/ntcirj/cabocha/data.pkl.bz2'
            self.__embedf = mecab_embedf
        elif self.__data_mode == 'ntcire':
            self.__embedf = en_embedf
            data = data_dir + '/ntcire/ckylark/data.pkl.bz2'
        elif self.__data_mode == 'tsukuba':
            self.__embedf = kytea_embedf
            data = data_dir + '/tsukuba/ckylark/data.pkl.bz2'
        elif self.__data_mode == 'sst_all':
            self.__embedf = en_embedf
            self.__label_num = 5
            data = data_dir + '/sst_all/data.pkl.bz2'
        elif self.__data_mode == 'sst_cut':
            self.__embedf = en_embedf
            self.__label_num = 5
            data = data_dir + '/sst_cut/data.pkl.bz2'
        data = utils.read_pkl_bz2(data)
        if self.__is_toy:
            data = data['toy']
            self.__n_epoch = 3
        if self.__dict == 'pn':
            data = data['poldict']
        elif self.__dict == 'pnn':
            data = data['poldict_neutral']
        self.__data = data
        self.mk_logfiles()
        self.print_params()
Beispiel #11
0
def do_train(config_training):

    src_indexer, tgt_indexer = load_voc_and_update_training_config(config_training)

    save_prefix = config_training.training_management.save_prefix

    output_files_dict = {}
    output_files_dict["train_config"] = save_prefix + ".train.config"
    output_files_dict["model_ckpt"] = save_prefix + ".model." + "ckpt" + ".npz"
    output_files_dict["model_final"] = save_prefix + \
        ".model." + "final" + ".npz"
    output_files_dict["model_best"] = save_prefix + ".model." + "best" + ".npz"
    output_files_dict["model_best_loss"] = save_prefix + ".model." + "best_loss" + ".npz"

#     output_files_dict["model_ckpt_config"] = save_prefix + ".model." + "ckpt" + ".config"
#     output_files_dict["model_final_config"] = save_prefix + ".model." + "final" + ".config"
#     output_files_dict["model_best_config"] = save_prefix + ".model." + "best" + ".config"
#     output_files_dict["model_best_loss_config"] = save_prefix + ".model." + "best_loss" + ".config"

    output_files_dict["test_translation_output"] = save_prefix + ".test.out"
    output_files_dict["test_src_output"] = save_prefix + ".test.src.out"
    output_files_dict["dev_translation_output"] = save_prefix + ".dev.out"
    output_files_dict["dev_src_output"] = save_prefix + ".dev.src.out"
    output_files_dict["valid_translation_output"] = save_prefix + ".valid.out"
    output_files_dict["valid_src_output"] = save_prefix + ".valid.src.out"
    output_files_dict["sqlite_db"] = save_prefix + ".result.sqlite"
    output_files_dict["optimizer_ckpt"] = save_prefix + ".optimizer." + "ckpt" + ".npz"
    output_files_dict["optimizer_final"] = save_prefix + ".optimizer." + "final" + ".npz"

    save_prefix_dir, save_prefix_fn = os.path.split(save_prefix)
    ensure_path(save_prefix_dir)

    already_existing_files = []
    for key_info, filename in output_files_dict.iteritems():  # , valid_data_fn]:
        if os.path.exists(filename):
            already_existing_files.append(filename)
    if len(already_existing_files) > 0:
        print "Warning: existing files are going to be replaced / updated: ", already_existing_files
        if not config_training.training_management.force_overwrite:
            raw_input("Press Enter to Continue")

    save_train_config_fn = output_files_dict["train_config"]
    log.info("Saving training config to %s" % save_train_config_fn)
    config_training.save_to(save_train_config_fn)
#     json.dump(config_training, open(save_train_config_fn, "w"), indent=2, separators=(',', ': '))

    Vi = len(src_indexer)  # + UNK
    Vo = len(tgt_indexer)  # + UNK

    eos_idx = Vo

    data_fn = config_training.data.data_fn

    log.info("loading training data from %s" % data_fn)
    training_data_all = json.load(gzip.open(data_fn, "rb"))

    training_data = training_data_all["train"]

    log.info("loaded %i sentences as training data" % len(training_data))

    if "test" in training_data_all:
        test_data = training_data_all["test"]
        log.info("Found test data: %i sentences" % len(test_data))
    else:
        test_data = None
        log.info("No test data found")

    if "dev" in training_data_all:
        dev_data = training_data_all["dev"]
        log.info("Found dev data: %i sentences" % len(dev_data))
    else:
        dev_data = None
        log.info("No dev data found")

    if "valid" in training_data_all:
        valid_data = training_data_all["valid"]
        log.info("Found valid data: %i sentences" % len(valid_data))
    else:
        valid_data = None
        log.info("No valid data found")

    max_src_tgt_length = config_training.training_management.max_src_tgt_length
    if max_src_tgt_length is not None:
        log.info("filtering sentences of length larger than %i" % (max_src_tgt_length))
        filtered_training_data = []
        nb_filtered = 0
        for src, tgt in training_data:
            if len(src) <= max_src_tgt_length and len(
                    tgt) <= max_src_tgt_length:
                filtered_training_data.append((src, tgt))
            else:
                nb_filtered += 1
        log.info("filtered %i sentences of length larger than %i" % (nb_filtered, max_src_tgt_length))
        training_data = filtered_training_data

    if not config_training.training.no_shuffle_of_training_data:
        log.info("shuffling")
        import random
        random.shuffle(training_data)
        log.info("done")

    encdec, _, _, _ = create_encdec_and_indexers_from_config_dict(config_training,
                                                                  src_indexer=src_indexer, tgt_indexer=tgt_indexer,
                                                                  load_config_model="if_exists" if config_training.training_management.resume else "no")
#     create_encdec_from_config_dict(config_training.model, src_indexer, tgt_indexer,
#                             load_config_model = "if_exists" if config_training.training_management.resume else "no")

#     if config_training.training_management.resume:
#         if "model_parameters" not in config_training:
#             log.error("cannot find model parameters in config file")
#         if config_training.model_parameters.type == "model":
#             model_filename = config_training.model_parameters.filename
#             log.info("resuming from model parameters %s" % model_filename)
#             serializers.load_npz(model_filename, encdec)

    if config_training.training_management.load_model is not None:
        log.info("loading model parameters from %s", config_training.training_management.load_model)
        serializers.load_npz(config_training.training_management.load_model, encdec)

    gpu = config_training.training_management.gpu
    if gpu is not None:
        encdec = encdec.to_gpu(gpu)

    if config_training.training.optimizer == "adadelta":
        optimizer = optimizers.AdaDelta()
    elif config_training.training.optimizer == "adam":
        optimizer = optimizers.Adam()
    elif config_training.training.optimizer == "adagrad":
        optimizer = optimizers.AdaGrad(lr=config_training.training.learning_rate)
    elif config_training.training.optimizer == "sgd":
        optimizer = optimizers.SGD(lr=config_training.training.learning_rate)
    elif config_training.training.optimizer == "momentum":
        optimizer = optimizers.MomentumSGD(lr=config_training.training.learning_rate,
                                           momentum=config_training.training.momentum)
    elif config_training.training.optimizer == "nesterov":
        optimizer = optimizers.NesterovAG(lr=config_training.training.learning_rate,
                                          momentum=config_training.training.momentum)
    elif config_training.training.optimizer == "rmsprop":
        optimizer = optimizers.RMSprop(lr=config_training.training.learning_rate)
    elif config_training.training.optimizer == "rmspropgraves":
        optimizer = optimizers.RMSpropGraves(lr=config_training.training.learning_rate,
                                             momentum=config_training.training.momentum)
    else:
        raise NotImplemented

    with cuda.get_device(gpu):
        optimizer.setup(encdec)

    if config_training.training.l2_gradient_clipping is not None and config_training.training.l2_gradient_clipping > 0:
        optimizer.add_hook(chainer.optimizer.GradientClipping(
            config_training.training.l2_gradient_clipping))

    if config_training.training.hard_gradient_clipping is not None and config_training.training.hard_gradient_clipping > 0:
        optimizer.add_hook(chainer.optimizer.GradientHardClipping(
            *config_training.training.hard_gradient_clipping))

    if config_training.training.weight_decay is not None:
        optimizer.add_hook(
            chainer.optimizer.WeightDecay(
                config_training.training.weight_decay))

    if config_training.training_management.load_optimizer_state is not None:
        with cuda.get_device(gpu):
            log.info("loading optimizer parameters from %s", config_training.training_management.load_optimizer_state)
            serializers.load_npz(config_training.training_management.load_optimizer_state, optimizer)

    if config_training.training_management.timer_hook:
        timer_hook = profiling_tools.MyTimerHook
    else:
        import contextlib

        @contextlib.contextmanager
        def timer_hook():
            yield

    import training_chainer
    with cuda.get_device(gpu):
        with timer_hook() as timer_infos:

            if config_training.training_management.max_nb_iters is not None:
                stop_trigger = (
                    config_training.training_management.max_nb_iters,
                    "iteration")
                if config_training.training_management.max_nb_epochs is not None:
                    log.warn(
                        "max_nb_iters and max_nb_epochs both specified. Only max_nb_iters will be considered.")
            elif config_training.training_management.max_nb_epochs is not None:
                stop_trigger = (
                    config_training.training_management.max_nb_epochs, "epoch")
            else:
                stop_trigger = None
            training_chainer.train_on_data_chainer(encdec, optimizer, training_data, output_files_dict,
                                                   src_indexer, tgt_indexer, eos_idx=eos_idx,
                                                   config_training=config_training,
                                                   stop_trigger=stop_trigger,
                                                   test_data=test_data, dev_data=dev_data, valid_data=valid_data
                                                   )
    def set_optimizer(self,
                      optimizer,
                      learning_rate_init,
                      weight_decay=0,
                      clip_grad_norm=5,
                      lr_schedule=None,
                      factor=None,
                      patience_epoch=None):
        """Set the optimizer and add hooks
        Args:
            optimizer (string): sgd or adam or adadelta or adagrad or rmsprop
            learning_rate_init (float): An initial learning rate
            weight_decay (float, optional): L2 penalty
            clip_grad_norm (float):
            lr_schedule: not used here
            factor: not used here
            patience_epoch: not used here
        Returns:
            scheduler ():
        """
        optimizer = optimizer.lower()
        if optimizer not in OPTIMIZER_CLS_NAMES:
            raise ValueError(
                "Optimizer name should be one of [%s], you provided %s." %
                (", ".join(OPTIMIZER_CLS_NAMES), optimizer))

        if optimizer == 'adadelta':
            self.optimizer = optimizers.AdaDelta(rho=0.95, eps=1e-6)
            # TODO: check learning rate
        elif optimizer == 'adagrad':
            self.optimizer = optimizers.AdaGrad(lr=learning_rate_init,
                                                eps=1e-8)
        elif optimizer == 'adam':
            self.optimizer = optimizers.Adam(alpha=0.001,
                                             beta1=0.9,
                                             beta2=0.999,
                                             eps=1e-8)
            # TODO: check learning rate
        elif optimizer == 'sgd':
            self.optimizer = optimizers.MomentumSGD(lr=learning_rate_init,
                                                    momentum=0.9)
        elif optimizer == 'nesterov':
            self.optimizer = optimizers.NesterovAG(lr=learning_rate_init,
                                                   momentum=0.9)
        elif optimizer == 'rmsprop':
            self.optimizer = optimizers.RMSprop(lr=learning_rate_init,
                                                alpha=0.99,
                                                eps=1e-8)
        elif optimizer == 'rmspropgraves':
            self.optimizer = optimizers.RMSpropGraves(lr=learning_rate_init,
                                                      alpha=0.95,
                                                      momentum=0.9,
                                                      eps=0.0001)
        else:
            raise NotImplementedError

        self.optimizer.setup(self)

        # Add hook
        self.optimizer.add_hook(
            chainer.optimizer.GradientClipping(clip_grad_norm))
        self.optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay))
        # self.optimizer.add_hook(chainer.optimizer.GradientNoise(eta=0.01))

        return None
 def create(self):
     return optimizers.NesterovAG(0.1)
Beispiel #14
0
# init RAM model and set optimizer
from ram import RAM

model = RAM(g_size=g_size,
            n_steps=n_steps,
            n_scales=n_scales,
            var=variance,
            use_lstm=args.lstm)

if not args.lstm:
    data = model.core_hh.W.data
    data[:] = np.identity(data.shape[0], dtype=np.float32)

lr_base = 1e-2
optimizer = optimizers.NesterovAG(lr=lr_base)
optimizer.use_cleargrads()
optimizer.setup(model)

if args.model is not None:
    print('load model from {}'.format(args.model))
    serializers.load_hdf5(args.model, model)

if args.resume is not None:
    print('load optimizer state from {}'.format(args.resume))
    serializers.load_hdf5(args.resume, optimizer)

# GPU/CPU
gpuid = args.gpuid
if gpuid >= 0:
    cuda.get_device(gpuid).use()
Beispiel #15
0
def run_nn_vae(q,
               optimizer_nm,
               train_x,
               train_real,
               train_y,
               test_x,
               test_y,
               cross_val,
               nn_n_hidden,
               vae_n_hidden,
               n_z,
               n_batch,
               nn_n_epochs,
               vae_n_epochs,
               n_epochs_tuning,
               activation,
               grad_clip,
               noise_nm,
               gpu=-1):

    # np.random.seed(123) # random値を固定

    n_x = train_x.shape[1]
    n_real = train_real.shape[1]
    n_y = train_y.shape[1]

    nn_n_layers = len(nn_n_hidden)

    vae_n_hidden_recog = vae_n_hidden
    vae_n_hidden_gen = vae_n_hidden[::-1]
    vae_n_layers_recog = len(vae_n_hidden_recog)
    vae_n_layers_gen = len(vae_n_hidden_gen)
    """NN pre_train"""

    layers = {}

    # Recognition model.
    nn_layer_sizes = [(n_x, nn_n_hidden[0])]
    if nn_n_layers > 1:
        nn_layer_sizes += zip(nn_n_hidden[:-1], nn_n_hidden[1:])
    nn_layer_sizes += [(nn_n_hidden[-1], n_real)]

    for i, (n_incoming, n_outgoing) in enumerate(nn_layer_sizes):
        layers['nn_layer_%i' % i] = F.Linear(n_incoming, n_outgoing)
    """VAE pre_train"""

    # Recognition model.
    vae_rec_layer_sizes = [(n_real, vae_n_hidden_recog[0])]
    if vae_n_layers_recog > 1:
        vae_rec_layer_sizes += zip(vae_n_hidden_recog[:-1],
                                   vae_n_hidden_recog[1:])
    vae_rec_layer_sizes += [(vae_n_hidden_recog[-1], n_z)]

    for i, (n_incoming, n_outgoing) in enumerate(vae_rec_layer_sizes):
        layers['vae_recog_%i' % i] = F.Linear(n_incoming, n_outgoing)

    layers['log_sigma'] = F.Linear(vae_n_hidden_recog[-1], n_z)

    # Generating model.
    vae_gen_layer_sizes = [(n_z, vae_n_hidden_gen[0])]
    if vae_n_layers_recog > 1:
        vae_gen_layer_sizes += zip(vae_n_hidden_gen[:-1], vae_n_hidden_gen[1:])
    vae_gen_layer_sizes += [(vae_n_hidden_gen[-1], n_real)]

    for i, (n_incoming, n_outgoing) in enumerate(vae_gen_layer_sizes):
        layers['vae_gen_%i' % i] = F.Linear(n_incoming, n_outgoing)

    layers['output'] = F.Linear(n_z, n_y)

    model = NN_VAE(**layers)

    if gpu >= 0:
        cuda.init(gpu)
        model.to_gpu()

    # use Adam
    optimizers_dict = {
        "Adam": optimizers.Adam(),
        "AdaDelta": optimizers.AdaDelta(),
        "AdaGrad": optimizers.AdaGrad(),
        "MomentumSGD": optimizers.MomentumSGD(),
        "NesterovAG": optimizers.NesterovAG(),
        "RMSprop": optimizers.RMSprop(),
        "SGD": optimizers.SGD()
    }

    optimizer = optimizers_dict[optimizer_nm]
    optimizer.setup(model.collect_parameters())

    total_nn_losses = []

    if cross_val >= 0:
        print('{}s pre-train start ...'.format(cross_val))

    # pre_train_NN start

    for epoch in xrange(1, nn_n_epochs + 1):
        t1 = time.time()

        # np.random.seed(123)
        indexes = np.random.permutation(train_x.shape[0])

        nn_total_loss = 0.0
        nn_out_list = np.zeros(train_real.shape)
        noisy_train_x = np.array(noisy(noise_nm, train_x), dtype=np.float32)
        for i in xrange(0, train_x.shape[0], n_batch):
            noisy_x_batch = noisy_train_x[indexes[i:i + n_batch]]
            real_batch = train_real[indexes[i:i + n_batch]]

            if gpu >= 0:
                noisy_x_batch = cuda.to_gpu(noisy_x_batch)

            optimizer.zero_grads()

            loss, nn_out = model.nn_forward(noisy_x_batch,
                                            real_batch,
                                            nn_n_layers,
                                            nonlinear=activation,
                                            gpu=-1,
                                            train=True)

            nn_total_loss += float(loss.data) * len(noisy_x_batch)
            loss.backward()
            optimizer.clip_grads(grad_clip)
            optimizer.update()
            nn_out_list[indexes[i:i + n_batch]] = nn_out.data

        total_nn_losses.append(nn_total_loss / train_x.shape[0])

    #  pre_train_VAE start

    total_vae_losses = []

    if cross_val >= 0:
        print('{}s tuning start ...'.format(cross_val))

    nn_out_list = np.array(nn_out_list, dtype=np.float32)
    noisy_nn_out_list = np.array(noisy(noise_nm, nn_out_list),
                                 dtype=np.float32)

    for epoch in xrange(1, vae_n_epochs + 1):
        # np.random.seed(123)
        indexes = np.random.permutation(train_x.shape[0])
        total_loss = 0.0
        noisy_nn_out_list = np.array(noisy(noise_nm, nn_out_list),
                                     dtype=np.float32)
        for i in xrange(0, train_x.shape[0], n_batch):
            noisy_nn_out_list_batch = noisy_nn_out_list[indexes[i:i + n_batch]]
            nn_out_list_batch = nn_out_list[indexes[i:i + n_batch]]
            real_batch = train_real[indexes[i:i + n_batch]]

            if gpu >= 0:
                noisy_nn_out_list_batch = cuda.to_gpu(noisy_nn_out_list_batch)

            optimizer.zero_grads()

            rec_loss, kl_loss, output = model.vae_forward(
                noisy_nn_out_list_batch,
                real_batch,
                vae_n_layers_recog,
                vae_n_layers_gen,
                nonlinear_q=activation,
                nonlinear_p=activation,
                train=True)
            loss = rec_loss + kl_loss
            total_loss += float(loss.data) * len(noisy_nn_out_list_batch)
            loss.backward()
            optimizer.clip_grads(grad_clip)
            optimizer.update()
        total_vae_losses.append(total_loss / train_x.shape[0])

    #  train_test_NN_VAE start

    total_nn_vae_losses = []
    total_test_losses = []
    total_train_losses = []
    if cross_val >= 0:
        print('{}s tuning start ...'.format(cross_val))

    for epoch in xrange(1, n_epochs_tuning + 1):
        noisy_train_x = np.array(noisy(noise_nm, train_x), dtype=np.float32)
        #np.random.seed(123)
        indexes = np.random.permutation(train_x.shape[0])
        total_loss = 0.0
        for i in xrange(0, train_x.shape[0], n_batch):
            noisy_x_batch = noisy_train_x[indexes[i:i + n_batch]]
            y_batch = train_y[indexes[i:i + n_batch]]

            if gpu >= 0:
                x_batch = cuda.to_gpu(x_batch)
                y_batch = cuda.to_gpu(y_batch)

            optimizer.zero_grads()

            loss, predict_score = model.nn_vae_tuning(noisy_x_batch,
                                                      y_batch,
                                                      nn_n_layers,
                                                      vae_n_layers_recog,
                                                      nonlinear_q=activation,
                                                      train=True)
            loss = loss**0.5
            total_loss += float(loss.data) * len(noisy_x_batch)
            loss.backward()
            optimizer.clip_grads(grad_clip)
            optimizer.update()
        total_nn_vae_losses.append(total_loss / train_x.shape[0])

        # test

        sum_loss_train = 0

        for i in xrange(0, train_x.shape[0], n_batch):
            x_batch = train_x[indexes[i:i + n_batch]]
            y_batch = train_y[indexes[i:i + n_batch]]

            if gpu >= 0:
                x_batch = cuda.to_gpu(x_batch)
                y_batch = cuda.to_gpu(y_batch)

            loss, predict_score = model.nn_vae_tuning(x_batch,
                                                      y_batch,
                                                      nn_n_layers,
                                                      vae_n_layers_recog,
                                                      nonlinear_q=activation,
                                                      train=False)
            loss = loss**0.5
            sum_loss_train += float(loss.data) * len(noisy_x_batch)
        total_train_losses.append(sum_loss_train / train_x.shape[0])

        x_batch = test_x
        y_batch = test_y

        loss, predict_score = model.nn_vae_tuning(x_batch,
                                                  y_batch,
                                                  nn_n_layers,
                                                  vae_n_layers_recog,
                                                  nonlinear_q=activation,
                                                  train=False)
        loss = loss**0.5
        total_test_losses.append(loss.data)
    q.put([
        total_nn_losses, total_vae_losses, total_nn_vae_losses,
        total_train_losses, total_test_losses
    ])
Beispiel #16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--dataset',
                        '-d',
                        default='cifar10',
                        help='The dataset to use: cifar10 or cifar100')
    parser.add_argument('--model', '-m', type=str, default=None)
    parser.add_argument('--opt', type=str, default=None)
    parser.add_argument('--epoch', '-e', type=int, default=40)
    parser.add_argument('--looptimes', '-t', type=int, default=5)
    parser.add_argument('--lr', '-l', type=float, default=0.01)
    parser.add_argument('--batch', '-b', type=int, default=128)
    parser.add_argument('--noplot',
                        dest='plot',
                        action='store_false',
                        help='Disable PlotReport extension')
    args = parser.parse_args()

    if args.dataset == 'cifar10':
        print('Using CIFAR10 dataset.')
        class_labels = 10
        train, test = get_cifar10()
    elif args.dataset == 'cifar100':
        print('Using CIFAR100 dataset.')
        class_labels = 100
        train, test = get_cifar100()
    else:
        raise RuntimeError('Invalid dataset choice.')

    # Set up a neural network to train.
    model = L.Classifier(
        network.LocalPCN(class_labels=class_labels, LoopTimes=args.looptimes))

    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    optimizer = optimizers.NesterovAG(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(1e-3))

    num_train_samples = 45000
    train_iter = iterators.SerialIterator(train[:num_train_samples],
                                          batch_size=args.batch,
                                          shuffle=True)
    test_iter = iterators.SerialIterator(train[num_train_samples:],
                                         batch_size=args.batch,
                                         repeat=False,
                                         shuffle=False)

    if args.model != None:
        print("loading model from " + args.model)
        serializers.load_npz(args.model, model)

    if args.opt != None:
        print("loading opt from " + args.opt)
        serializers.load_npz(args.opt, optimizer)

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out='results')

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))
    trainer.extend(extensions.LogReport(trigger=(10, 'iteration')))
    trainer.extend(extensions.observe_lr(), trigger=(10, 'iteration'))

    # Schedule of a learning rate (LinearShift)
    trainer.extend(
        extensions.LinearShift('lr', (args.lr, args.lr * 0.1),
                               (args.epoch * 0.5, args.epoch * 0.5 + 1)),
        trigger=(1, 'epoch'))

    # Save two plot images to the result dir
    if args.plot and extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch',
                file_name='accuracy.png'))

    trainer.extend(extensions.PrintReport([
        'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
        'validation/main/accuracy', 'lr', 'elapsed_time'
    ]),
                   trigger=(1, 'iteration'))
    trainer.extend(extensions.ProgressBar(update_interval=1))

    #Plot computation graph
    trainer.extend(extensions.dump_graph('main/loss'))

    # Train
    trainer.run()

    # Save results
    modelname = "./results/model"
    print("saving model to " + modelname)
    serializers.save_npz(modelname, model)

    optimizername = "./results/optimizer"
    print("saving optimizer to " + optimizername)
    serializers.save_npz(optimizername, optimizer)
Beispiel #17
0
def SetupOptimizer(model):
    opt = optimizers.NesterovAG(lr=args.optimizer['lr'],
                                momentum=args.optimizer['momentum'])
    opt.setup(model)
    return opt
        with self.init_scope():
            self.l1 = L.Linear(None, n_mid_units)
            self.l2 = L.Linear(None, n_mid_units)
            self.l3 = L.Linear(None, n_out)

    def forward(self, x):
        h1 = F.relu(self.l1(x))
        h2 = F.relu(self.l2(h1))
        return self.l3(h2)


# Wrap the model by Classifier
model = MyNeuralNetwork(n_mid_units=mid_size, n_out=out_size)

# Give optimizing method to the model
optimizer = optimizers.NesterovAG()
optimizer.setup(model)

# Assign GPU or CPU to the model
if gpu_id >= 0:
    cuda.get_device(gpu_id).use()
    model.to_gpu(gpu_id)
    cp = cuda.cupy

# 問題2
# Triplet lossを用いて1で定義したモデルを訓練せよ.
# その際,正常品と正常品および不良品と不良品の間の距離は小さく,
# 正常品と不良品の間の距離は大きくなるようすること.

# An Index for searching anchor image of a triplet
triplet_pos = 0
                        conv5=F.Convolution2D(32, 64, (1, 4)),
                        conv6=F.Convolution2D(64, 64, (1, 4)),
                        conv7=F.Convolution2D(64, 128, (1, 2)),
                        conv8=F.Convolution2D(128, 128, (1, 2)),
                        conv9=F.Convolution2D(128, 256, (1, 2)),
                        conv10=F.Convolution2D(256, 256, (1, 2)),
                        fc11=F.Linear(256 * 10 * 1, 1024),
                        norm1=F.BatchNormalization(1024),
                        fc12=F.Linear(1024, 1024),
                        norm2=F.BatchNormalization(1024),
                        fc13=F.Linear(1024, 3))

    #optimizer = optimizers.MomentumSGD(lr=LR, momentum=0.9)
    #optimizer = optimizers.SMORMS3(lr=LR, eps=1e-16)
    #optimizer = optimizers.AdaGrad(lr=LR)
    optimizer = optimizers.NesterovAG(lr=LR, momentum=0.9)
    optimizer.setup(model)

    if GPU_ID >= 0:
        cuda.get_device(GPU_ID).use()
        model.to_gpu(GPU_ID)

    print 'show train_index,test_index'
    print train_index
    print test_index
    print 'Fold %d ' % (c_f)

    N_test = test_index.shape[0]

    max_accuracy = 0
Beispiel #20
0
def command_line(arguments=None):
    import argparse
    parser = argparse.ArgumentParser(
        description="Train a RNNSearch model",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "data_prefix",
        help="prefix of the training data created by make_data.py")
    parser.add_argument(
        "save_prefix",
        help="prefix to be added to all files created during the training")
    parser.add_argument("--gpu",
                        type=int,
                        nargs="+",
                        default=None,
                        help="specify gpu number to use, if any")
    #parser.add_argument("--gpulist", type = int, nargs = "+", default = None, help = "specify gpu number to use, if any")
    parser.add_argument(
        "--load_model",
        help="load the parameters of a previously trained model")
    parser.add_argument("--load_optimizer_state",
                        help="load previously saved optimizer states")
    parser.add_argument("--Ei",
                        type=int,
                        default=620,
                        help="Source words embedding size.")
    parser.add_argument("--Eo",
                        type=int,
                        default=620,
                        help="Target words embedding size.")
    parser.add_argument("--Hi",
                        type=int,
                        default=1000,
                        help="Source encoding layer size.")
    parser.add_argument("--Ho",
                        type=int,
                        default=1000,
                        help="Target hidden layer size.")
    parser.add_argument("--Ha",
                        type=int,
                        default=1000,
                        help="Attention Module Hidden layer size.")
    parser.add_argument("--Hl",
                        type=int,
                        default=500,
                        help="Maxout output size.")
    parser.add_argument("--mb_size",
                        type=int,
                        default=80,
                        help="Minibatch size")
    parser.add_argument("--nb_batch_to_sort",
                        type=int,
                        default=20,
                        help="Sort this many batches by size.")
    parser.add_argument("--noise_on_prev_word",
                        default=False,
                        action="store_true")

    parser.add_argument(
        "--use_memory_optimization",
        default=False,
        action="store_true",
        help="Experimental option that could strongly reduce memory used.")

    parser.add_argument("--max_nb_iters",
                        type=int,
                        default=None,
                        help="maximum number of iterations")

    parser.add_argument("--max_src_tgt_length",
                        type=int,
                        help="Limit length of training sentences")

    parser.add_argument("--l2_gradient_clipping",
                        type=float,
                        default=1,
                        help="L2 gradient clipping. 0 for None")

    parser.add_argument("--hard_gradient_clipping",
                        type=float,
                        nargs=2,
                        help="hard gradient clipping.")

    parser.add_argument("--weight_decay",
                        type=float,
                        help="Weight decay value. ")

    parser.add_argument("--optimizer",
                        choices=[
                            "sgd", "rmsprop", "rmspropgraves", "momentum",
                            "nesterov", "adam", "adagrad", "adadelta"
                        ],
                        default="adam",
                        help="Optimizer type.")
    parser.add_argument("--learning_rate",
                        type=float,
                        default=0.01,
                        help="Learning Rate")
    parser.add_argument("--momentum",
                        type=float,
                        default=0.9,
                        help="Momentum term")
    parser.add_argument("--report_every",
                        type=int,
                        default=200,
                        help="report every x iterations")
    parser.add_argument("--randomized_data",
                        default=False,
                        action="store_true")
    parser.add_argument("--use_accumulated_attn",
                        default=False,
                        action="store_true")

    parser.add_argument("--use_deep_attn", default=False, action="store_true")

    parser.add_argument("--no_shuffle_of_training_data",
                        default=False,
                        action="store_true")
    parser.add_argument("--no_resume", default=False, action="store_true")

    parser.add_argument("--init_orth", default=False, action="store_true")

    parser.add_argument("--reverse_src", default=False, action="store_true")
    parser.add_argument("--reverse_tgt", default=False, action="store_true")

    parser.add_argument("--curiculum_training",
                        default=False,
                        action="store_true")

    parser.add_argument("--use_bn_length", default=0, type=int)
    parser.add_argument("--use_previous_prediction", default=0, type=float)

    parser.add_argument("--no_report_or_save",
                        default=False,
                        action="store_true")

    parser.add_argument(
        "--lexical_probability_dictionary",
        help=
        "lexical translation probabilities in zipped JSON format. Used to implement https://arxiv.org/abs/1606.02006"
    )
    parser.add_argument(
        "--lexicon_prob_epsilon",
        default=1e-3,
        type=float,
        help="epsilon value for combining the lexical probabilities")

    parser.add_argument(
        "--encoder_cell_type",
        default="lstm",
        help=
        "cell type of encoder. format: type,param1:val1,param2:val2,... where type is in [%s]"
        % (" ".join(rnn_cells.cell_dict.keys())))
    parser.add_argument(
        "--decoder_cell_type",
        default="lstm",
        help="cell type of decoder. format same as for encoder")

    parser.add_argument("--sample_every", default=200, type=int)

    parser.add_argument("--save_ckpt_every", default=4000, type=int)

    parser.add_argument("--use_reinf", default=False, action="store_true")

    parser.add_argument("--is_multitarget", default=False, action="store_true")
    parser.add_argument(
        "--postprocess",
        default=False,
        action="store_true",
        help=
        "This flag indicates whether the translations should be postprocessed or not. For now it simply indicates that the BPE segmentation should be undone."
    )

    args = parser.parse_args(args=arguments)

    output_files_dict = {}
    output_files_dict["train_config"] = args.save_prefix + ".train.config"
    output_files_dict[
        "model_ckpt"] = args.save_prefix + ".model." + "ckpt" + ".npz"
    output_files_dict[
        "model_final"] = args.save_prefix + ".model." + "final" + ".npz"
    output_files_dict[
        "model_best"] = args.save_prefix + ".model." + "best" + ".npz"
    output_files_dict[
        "model_best_loss"] = args.save_prefix + ".model." + "best_loss" + ".npz"

    output_files_dict[
        "test_translation_output"] = args.save_prefix + ".test.out"
    output_files_dict["test_src_output"] = args.save_prefix + ".test.src.out"
    output_files_dict["dev_translation_output"] = args.save_prefix + ".dev.out"
    output_files_dict["dev_src_output"] = args.save_prefix + ".dev.src.out"
    output_files_dict[
        "valid_translation_output"] = args.save_prefix + ".valid.out"
    output_files_dict["valid_src_output"] = args.save_prefix + ".valid.src.out"
    output_files_dict["sqlite_db"] = args.save_prefix + ".result.sqlite"
    output_files_dict[
        "optimizer_ckpt"] = args.save_prefix + ".optimizer." + "ckpt" + ".npz"
    output_files_dict[
        "optimizer_final"] = args.save_prefix + ".optimizer." + "final" + ".npz"

    save_prefix_dir, save_prefix_fn = os.path.split(args.save_prefix)
    ensure_path(save_prefix_dir)

    already_existing_files = []
    for key_info, filename in output_files_dict.iteritems(
    ):  #, valid_data_fn]:
        if os.path.exists(filename):
            already_existing_files.append(filename)
    if len(already_existing_files) > 0:
        print "Warning: existing files are going to be replaced / updated: ", already_existing_files
        #raw_input("Press Enter to Continue")

    config_fn = args.data_prefix + ".data.config"
    voc_fn = args.data_prefix + ".voc"
    data_fn = args.data_prefix + ".data.json.gz"

    log.info("loading training data from %s" % data_fn)
    training_data_all = json.load(gzip.open(data_fn, "rb"))

    training_data = training_data_all["train"]

    log.info("loaded %i sentences as training data" % len(training_data))

    if "test" in training_data_all:
        test_data = training_data_all["test"]
        log.info("Found test data: %i sentences" % len(test_data))
    else:
        test_data = None
        log.info("No test data found")

    if "dev" in training_data_all:
        dev_data = training_data_all["dev"]
        log.info("Found dev data: %i sentences" % len(dev_data))
    else:
        dev_data = None
        log.info("No dev data found")

    if "valid" in training_data_all:
        valid_data = training_data_all["valid"]
        log.info("Found valid data: %i sentences" % len(valid_data))
    else:
        valid_data = None
        log.info("No valid data found")

    log.info("loading voc from %s" % voc_fn)
    src_voc, tgt_voc = json.load(open(voc_fn))

    src_indexer = Indexer.make_from_serializable(src_voc)
    tgt_indexer = Indexer.make_from_serializable(tgt_voc)
    tgt_voc = None
    src_voc = None

    #     Vi = len(src_voc) + 1 # + UNK
    #     Vo = len(tgt_voc) + 1 # + UNK

    Vi = len(src_indexer)  # + UNK
    Vo = len(tgt_indexer)  # + UNK

    if args.lexical_probability_dictionary is not None:
        log.info("opening lexical_probability_dictionary %s" %
                 args.lexical_probability_dictionary)
        lexical_probability_dictionary_all = json.load(
            gzip.open(args.lexical_probability_dictionary, "rb"))
        log.info("computing lexical_probability_dictionary_indexed")
        lexical_probability_dictionary_indexed = {}
        for ws in lexical_probability_dictionary_all:
            ws_idx = src_indexer.convert([ws])[0]
            if ws_idx in lexical_probability_dictionary_indexed:
                assert src_indexer.is_unk_idx(ws_idx)
            else:
                lexical_probability_dictionary_indexed[ws_idx] = {}
            for wt in lexical_probability_dictionary_all[ws]:
                wt_idx = tgt_indexer.convert([wt])[0]
                if wt_idx in lexical_probability_dictionary_indexed[ws_idx]:
                    assert src_indexer.is_unk_idx(
                        ws_idx) or tgt_indexer.is_unk_idx(wt_idx)
                    lexical_probability_dictionary_indexed[ws_idx][
                        wt_idx] += lexical_probability_dictionary_all[ws][wt]
                else:
                    lexical_probability_dictionary_indexed[ws_idx][
                        wt_idx] = lexical_probability_dictionary_all[ws][wt]
        lexical_probability_dictionary = lexical_probability_dictionary_indexed
    else:
        lexical_probability_dictionary = None

    if args.max_src_tgt_length is not None:
        log.info("filtering sentences of length larger than %i" %
                 (args.max_src_tgt_length))
        filtered_training_data = []
        nb_filtered = 0
        for src, tgt in training_data:
            if len(src) <= args.max_src_tgt_length and len(
                    tgt) <= args.max_src_tgt_length:
                filtered_training_data.append((src, tgt))
            else:
                nb_filtered += 1
        log.info("filtered %i sentences of length larger than %i" %
                 (nb_filtered, args.max_src_tgt_length))
        training_data = filtered_training_data

    if not args.no_shuffle_of_training_data:
        log.info("shuffling")
        import random
        random.shuffle(training_data)
        log.info("done")

#
#     Vi = len(src_voc) + 1 # + UNK
#     Vo = len(tgt_voc) + 1 # + UNK

    is_multitarget = args.is_multitarget

    config_training = {
        "command_line": args.__dict__,
        "Vi": Vi,
        "Vo": Vo,
        "voc": voc_fn,
        "data": data_fn,
        "is_multitarget": is_multitarget
    }
    save_train_config_fn = output_files_dict["train_config"]
    log.info("Saving training config to %s" % save_train_config_fn)
    with io.open(save_train_config_fn, 'w', encoding="utf-8") as outfile:
        outfile.write(unicode(json.dumps(config_training, ensure_ascii=False)))
    #json.dump(config_training, open(save_train_config_fn, "w"), indent=2, separators=(',', ': '))

    eos_idx = Vo

    # Selecting Attention type
    attn_cls = models.AttentionModule
    if args.use_accumulated_attn:
        raise NotImplemented
#         encdec = models.EncoderDecoder(Vi, args.Ei, args.Hi, Vo + 1, args.Eo, args.Ho, args.Ha, args.Hl,
#                                        attn_cls= models.AttentionModuleAcumulated,
#                                        init_orth = args.init_orth)
    if args.use_deep_attn:
        attn_cls = models.DeepAttentionModule

    # Creating encoder/decoder
    encdec = models.EncoderDecoder(
        Vi,
        args.Ei,
        args.Hi,
        Vo + 1,
        args.Eo,
        args.Ho,
        args.Ha,
        args.Hl,
        init_orth=args.init_orth,
        use_bn_length=args.use_bn_length,
        attn_cls=attn_cls,
        encoder_cell_type=args.encoder_cell_type,
        decoder_cell_type=args.decoder_cell_type,
        lexical_probability_dictionary=lexical_probability_dictionary,
        lex_epsilon=args.lexicon_prob_epsilon,
        is_multitarget=is_multitarget)

    if args.load_model is not None:
        serializers.load_npz(args.load_model, encdec)

    if args.gpu is not None:
        models_list = []
        models_list.append(encdec)
        import copy
        for i in range(len(args.gpu) - 1):
            log.info(
                "Creating copy #%d of model for data parallel computation." %
                (i + 1))
            encdec_copy = copy.deepcopy(encdec)
            models_list.append(encdec_copy)
        for i in range(len(args.gpu)):
            models_list[i] = models_list[i].to_gpu(args.gpu[i])
        assert models_list[0] == encdec

    #print len(models_list)

    if args.optimizer == "adadelta":
        optimizer = optimizers.AdaDelta()
    elif args.optimizer == "adam":
        optimizer = optimizers.Adam()
    elif args.optimizer == "adagrad":
        optimizer = optimizers.AdaGrad(lr=args.learning_rate)
    elif args.optimizer == "sgd":
        optimizer = optimizers.SGD(lr=args.learning_rate)
    elif args.optimizer == "momentum":
        optimizer = optimizers.MomentumSGD(lr=args.learning_rate,
                                           momentum=args.momentum)
    elif args.optimizer == "nesterov":
        optimizer = optimizers.NesterovAG(lr=args.learning_rate,
                                          momentum=args.momentum)
    elif args.optimizer == "rmsprop":
        optimizer = optimizers.RMSprop(lr=args.learning_rate)
    elif args.optimizer == "rmspropgraves":
        optimizer = optimizers.RMSpropGraves(lr=args.learning_rate,
                                             momentum=args.momentum)
    else:
        raise NotImplemented
    with cuda.get_device(args.gpu):
        optimizer.setup(encdec)

    if args.l2_gradient_clipping is not None and args.l2_gradient_clipping > 0:
        optimizer.add_hook(
            chainer.optimizer.GradientClipping(args.l2_gradient_clipping))

    if args.hard_gradient_clipping is not None and args.hard_gradient_clipping > 0:
        optimizer.add_hook(
            chainer.optimizer.GradientHardClipping(
                *args.hard_gradient_clipping))

    if args.weight_decay is not None:
        optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))

    if args.load_optimizer_state is not None:
        with cuda.get_device(args.gpu):
            serializers.load_npz(args.load_optimizer_state, optimizer)

    with cuda.get_device(args.gpu[0]):
        #         with MyTimerHook() as timer:
        #             try:
        train_on_data(
            encdec,
            optimizer,
            training_data,
            output_files_dict,
            src_indexer,
            tgt_indexer,
            eos_idx=eos_idx,
            mb_size=args.mb_size,
            nb_of_batch_to_sort=args.nb_batch_to_sort * len(args.gpu),
            test_data=test_data,
            dev_data=dev_data,
            valid_data=valid_data,
            gpu=args.gpu,
            report_every=args.report_every,
            randomized=args.randomized_data,
            reverse_src=args.reverse_src,
            reverse_tgt=args.reverse_tgt,
            max_nb_iters=args.max_nb_iters,
            do_not_save_data_for_resuming=args.no_resume,
            noise_on_prev_word=args.noise_on_prev_word,
            curiculum_training=args.curiculum_training,
            use_previous_prediction=args.use_previous_prediction,
            no_report_or_save=args.no_report_or_save,
            use_memory_optimization=args.use_memory_optimization,
            sample_every=args.sample_every,
            use_reinf=args.use_reinf,
            save_ckpt_every=args.save_ckpt_every,
            postprocess=args.postprocess,
            models_list=models_list
            #                     lexical_probability_dictionary = lexical_probability_dictionary,
            #                     V_tgt = Vo + 1,
            #                     lexicon_prob_epsilon = args.lexicon_prob_epsilon
        )


#             finally:
#                 print timer
#                 timer.print_sorted()
#                 print "total time:"
#                 print(timer.total_time())

    import sys
    sys.exit(0)

    import training_chainer
    with cuda.get_device(args.gpu):
        training_chainer.train_on_data_chainer(
            encdec,
            optimizer,
            training_data,
            output_files_dict,
            src_indexer,
            tgt_indexer,
            eos_idx=eos_idx,
            output_dir=args.save_prefix,
            stop_trigger=None,
            mb_size=args.mb_size,
            nb_of_batch_to_sort=args.nb_batch_to_sort,
            test_data=test_data,
            dev_data=dev_data,
            valid_data=valid_data,
            gpu=args.gpu,
            report_every=args.report_every,
            randomized=args.randomized_data,
            reverse_src=args.reverse_src,
            reverse_tgt=args.reverse_tgt,
            max_nb_iters=args.max_nb_iters,
            do_not_save_data_for_resuming=args.no_resume,
            noise_on_prev_word=args.noise_on_prev_word,
            curiculum_training=args.curiculum_training,
            use_previous_prediction=args.use_previous_prediction,
            no_report_or_save=args.no_report_or_save,
            use_memory_optimization=args.use_memory_optimization,
            sample_every=args.sample_every,
            use_reinf=args.use_reinf,
            save_ckpt_every=args.save_ckpt_every,
            postprocess=args.postprocess
            #                     lexical_probability_dictionary = lexical_probability_dictionary,
            #                     V_tgt = Vo + 1,
            #                     lexicon_prob_epsilon = args.lexicon_prob_epsilon
        )
Beispiel #21
0
def main():
    parser = ArgumentParser()

    parser.add_argument('train_data', help='train data')
    parser.add_argument('train_labels', help='train labels')
    parser.add_argument('--val-data', default=None, help='val data')
    parser.add_argument('--val-labels', default=None, help='val labels')
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=5,
                        help='mini-batch size (default=5)')
    parser.add_argument('--beta2',
                        type=float,
                        default=0.999,
                        help='beta2 of Adam (default=0.999)')
    parser.add_argument('-g',
                        '--gpu-id',
                        type=int,
                        default=-1,
                        help='GPU ID (default=-1, indicates CPU)')
    parser.add_argument('--ignore-labels',
                        type=int,
                        default=[],
                        nargs='+',
                        help='labels to ignore (default=[])')
    parser.add_argument('-l',
                        '--learning-rate',
                        type=float,
                        default=0.1,
                        help='learning rate (default=0.1)')
    parser.add_argument('--max-iter',
                        type=int,
                        default=160000,
                        help='train model up to max-iter (default=160000)')
    parser.add_argument(
        '--mean-interval',
        type=int,
        default=1000,
        help='calculate mean of train/loss (and validation loss) ' +
        'every mean-interval iters (default=1000)')
    parser.add_argument('--model',
                        default=None,
                        help='resume to train the model')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.9,
                        help='momentum rate (default=0.9)')
    parser.add_argument('--n-classes',
                        type=int,
                        default=5,
                        help='number of classes (default=5)')
    parser.add_argument('--noise',
                        default='no',
                        help='noise injection method. \'no\', \'patch\', ' +
                        'and \'permutation\' are available (default=\'no\')')
    parser.add_argument('--optim',
                        default='nesterov',
                        help='optimization method. \'sgd\', \'nesterov\', ' +
                        'and \'adam\' are available (default=\'nesterov\')')
    parser.add_argument(
        '-o',
        '--outdir',
        default='./',
        help='trained models and optimizer states are stored in outdir ' +
        '(default=\'./\')')
    parser.add_argument(
        '--queue-maxsize',
        type=int,
        default=10,
        help='maxsize of queues for training and validation (default=10)')
    parser.add_argument(
        '--save-interval',
        type=int,
        default=10000,
        help='save model & optimizer every save-interval iters (default=10000)'
    )
    parser.add_argument(
        '--state',
        default=None,
        help='optimizer state. resume to train the model with the optimizer')
    parser.add_argument('-w',
                        '--weight-decay',
                        type=float,
                        default=1e-4,
                        help='weight decay factor (default=1e-4)')

    args = parser.parse_args()

    print(argv2string(sys.argv) + '\n')
    for arg in dir(args):
        if arg[:1] == '_':
            continue
        print('{} = {}'.format(arg, getattr(args, arg)))
    print()

    if not os.path.isdir(args.outdir):
        os.makedirs(args.outdir)
        print('mkdir ' + args.outdir + '\n')

    model = Model(in_ch=3, out_ch=args.n_classes)
    if args.model is not None:
        S.load_npz(args.model, model)
    loss_func = Loss(model)

    if args.optim.lower() in 'sgd':
        if args.momentum > 0:
            optim = optims.CorrectedMomentumSGD(lr=args.learning_rate,
                                                momentum=args.momentum)
        else:
            optim = optims.SGD(lr=args.learning_rate)
    elif args.optim.lower() in 'nesterovag':
        optim = optims.NesterovAG(lr=args.learning_rate,
                                  momentum=args.momentum)
    elif args.optim.lower() in 'adam':
        optim = optims.Adam(alpha=args.learning_rate,
                            beta1=args.momentum,
                            beta2=args.beta2,
                            weight_decay_rate=args.weight_decay,
                            amsgrad=True)
    else:
        raise ValueError('Please specify an available optimizer name.\n' +
                         'SGD, NesterovAG, and Adam are available.')

    print('{}\n'.format(type(optim)))
    optim.setup(model)

    if args.state is not None:
        S.load_npz(args.state, optim)

    if (args.weight_decay > 0) and not isinstance(optim, optims.Adam):
        optim.add_hook(WeightDecay(args.weight_decay))

    optim.add_hook(GradientClipping(1))

    lr_decay_iter_dict = {
        int(5 * args.max_iter / 8): 0.1,
        int(7 * args.max_iter / 8): 0.1,
    }

    with open(args.train_data, 'r') as f:
        train_data_path_list = [line.strip() for line in f.readlines()]
    with open(args.train_labels, 'r') as f:
        train_labels_path_list = [line.strip() for line in f.readlines()]

    assert len(train_data_path_list) == len(train_labels_path_list)

    if (args.val_data is not None) or (args.val_labels is not None):
        if (args.val_data is not None) and (args.val_labels is not None):
            with open(args.val_data, 'r') as f:
                val_data_path_list = [line.strip() for line in f.readlines()]
            with open(args.val_labels, 'r') as f:
                val_labels_path_list = [line.strip() for line in f.readlines()]
            assert len(val_data_path_list) == len(val_labels_path_list)
        else:
            raise ValueError('Either val_data or val_labels is not specified.')

    train_queue = mp.Queue(maxsize=args.queue_maxsize)
    train_generator = BatchGenerator(args.batch_size,
                                     train_data_path_list,
                                     train_labels_path_list,
                                     train_queue,
                                     train=True,
                                     noise_injection=args.noise,
                                     out_height=512,
                                     out_width=512,
                                     max_height=1216,
                                     max_width=1216,
                                     min_height=832,
                                     min_width=832)
    train_generator.start()

    if args.val_data is None:
        val_queue = None
    else:
        val_queue = mp.Queue(maxsize=args.queue_maxsize)
        try:
            val_generator = BatchGenerator(1,
                                           val_data_path_list,
                                           val_labels_path_list,
                                           val_queue,
                                           train=False,
                                           out_height=608,
                                           out_width=968)
            val_generator.start()
        except Exception:
            train_generator.terminate()
            train_queue.close()
            val_queue.close()
            raise

    try:
        train(loss_func, optim, train_queue, args.max_iter, args.mean_interval,
              args.save_interval, val_queue, lr_decay_iter_dict, args.gpu_id,
              args.ignore_labels, args.outdir)
    except BaseException:
        train_generator.terminate()
        train_queue.close()
        if val_queue is not None:
            val_generator.terminate()
            val_queue.close()
        raise

    train_generator.terminate()
    train_queue.close()
    if val_queue is not None:
        val_generator.terminate()
        val_queue.close()
Beispiel #22
0
    n_symbol = 5
    n_epoch = 2  # number of epochs
    SIL_idx = 0  # index of blank symbol
    grad_clip = 10  # gradient norm threshold to clip 较大的值,模型收敛的较快

    model = RNNASR(n_feature, n_units, n_symbol)

    ## use GPU or not
    useGPU = False
    xp = cuda.cupy if useGPU else np
    if useGPU:
        cuda.get_device(0).use()
        model.to_gpu()

    ## Setup optimizer
    optimizer = optimizers.NesterovAG()  # 比RMSpropGraves快一些
    #optimizer = optimizers.RMSpropGraves()

    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(grad_clip))  # 必不可少的

    # train loop
    counter = 0
    trainsize = len(trainset)
    for epoch in range(1, n_epoch + 1):
        indexes = np.random.permutation(trainsize)
        for i in range(trainsize):

            x_data = os.path.join(data_root, trainset[indexes[i]][0])
            y_data = trainset[indexes[i]][1]