コード例 #1
0
ファイル: snippet.py プロジェクト: szabo92/gistable
    insize = 224
    train_list = load_image_list(args.train)
    val_list = load_image_list(args.val)
    mean_image = np.load(args.mean)
    encoders = []
    decoders = []

    # Train layer 1
    if args.conv1_1 == None:
        converter1_1 = converter_generator(encoders)
        loader1_1 = loader_generator(insize,
                                     train_list,
                                     mean_image,
                                     converter1_1,
                                     use_gpu=use_gpu)
        optimizer1_1 = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
        trainer1_1 = Trainer(conv1_1,
                             optimizer1_1,
                             loader1_1,
                             max_epochs=1,
                             logger=logger)

        trainer1_1.loop(len(train_list), args.batchsize)

        f1_1 = open('pkl/conv1_1.pkl', 'wb')
        pickle.dump(conv1_1, f1_1)
        f1_1.close()
    else:
        f1_1 = open(args.conv1_1, 'rb')
        conv1_1 = pickle.load(f1_1)
        f1_1.close()
コード例 #2
0
 def create(self):
     return optimizers.MomentumSGD(0.1)
コード例 #3
0
elif args.feature == "mel":
    model = models.Mel(pre=is_pre)
elif args.feature == "mfcc":
    model = models.Mfcc(pre=is_pre)

if not is_pre:
    print 'Load model from', save_name + ".model"
    serializers.load_hdf5(save_name + ".model", model)

if args.gpu >= 0:
    cuda.get_device(args.gpu).use()
    model.to_gpu()
xp = np if args.gpu < 0 else cuda.cupy

optimizer = optimizers.Adam(
) if args.learning == "adam" else optimizers.MomentumSGD(lr=0.01, momentum=0.9)
optimizer.setup(model)
print "algorithm is ", args.learning
for epoch in tqdm(xrange(1, args.epoch + 1)):
    #print 'epoch ', epoch,"/",args.epoch
    #if args.learning == "sgd":
    #    print "learning rate : ",optimizer.lr
    x_batch = np.ndarray((batchsize, model.insize), dtype=np.float32)
    y_batch = np.ndarray((batchsize, ), dtype=np.int32)
    random.shuffle(train_list)

    sum_accuracy = 0
    sum_loss = 0
    count = 0

    batch_range = range(N)
コード例 #4
0
        return h


train_data, test_data = get_mnist(n_train=1000,
                                  n_test=100,
                                  with_label=False,
                                  classes=[0])

num_train_it = 2000

batchsize = 10

dis = discriminator()
gen = generator()

optimizer4gen = optimizers.MomentumSGD()
optimizer4gen.setup(gen)

optimizer4dis = optimizers.MomentumSGD()
optimizer4dis.setup(dis)

losscoll_dis = []
losscoll_gen = []

for trainit in range(num_train_it):

    z = chainer.Variable(
        np.random.uniform(-1, 1, (batchsize, 100)).astype('float32'))

    gendata = gen(z)
コード例 #5
0
ファイル: train.py プロジェクト: diceroll/kmnist
def main():
    parser = argparse.ArgumentParser(description='training mnist')
    parser.add_argument('--gpu',
                        '-g',
                        default=-1,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=300,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--seed',
                        '-s',
                        type=int,
                        default=0,
                        help='Random seed')
    parser.add_argument('--n_fold',
                        '-nf',
                        type=int,
                        default=5,
                        help='n_fold cross validation')
    parser.add_argument('--fold', '-f', type=int, default=1)
    parser.add_argument('--out_dir_name',
                        '-dn',
                        type=str,
                        default=None,
                        help='Name of the output directory')
    parser.add_argument('--report_trigger',
                        '-rt',
                        type=str,
                        default='1e',
                        help='Interval for reporting(Ex.100i, default:1e)')
    parser.add_argument('--save_trigger',
                        '-st',
                        type=str,
                        default='1e',
                        help='Interval for saving the model'
                        '(Ex.100i, default:1e)')
    parser.add_argument('--load_model',
                        '-lm',
                        type=str,
                        default=None,
                        help='Path of the model object to load')
    parser.add_argument('--load_optimizer',
                        '-lo',
                        type=str,
                        default=None,
                        help='Path of the optimizer object to load')
    args = parser.parse_args()

    if args.out_dir_name is None:
        start_time = datetime.now()
        out_dir = Path('output/{}'.format(start_time.strftime('%Y%m%d_%H%M')))
    else:
        out_dir = Path('output/{}'.format(args.out_dir_name))

    random.seed(args.seed)
    np.random.seed(args.seed)
    cupy.random.seed(args.seed)
    chainer.config.cudnn_deterministic = True

    # model = ModifiedClassifier(SEResNeXt50())
    # model = ModifiedClassifier(SERes2Net50())
    model = ModifiedClassifier(SEResNeXt101())

    if args.load_model is not None:
        serializers.load_npz(args.load_model, model)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    optimizer = optimizers.MomentumSGD(lr=0.1, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(1e-4))
    if args.load_optimizer is not None:
        serializers.load_npz(args.load_optimizer, optimizer)

    n_fold = args.n_fold
    slices = [slice(i, None, n_fold) for i in range(n_fold)]
    fold = args.fold - 1

    # model1
    # augmentation = [
    #     ('Rotate', {'p': 0.8, 'limit': 5}),
    #     ('PadIfNeeded', {'p': 0.5, 'min_height': 28, 'min_width': 30}),
    #     ('PadIfNeeded', {'p': 0.5, 'min_height': 30, 'min_width': 28}),
    #     ('Resize', {'p': 1.0, 'height': 28, 'width': 28}),
    #     ('RandomScale', {'p': 1.0, 'scale_limit': 0.1}),
    #     ('PadIfNeeded', {'p': 1.0, 'min_height': 32, 'min_width': 32}),
    #     ('RandomCrop', {'p': 1.0, 'height': 28, 'width': 28}),
    #     ('Mixup', {'p': 0.5}),
    #     ('Cutout', {'p': 0.5, 'num_holes': 4, 'max_h_size': 4,
    #                 'max_w_size': 4}),
    # ]
    # resize = None

    # model2
    augmentation = [
        ('Rotate', {
            'p': 0.8,
            'limit': 5
        }),
        ('PadIfNeeded', {
            'p': 0.5,
            'min_height': 28,
            'min_width': 32
        }),
        ('PadIfNeeded', {
            'p': 0.5,
            'min_height': 32,
            'min_width': 28
        }),
        ('Resize', {
            'p': 1.0,
            'height': 32,
            'width': 32
        }),
        ('RandomScale', {
            'p': 1.0,
            'scale_limit': 0.1
        }),
        ('PadIfNeeded', {
            'p': 1.0,
            'min_height': 36,
            'min_width': 36
        }),
        ('RandomCrop', {
            'p': 1.0,
            'height': 32,
            'width': 32
        }),
        ('Mixup', {
            'p': 0.5
        }),
        ('Cutout', {
            'p': 0.5,
            'num_holes': 4,
            'max_h_size': 4,
            'max_w_size': 4
        }),
    ]
    resize = [('Resize', {'p': 1.0, 'height': 32, 'width': 32})]

    train_data = KMNIST(augmentation=augmentation,
                        drop_index=slices[fold],
                        pseudo_labeling=True)
    valid_data = KMNIST(augmentation=resize, index=slices[fold])

    train_iter = iterators.SerialIterator(train_data, args.batchsize)
    valid_iter = iterators.SerialIterator(valid_data,
                                          args.batchsize,
                                          repeat=False,
                                          shuffle=False)

    updater = StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = Trainer(updater, (args.epoch, 'epoch'), out=out_dir)

    report_trigger = (int(args.report_trigger[:-1]), 'iteration'
                      if args.report_trigger[-1] == 'i' else 'epoch')
    trainer.extend(extensions.LogReport(trigger=report_trigger))
    trainer.extend(extensions.Evaluator(valid_iter, model, device=args.gpu),
                   name='val',
                   trigger=report_trigger)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'main/accuracy', 'val/main/loss',
        'val/main/accuracy', 'elapsed_time'
    ]),
                   trigger=report_trigger)
    trainer.extend(
        extensions.PlotReport(['main/loss', 'val/main/loss'],
                              x_key=report_trigger[1],
                              marker='.',
                              file_name='loss.png',
                              trigger=report_trigger))
    trainer.extend(
        extensions.PlotReport(['main/accuracy', 'val/main/accuracy'],
                              x_key=report_trigger[1],
                              marker='.',
                              file_name='accuracy.png',
                              trigger=report_trigger))

    save_trigger = (int(args.save_trigger[:-1]),
                    'iteration' if args.save_trigger[-1] == 'i' else 'epoch')
    trainer.extend(extensions.snapshot_object(
        model,
        filename='model_{0}-{{.updater.{0}}}.npz'.format(save_trigger[1])),
                   trigger=save_trigger)
    trainer.extend(extensions.snapshot_object(
        optimizer,
        filename='optimizer_{0}-{{.updater.{0}}}.npz'.format(save_trigger[1])),
                   trigger=save_trigger)
    trainer.extend(extensions.ProgressBar())
    trainer.extend(CosineAnnealing(lr_max=0.1, lr_min=1e-6, T_0=20),
                   trigger=(1, 'epoch'))

    best_model_trigger = triggers.MaxValueTrigger('val/main/accuracy',
                                                  trigger=(1, 'epoch'))
    trainer.extend(extensions.snapshot_object(model,
                                              filename='best_model.npz'),
                   trigger=best_model_trigger)
    trainer.extend(extensions.snapshot_object(optimizer,
                                              filename='best_optimizer.npz'),
                   trigger=best_model_trigger)
    best_loss_model_trigger = triggers.MinValueTrigger('val/main/loss',
                                                       trigger=(1, 'epoch'))
    trainer.extend(extensions.snapshot_object(model,
                                              filename='best_loss_model.npz'),
                   trigger=best_loss_model_trigger)
    trainer.extend(extensions.snapshot_object(
        optimizer, filename='best_loss_optimizer.npz'),
                   trigger=best_loss_model_trigger)

    if out_dir.exists():
        shutil.rmtree(out_dir)
    out_dir.mkdir()

    # Write parameters text
    with open(out_dir / 'train_params.txt', 'w') as f:
        f.write('model: {}\n'.format(model.predictor.__class__.__name__))
        f.write('n_epoch: {}\n'.format(args.epoch))
        f.write('batch_size: {}\n'.format(args.batchsize))
        f.write('n_data_train: {}\n'.format(len(train_data)))
        f.write('n_data_val: {}\n'.format(len(valid_data)))
        f.write('seed: {}\n'.format(args.seed))
        f.write('n_fold: {}\n'.format(args.n_fold))
        f.write('fold: {}\n'.format(args.fold))
        f.write('augmentation: \n')
        for process, param in augmentation:
            f.write('  {}: {}\n'.format(process, param))

    trainer.run()
コード例 #6
0
def train():
    args = configuration()

    ## Parameter Information Display out
    print('===================================================')
    if args.test:
        print('Num of Minibatch Size: 1'.format(args.batch))
    else:
        print('Num of Minibatch Size: {}'.format(args.batch))
    print('Num of Epoch         : {}'.format(args.epoch))
    if args.gpu >= 0:
        print('GPU Number           : {}'.format(args.gpu))
    else:
        print('Training with CPU only.')
    print('===================================================')

    ## Set up the Training Network
    model = Brief_CNN()
    if args.model is not None:
        print('Loading Brief CNN model from {}'.format(args.model))
        serializers.load_npz(args.model, model)

    if args.gpu >= 0:
        cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    ## Set up the Optimizer
    ## Kind of Optimizers are MomentumSGD.
    optimizer = optimizers.MomentumSGD(lr=0.0001)
    optimizer.setup(model)
    if args.opt is not None:
        print('Loading Brief CNN Optimizer from {}'.format(args.opt))
        serializers.load_npz(args.opt, optimizer)

    if args.test:
        test = load_single_image(args.img)
        xp = model.xp
        test = xp.asarray(test, dtype=xp.float32)
        with chainer.using_config('train', False):
            val = model(test)
        prob = F.softmax(val).data
        prob = xp.reshape(prob, (prob.shape[1]))
        max_val_ind = xp.ndarray.argmax(prob)
        max_val = prob[max_val_ind] * 100
        print('>>> {} : {} %'.format(max_val_ind, max_val))

    else:
        train = make_dataset(args.img)
        train_iter = iterators.SerialIterator(train, batch_size=args.batch)
        log_filename = 'log_train'
        updater = CNNUpdater(net_model=model,
                             iterator={'main': train_iter},
                             optimizer={'optimizer': optimizer},
                             device=args.gpu)

        trainer = training.Trainer(updater, (args.epoch, 'epoch'),
                                   out='results')

        trainer.extend(
            extensions.LogReport(trigger=(1, 'epoch'), log_name=log_filename))
        trainer.extend(extensions.PrintReport(['epoch', 'Loss', 'Acc']))
        trainer.extend(extensions.snapshot_object(model, 'model'),
                       trigger=(10, 'epoch'))
        trainer.extend(extensions.snapshot_object(optimizer, 'optimizer'),
                       trigger=(10, 'epoch'))
        trainer.extend(extensions.ProgressBar(update_interval=1))

        trainer.run()

        modelname = 'results/model'
        print('Saving Brief CNN model to {}'.format(modelname))
        serializers.save_npz(modelname, model)

        optname = 'results/optimizer'
        print('Saving Brief CNN optimizer to {}'.format(optname))
        serializers.save_npz(optname, optimizer)

    print('OVER')
コード例 #7
0
def run(data_file, is_train=False, **args):
    is_test = not is_train
    batchsize = args['batchsize']
    model_name = args['model_name']
    optimizer_name = args['optimizer']
    save_dir = args['save_dir']
    print args
    if save_dir[-1] != '/':
        save_dir = save_dir + '/'

    # TODO: check save_dir exist
    if not os.path.isdir(save_dir):
        err_msg = 'There is no dir : {}\n'.format(save_dir)
        err_msg += '##############################\n'
        err_msg += '## Please followiing: \n'
        err_msg += '## $ mkdir {}\n'.format(save_dir)
        err_msg += '##############################\n'
        raise ValueError(err_msg)

    save_name = args['save_name']
    if save_name == '':
        save_name = '_'.join([model_name, optimizer_name])

    save_name = save_dir + save_name

    xp = cuda.cupy if args['gpu'] >= 0 else np
    if args['gpu'] >= 0:
        cuda.get_device(args['gpu']).use()
        xp.random.seed(1234)

    # load files
    dev_file = args['dev_file']
    test_file = args['test_file']
    delimiter = args['delimiter']
    sentences_train = []
    if is_train:
        sentences_train = util.read_conll_file(filename=data_file,
                                               delimiter=delimiter,
                                               input_idx=0,
                                               output_idx=-1)
        if len(sentences_train) == 0:
            s = str(len(sentences_train))
            err_msg = 'Invalid training sizes: {} sentences. '.format(s)
            raise ValueError(err_msg)
    else:
        # Predict
        sentences_train = util.read_raw_file(filename=data_file,
                                             delimiter=u' ')

    # sentences_train = sentences_train[:100]

    sentences_dev = []
    sentences_test = []
    if dev_file:
        sentences_dev = util.read_conll_file(dev_file,
                                             delimiter=delimiter,
                                             input_idx=0,
                                             output_idx=-1)
    if test_file:
        sentences_test = util.read_conll_file(test_file,
                                              delimiter=delimiter,
                                              input_idx=0,
                                              output_idx=-1)

    save_vocab = save_name + '.vocab'
    save_vocab_char = save_name + '.vocab_char'
    save_tags_vocab = save_name + '.vocab_tag'
    save_train_config = save_name + '.train_config'

    # TODO: check unkown pos tags
    # TODO: compute unk words
    if is_train:
        sentences_words_train = [w_obj[0] for w_obj in sentences_train]
        vocab = util.build_vocab(sentences_words_train)
        vocab_char = util.build_vocab(util.flatten(sentences_words_train))
        vocab_tags = util.build_tag_vocab(sentences_train)
    elif is_test:
        vocab = util.load_vocab(save_vocab)
        vocab_char = util.load_vocab(save_vocab_char)
        vocab_tags = util.load_vocab(save_tags_vocab)

    PAD_IDX = vocab[PADDING]
    UNK_IDX = vocab[UNKWORD]

    CHAR_PAD_IDX = vocab_char[PADDING]
    CHAR_UNK_IDX = vocab_char[UNKWORD]

    def parse_to_word_ids(sentences):
        return util.parse_to_word_ids(sentences,
                                      xp=xp,
                                      vocab=vocab,
                                      UNK_IDX=UNK_IDX,
                                      idx=0)

    def parse_to_char_ids(sentences):
        return util.parse_to_char_ids(sentences,
                                      xp=xp,
                                      vocab_char=vocab_char,
                                      UNK_IDX=CHAR_UNK_IDX,
                                      idx=0)

    def parse_to_tag_ids(sentences):
        return util.parse_to_tag_ids(sentences,
                                     xp=xp,
                                     vocab=vocab_tags,
                                     UNK_IDX=-1,
                                     idx=-1)

    # if is_train:
    x_train = parse_to_word_ids(sentences_train)
    x_char_train = parse_to_char_ids(sentences_train)
    y_train = parse_to_tag_ids(sentences_train)

    # elif is_test:
    #     x_predict = parse_to_word_ids(sentences_predict)
    #     x_char_predict = parse_to_char_ids(sentences_predict)
    #     y_predict = parse_to_tag_ids(sentences_predict)

    x_dev = parse_to_word_ids(sentences_dev)
    x_char_dev = parse_to_char_ids(sentences_dev)
    y_dev = parse_to_tag_ids(sentences_dev)

    x_test = parse_to_word_ids(sentences_test)
    x_char_test = parse_to_char_ids(sentences_test)
    y_test = parse_to_tag_ids(sentences_test)

    cnt_train_unk = sum([xp.sum(d == UNK_IDX) for d in x_train])
    cnt_train_word = sum([d.size for d in x_train])
    unk_train_unk_rate = float(cnt_train_unk) / cnt_train_word

    cnt_dev_unk = sum([xp.sum(d == UNK_IDX) for d in x_dev])
    cnt_dev_word = sum([d.size for d in x_dev])
    unk_dev_unk_rate = float(cnt_dev_unk) / max(cnt_dev_word, 1)

    logging.info('train:' + str(len(x_train)))
    logging.info('dev  :' + str(len(x_dev)))
    logging.info('test :' + str(len(x_test)))
    logging.info('vocab     :' + str(len(vocab)))
    logging.info('vocab_tags:' + str(len(vocab_tags)))
    logging.info('unk count (train):' + str(cnt_train_unk))
    logging.info('unk rate  (train):' + str(unk_train_unk_rate))
    logging.info('cnt all words (train):' + str(cnt_train_word))
    logging.info('unk count (dev):' + str(cnt_dev_unk))
    logging.info('unk rate  (dev):' + str(unk_dev_unk_rate))
    logging.info('cnt all words (dev):' + str(cnt_dev_word))
    # show model config
    logging.info('######################')
    logging.info('## Model Config')
    logging.info('model_name:' + str(model_name))
    logging.info('batchsize:' + str(batchsize))
    logging.info('optimizer:' + str(optimizer_name))
    # Save model config
    logging.info('######################')
    logging.info('## Model Save Config')
    logging.info('save_dir :' + str(save_dir))

    # save vocab
    logging.info('save_vocab        :' + save_vocab)
    logging.info('save_vocab_char   :' + save_vocab_char)
    logging.info('save_tags_vocab   :' + save_tags_vocab)
    logging.info('save_train_config :' + save_train_config)
    util.write_vocab(save_vocab, vocab)
    util.write_vocab(save_vocab_char, vocab_char)
    util.write_vocab(save_tags_vocab, vocab_tags)
    util.write_vocab(save_train_config, args)

    net = BiLSTM_CNN_CRF(n_vocab=len(vocab),
                         n_char_vocab=len(vocab_char),
                         emb_dim=args['n_word_emb'],
                         hidden_dim=args['n_hidden'],
                         n_layers=args['n_layer'],
                         init_emb=None,
                         n_label=len(vocab_tags))

    if args['word_emb_file']:
        # set Pre-trained embeddings
        # emb_file = './emb/glove.6B.100d.txt'
        emb_file = args['word_emb_file']
        word_ids, word_vecs = util.load_glove_embedding(emb_file, vocab)
        net.word_embed.W.data[word_ids] = word_vecs

    if args['gpu'] >= 0:
        net.to_gpu()

    init_alpha = args['init_lr']
    if optimizer_name == 'adam':
        opt = optimizers.Adam(alpha=init_alpha, beta1=0.9, beta2=0.9)
    elif optimizer_name == 'adadelta':
        opt = optimizers.AdaDelta()
    if optimizer_name == 'sgd_mom':
        opt = optimizers.MomentumSGD(lr=init_alpha, momentum=0.9)
    if optimizer_name == 'sgd':
        opt = optimizers.SGD(lr=init_alpha)

    opt.setup(net)
    opt.add_hook(chainer.optimizer.GradientClipping(5.0))

    def eval_loop(x_data, x_char_data, y_data):
        # dev or test
        net.set_train(train=False)
        iteration_list = range(0, len(x_data), batchsize)
        perm = np.random.permutation(len(x_data))
        sum_loss = 0.0
        predict_lists = []
        for i_index, index in enumerate(iteration_list):
            data = [(x_data[i], x_char_data[i], y_data[i])
                    for i in perm[index:index + batchsize]]
            x, x_char, target_y = zip(*data)

            output = net(x_data=x, x_char_data=x_char)
            predict, loss = net.predict(output, target_y)

            sum_loss += loss.data
            predict_lists.extend(predict)
        return predict_lists, sum_loss

    if is_test:
        # predict
        model_filename = args['model_filename']
        model_filename = save_dir + model_filename
        serializers.load_hdf5(model_filename, net)

        vocab_tags_inv = dict([(v, k) for k, v in vocab_tags.items()])
        x_predict = x_train
        x_char_predict = x_char_train
        y_predict = y_train
        predict_pairs, _ = eval_loop(x_predict, x_char_predict, y_predict)
        _, predict_tags = zip(*predict_pairs)
        for predict in predict_tags:
            predict = [vocab_tags_inv[tag_idx] for tag_idx in to_cpu(predict)]
            print predict

        return False

    tmax = args['max_iter']
    t = 0.0
    for epoch in xrange(args['max_iter']):

        # train
        net.set_train(train=True)
        iteration_list = range(0, len(x_train), batchsize)
        perm = np.random.permutation(len(x_train))
        sum_loss = 0.0
        predict_train = []
        for i_index, index in enumerate(iteration_list):
            data = [(x_train[i], x_char_train[i], y_train[i])
                    for i in perm[index:index + batchsize]]
            x, x_char, target_y = zip(*data)

            output = net(x_data=x, x_char_data=x_char)
            predict, loss = net.predict(output, target_y)

            # loss
            sum_loss += loss.data

            # update
            net.zerograds()
            loss.backward()
            opt.update()

            predict_train.extend(predict)

        # Evaluation
        train_accuracy = util.eval_accuracy(predict_train)

        logging.info('epoch:' + str(epoch))
        logging.info(' [train]')
        logging.info('  loss     :' + str(sum_loss))
        logging.info('  accuracy :' + str(train_accuracy))

        # Dev
        predict_dev, loss_dev = eval_loop(x_dev, x_char_dev, y_dev)

        # Evaluation
        dev_accuracy = util.eval_accuracy(predict_dev)
        logging.info(' [dev]')
        logging.info('  loss     :' + str(loss_dev))
        logging.info('  accuracy :' + str(dev_accuracy))

        # Save model
        model_filename = save_name + '_epoch' + str(epoch)
        serializers.save_hdf5(model_filename + '.model', net)
        serializers.save_hdf5(model_filename + '.state', opt)
コード例 #8
0
def pretraining():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--batchsize', type=int, default=256)
    args = parser.parse_args()

    xp = np
    gpu_id = args.gpu
    seed = args.seed
    train, _ = mnist.get_mnist()
    train, _ = convert.concat_examples(train, device=gpu_id)
    batchsize = args.batchsize
    model = StackedDenoisingAutoEncoder(input_dim=train.shape[1])
    if chainer.cuda.available and args.gpu >= 0:
        import cupy as cp
        xp = cp
        model.to_gpu(gpu_id)
    xp.random.seed(seed)

    # Layer-Wise Pretrain
    print("Layer-Wise Pretrain")
    for i, dae in enumerate(model.children()):
        print("Layer {}".format(i + 1))
        train_tuple = tuple_dataset.TupleDataset(train, train)
        train_iter = iterators.SerialIterator(train_tuple, batchsize)
        clf = L.Classifier(dae, lossfun=mean_squared_error)
        clf.compute_accuracy = False
        if chainer.cuda.available and args.gpu >= 0:
            clf.to_gpu(gpu_id)
        optimizer = optimizers.MomentumSGD(lr=0.1)
        optimizer.setup(clf)
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           device=gpu_id)
        trainer = training.Trainer(updater, (50000, "iteration"),
                                   out="mnist_result")
        trainer.extend(extensions.LogReport())
        trainer.extend(
            extensions.PrintReport(['iteration', 'main/loss', 'elapsed_time']))
        trainer.extend(ChangeLearningRate(), trigger=(20000, "iteration"))
        trainer.run()
        train = dae.encode(train).data

    # Finetuning
    print("fine tuning")
    with chainer.using_config("train", False):
        train, _ = mnist.get_mnist()
        train, _ = convert.concat_examples(train, device=gpu_id)
        train_tuple = tuple_dataset.TupleDataset(train, train)
        train_iter = iterators.SerialIterator(train_tuple, batchsize)
        model = L.Classifier(model, lossfun=mean_squared_error)
        model.compute_accuracy = False
        if chainer.cuda.available and args.gpu >= 0:
            model.to_gpu(gpu_id)
        optimizer = optimizers.MomentumSGD(lr=0.1)
        optimizer.setup(model)
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           device=gpu_id)
        trainer = training.Trainer(updater, (100000, "iteration"),
                                   out="mnist_result")
        trainer.extend(extensions.LogReport())
        trainer.extend(
            extensions.PrintReport(['iteration', 'main/loss', 'elapsed_time']))
        trainer.extend(ChangeLearningRate(), trigger=(20000, "iteration"))
        trainer.run()

    outfile = "StackedDenoisingAutoEncoder-seed{}.model".format(seed)
    serializers.save_npz(outfile, model.predictor)
コード例 #9
0
def main():
    parser = argparse.ArgumentParser(description='Train stargan voice convertor')
    parser.add_argument(
        '--gpu', type=int, default=-1, help='GPU ID (negative value indicates CPU)')
    parser.add_argument("--train_data", type=Path, required=True, help="training data")
    parser.add_argument("--speaker_id", type=Path, required=True, help="speaker_id file")
    parser.add_argument("--output_file", type=Path, required=True)
    parser.add_argument(
        '--epoch', default=6000, type=int, help='number of epochs to learn')
    parser.add_argument("--epoch_start", type=int, default=0)

    parser.add_argument(
        '--snapshot', default=100, type=int, help='interval of snapshot')
    parser.add_argument(
        '--batchsize', type=int, default=4, help='Batch size')
    parser.add_argument(
        '--optimizer', default='Adam', choices=["Adam", "MomentumSGD", "RMSprop"], type=str, help='optimizer to use: Adam, MomentumSGD, RMSprop')
    parser.add_argument(
        '--lrate', default='0.00001', type=float, help='learning rate for Adam, MomentumSGD or RMSprop')
    parser.add_argument(
        '--genpath', type=str, help='path for a pretrained generator')
    parser.add_argument(
        '--clspath', type=str, help='path for a pretrained classifier')
    parser.add_argument(
        '--advdispath', type=str, help='path for a pretrained real/fake discriminator')

    args = parser.parse_args()
    epsi = sys.float_info.epsilon

    output_file = args.output_file
    output_dir = output_file.with_suffix("")
    output_dir.mkdir(exist_ok=True, parents=True)

    all_source = np.load(args.train_data)
    Speakers, SpeakerIndividualKeys = separate_speaker(np.load(args.speaker_id))
    NormalizedAllData = get_separated_values(all_source, SpeakerIndividualKeys)
    SpeakerNum = len(Speakers)

    # Set input directories
    EpochNum = args.epoch
    BatchSize = args.batchsize

    SentenceNum = [len(SpeakerIndividualKeys[s]) for s in range(SpeakerNum)]
    MaxSentenceNum = max(SentenceNum)

    print('#GPU: {}'.format(args.gpu))
    print('#epoch: {}'.format(EpochNum))
    print('Optimizer: {}'.format(args.optimizer))
    print('Learning rate: {}'.format(args.lrate))
    print('Snapshot: {}'.format(args.snapshot))

    # Set up model
    num_mels = 36
    zdim = 5
    hdim = 32
    cdim = 8
    adim = 32

    # num_mels = data.shape[0] (36dim)
    # zdim = 8
    # hdim = 32
    generator_class = net.Generator1
    classifier_class = net.Classifier1
    discriminator_class = net.AdvDiscriminator1
    loss_class = net.Loss1

    generator = generator_class(SpeakerNum)
    # paranum = sum(p.data.size for p in generator.params())
    # print('Parameter #: {}'.format(paranum))

    # cdim = 8
    classifier = classifier_class(num_mels, SpeakerNum, cdim)
    # paranum = sum(p.data.size for p in classifier.params())
    # print('Parameter #: {}'.format(paranum))

    # adim = 32
    adverserial_discriminator = discriminator_class(num_mels, SpeakerNum, adim)
    # adverserial_discriminator = net.AdvDiscriminator_noactive(num_mels, SpeakerNum, adim)
    # paranum = sum(p.data.size for p in adverserial_discriminator.params())
    # print('Parameter #: {}'.format(paranum))

    if args.genpath is not None:
        try:
            serializers.load_npz(args.genpath, generator)
        except:
            print('Could not load generator.')
    if args.clspath is not None:
        try:
            serializers.load_npz(args.clspath, classifier)
        except:
            print('Could not load domain classifier.')
    if args.advdispath is not None:
        try:
            serializers.load_npz(args.advdispath, adverserial_discriminator)
        except:
            print('Could not load real/fake discriminator.')

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        generator.to_gpu()
        classifier.to_gpu()
        adverserial_discriminator.to_gpu()
    xp = np if args.gpu < 0 else cuda.cupy

    # Set up optimziers
    # loss = net.Loss1(generator, classifier, adverserial_discriminator)
    loss = loss_class(generator, classifier, adverserial_discriminator)
    w_adv = 1.0
    w_cls = 1.0
    w_cyc = 1.0
    w_rec = 1.0
    if args.optimizer == 'MomentumSGD':
        opt_gen = optimizers.MomentumSGD(lr=args.lrate, momentum=0.9)
        opt_cls = optimizers.MomentumSGD(lr=args.lrate, momentum=0.9)
        opt_advdis = optimizers.MomentumSGD(lr=args.lrate, momentum=0.9)
    elif args.optimizer == 'Adam':
        opt_gen = optimizers.Adam(alpha=0.001, beta1=0.9)
        opt_cls = optimizers.Adam(alpha=0.00005, beta1=0.5)
        opt_advdis = optimizers.Adam(alpha=0.00001, beta1=0.5)
    elif args.optimizer == 'RMSprop':
        opt_gen = optimizers.RMSprop(lr=args.lrate)
        opt_cls = optimizers.RMSprop(lr=args.lrate)
        opt_advdis = optimizers.RMSprop(lr=args.lrate)
    opt_gen.setup(generator)
    opt_cls.setup(classifier)
    opt_advdis.setup(adverserial_discriminator)


    AllCombinationPairs = list(itertools.combinations(range(SpeakerNum), 2))
    # train
    for epoch in trange(args.epoch_start, EpochNum+1):

        # shuffled_indexes[speaker_idx][idx]: value is index of NormalizedAllData[speaker_idx][**here**]
        shuffled_indexes = [myperm(SentenceNum[s], MaxSentenceNum) for s in range(SpeakerNum)]

        for n in range(MaxSentenceNum//BatchSize):
            # batchlist_mcep[speaker_idx][sentence_idx_in_batch]
            batchlist_mcep = []
            begin_idx = n * BatchSize
            end_idx = begin_idx + BatchSize # not include @ end_idx
            for s in range(SpeakerNum):
                batch_tmp = []
                for idx in shuffled_indexes[s][begin_idx:end_idx]:
                    batch_tmp.append( NormalizedAllData[s][idx].T ) # Transpose here!!
                batchlist_mcep.append(batch_tmp)
            # Convert batchlist into a list of arrays
            X = [batchlist2array(batchlist) for batchlist in batchlist_mcep]

            xin = [chainer.Variable(xp.asarray(Xs, dtype=np.float32)) for Xs in X]

            # Iterate through all speaker pairs
            random.shuffle(AllCombinationPairs)
            for s0, s1 in AllCombinationPairs:
                AdvLoss_d, AdvLoss_g, ClsLoss_r, ClsLoss_f, CycLoss, RecLoss \
                    = loss.calc_loss(xin[s0], xin[s1], s0, s1, SpeakerNum)
                gen_loss = (w_adv * AdvLoss_g + w_cls * ClsLoss_f
                            + w_cyc * CycLoss + w_rec * RecLoss)
                cls_loss = ClsLoss_r
                advdis_loss = AdvLoss_d
                generator.cleargrads()
                gen_loss.backward()
                opt_gen.update()
                classifier.cleargrads()
                cls_loss.backward()
                opt_cls.update()
                adverserial_discriminator.cleargrads()
                advdis_loss.backward()
                opt_advdis.update()

            print('epoch {}, mini-batch {}:'.format(epoch, n+1))
            print('AdvLoss_d={}, AdvLoss_g={}, ClsLoss_r={}, ClsLoss_f={}'
                  .format(AdvLoss_d.data, AdvLoss_g.data, ClsLoss_r.data, ClsLoss_f.data))
            print('CycLoss={}, RecLoss={}'
                  .format(CycLoss.data, RecLoss.data))
        save_loss(output_dir, AdvLoss_d.data, AdvLoss_g.data, ClsLoss_r.data, ClsLoss_f.data, CycLoss.data, RecLoss.data)

        if epoch % args.snapshot == 0:
            snapshot_dir = output_dir / "snapshot"
            snapshot_dir.mkdir(exist_ok=True)
            snapshot(snapshot_dir, epoch, generator, classifier, adverserial_discriminator)
            snapshot_feature_dir = output_dir / "snapshot_feature"
            snapshot_feature_dir.mkdir(exist_ok=True)
            output = {}
            with chainer.no_backprop_mode():
                for s in range(SpeakerNum):
                    for key, mcep in zip(SpeakerIndividualKeys[s], NormalizedAllData[s]):
                        mcep_T = mcep.T
                        out = generator.hidden_layer(chainer.Variable(xp.asarray(mcep_T[np.newaxis,:,:], dtype=np.float32)))
                        out = np.squeeze(cuda.to_cpu(out.data))
                        output[key] = out.T
            np.savez(snapshot_feature_dir / f"{output_file.stem}_epoch_{epoch:05}.npz", **output)

    # output final result
    output = {}
    with chainer.no_backprop_mode():
        for s in range(SpeakerNum):
            for key, mcep in zip(SpeakerIndividualKeys[s], NormalizedAllData[s]):
                mcep_T = mcep.T
                out = generator.hidden_layer(chainer.Variable(xp.asarray(mcep_T[np.newaxis,:,:], dtype=np.float32)))
                out = np.squeeze(cuda.to_cpu(out.data))
                output[key] = out.T
    np.savez(output_file, **output)
コード例 #10
0
        cifar_net = net.IdentityMapping(args.res_depth, swapout=args.swapout, skip=args.skip_depth)
    elif args.model == 'vgg_no_fc':
        cifar_net = net.VGGNoFC()
    elif args.model == 'vgg_wide':
        cifar_net = net.VGGWide()
    elif args.model == 'vgg_crelu':
        cifar_net = net.VGGCReLU()
    elif args.model == 'inception':
        cifar_net = net.Inception()
    elif args.model == 'pyramid':
        cifar_net = net.PyramidNet(args.res_depth)
    else:
        cifar_net = net.VGG()

    if args.optimizer == 'sgd':
        optimizer = optimizers.MomentumSGD(lr=args.lr)
    else:
        optimizer = optimizers.Adam(alpha=args.alpha)
    optimizer.setup(cifar_net)
    if args.weight_decay > 0:
        optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))
    cifar_trainer = trainer.CifarTrainer(cifar_net, optimizer, args.iter, args.batch_size, args.gpu)
    if args.prefix is None:
        model_prefix = '{}_{}'.format(args.model, args.optimizer)
    else:
        model_prefix = args.prefix

    state = {'best_valid_error': 100, 'best_test_error': 100, 'clock': time.clock()}
    def on_epoch_done(epoch, n, o, loss, acc, valid_loss, valid_acc, test_loss, test_acc):
        error = 100 * (1 - acc)
        valid_error = 100 * (1 - valid_acc)
コード例 #11
0
ファイル: train.py プロジェクト: manuelschmidt/chainercv
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--batchsize', type=int, default=12)
    parser.add_argument('--class_weight', type=str, default='class_weight.npy')
    parser.add_argument('--out', type=str, default='result')
    args = parser.parse_args()

    # Triggers
    log_trigger = (50, 'iteration')
    validation_trigger = (2000, 'iteration')
    end_trigger = (16000, 'iteration')

    # Dataset
    train = CamVidDataset(split='train')

    def transform(in_data):
        img, label = in_data
        if np.random.rand() > 0.5:
            img = img[:, :, ::-1]
            label = label[:, ::-1]
        return img, label

    train = TransformDataset(train, transform)
    val = CamVidDataset(split='val')

    # Iterator
    train_iter = iterators.MultiprocessIterator(train, args.batchsize)
    val_iter = iterators.MultiprocessIterator(val,
                                              args.batchsize,
                                              shuffle=False,
                                              repeat=False)

    # Model
    class_weight = np.load(args.class_weight)
    model = SegNetBasic(n_class=11)
    model = PixelwiseSoftmaxClassifier(model, class_weight=class_weight)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    # Optimizer
    optimizer = optimizers.MomentumSGD(lr=0.1, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    # Updater
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)

    # Trainer
    trainer = training.Trainer(updater, end_trigger, out=args.out)

    trainer.extend(extensions.LogReport(trigger=log_trigger))
    trainer.extend(extensions.observe_lr(), trigger=log_trigger)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(TestModeEvaluator(val_iter, model, device=args.gpu),
                   trigger=validation_trigger)

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  x_key='iteration',
                                  file_name='loss.png'))

    trainer.extend(
        extensions.snapshot_object(
            model.predictor,
            filename='model_iteration-{.updater.iteration}',
            trigger=end_trigger))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'elapsed_time', 'lr', 'main/loss',
        'validation/main/loss'
    ]),
                   trigger=log_trigger)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
コード例 #12
0
N = data.num_train
N_test = data.num_test

print('- number of training data: %d' % N)
print('- number of test data: %d' % N_test)
print('- number of labels: %d' % len(caltech['label_names']))
print('done.')

# prepare network
model = net.Classifier(net.AlexNet())

# initialize optimizer
if args.optimizer == 'adam':
    optimizer = optimizers.Adam(args.alpha)
if args.optimizer == 'momentumsgd':
    optimizer = optimizers.MomentumSGD(args.learningrate)
optimizer.setup(model)

# training loop
print()
print('start learning')
if args.gpu >= 0:
    model.to_gpu()
for epoch in range(0, n_epoch):
    print('epoch', epoch + 1)

    perm = np.random.permutation(N)
    permed_data = np.asarray(x_train[perm])
    permed_target = xp.asarray(y_train[perm])

    sum_accuracy = 0
コード例 #13
0
ファイル: net.py プロジェクト: yyuzhongpv/branchynet
    def __init__(self,
                 network,
                 thresholdExits=None,
                 percentTestExits=.9,
                 percentTrainKeeps=1.,
                 lr=0.1,
                 momentum=0.9,
                 weight_decay=0.0001,
                 alpha=0.001,
                 opt="Adam",
                 joint=True,
                 verbose=False):
        self.opt = opt
        self.lr = lr
        self.alpha = alpha
        self.momentum = momentum
        self.weight_decay = weight_decay
        self.joint = joint
        self.forwardMain = None

        self.main = Net()
        self.models = []
        starti = 0
        curri = 0
        for link in network:
            if not isinstance(link, Branch):
                curri += 1
                self.main.add_link(link)
            else:
                net = Net(link.weight)
                net.starti = starti
                starti = curri
                net.endi = curri
                for prevlink in self.main:
                    newlink = copy.deepcopy(prevlink)
                    newlink.name = None
                    net.add_link(newlink)
                for branchlink in link:
                    newlink = copy.deepcopy(branchlink)
                    newlink.name = None
                    net.add_link(newlink)
                self.models.append(net)
        for branchlink in link:
            newlink = copy.deepcopy(branchlink)
            newlink.name = None
            self.main.add_link(newlink)

        if self.opt == 'MomentumSGD':
            self.optimizer = optimizers.MomentumSGD(lr=self.lr,
                                                    momentum=self.momentum)
        else:
            self.optimizer = optimizers.Adam(alpha=self.alpha)
        self.optimizer.setup(self.main)

        if self.opt == 'MomentumSGD':
            self.optimizer.add_hook(
                chainer.optimizer.WeightDecay(self.weight_decay))

        self.optimizers = []

        for model in self.models:
            if self.opt == 'MomentumSGD':
                optimizer = optimizers.MomentumSGD(lr=self.lr, momentum=0.9)
            else:
                optimizer = optimizers.Adam()
            optimizer.setup(model)

            if self.opt == 'MomentumSGD':
                optimizer.add_hook(
                    chainer.optimizer.WeightDecay(self.weight_decay))

            self.optimizers.append(optimizer)

        self.percentTrainKeeps = percentTrainKeeps
        self.percentTestExits = percentTestExits
        self.thresholdExits = thresholdExits
        self.clearLearnedExitsThresholds()

        self.verbose = verbose
        self.gpu = False
        self.xp = np
コード例 #14
0
    train_dataset = VOC('train')
    valid_dataset = VOC('val')

    train_iter = iterators.SerialIterator(train_dataset, batchsize)
    model = FasterRCNN()

    chainer.serializers.load_npz('train_rpn/snapshot_571000', model)

    model.to_gpu(0)

    warmup(model, train_iter)
    model.rcnn_train = True

    # optimizer = optimizers.Adam()
    # optimizer.setup(model)
    optimizer = optimizers.MomentumSGD(lr=0.001)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005))

    updater = training.StandardUpdater(train_iter, optimizer, device=0)
    trainer = training.Trainer(updater, (100, 'epoch'), out='train_rcnn')
    trainer.extend(extensions.LogReport(trigger=(100, 'iteration')))
    trainer.extend(extensions.PrintReport([
        'epoch',
        'iteration',
        'main/loss_cls',
        'main/cls_accuracy',
        'main/loss_bbox',
        'main/loss_rcnn',
        'elapsed_time',
    ]),
コード例 #15
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epochs', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--output_dir', '-o', default='./outputs',
                        help='Directory to output the result')
    parser.add_argument('--gpu_id', '-g', default=0,
                        help='ID of the GPU to be used. Set to -1 if you use CPU')
    args = parser.parse_args()

    # Download the MNIST data if you haven't downloaded it yet
    train, test = datasets.mnist.get_mnist(withlabel=True, ndim=1)

    gpu_id = args.gpu_id
    batchsize = args.batchsize
    epochs = args.epochs
    run.log('Batch size', np.int(batchsize))
    run.log('Epochs', np.int(epochs))

    train_iter = iterators.SerialIterator(train, batchsize)
    test_iter = iterators.SerialIterator(test, batchsize,
                                         repeat=False, shuffle=False)

    model = MyNetwork()

    if gpu_id >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(0).use()
        model.to_gpu()  # Copy the model to the GPU

    # Choose an optimizer algorithm
    optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)

    # Give the optimizer a reference to the model so that it
    # can locate the model's parameters.
    optimizer.setup(model)

    while train_iter.epoch < epochs:
        # ---------- One iteration of the training loop ----------
        train_batch = train_iter.next()
        image_train, target_train = concat_examples(train_batch, gpu_id)

        # Calculate the prediction of the network
        prediction_train = model(image_train)

        # Calculate the loss with softmax_cross_entropy
        loss = F.softmax_cross_entropy(prediction_train, target_train)

        # Calculate the gradients in the network
        model.cleargrads()
        loss.backward()

        # Update all the trainable parameters
        optimizer.update()
        # --------------------- until here ---------------------

        # Check the validation accuracy of prediction after every epoch
        if train_iter.is_new_epoch:  # If this iteration is the final iteration of the current epoch

            # Display the training loss
            print('epoch:{:02d} train_loss:{:.04f} '.format(
                train_iter.epoch, float(to_cpu(loss.array))), end='')

            test_losses = []
            test_accuracies = []
            while True:
                test_batch = test_iter.next()
                image_test, target_test = concat_examples(test_batch, gpu_id)

                # Forward the test data
                prediction_test = model(image_test)

                # Calculate the loss
                loss_test = F.softmax_cross_entropy(prediction_test, target_test)
                test_losses.append(to_cpu(loss_test.array))

                # Calculate the accuracy
                accuracy = F.accuracy(prediction_test, target_test)
                accuracy.to_cpu()
                test_accuracies.append(accuracy.array)

                if test_iter.is_new_epoch:
                    test_iter.epoch = 0
                    test_iter.current_position = 0
                    test_iter.is_new_epoch = False
                    test_iter._pushed_position = None
                    break

            val_accuracy = np.mean(test_accuracies)
            print('val_loss:{:.04f} val_accuracy:{:.04f}'.format(
                np.mean(test_losses), val_accuracy))

            run.log("Accuracy", np.float(val_accuracy))
コード例 #16
0
def train(view, set):
    # 5分割されたデータのロード
    gallery1, probe1 = load_res_invariant_dataset(view=view,
                                                  set=set,
                                                  type='SFDEI',
                                                  frame='1')
    gallery2, probe2 = load_res_invariant_dataset(view=view,
                                                  set=set,
                                                  type='SFDEI',
                                                  frame='2')

    # tensor-boardの定義
    writer = SummaryWriter()

    # 学習の設定
    save_dir = '/home/common-ns/PycharmProjects/Multiscale_SFDEINet/4inputs/models' + view + '/set_' + str(
        set)
    # save_dir = '/home/common-ns/setoguchi/chainer_files/Two_by_two_in/SRGAN1-2/' + view + '/set_' + str(set)
    os.mkdir(save_dir)
    batch_size = 239
    max_iteration = 50000
    id = 1
    # gpu_id = chainer.backends.cuda.get_device_from_id(id)
    model = two_by_two_in()
    model.to_gpu(id)
    optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)
    # optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005))

    iteration = 1.0
    i = 0
    # 学習ループ
    # for i in range(1, epock+1):
    while iteration < max_iteration + 1:

        accum_loss = 0.0
        count = 0

        # データから正例,負例のペアを構築.少なくとも全データの1割が正例になるようにする
        # データに偏りが出ないよう,データのタイプを1/4の確率で決定
        select_seed = np.random.randint(0, 2)
        if select_seed == 0:
            train_data = create_pair(gallery1, gallery2, probe1, probe2)
        elif select_seed == 1:
            train_data = create_pair(probe1, probe2, gallery1, gallery2)
        elif select_seed == 2:
            train_data = create_pair(gallery1, gallery2, gallery1, gallery2)
        elif select_seed == 3:
            train_data = create_pair(probe1, probe2, probe1, probe2)

        # バッチをつくる前にシャッフル
        shuffle = random.sample(train_data, len(train_data))
        # ミニバッチに分割
        mini_batches = make_batch_list(shuffle, batch_size)

        for batch in mini_batches:
            # リストから1つのnumpyバッチにする
            pos1, pos2, neg1, neg2, signal = list2npy(batch, id)

            # Forward
            g_out, p_out = model(pos1, pos2, neg1, neg2)

            # lossの計算
            signal = F.flatten(signal)  # contrastive_lossの仕様に合わせて1次元化
            loss = F.contrastive(g_out, p_out, signal, margin=3, reduce='mean')
            # print cuda.to_cpu(loss.data)
            accum_loss += cuda.to_cpu(loss.data)

            # backward
            model.cleargrads()
            loss.backward()

            # パラメータ更新
            optimizer.update()

            if iteration % 10000 == 0:
                optimizer.lr = optimizer.lr * 0.1
                print('学習率がさがりました:{}'.format(optimizer.lr))
            if iteration % 5000 == 0:
                serializers.save_npz(
                    save_dir + '/model_snapshot_{}'.format(int(iteration)),
                    model)
            iteration += 1.0
        i += 1

        print('epock:{}'.format(i), 'iteration:{}'.format(int(iteration)),
              'accum_loss:{}'.format(accum_loss / float(len(mini_batches))))
        writer.add_scalar('train/loss', accum_loss / float(len(mini_batches)),
                          i)

        # 約1000イテレーション毎にモデルを保存
        # if iteration % 10000 == 0:
        #     serializers.save_npz(save_dir + '/model_snapshot_{}'.format(i), model)
    g = c.build_computational_graph(g_out[0])
    with open(save_dir + '/graph.dot', 'w') as o:
        o.write(g.dump())
コード例 #17
0
def main():
    print("\n---------------- argument check ------------------")
    if len(sys.argv) != 2:
        print("Invalid argument.################################" +
              str(len(sys.argv)))
        exit(1)

    dataset_path = sys.argv[1]

    print("\n------------------ preprocess --------------------")
    proj_root = os.path.dirname(os.path.dirname(os.path.dirname(root_path)))
    config_path = os.path.join(proj_root, "data", "config.json")
    layer_path = os.path.join(proj_root, "data", "layer_settings.json")

    # configファイル読込み
    try:
        with open(config_path, "r") as f:
            config = json.load(f)
            print("loaded config file")
    except IOError:
        print("config.json load error.")
        sys.exit(1)

    # layerファイル読込み
    try:
        with open(layer_path, "r") as f:
            layer_data = json.load(f)
            print("loaded layer info file")
    except IOError:
        print("layer_settings.json load error.")
        sys.exit(1)

    in_w = layer_data["nodes"][0]["settings"]["width"]
    in_h = layer_data["nodes"][0]["settings"]["height"]
    in_c = layer_data["nodes"][0]["settings"]["channel"]

    # 学習・テストデータの読込み
    (x_train, t_train), (x_test, t_test) = \
        load_data(dataset_path,
                  input_width=in_w, input_height=in_h, input_channel=in_c,
                  test_size=0.1, flatten=False)
    # (x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)
    print("loaded training data")

    print("\n---------------- hyper parameter -----------------")
    train_size = len(t_train)
    test_size = len(t_test)
    batch_size = config["batch"]
    epoch = config["epoch"]
    iter_per_epoch = int(train_size / batch_size)
    max_iter = epoch * iter_per_epoch
    opt_name = config["optimizer"]
    lr = config["lr"]
    if opt_name == "Adagrad":
        optimizer = optimizers.AdaGrad(lr=lr)
    elif opt_name == "Adam":
        optimizer = optimizers.Adam()
    elif opt_name == "MonmentumSGD":
        optimizer = optimizers.MomentumSGD(lr=lr,
                                           momentum=0.9)  #default lr = 0.01
    elif opt_name == "Adadelta":
        optimizer = optimizers.AdaDelta()
    else:
        print("unsupported optimizer type:" + opt_name)
        sys.exit(2)

    print("training size : " + str(train_size))
    print("test size     : " + str(test_size))
    print("batch size    : " + str(batch_size))
    print("epoch size    : " + str(epoch))
    print("optimizer type: " + opt_name)
    print("learning rate : " + str(lr))
    print("input width   : " + str(in_w))
    print("input height  : " + str(in_h))

    print("\n------------ CNN layer Consituation --------------")
    model = CNN(layer_data["layer_info"])

    optimizer.use_cleargrads()
    optimizer.setup(model)

    print("\n------------ training start --------------")
    for e in range(0, epoch):
        print("epoch[" + str(e) + "/" + str(epoch) + "]")
        # interval_start = time.clock()
        sum_loss = 0
        sum_accuracy = 0

        # ------------ 学習データによる学習とその結果を取得 ---------
        train_stroke_size = int(train_size / batch_size)
        for i in tqdm(range(0, train_stroke_size)):
            # for i in tqdm(range(max_iter)):
            # バッチサイズ分のデータを取得
            batch_mask = np.random.choice(train_size, batch_size)
            X = x_train[batch_mask]
            # 畳み込みように3次元にしておく(width,height,channel)
            X = np.array(X).astype(np.float32).reshape(len(X), in_c, in_w,
                                                       in_h)
            # 実数はnp.float32, 正数はinp.int32に固定する(しなければならない)
            x_batch = chainer.Variable(np.array(X).astype(np.float32))
            t_batch = np.array(t_train[batch_mask]).astype(np.int32)
            # 前のバッチで取得した勾配をクリアする
            model.cleargrads()
            # 順伝播で初期パラメータを学習させる
            loss = model.forward(x_batch, t_batch, True)
            # 逆伝播で初期パラメータから最適な(損失率が少ない)パラメータを取得する
            loss.backward()
            optimizer.update()

            accuracy = float(model.accuracy.data) * len(t_batch.data)
            sum_loss += float(model.loss.data) * len(t_batch.data)
            sum_accuracy += accuracy
            del X
            gc.collect()
            # print("epoch : " + str(epoch) + " / " + str(n_epoch))
        print("train loss: %f", sum_loss / train_stroke_size)
        print("train accuracy: %f", sum_accuracy / train_stroke_size)

        # log = "accuracy:" + str(sum_accuracy/iter_per_epoch) + "  loss:" + str(sum_loss/iter_per_epoch)

        sum_loss = 0
        sum_accuracy = 0

        # --------- テストデータによる結果を取得 -----------
        test_stroke_size = int(test_size / batch_size)
        for i in tqdm(range(test_stroke_size)):

            batch_mask = np.random.choice(test_size, batch_size)
            X = x_test[batch_mask]
            X = np.array(X).astype(np.float32).reshape(len(X), in_c, in_w,
                                                       in_h)
            x_batch = chainer.Variable(np.array(X).astype(np.float32))
            t_batch = np.array(t_test[batch_mask]).astype(np.int32)

            # model.cleargrads()
            loss = model.forward(x_batch, t_batch, False)
            # loss.backward()
            # optimizer.update()

            # accuracy = float(model.accuracy.data) * len(t_batch.data)
            sum_loss += float(model.loss.data) * len(t_batch.data)
            sum_accuracy += float(model.accuracy.data) * len(t_batch.data)
            # print("accuracy:" + str(accuracy) + "  loss:" + str(sum_loss))
            del X
            gc.collect()

        print("train loss: %f", sum_loss / test_stroke_size)
        print("train accuracy: %f", sum_accuracy / test_stroke_size)

    # with open(os.path.join(root_path, "memory","train.txt"), "w") as f:
    #     f.write(log)

    model_path = os.path.join(root_path, "memory", "model.pkl")
    pickle.dump(model, open(model_path, "wb"), -1)
コード例 #18
0
x, t = load_iris(return_X_y=True)
x = x.astype('float32')
t = t.astype('int32')

dataset = TupleDataset(x, t)

train_val, test = split_dataset_random(dataset,
                                       int(len(dataset) * 0.7),
                                       seed=0)
train, valid = split_dataset_random(train_val,
                                    int(len(train_val) * 0.7),
                                    seed=0)

train_iter = SerialIterator(train, batch_size=4, repeat=True, shuffle=True)

optimizer = optimizers.MomentumSGD(lr=0.001, momentum=0.9)
optimizer.setup(net)

for param in net.params():
    if param.name != 'b':  # バイアス以外だったら
        param.update_rule.add_hook(WeightDecay(0.0001))  # 重み減衰を適用

n_batch = 64  # バッチサイズ
n_epoch = 50  # エポック数

# ログ
results_train, results_valid = {}, {}
results_train['loss'], results_train['accuracy'] = [], []
results_valid['loss'], results_valid['accuracy'] = [], []

train_iter.reset()  # 上で一度 next() が呼ばれているため
コード例 #19
0
ファイル: trainer.py プロジェクト: funalab/QCANet
    def training(self, iterators):
        train_iter, val_iter = iterators

        if self.opt_method == 'Adam':
            #opt_nsn = optimizers.Adam(alpha=0.05767827010227712, beta1=0.9687170166672859,
            #                          beta2=0.9918705323205452, eps=0.03260658847351856)
            opt_nsn = optimizers.Adam()
            opt_nsn.setup(self.model)
            #opt_nsn.add_hook(chainer.optimizer.WeightDecay(0.00000416029939))
            opt_nsn.add_hook(chainer.optimizer.WeightDecay(0.001))

        elif self.opt_method == 'SGD':
            opt_nsn = optimizers.SGD(lr=1.0)
            #opt_nsn = optimizers.SGD(lr=0.01)
            opt_nsn.setup(self.model)
            #opt_nsn.add_hook(chainer.optimizer.WeightDecay(0.001))
            opt_nsn.add_hook(chainer.optimizer.WeightDecay(0.00009))

        elif self.opt_method == 'MomentumSGD':
            opt_nsn = optimizers.MomentumSGD(lr=0.0001, momentum=0.99)
            opt_nsn.setup(self.model)
            #opt_nsn.add_hook(chainer.optimizer.WeightDecay(0.00001))

        train_eval, test_eval = {}, {}
        train_eval['loss'], test_eval['loss'] = [], []
        for cri in self.criteria:
            train_eval[cri] = []
            test_eval[cri] = []
        N_train = train_iter.dataset.__len__()
        N_test = val_iter.dataset.__len__()
        with open(self.opbase + '/result.txt', 'w') as f:
            f.write('N_train: {}\n'.format(N_train))
            f.write('N_test: {}\n'.format(N_test))
        bestAccuracy, bestRecall, bestPrecision, bestSpecificity, bestFmeasure, bestIoU = 0, 0, 0, 0, 0, 0
        bestEpoch = 0

        for epoch in range(1, self.epoch + 1):
            print('[epoch {}]'.format(epoch))
            traeval, train_sum_loss = self._trainer(train_iter,
                                                    opt_nsn,
                                                    epoch=epoch)
            train_eval['loss'].append(train_sum_loss /
                                      (N_train * self.batchsize))

            if epoch % self.val_iteration == 0:
                teseval, test_sum_loss = self._validater(val_iter, epoch=epoch)
                test_eval['loss'].append(test_sum_loss /
                                         (N_test * self.batchsize))
            else:
                teseval = {}
                for cri in self.criteria:
                    teseval[cri] = 0
                    test_eval['loss'].append(0)
                    test_sum_loss = 0

            for cri in self.criteria:
                train_eval[cri].append(traeval[cri])
                test_eval[cri].append(teseval[cri])
            print('train mean loss={}'.format(train_sum_loss /
                                              (N_train * self.batchsize)))
            print('train accuracy={}, train recall={}'.format(
                traeval['Accuracy'], traeval['Recall']))
            print('train precision={}, specificity={}'.format(
                traeval['Precision'], traeval['Specificity']))
            print('train F-measure={}, IoU={}'.format(traeval['F-measure'],
                                                      traeval['IoU']))
            print('test mean loss={}'.format(test_sum_loss /
                                             (N_test * self.batchsize)))
            print('test accuracy={}, recall={}'.format(teseval['Accuracy'],
                                                       teseval['Recall']))
            print('test precision={}, specificity={}'.format(
                teseval['Precision'], teseval['Specificity']))
            print('test F-measure={}, IoU={}'.format(teseval['F-measure'],
                                                     teseval['IoU']))
            with open(self.opbase + '/result.txt', 'a') as f:
                f.write('========================================\n')
                f.write('[epoch' + str(epoch) + ']\n')
                f.write('train mean loss={}\n'.format(
                    train_sum_loss / (N_train * self.batchsize)))
                f.write('train accuracy={}, train recall={}\n'.format(
                    traeval['Accuracy'], traeval['Recall']))
                f.write('train precision={}, specificity={}\n'.format(
                    traeval['Precision'], traeval['Specificity']))
                f.write('train F-measure={}, IoU={}\n'.format(
                    traeval['F-measure'], traeval['IoU']))
                if epoch % self.val_iteration == 0:
                    f.write('validation mean loss={}\n'.format(
                        test_sum_loss / (N_test * self.batchsize)))
                    f.write('validation accuracy={}, recall={}\n'.format(
                        teseval['Accuracy'], teseval['Recall']))
                    f.write('validation precision={}, specificity={}\n'.format(
                        teseval['Precision'], teseval['Specificity']))
                    f.write('validation F-measure={}, IoU={}\n'.format(
                        teseval['F-measure'], teseval['IoU']))
            with open(self.opbase + '/TrainResult.csv', 'a') as f:
                c = csv.writer(f)
                c.writerow([
                    epoch, traeval['Accuracy'], traeval['Recall'],
                    traeval['Precision'], traeval['Specificity'],
                    traeval['F-measure'], traeval['IoU']
                ])
            with open(self.opbase + '/ValResult.csv', 'a') as f:
                c = csv.writer(f)
                c.writerow([
                    epoch, teseval['Accuracy'], teseval['Recall'],
                    teseval['Precision'], teseval['Specificity'],
                    teseval['F-measure'], teseval['IoU']
                ])

            if epoch == 1:
                pastLoss = train_sum_loss

            if train_sum_loss > pastLoss and self.opt_method == 'SGD':
                learning_rate = opt_nsn.lr * 1.0
                opt_nsn = optimizers.SGD(learning_rate)
                opt_nsn.setup(self.model)
                with open(self.opbase + '/result.txt', 'a') as f:
                    f.write('lr: {}\n'.format(opt_nsn.lr))

            pastLoss = train_sum_loss

            if bestAccuracy <= teseval['Accuracy']:
                bestAccuracy = teseval['Accuracy']
            if bestRecall <= teseval['Recall']:
                bestRecall = teseval['Recall']
                # Save Model
                # model_name = 'NSN_Recall_p' + str(self.patchsize) + '.npz'
                # serializers.save_npz(self.opbase + '/' + model_name, self.model)
            if bestPrecision <= teseval['Precision']:
                bestPrecision = teseval['Precision']
                # Save Model
                # model_name = 'NSN_Precision_p' + str(self.patchsize) + '.npz'
                # serializers.save_npz(self.opbase + '/' + model_name, self.model)
            if bestSpecificity <= teseval['Specificity']:
                bestSpecificity = teseval['Specificity']
            if bestFmeasure <= teseval['F-measure']:
                bestFmeasure = teseval['F-measure']
            if bestIoU <= teseval['IoU']:
                bestIoU = teseval['IoU']
                bestEpoch = epoch
                # Save Model
                if epoch > 0:
                    model_name = 'NSN_IoU_p' + str(self.patchsize) + '.npz'
                    serializers.save_npz(self.opbase + '/' + model_name,
                                         self.model)
                else:
                    bestIoU = 0.0

        bestScore = [
            bestAccuracy, bestRecall, bestPrecision, bestSpecificity,
            bestFmeasure, bestIoU
        ]
        print('========================================')
        print('Best Epoch : ' + str(bestEpoch))
        print('Best Accuracy : ' + str(bestAccuracy))
        print('Best Recall : ' + str(bestRecall))
        print('Best Precision : ' + str(bestPrecision))
        print('Best Specificity : ' + str(bestSpecificity))
        print('Best F-measure : ' + str(bestFmeasure))
        print('Best IoU : ' + str(bestIoU))
        with open(self.opbase + '/result.txt', 'a') as f:
            f.write('################################################\n')
            f.write('BestAccuracy={}\n'.format(bestAccuracy))
            f.write('BestRecall={}, BestPrecision={}\n'.format(
                bestRecall, bestPrecision))
            f.write('BestSpecificity={}, BestFmesure={}\n'.format(
                bestSpecificity, bestFmeasure))
            f.write('BestIoU={}, BestEpoch={}\n'.format(bestIoU, bestEpoch))
            f.write('################################################\n')

        return train_eval, test_eval, bestScore
コード例 #20
0
ファイル: darknet19_train.py プロジェクト: bledem/yolotest
with open(label_file, "r") as f:
    labels = f.read().strip().split("\n")

# load model
print("loading model...")
model = Darknet19Predictor(Darknet19())
backup_file = "%s/401.model" % (backup_path)
if os.path.isfile(backup_file):
    serializers.load_hdf5(backup_file, model)  # load saved model
model.predictor.train = True
cuda.get_device(0).use()
model.to_gpu(0)  # for gpu
start = time.time()

optimizer = optimizers.MomentumSGD(lr=learning_rate, momentum=momentum)
optimizer.use_cleargrads()
optimizer.setup(model)
#freeze the weights
#odel.conv1.disable_update()
optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay))

item_files = glob.glob(item_path + "/*")
x_debug = []
t_debug = []

# start to train
print("start training")
for batch in range(max_batches):
    model.predictor.train = True
コード例 #21
0
def do_train(config_training):
    if config_training["training_management"]["disable_cudnn_softmax"]:
        import nmt_chainer.models.feedforward.multi_attention
        nmt_chainer.models.feedforward.multi_attention.disable_cudnn_softmax = True

    src_indexer, tgt_indexer = load_voc_and_update_training_config(
        config_training)

    save_prefix = config_training.training_management.save_prefix

    output_files_dict = {}
    output_files_dict["train_config"] = save_prefix + ".train.config"
    output_files_dict["model_ckpt"] = save_prefix + ".model." + "ckpt" + ".npz"
    output_files_dict["model_final"] = save_prefix + \
        ".model." + "final" + ".npz"
    output_files_dict["model_best"] = save_prefix + ".model." + "best" + ".npz"
    output_files_dict[
        "model_best_loss"] = save_prefix + ".model." + "best_loss" + ".npz"

    #     output_files_dict["model_ckpt_config"] = save_prefix + ".model." + "ckpt" + ".config"
    #     output_files_dict["model_final_config"] = save_prefix + ".model." + "final" + ".config"
    #     output_files_dict["model_best_config"] = save_prefix + ".model." + "best" + ".config"
    #     output_files_dict["model_best_loss_config"] = save_prefix + ".model." + "best_loss" + ".config"

    output_files_dict["test_translation_output"] = save_prefix + ".test.out"
    output_files_dict["test_src_output"] = save_prefix + ".test.src.out"
    output_files_dict["dev_translation_output"] = save_prefix + ".dev.out"
    output_files_dict["dev_src_output"] = save_prefix + ".dev.src.out"
    output_files_dict["valid_translation_output"] = save_prefix + ".valid.out"
    output_files_dict["valid_src_output"] = save_prefix + ".valid.src.out"
    output_files_dict["sqlite_db"] = save_prefix + ".result.sqlite"
    output_files_dict[
        "optimizer_ckpt"] = save_prefix + ".optimizer." + "ckpt" + ".npz"
    output_files_dict[
        "optimizer_final"] = save_prefix + ".optimizer." + "final" + ".npz"

    save_prefix_dir, save_prefix_fn = os.path.split(save_prefix)
    ensure_path(save_prefix_dir)

    already_existing_files = []
    for key_info, filename in output_files_dict.iteritems(
    ):  # , valid_data_fn]:
        if os.path.exists(filename):
            already_existing_files.append(filename)
    if len(already_existing_files) > 0:
        print "Warning: existing files are going to be replaced / updated: ", already_existing_files
        if not config_training.training_management.force_overwrite:
            raw_input("Press Enter to Continue")

    save_train_config_fn = output_files_dict["train_config"]
    log.info("Saving training config to %s" % save_train_config_fn)
    config_training.save_to(save_train_config_fn)
    #     json.dump(config_training, open(save_train_config_fn, "w"), indent=2, separators=(',', ': '))

    Vi = len(src_indexer)  # + UNK
    Vo = len(tgt_indexer)  # + UNK

    eos_idx = Vo

    data_fn = config_training.data.data_fn

    log.info("loading training data from %s" % data_fn)
    training_data_all = json.load(gzip.open(data_fn, "rb"))

    training_data = training_data_all["train"]

    log.info("loaded %i sentences as training data" % len(training_data))

    if "test" in training_data_all:
        test_data = training_data_all["test"]
        log.info("Found test data: %i sentences" % len(test_data))
    else:
        test_data = None
        log.info("No test data found")

    if "dev" in training_data_all:
        dev_data = training_data_all["dev"]
        log.info("Found dev data: %i sentences" % len(dev_data))
    else:
        dev_data = None
        log.info("No dev data found")

    if "valid" in training_data_all:
        valid_data = training_data_all["valid"]
        log.info("Found valid data: %i sentences" % len(valid_data))
    else:
        valid_data = None
        log.info("No valid data found")

    max_src_tgt_length = config_training.training_management.max_src_tgt_length
    if max_src_tgt_length is not None:
        log.info("filtering sentences of length larger than %i" %
                 (max_src_tgt_length))
        filtered_training_data = []
        nb_filtered = 0
        for src, tgt in training_data:
            if len(src) <= max_src_tgt_length and len(
                    tgt) <= max_src_tgt_length:
                filtered_training_data.append((src, tgt))
            else:
                nb_filtered += 1
        log.info("filtered %i sentences of length larger than %i" %
                 (nb_filtered, max_src_tgt_length))
        training_data = filtered_training_data

    if not config_training.training.no_shuffle_of_training_data:
        log.info("shuffling")
        import random
        random.shuffle(training_data)
        log.info("done")

    encdec, _, _, _ = create_encdec_and_indexers_from_config_dict(
        config_training,
        src_indexer=src_indexer,
        tgt_indexer=tgt_indexer,
        load_config_model="if_exists"
        if config_training.training_management.resume else "no")

    if (config_training.training.get("load_initial_source_embeddings", None)
            is not None or config_training.training.get(
                "load_initial_target_embeddings", None) is not None):
        src_emb = None
        tgt_emb = None

        src_emb_fn = config_training.training.get(
            "load_initial_source_embeddings", None)
        tgt_emb_fn = config_training.training.get(
            "load_initial_target_embeddings", None)

        if src_emb_fn is not None:
            log.info("loading source embeddings from %s", src_emb_fn)
            src_emb = np.load(src_emb_fn)

        if tgt_emb_fn is not None:
            log.info("loading target embeddings from %s", tgt_emb_fn)
            tgt_emb = np.load(tgt_emb_fn)

        encdec.initialize_embeddings(src_emb,
                                     tgt_emb,
                                     no_unk_src=True,
                                     no_unk_tgt=True)

#     create_encdec_from_config_dict(config_training.model, src_indexer, tgt_indexer,
#                             load_config_model = "if_exists" if config_training.training_management.resume else "no")

#     if config_training.training_management.resume:
#         if "model_parameters" not in config_training:
#             log.error("cannot find model parameters in config file")
#         if config_training.model_parameters.type == "model":
#             model_filename = config_training.model_parameters.filename
#             log.info("resuming from model parameters %s" % model_filename)
#             serializers.load_npz(model_filename, encdec)

    if config_training.training_management.load_model is not None:
        log.info("loading model parameters from %s",
                 config_training.training_management.load_model)
        load_model_flexible(config_training.training_management.load_model,
                            encdec)
#         try:
#             serializers.load_npz(config_training.training_management.load_model, encdec)
#         except KeyError:
#             log.info("not model format, trying snapshot format")
#             with np.load(config_training.training_management.load_model) as fseri:
#                 dicseri = serializers.NpzDeserializer(fseri, path="updater/model:main/")
#                 dicseri.load(encdec)

    gpu = config_training.training_management.gpu
    if gpu is not None:
        encdec = encdec.to_gpu(gpu)

    if config_training.training.optimizer == "adadelta":
        optimizer = optimizers.AdaDelta()
    elif config_training.training.optimizer == "adam":
        optimizer = optimizers.Adam()
    elif config_training.training.optimizer == "scheduled_adam":
        from nmt_chainer.additional_links.scheduled_adam import ScheduledAdam
        optimizer = ScheduledAdam(d_model=config_training.model.ff_d_model)
    elif config_training.training.optimizer == "adagrad":
        optimizer = optimizers.AdaGrad(
            lr=config_training.training.learning_rate)
    elif config_training.training.optimizer == "sgd":
        optimizer = optimizers.SGD(lr=config_training.training.learning_rate)
    elif config_training.training.optimizer == "momentum":
        optimizer = optimizers.MomentumSGD(
            lr=config_training.training.learning_rate,
            momentum=config_training.training.momentum)
    elif config_training.training.optimizer == "nesterov":
        optimizer = optimizers.NesterovAG(
            lr=config_training.training.learning_rate,
            momentum=config_training.training.momentum)
    elif config_training.training.optimizer == "rmsprop":
        optimizer = optimizers.RMSprop(
            lr=config_training.training.learning_rate)
    elif config_training.training.optimizer == "rmspropgraves":
        optimizer = optimizers.RMSpropGraves(
            lr=config_training.training.learning_rate,
            momentum=config_training.training.momentum)
    else:
        raise NotImplemented

    with cuda.get_device(gpu):
        optimizer.setup(encdec)

    if config_training.training.l2_gradient_clipping is not None and config_training.training.l2_gradient_clipping > 0:
        optimizer.add_hook(
            chainer.optimizer.GradientClipping(
                config_training.training.l2_gradient_clipping))

    if config_training.training.hard_gradient_clipping is not None and config_training.training.hard_gradient_clipping > 0:
        optimizer.add_hook(
            chainer.optimizer.GradientHardClipping(
                *config_training.training.hard_gradient_clipping))

    if config_training.training.weight_decay is not None:
        optimizer.add_hook(
            chainer.optimizer.WeightDecay(
                config_training.training.weight_decay))

    if config_training.training_management.load_optimizer_state is not None:
        with cuda.get_device(gpu):
            log.info("loading optimizer parameters from %s",
                     config_training.training_management.load_optimizer_state)
            serializers.load_npz(
                config_training.training_management.load_optimizer_state,
                optimizer)

    if config_training.training_management.timer_hook:
        timer_hook = profiling_tools.MyTimerHook
    else:
        import contextlib

        @contextlib.contextmanager
        def timer_hook():
            yield

    import training_chainer
    with cuda.get_device(gpu):
        with timer_hook() as timer_infos:

            if config_training.training_management.max_nb_iters is not None:
                stop_trigger = (
                    config_training.training_management.max_nb_iters,
                    "iteration")
                if config_training.training_management.max_nb_epochs is not None:
                    log.warn(
                        "max_nb_iters and max_nb_epochs both specified. Only max_nb_iters will be considered."
                    )
            elif config_training.training_management.max_nb_epochs is not None:
                stop_trigger = (
                    config_training.training_management.max_nb_epochs, "epoch")
            else:
                stop_trigger = None
            training_chainer.train_on_data_chainer(
                encdec,
                optimizer,
                training_data,
                output_files_dict,
                src_indexer,
                tgt_indexer,
                eos_idx=eos_idx,
                config_training=config_training,
                stop_trigger=stop_trigger,
                test_data=test_data,
                dev_data=dev_data,
                valid_data=valid_data)