Esempio n. 1
0
def infer_initial_states_sctrnn(params,
                                old_model,
                                testing_data,
                                num_timesteps=0,
                                epochs=None,
                                start_is='mean',
                                error_computation='standard',
                                single_recognition=False,
                                hyp_prior=None,
                                external_signal_variance=-1,
                                x_start=None,
                                use_init_state_loss=True):

    # each trajectory is handled as a separate "class", infer initial states per class
    num_classes = testing_data.shape[0]
    # full number of timesteps
    num_timesteps_orig = int(testing_data.shape[1] / params.num_io)
    # timesteps to use for inference
    if num_timesteps == 0:
        num_timesteps = num_timesteps_orig

    gpu_id = 0  # -1 for CPU
    # Determine whether CPU or GPU should be used
    xp = np
    if gpu_id >= 0 and cuda.available:
        print("Use GPU!")
        cuda.get_device_from_id(gpu_id).use()
        xp = cuda.cupy
    else:
        print("Use CPU!")
        gpu_id = -1

    c = []
    num_samples_per_class = 1
    for i in range(num_classes):
        for j in range(num_samples_per_class):
            c.append(i)
    c_train = xp.array(c)

    save_location = "."
    if os.path.exists("/media/AnjaDataDrive"):
        save_location = "/media/AnjaDataDrive"
    save_location += "/results"

    now = datetime.datetime.now()
    expStr = str(now.year).zfill(4) + "-" + str(
        now.month).zfill(2) + "-" + str(now.day).zfill(2) + "_" + str(
            now.hour).zfill(2) + "-" + str(now.minute).zfill(2) + "_" + str(
                now.microsecond).zfill(7) + "_inference"
    save_dir = os.path.join(save_location, expStr)
    print(save_dir)

    pathlib.Path(save_dir).mkdir(parents=True, exist_ok=True)

    save_interval = 100  # interval for testing the production capability of the network and saving initial state information
    save_model_interval = 100  # interval for storing the learned model

    # Should better already be done outside this method
    # try:
    #     x_train = range2norm(x_train_orig, params.norm_offset, params.norm_range, minmax = params.minmax)
    #     x_train = xp.float32(x_train)
    #     # N = len(x_train)
    # except:
    #     print("No normalization applicable...")
    #     x_train = testing_data

    # CUT PART OF THE TRAINING SIGNAL (COMPLETION TASK)
    testing_data_cut = testing_data[:, 0:params.num_io * num_timesteps]

    plot_results(xp.copy(testing_data_cut[0::num_samples_per_class]),
                 num_timesteps,
                 os.path.join(save_dir, 'target_trajectories.png'),
                 params.num_io,
                 twoDim=True)

    info = "same trajectories (original #timesteps: " + str(
        num_timesteps_orig) + "), used #timesteps: " + str(num_timesteps)

    # copy network model and prepare it for backpropagation inference
    params.learn_weights = False
    params.learn_bias = False
    params.epochs = epochs
    max_epochs = 500
    if params.epochs:
        epoch_array_size = params.epochs
    else:
        epoch_array_size = max_epochs

    model = SCTRNN(params.num_io,
                   params.num_c,
                   params.tau_c,
                   num_classes,
                   init_state_init=params.init_state_init,
                   init_state_learning=params.learn_init_states,
                   weights_learning=params.learn_weights,
                   bias_learning=params.learn_bias,
                   tau_learning=params.learn_tau,
                   pretrained_model=old_model)
    #model.hyp_prior = params.hyp_prior
    #model.external_signal_variance = params.external_signal_variance
    if not hyp_prior is None:
        model.hyp_prior = hyp_prior
        params.hyp_prior = hyp_prior
    if external_signal_variance is None or external_signal_variance >= 0:
        model.external_signal_variance = external_signal_variance
        params.external_signal_variance = external_signal_variance
    params.lr = 0.01

    with open(os.path.join(save_dir, "info.txt"), 'w') as f:
        f.write(params.get_parameter_string())
        f.write("\n")
        f.write(info)
        f.write("\n")
    f.close()

    if start_is is 'mean':
        model.set_initial_states_mean()
    elif start_is is 'zero':
        model.set_initial_states_zero()
    else:
        model.initial_states.W.array = start_is
    #model.apply_estimated_variance = True
    model.set_init_state_learning(c_train)

    if gpu_id >= 0:
        model.to_gpu(gpu_id)
        testing_data = cuda.to_gpu(testing_data)
        x_start = cuda.to_gpu(x_start)

    save_network(save_dir,
                 params=params,
                 model=model,
                 model_filename="network-initial")

    # Optimizer
    optimizer = optimizers.Adam(params.lr)
    optimizer.setup(model)
    #optimizer.add_hook(chainer.optimizer.WeightDecay(0))

    history_init_state_var = np.zeros((epoch_array_size + 1, ))
    history_init_state_var[0] = np.mean(
        np.var(model.initial_states.W.array, axis=0))
    history_generation_error_proactive = np.empty((num_classes, ),
                                                  dtype=object)
    history_generation_error_reactive = np.empty((num_classes, ), dtype=object)
    history_training_error = np.zeros((epoch_array_size + 1, ))
    history_training_variance_estimation = np.zeros(
        (epoch_array_size + 1, num_classes))

    history_initial_states = []

    likelihood_per_epoch = []

    print("actual variance of init_states_0: " +
          str(history_init_state_var[0]))

    # Evaluate the performance of the untrained network
    test_batch_size = np.min(
        [model.initial_states.W.array.shape[0], testing_data.shape[0]])
    res, resv, resm = model.generate(model.initial_states.W.array,
                                     num_timesteps_orig,
                                     add_variance_to_output=0,
                                     x_start=x_start)
    results = res  #cuda.to_cpu(res)

    for i in range(num_classes):
        generation_error = chainer.functions.mean_squared_error(
            results[i, :], testing_data[i, :]).array.tolist()
        history_generation_error_proactive[i] = [generation_error]

        with open(os.path.join(save_dir, "evaluation.txt"), 'a') as f:
            f.write("before learning: pattern generation error (proactive): " +
                    str(history_generation_error_proactive[i]) + "\n")

    plot_results(xp.copy(results),
                 num_timesteps_orig,
                 os.path.join(save_dir, "proactive_before-learning"),
                 params.num_io,
                 twoDim=True)

    res, resv, resm, pe, wpe, respost = model.generate(
        model.initial_states.W.array,
        num_timesteps_orig,
        external_input=xp.asarray(testing_data[0::num_samples_per_class, :]),
        add_variance_to_output=0,
        x_start=x_start)
    results = res  #cuda.to_cpu(res)

    for i in range(num_classes):
        generation_error = chainer.functions.mean_squared_error(
            results[i, :], testing_data[i, :]).array.tolist()
        history_generation_error_reactive[i] = [generation_error]

        with open(os.path.join(save_dir, "evaluation.txt"), 'a') as f:
            f.write("before learning: pattern generation error (reactive): " +
                    str(history_generation_error_reactive[i]) + "\n")

    plot_results(xp.copy(results),
                 num_timesteps_orig,
                 os.path.join(save_dir, "reactive_before-learning"),
                 params.num_io,
                 twoDim=True)

    # arrays for tracking likelihood and determining stop condition
    all_mean_diffs = []
    all_std_diffs = []
    m1s = []
    s1s = []
    # tmp_epoch_marker = 0
    # conv_eval_interval = 1000 # the length of the interval to consider for determining convergence

    for epoch in range(1, epoch_array_size + 1):
        epochStart = time.time()

        outv = np.zeros((num_timesteps, ))

        # permutate samples in each epoch so that they are randomly ordered
        perm = np.random.permutation(testing_data_cut.shape[0])

        # here, one batch equals the full training set
        x_batch = xp.asarray(testing_data_cut[perm])
        x_batch = x_batch + 0.01 * xp.random.randn(
            x_batch.shape[0], x_batch.shape[1]).astype('float32')
        model.set_init_state_learning(c_train[perm])

        mean_init_states = chainer.Variable(xp.zeros((), dtype=xp.float32))
        mean_init_states = chainer.functions.average(model.initial_states.W,
                                                     axis=0)  #keepdims=True
        #mean_init_states = xp.mean(c0.array,axis=0) # using this instead causes no difference in resulting gradient of c0

        # initialize error
        acc_loss = chainer.Variable(xp.zeros(
            (), dtype=xp.float32))  # for weight backprop
        acc_init_loss = chainer.Variable(xp.zeros(
            (), dtype=xp.float32))  # for init states backprop
        err = xp.zeros(())  # for evaluation only

        # clear gradients from previous batch
        model.cleargrads()
        # clear output and variance estimations from previous batch
        model.reset_current_output()

        t = 0  # iterate through time
        x_t = x_batch[:, params.num_io * t:params.num_io * (t + 1)]
        # next time step to be predicted (for evaluation)
        x_t1 = x_batch[:, params.num_io * (t + 1):params.num_io * (t + 2)]
        # x_t = xp.reshape(x_batch[0][t,:], (1, params.num_io))
        # x_t1 = xp.reshape(x_batch[0][t+1,:], (1, params.num_io))
        # for i in range(1, params.batch_size):
        #     x_t = np.concatenate((x_t, xp.reshape(x_batch[i][t,:], (1,params.num_io))),axis=0)
        #     x_t1 = np.concatenate((x_t1, xp.reshape(x_batch[i][t+1,:], (1,params.num_io))),axis=0)

        # execute first forward step
        u_h, y, v = model(
            x_t, None
        )  # initial states of u_h are set automatically according to model.classes

        # noisy output estimation
        #y_out = y.array + xp.sqrt(v.array) * xp.random.randn()

        # compute prediction error, averaged over batch
        if error_computation == 'standard':
            # compare network prediction to ground truth
            loss_i = chainer.functions.gaussian_nll(chainer.Variable(x_t1), y,
                                                    exponential.log(v))
        elif error_computation == 'integrated':
            # compare network prediction to posterior of perception
            loss_i = chainer.functions.gaussian_nll(model.current_x, y,
                                                    exponential.log(v))
        acc_loss += loss_i

        acc_loss += loss_i

        # compute error for evaluation purposes
        err += chainer.functions.mean_squared_error(
            chainer.Variable(x_t), y).array.reshape(()) * params.batch_size

        outv[t] = xp.mean(v.array)

        # rollout trajectory
        for t in range(1, num_timesteps - 1):
            # current time step
            x_t = x_batch[:, params.num_io * t:params.num_io * (t + 1)]
            # next time step to be predicted (for evaluation)
            x_t1 = x_batch[:, params.num_io * (t + 1):params.num_io * (t + 2)]

            u_h, y, v = model(x_t, u_h)

            # noisy output estimation
            #y_out = y.array + xp.sqrt(v.array) * xp.random.randn()

            # compute error for backprop for weights
            if error_computation == 'standard':
                loss_i = chainer.functions.gaussian_nll(
                    chainer.Variable(x_t1), y, exponential.log(v))
            elif error_computation == 'integrated':
                integrated_x = params.training_external_contrib * chainer.Variable(
                    x_t1) + (1 - params.training_external_contrib) * (
                        y + chainer.functions.sqrt(v) * xp.random.randn())
                loss_i = chainer.functions.gaussian_nll(
                    integrated_x, y, exponential.log(v))
            acc_loss += loss_i

            # compute error for evaluation purposes
            err += chainer.functions.mean_squared_error(
                chainer.Variable(x_t), y).array.reshape(()) * params.batch_size

            outv[t] = xp.mean(v.array)

        # for each training sequence of this batch: compute loss for maintaining desired initial state variance
        if not single_recognition and use_init_state_loss:
            for s in range(len(c_train)):
                if gpu_id >= 0:
                    acc_init_loss += chainer.functions.gaussian_nll(
                        model.initial_states()[model.classes][s],
                        mean_init_states,
                        xp.ones(mean_init_states.shape) * exponential.log(
                            cuda.to_gpu(params.init_state_var, device=gpu_id)))
                else:
                    acc_init_loss += chainer.functions.gaussian_nll(
                        model.initial_states()[model.classes][s],
                        mean_init_states,
                        exponential.log(params.init_state_var))

            # compute gradients
            # (gradients from L_out and L_init are summed up)
            # gradient of initial states equals:
            # 1/params.init_state_var * (c0[cl]-mean_init_states).array
            acc_init_loss.backward()
        else:
            epochBatchProcessed = time.time()

        acc_loss.backward()

        print("update")
        optimizer.update()

        print("Done epoch " + str(epoch))
        error = err / params.batch_size / num_timesteps
        mean_estimated_var = xp.mean(outv)
        history_training_error[epoch] = error
        history_training_variance_estimation[epoch, :] = mean_estimated_var

        print("train MSE = " + str(error) + "\nmean estimated var: " +
              str(mean_estimated_var))
        print("init_states = [" + str(model.initial_states.W.array[0][0]) +
              "," + str(model.initial_states.W.array[0][1]) + "...], var: " +
              str(np.mean(np.var(model.initial_states.W.array, axis=0))) +
              ", accs: " + str(acc_loss) + " + " + str(acc_init_loss))

        likelihood_per_epoch.append(
            np.float64(acc_loss.array + acc_init_loss.array))

        history_init_state_var[epoch] = np.mean(
            np.var(model.initial_states.W.array, axis=0))

        with open(os.path.join(save_dir, "evaluation.txt"), 'a') as f:
            f.write("epoch: " + str(epoch) + "\n")
            f.write("train MSE = " + str(error) + "\nmean estimated var: " +
                    str(mean_estimated_var))
            f.write("initial state var: " +
                    str(history_init_state_var[epoch]) + ", precision loss: " +
                    str(acc_loss) + ", variance loss: " + str(acc_init_loss) +
                    "\ninit states:\n")
            for i in range(num_classes):
                f.write("\t[" + str(model.initial_states.W[i][0]) + "," +
                        str(model.initial_states.W[i][1]) + "...]\n")
        f.close()

        if epoch % save_interval == 1 or epoch == params.epochs:
            # evaluate proactive generation
            res, resv, resm, u_h_history = model.generate(
                model.initial_states.W.array,
                num_timesteps_orig,
                add_variance_to_output=0,
                additional_output='activations',
                x_start=x_start)
            results = res  #cuda.to_cpu(res)

            plot_results(xp.copy(results),
                         num_timesteps_orig,
                         os.path.join(
                             save_dir, "proactive_epoch-" +
                             str(epoch).zfill(len(str(epochs)))),
                         params.num_io,
                         twoDim=True)

            for i in range(num_classes):
                generation_error = chainer.functions.mean_squared_error(
                    results[i, :], testing_data[i, :]).array.tolist()
                history_generation_error_proactive[i].append(generation_error)
                with open(os.path.join(save_dir, "evaluation.txt"), 'a') as f:
                    f.write("pattern generation error (proactive): " +
                            str(generation_error) + "\n")
                f.close()

            # evaluate reactive generation
            res, resv, resm, pe, wpe, u_h_history, respost = model.generate(
                model.initial_states.W.array,
                num_timesteps_orig,
                external_input=xp.asarray(
                    testing_data[0::num_samples_per_class, :]),
                additional_output='activations',
                x_start=x_start)
            results = res  #cuda.to_cpu(res)

            plot_results(xp.copy(results),
                         num_timesteps_orig,
                         os.path.join(
                             save_dir, "reactive_epoch-" +
                             str(epoch).zfill(len(str(epochs)))),
                         params.num_io,
                         twoDim=True)

            for i in range(test_batch_size):
                generation_error = chainer.functions.mean_squared_error(
                    results[i, :], testing_data[i, :]).array.tolist()
                history_generation_error_reactive[i].append(generation_error)
                with open(os.path.join(save_dir, "evaluation.txt"), 'a') as f:
                    f.write("pattern generation error (reactive): " +
                            str(generation_error) + "\n")
                f.close()

        if epoch % save_model_interval == 1 or epoch == params.epochs:
            save_network(save_dir,
                         params,
                         model,
                         model_filename="network-epoch-" +
                         str(epoch).zfill(len(str(epochs))))
            np.save(os.path.join(save_dir, "history_init_state_var"),
                    np.array(history_init_state_var))
            np.save(
                os.path.join(save_dir, "history_generation_error_proactive"),
                np.array(history_generation_error_proactive))
            np.save(
                os.path.join(save_dir, "history_generation_error_reactive"),
                np.array(history_generation_error_reactive))
            np.save(os.path.join(save_dir, "history_training_error"),
                    np.array(history_training_error))
            np.save(
                os.path.join(save_dir, "history_training_variance_estimation"),
                np.array(history_training_variance_estimation))

            fig = plt.figure()
            ax = fig.add_subplot(111)
            ax.plot(np.arange(0, len(history_init_state_var)),
                    history_init_state_var)
            plt.title("init state variance")
            fig.savefig(os.path.join(save_dir, "init-state-var"))
            plt.close()

            fig = plt.figure()
            ax = fig.add_subplot(121)
            for i in range(num_classes):
                ax.plot(
                    np.arange(0, len(history_generation_error_proactive[i])) *
                    save_interval, history_generation_error_proactive[i])
            ax = fig.add_subplot(122)
            for i in range(num_classes):
                ax.plot(
                    np.arange(0, len(history_generation_error_reactive[i])) *
                    save_interval,
                    history_generation_error_reactive[i],
                    label=str(i))
            plt.title("generation error (proactive / reactive)")
            plt.legend()
            fig.savefig(os.path.join(save_dir, "generation-error"))
            plt.close()

            plt.figure()
            plt.plot(np.arange(len(all_std_diffs)),
                     all_std_diffs,
                     'bo',
                     label='std diff')
            plt.plot(np.arange(len(all_mean_diffs)),
                     all_mean_diffs,
                     'ro',
                     label='mean diff')
            plt.legend()
            plt.savefig(os.path.join(save_dir, 'convergence-condition.png'))
            plt.close()

        history_initial_states.append(model.initial_states.W.array.copy())

        # if no epoch number is decided, stop when error is below a threshold
        if not epochs:
            if error < 0.01:
                break

    save_network(save_dir, params, model, model_filename="network-final")

    return model.initial_states, history_initial_states, results, resm, save_dir
Esempio n. 2
0
if __name__ == '__main__':
    #ハイパーパラメータ
    num_epochs = 10  #エポック数
    batch_size = 500  #バッチ数
    learing_rate = 0.001  #学習率

    #データ読み込み
    train, test = load_Mnist()
    x_train, c_train = train
    x_test, c_test = test
    num_train = len(x_train)
    num_test = len(x_test)

    #モデル、オプティマイザ(chainer関数の使用)
    model = ConvNet()
    optimizer = optimizers.Adam(learing_rate)
    optimizer.setup(model)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    #訓練ループ
    loss_log = []
    for epoch in range(num_epochs):
        for i in range(0, num_train, batch_size):
            x_batch = xp.asarray(x_train[i:i + batch_size])  # 1->バッチサイズまでのループ
            c_batch = xp.asarray(c_train[i:i + batch_size])
            y_batch = model(x_batch)

            #損失関数の計算
Esempio n. 3
0
    model.to_gpu()


def forward(x_data, y_data, train=True):
    x, t = chainer.Variable(x_data), chainer.Variable(y_data)
    h = F.max_pooling_2d(F.relu(model.conv1(x)), 2)
    h = F.max_pooling_2d(F.relu(model.conv2(h)), 2)
    h = F.dropout(F.relu(model.l1(h)), train=train)
    y = model.l2(h)
    if train:
        return F.softmax_cross_entropy(y, t)
    else:
        return F.accuracy(y, t)


optimizer = optimizers.Adam()
optimizer.setup(model)

fp1 = open("accuracy.txt", "w")
fp2 = open("loss.txt", "w")

fp1.write("epoch\ttest_accuracy\n")
fp2.write("epoch\ttrain_loss\n")

# 訓練ループ
start_time = time.clock()
for epoch in range(1, n_epoch + 1):
    print "epoch: %d" % epoch

    perm = np.random.permutation(N)
    sum_loss = 0
def main():
    # Parse the arguments.
    args = parse_arguments()

    if args.label:
        labels = args.label
        class_num = len(labels) if isinstance(labels, list) else 1
    else:
        raise ValueError('No target label was specified.')

    # Dataset preparation. Postprocessing is required for the regression task.
    def postprocess_label(label_list):
        return numpy.asarray(label_list, dtype=numpy.float32)

    # Apply a preprocessor to the dataset.
    print('Preprocessing dataset...')
    preprocessor = preprocess_method_dict[args.method]()
    parser = CSVFileParser(preprocessor,
                           postprocess_label=postprocess_label,
                           labels=labels,
                           smiles_col='SMILES')
    dataset = parser.parse(args.datafile)['dataset']

    # Scale the label values, if necessary.
    if args.scale == 'standardize':
        scaler = StandardScaler()
        scaler.fit(dataset.get_datasets()[-1])
    else:
        scaler = None

    # Split the dataset into training and validation.
    train_data_size = int(len(dataset) * args.train_data_ratio)
    train, _ = split_dataset_random(dataset, train_data_size, args.seed)

    # Set up the predictor.
    predictor = set_up_predictor(args.method,
                                 args.unit_num,
                                 args.conv_layers,
                                 class_num,
                                 label_scaler=scaler)

    # Set up the iterator.
    train_iter = SerialIterator(train, args.batchsize)

    # Set up the regressor.
    device = args.gpu
    metrics_fun = {'mae': F.mean_absolute_error, 'rmse': rmse}
    regressor = Regressor(predictor,
                          lossfun=F.mean_squared_error,
                          metrics_fun=metrics_fun,
                          device=device)

    # Set up the optimizer.
    optimizer = optimizers.Adam()
    optimizer.setup(regressor)

    # Set up the updater.
    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       device=device,
                                       converter=concat_mols)

    # Set up the trainer.
    print('Training...')
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(E.LogReport())
    trainer.extend(
        E.PrintReport(
            ['epoch', 'main/loss', 'main/mae', 'main/rmse', 'elapsed_time']))
    trainer.extend(E.ProgressBar())
    trainer.run()

    # Save the regressor's parameters.
    model_path = os.path.join(args.out, args.model_filename)
    print('Saving the trained model to {}...'.format(model_path))
    regressor.save_pickle(model_path, protocol=args.protocol)
Esempio n. 5
0
def main():
  t0 = time.time()

  train_num = int(n_data*0.95)
  test_num = n_data - train_num

  channel = 1
  axis_x = div
  axis_y = div
  axis_z = div
  output_pos = 3
  output_ori = 4 if orientation == "quaternion" else 3
  class_num = 2

  x = np.zeros((n_data, channel, axis_z, axis_y, axis_x), dtype='float32')
  if orientation == "quaternion":
    import network_1 as network
    y = np.zeros((n_data,7), dtype='float32')
  elif orientation == 'euler':
    import network_euler as network
    y = np.zeros((n_data,6), dtype='float32')
  elif orientation == 'rotation_matrix':
    import network_matrix as network
    y = np.zeros((n_data,12), dtype='float32')

  if output == 'classification':
    import network_matrix_class as network
    #y = np.append(y, np.zeros((n_data,class_num), dtype='float32'), axis=1)
    label = np.zeros((n_data), dtype='int32')

  print("load dataset")
  infh = h5py.File('./datasets/hv7_52000.hdf5', 'r')
  infh.keys()
  if output == 'classification':
    infh2 = h5py.File('./datasets/paper_hv6_58000.hdf5', 'r')
    infh2.keys()

  for n in tqdm(range(0,n_data)):
      ## load dataset from hdf
      if(output == 'regression' or n < n_data/2):
        voxel_data = infh["data_"+str(n+1)]['voxel'].value
        tf_data = infh["data_"+str(n+1)]['pose'].value
        if output == 'classification':
          label[n] = np.array(0, dtype='int32')
      else:
        voxel_data = infh2["data_"+str(n+1)]['voxel'].value
        tf_data = infh2["data_"+str(n+1)]['pose'].value
        if output == 'classification':
          label[n] = np.array(1, dtype='int32')

      ## transform or normalize orientation
      x[n,channel-1] = voxel_data.reshape(axis_x, axis_y, axis_z)
      if orientation == "euler":
        tf_data[3:6] = tf_data[3:6]/3.14
        y[n] = tf_data[0:6]
      elif orientation == "rotation_matrix":
        y[n][0:3] = tf_data[0:3]
        tf_data = euler_matrix(tf_data[3], tf_data[4], tf_data[5], 'sxyz')
        y[n][3:12] = tf_data[0:3,0:3].reshape(9)

  print(x.shape)
  print("y.shape: {}".format(y.shape))

  #### visualize voxel data
  ##    point_x = []
  ##    point_y = []
  ##    point_z = []
  ##    fig = plt.figure()
  ##    ax = fig.add_subplot(111, projection='3d')
  ##    ax.set_xlabel("x")
  ##    ax.set_ylabel("y")
  ##    ax.set_zlabel("z")
  ##
  ##    for m in range(channel):
  ##        for i in range(axis_z):
  ##            for j in range(axis_y):
  ##                for k in range(axis_x):
  ##                    if(voxel_data[(div*div*i) + (div*j) + (k)] == 1):
  ##                        x[n, m, i, j, k] = 1
  ##                        point_x.append(k)
  ##                        point_y.append(j)
  ##                        point_z.append(i)
  ##
  ##    ax.scatter(point_x, point_y, point_z)
  ##    plt.show()

  ##for a in range(4):
  ##p = Process(target=make_voxel, args=(infh,))
  ##p.start()
  ##p.join()

  print("finish loading datasets")

  t1 = time.time()
  elapsed_time1 = t1-t0
  print("データ読み込み時間:{}".format(elapsed_time1))

  nn = network.CNN()
  if gpu >= 0:
      nn.to_gpu(0)
  ##print(x[0:train_num])

  if output == 'classification':
    p = list(zip(x, y, label))
    random.shuffle(p)
    x, y, label = zip(*p)
    train = TupleDataset(x[:train_num], y[:train_num], label[:train_num])
    val = TupleDataset(x[train_num:],y[train_num:], label[train_num:])
  else:
    p = list(zip(x, y))
    random.shuffle(p)
    x, y = zip(*p)
    train = TupleDataset(x[:train_num], y[:train_num])
    val = TupleDataset(x[train_num:],y[train_num:])

  train_iter = iterators.SerialIterator(train, batchsize)
  val_iter = iterators.SerialIterator(val, batchsize, False, False)

  optimizer = optimizers.Adam()
  optimizer.setup(nn)

  updater = training.updaters.StandardUpdater(train_iter, optimizer, device=gpu)
  trainer = training.Trainer(updater, (max_epoch, 'epoch'))

  trainer.extend(extensions.LogReport())
  trainer.extend(extensions.Evaluator(val_iter, nn, device=gpu))
  trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss']))
  trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png'))
  trainer.extend(extensions.dump_graph('main/loss'))

  ##trigger = training.triggers.MaxValueTrigger('validation/main/loss', trigger=(1, 'epoch'))
  ##trainer.extend(extensions.snapshot_object(nn, filename='result/new_best.model'), trigger=trigger)
  trainer.run()

  if args.gpu >= 0:
      nn.to_cpu()

  t2 = time.time()
  elapsed_time2 = t2-t1
  print("データ読み込み時間:{}".format(elapsed_time1))
  print("学習時間:{}".format(elapsed_time2))

  serializers.save_npz('result/new.model', nn)
Esempio n. 6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu',
                        '-g',
                        default=-1,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--unit',
                        '-u',
                        default=100,
                        type=int,
                        help='number of units')
    parser.add_argument('--window',
                        '-w',
                        default=5,
                        type=int,
                        help='window size')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=1000,
                        help='learning minibatch size')
    parser.add_argument('--epoch',
                        '-e',
                        default=20,
                        type=int,
                        help='number of epochs to learn')
    parser.add_argument('--model',
                        '-m',
                        choices=['skipgram', 'cbow'],
                        default='skipgram',
                        help='model type ("skipgram", "cbow")')
    parser.add_argument('--negative-size',
                        default=5,
                        type=int,
                        help='number of negative samples')
    parser.add_argument('--out-type',
                        '-o',
                        choices=['hsm', 'ns', 'original'],
                        default='hsm',
                        help='output model type ("hsm": hierarchical softmax, '
                        '"ns": negative sampling, "original": '
                        'no approximation)')
    parser.add_argument('--out',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--test', dest='test', action='store_true')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        cuda.check_cuda_available()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('Window: {}'.format(args.window))
    print('Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('Training model: {}'.format(args.model))
    print('Output type: {}'.format(args.out_type))
    print('')

    if args.gpu >= 0:
        cuda.get_device_from_id(args.gpu).use()

    # Load the dataset
    train, val, _ = chainer.datasets.get_ptb_words()
    counts = collections.Counter(train)
    counts.update(collections.Counter(val))
    n_vocab = max(train) + 1

    if args.test:
        train = train[:100]
        val = val[:100]

    vocab = chainer.datasets.get_ptb_words_vocabulary()
    index2word = {wid: word for word, wid in six.iteritems(vocab)}

    print('n_vocab: %d' % n_vocab)
    print('data length: %d' % len(train))

    if args.out_type == 'hsm':
        HSM = L.BinaryHierarchicalSoftmax
        tree = HSM.create_huffman_tree(counts)
        loss_func = HSM(args.unit, tree)
        loss_func.W.data[...] = 0
    elif args.out_type == 'ns':
        cs = [counts[w] for w in range(len(counts))]
        loss_func = L.NegativeSampling(args.unit, cs, args.negative_size)
        loss_func.W.data[...] = 0
    elif args.out_type == 'original':
        loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab)
    else:
        raise Exception('Unknown output type: {}'.format(args.out_type))

    # Choose the model
    if args.model == 'skipgram':
        model = SkipGram(n_vocab, args.unit, loss_func)
    elif args.model == 'cbow':
        model = ContinuousBoW(n_vocab, args.unit, loss_func)
    else:
        raise Exception('Unknown model type: {}'.format(args.model))

    if args.gpu >= 0:
        model.to_gpu()

    # Set up an optimizer
    optimizer = O.Adam()
    optimizer.setup(model)

    # Set up an iterator
    train_iter = WindowIterator(train, args.window, args.batchsize)
    val_iter = WindowIterator(val, args.window, args.batchsize, repeat=False)

    # Set up an updater
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                converter=convert,
                                                device=args.gpu)

    # Set up a trainer
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(
        extensions.Evaluator(val_iter,
                             model,
                             converter=convert,
                             device=args.gpu))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss']))
    trainer.extend(extensions.ProgressBar())
    trainer.run()

    # Save the word2vec model
    with open('word2vec.model', 'w') as f:
        f.write('%d %d\n' % (len(index2word), args.unit))
        w = cuda.to_cpu(model.embed.W.data)
        for i, wi in enumerate(w):
            v = ' '.join(map(str, wi))
            f.write('%s %s\n' % (index2word[i], v))
Esempio n. 7
0
def train(args):
    np.random.seed(args.seed)
    train_l, train_ul, test = load_dataset(args.data_dir,
                                           valid=args.validation,
                                           dataset_seed=args.dataset_seed)
    print("N_train_labeled:{}, N_train_unlabeled:{}".format(
        train_l.N, train_ul.N))
    enc = CNN(n_outputs=args.n_categories,
              dropout_rate=args.dropout_rate,
              top_bn=args.top_bn)
    if args.gpu > -1:
        chainer.cuda.get_device(args.gpu).use()
        enc.to_gpu()
    if args.net != '':
        serializers.load_npz(args.net, enc)

    optimizer = optimizers.Adam(alpha=args.lr, beta1=args.mom1)
    optimizer.setup(enc)
    if args.opt != '':
        serializers.load_npz(args.opt, optimizer)

    alpha_plan = [args.lr] * args.num_epochs
    beta1_plan = [args.mom1] * args.num_epochs
    for i in range(args.epoch_decay_start, args.num_epochs):
        alpha_plan[i] = float(args.num_epochs - i) / (
            args.num_epochs - args.epoch_decay_start) * args.lr
        beta1_plan[i] = args.mom2

    accs_test = np.zeros(args.num_epochs)
    cl_losses = np.zeros(args.num_epochs)
    ul_losses = np.zeros(args.num_epochs)
    mkdir_p(args.log_dir)
    for epoch in range(args.num_epochs):
        with chainer.using_config('train', True):
            optimizer.alpha = alpha_plan[epoch]
            optimizer.beta1 = beta1_plan[epoch]
            sum_loss_l = 0
            sum_loss_ul = 0
            for it in range(args.num_iter_per_epoch):
                start = time.time()
                x1, t = train_l.get(args.batchsize,
                                    gpu=args.gpu,
                                    aug_trans=args.aug_trans,
                                    aug_flip=args.aug_flip)
                loss_l = loss_labeled(enc, Variable(x1), Variable(t))
                x_u1, _ = train_ul.get(args.batchsize_ul,
                                       gpu=args.gpu,
                                       aug_trans=args.aug_trans,
                                       aug_flip=args.aug_flip)
                loss_ul = loss_unlabeled(enc, Variable(x_u1), args)
                beta = args.beta
                loss_total = loss_l + beta * loss_ul
                enc.cleargrads()
                loss_total.backward()
                optimizer.update()
                sum_loss_l += loss_l.data
                sum_loss_ul += loss_ul.data
                end = time.time()
                print(
                    "Epoch: {} Iter: {} time_batch: {} beta: {} loss_l: {} loss_ul: {}"
                    .format(epoch, it, end - start, beta, loss_l.data,
                            loss_ul.data))
            cl_losses[epoch] = sum_loss_l / args.num_iter_per_epoch
            ul_losses[epoch] = sum_loss_ul / args.num_iter_per_epoch

        if (epoch + 1) % args.eval_freq == 0:
            with chainer.using_config('train', False):
                acc_test_sum = 0
                test_x, test_t = test.get()
                N_test = test_x.shape[0]
                for i in range(0, N_test, args.batchsize_eval):
                    x = test_x[i:i + args.batchsize_eval]
                    t = test_t[i:i + args.batchsize_eval]
                    if args.gpu > -1:
                        x, t = cuda.to_gpu(x, device=args.gpu), cuda.to_gpu(
                            t, device=args.gpu)
                    _, acc = loss_test(enc, Variable(x), Variable(t))
                    acc_test_sum += acc * x.shape[0]
                accs_test[epoch] = acc_test_sum / N_test
                print(
                    "Epoch:{}, classification loss:{}, unlabeled loss:{}, time:{}"
                    .format(epoch, cl_losses[epoch], ul_losses[epoch],
                            end - start))
                print("test acc:{}".format(accs_test[epoch]))

        sys.stdout.flush()
        if (epoch + 1) % args.snapshot_freq == 0:
            # Save stats and model
            np.savetxt(os.path.join(args.log_dir, 'log.txt'),
                       np.concatenate([
                           np.array([['acc', 'cl_loss', 'ul_loss']]),
                           np.transpose([accs_test, cl_losses, ul_losses])
                       ], 0),
                       fmt='%s')
            serializers.save_npz(
                os.path.join(args.log_dir, 'trained_model_ep{}'.format(epoch)),
                enc)
            serializers.save_npz(
                os.path.join(args.log_dir, 'optimizer_ep{}'.format(epoch)),
                optimizer)

    # Save final stats and model
    np.savetxt(os.path.join(args.log_dir, 'log.txt'),
               np.concatenate([
                   np.array([['acc', 'cl_loss', 'ul_loss']]),
                   np.transpose([accs_test, cl_losses, ul_losses])
               ], 0),
               fmt='%s')
    serializers.save_npz(os.path.join(args.log_dir, 'trained_model_final'),
                         enc)
    serializers.save_npz(os.path.join(args.log_dir, 'optimizer_final'),
                         optimizer)
Esempio n. 8
0
def main():
    # Supported preprocessing/network list
    method_list = ['nfp', 'ggnn', 'schnet', 'weavenet', 'rsgcn']
    label_names = D.get_tox21_label_names()
    iterator_type = ['serial', 'balanced']

    parser = argparse.ArgumentParser(
        description='Multitask Learning with Tox21.')
    parser.add_argument('--method',
                        '-m',
                        type=str,
                        choices=method_list,
                        default='nfp',
                        help='graph convolution model to use '
                        'as a predictor.')
    parser.add_argument('--label',
                        '-l',
                        type=str,
                        choices=label_names,
                        default='',
                        help='target label for logistic '
                        'regression. Use all labels if this option '
                        'is not specified.')
    parser.add_argument('--iterator-type',
                        type=str,
                        choices=iterator_type,
                        default='serial',
                        help='iterator type. If `balanced` '
                        'is specified, data is sampled to take same number of'
                        'positive/negative labels during training.')
    parser.add_argument('--eval-mode',
                        type=int,
                        default=1,
                        help='Evaluation mode.'
                        '0: only binary_accuracy is calculated.'
                        '1: binary_accuracy and ROC-AUC score is calculated')
    parser.add_argument('--conv-layers',
                        '-c',
                        type=int,
                        default=4,
                        help='number of convolution layers')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=32,
                        help='batch size')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID to use. Negative value indicates '
                        'not to use GPU and to run the code in CPU.')
    parser.add_argument('--out',
                        '-o',
                        type=str,
                        default='result',
                        help='path to output directory')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=10,
                        help='number of epochs')
    parser.add_argument('--unit-num',
                        '-u',
                        type=int,
                        default=16,
                        help='number of units in one layer of the model')
    parser.add_argument('--resume',
                        '-r',
                        type=str,
                        default='',
                        help='path to a trainer snapshot')
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--protocol',
                        type=int,
                        default=2,
                        help='protocol version for pickle')
    parser.add_argument('--model-filename',
                        type=str,
                        default='classifier.pkl',
                        help='file name for pickled model')
    parser.add_argument('--num-data',
                        type=int,
                        default=-1,
                        help='Number of data to be parsed from parser.'
                        '-1 indicates to parse all data.')
    args = parser.parse_args()

    method = args.method
    if args.label:
        labels = args.label
        class_num = len(labels) if isinstance(labels, list) else 1
    else:
        labels = None
        class_num = len(label_names)

    # Dataset preparation
    train, val, _ = data.load_dataset(method, labels, num_data=args.num_data)

    # Network
    predictor_ = predictor.build_predictor(method, args.unit_num,
                                           args.conv_layers, class_num)

    iterator_type = args.iterator_type
    if iterator_type == 'serial':
        train_iter = I.SerialIterator(train, args.batchsize)
    elif iterator_type == 'balanced':
        if class_num > 1:
            raise ValueError('BalancedSerialIterator can be used with only one'
                             'label classification, please specify label to'
                             'be predicted by --label option.')
        train_iter = BalancedSerialIterator(train,
                                            args.batchsize,
                                            train.features[:, -1],
                                            ignore_labels=-1)
        train_iter.show_label_stats()
    else:
        raise ValueError('Invalid iterator type {}'.format(iterator_type))
    val_iter = I.SerialIterator(val,
                                args.batchsize,
                                repeat=False,
                                shuffle=False)

    classifier = Classifier(predictor_,
                            lossfun=F.sigmoid_cross_entropy,
                            metrics_fun=F.binary_accuracy,
                            device=args.gpu)

    optimizer = O.Adam()
    optimizer.setup(classifier)

    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       device=args.gpu,
                                       converter=concat_mols)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(
        E.Evaluator(val_iter,
                    classifier,
                    device=args.gpu,
                    converter=concat_mols))
    trainer.extend(E.LogReport())

    eval_mode = args.eval_mode
    if eval_mode == 0:
        trainer.extend(
            E.PrintReport([
                'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss',
                'validation/main/accuracy', 'elapsed_time'
            ]))
    elif eval_mode == 1:
        train_eval_iter = I.SerialIterator(train,
                                           args.batchsize,
                                           repeat=False,
                                           shuffle=False)
        trainer.extend(
            ROCAUCEvaluator(train_eval_iter,
                            classifier,
                            eval_func=predictor_,
                            device=args.gpu,
                            converter=concat_mols,
                            name='train',
                            pos_labels=1,
                            ignore_labels=-1,
                            raise_value_error=False))
        # extension name='validation' is already used by `Evaluator`,
        # instead extension name `val` is used.
        trainer.extend(
            ROCAUCEvaluator(val_iter,
                            classifier,
                            eval_func=predictor_,
                            device=args.gpu,
                            converter=concat_mols,
                            name='val',
                            pos_labels=1,
                            ignore_labels=-1))
        trainer.extend(
            E.PrintReport([
                'epoch', 'main/loss', 'main/accuracy', 'train/main/roc_auc',
                'validation/main/loss', 'validation/main/accuracy',
                'val/main/roc_auc', 'elapsed_time'
            ]))
    else:
        raise ValueError('Invalid accfun_mode {}'.format(eval_mode))
    trainer.extend(E.ProgressBar(update_interval=10))
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(E.snapshot(), trigger=(frequency, 'epoch'))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()

    config = {
        'method': args.method,
        'conv_layers': args.conv_layers,
        'unit_num': args.unit_num,
        'labels': args.label
    }

    with open(os.path.join(args.out, 'config.json'), 'w') as o:
        o.write(json.dumps(config))

    classifier.save_pickle(os.path.join(args.out, args.model_filename),
                           protocol=args.protocol)
Esempio n. 9
0
def main():

    ###########################
    #### create dictionary ####
    ###########################

    if os.path.exists('./data/corpus/dictionary.dict'):
        if args.lang == 'ja':
            corpus = JaConvCorpus(file_path=None, batch_size=batchsize)
        else:
            corpus = ConvCorpus(file_path=None, batch_size=batchsize)
        corpus.load(load_dir='./data/corpus/')
    else:
        if args.lang == 'ja':
            corpus = JaConvCorpus(file_path=data_file, batch_size=batchsize)
        else:
            corpus = ConvCorpus(file_path=data_file, batch_size=batchsize)
        corpus.save(save_dir='./data/corpus/')
    print('Vocabulary Size (number of words) :', len(corpus.dic.token2id))

    ######################
    #### create model ####
    ######################

    model = Seq2Seq(len(corpus.dic.token2id),
                    feature_num=feature_num,
                    hidden_num=hidden_num,
                    batch_size=batchsize,
                    gpu_flg=args.gpu)
    if args.gpu >= 0:
        model.to_gpu()
    optimizer = optimizers.Adam(alpha=0.001)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(5))
    # optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001))

    ##########################
    #### create ID corpus ####
    ##########################

    input_mat = []
    output_mat = []
    max_input_ren = max_output_ren = 0

    for input_text, output_text in zip(corpus.posts, corpus.cmnts):

        # convert to list
        input_text.reverse()  # encode words in a reverse order
        input_text.insert(0, corpus.dic.token2id["<eos>"])
        output_text.append(corpus.dic.token2id["<eos>"])

        # update max sentence length
        max_input_ren = max(max_input_ren, len(input_text))
        max_output_ren = max(max_output_ren, len(output_text))

        input_mat.append(input_text)
        output_mat.append(output_text)

    # padding
    for li in input_mat:
        insert_num = max_input_ren - len(li)
        for _ in range(insert_num):
            li.insert(0, corpus.dic.token2id['<pad>'])
    for li in output_mat:
        insert_num = max_output_ren - len(li)
        for _ in range(insert_num):
            li.append(corpus.dic.token2id['<pad>'])

    # create batch matrix
    input_mat = np.array(input_mat, dtype=np.int32).T
    output_mat = np.array(output_mat, dtype=np.int32).T

    # separate corpus into Train and Test
    perm = np.random.permutation(len(corpus.posts))
    test_input_mat = input_mat[:, perm[0:0 + testsize]]
    test_output_mat = output_mat[:, perm[0:0 + testsize]]
    train_input_mat = input_mat[:, perm[testsize:]]
    train_output_mat = output_mat[:, perm[testsize:]]

    list_of_references = []
    for text_ndarray in test_output_mat.T:
        reference = text_ndarray.tolist()
        references = [[w_id for w_id in reference if w_id is not -1]]
        list_of_references.append(references)

    #############################
    #### train seq2seq model ####
    #############################

    accum_loss = 0
    train_loss_data = []
    test_loss_data = []
    bleu_score_data = []
    wer_score_data = []
    for num, epoch in enumerate(range(n_epoch)):
        total_loss = test_loss = 0
        batch_num = 0
        perm = np.random.permutation(len(corpus.posts) - testsize)

        # for training
        for i in range(0, len(corpus.posts) - testsize, batchsize):

            # select batch data
            input_batch = train_input_mat[:, perm[i:i + batchsize]]
            output_batch = train_output_mat[:, perm[i:i + batchsize]]

            # Encode a sentence
            model.initialize()  # initialize cell
            model.encode(input_batch,
                         train=True)  # encode (output: hidden Variable)

            # Decode from encoded context
            end_batch = xp.array(
                [corpus.dic.token2id["<start>"] for _ in range(batchsize)])
            first_words = output_batch[0]
            loss, predict_mat = model.decode(end_batch,
                                             first_words,
                                             train=True)
            next_ids = first_words
            accum_loss += loss
            for w_ids in output_batch[1:]:
                loss, predict_mat = model.decode(next_ids, w_ids, train=True)
                next_ids = w_ids
                accum_loss += loss

            # learn model
            model.cleargrads()  # initialize all grad to zero
            accum_loss.backward()  # back propagation
            optimizer.update()
            total_loss += float(accum_loss.data)
            batch_num += 1
            print('Epoch: ', num, 'Batch_num', batch_num,
                  'batch loss: {:.2f}'.format(float(accum_loss.data)))
            accum_loss = 0

        # for testing
        list_of_hypotheses = []
        for i in range(0, testsize, batchsize):

            # select test batch data
            input_batch = test_input_mat[:, i:i + batchsize]
            output_batch = test_output_mat[:, i:i + batchsize]

            # Encode a sentence
            model.initialize()  # initialize cell
            model.encode(input_batch,
                         train=True)  # encode (output: hidden Variable)

            # Decode from encoded context
            end_batch = xp.array(
                [corpus.dic.token2id["<start>"] for _ in range(batchsize)])
            first_words = output_batch[0]
            loss, predict_mat = model.decode(end_batch,
                                             first_words,
                                             train=True)
            next_ids = xp.argmax(predict_mat.data, axis=1)
            test_loss += loss
            if args.gpu >= 0:
                hypotheses = [cuda.to_cpu(next_ids)]
            else:
                hypotheses = [next_ids]
            for w_ids in output_batch[1:]:
                loss, predict_mat = model.decode(next_ids, w_ids, train=True)
                next_ids = xp.argmax(predict_mat.data, axis=1)
                test_loss += loss
                if args.gpu >= 0:
                    hypotheses.append(cuda.to_cpu(next_ids))
                else:
                    hypotheses.append(next_ids)

            # collect hypotheses for calculating BLEU score
            hypotheses = np.array(hypotheses).T
            for hypothesis in hypotheses:
                text_list = hypothesis.tolist()
                list_of_hypotheses.append(
                    [w_id for w_id in text_list if w_id is not -1])

        # calculate BLEU score from test (develop) data
        bleu_score = nltk.translate.bleu_score.corpus_bleu(list_of_references,
                                                           list_of_hypotheses,
                                                           weights=(0.25, 0.25,
                                                                    0.25,
                                                                    0.25))
        bleu_score_data.append(bleu_score)
        print('Epoch: ', num, 'BLEU SCORE: ', bleu_score)

        # calculate WER score from test (develop) data
        wer_score = 0
        for index, references in enumerate(list_of_references):
            wer_score += wer(references[0], list_of_hypotheses[index])
        wer_score /= len(list_of_references)
        wer_score_data.append(wer_score)
        print('Epoch: ', num, 'WER SCORE: ', wer_score)

        # save model and optimizer
        if (epoch + 1) % 10 == 0:
            print('-----', epoch + 1, ' times -----')
            print('save the model and optimizer')
            serializers.save_hdf5('data/' + str(epoch) + '.model', model)
            serializers.save_hdf5('data/' + str(epoch) + '.state', optimizer)

        # display the on-going status
        print('Epoch: ', num, 'Train loss: {:.2f}'.format(total_loss),
              'Test loss: {:.2f}'.format(float(test_loss.data)))
        train_loss_data.append(float(total_loss / batch_num))
        test_loss_data.append(float(test_loss.data))

        # evaluate a test loss
        check_loss = test_loss_data[-10:]  # check out the last 10 loss data
        end_flg = [
            j for j in range(len(check_loss) - 1)
            if check_loss[j] < check_loss[j + 1]
        ]
        if len(end_flg) > 8:
            print('Probably it is over-fitting. So stop to learn...')
            break

    # save loss data
    with open('./data/loss_train_data.pkl', 'wb') as f:
        pickle.dump(train_loss_data, f)
    with open('./data/loss_test_data.pkl', 'wb') as f:
        pickle.dump(test_loss_data, f)
    with open('./data/bleu_score_data.pkl', 'wb') as f:
        pickle.dump(bleu_score_data, f)
    with open('./data/wer_score_data.pkl', 'wb') as f:
        pickle.dump(wer_score_data, f)
 def create(self):
     if self.dtype == numpy.float16:
         kwargs = {'eps': 1e-6}
     else:
         kwargs = {}
     return optimizers.Adam(0.05, **kwargs)
Esempio n. 11
0
    def _test_abc(self, use_lstm, discrete=True, steps=1000000,
                  require_success=True, gpu=-1):

        def make_env(process_idx, test):
            size = 2
            return ABC(size=size, discrete=discrete, episodic=True,
                       partially_observable=self.use_lstm,
                       deterministic=test)

        sample_env = make_env(0, False)
        action_space = sample_env.action_space
        obs_space = sample_env.observation_space

        def phi(x):
            return x

        n_hidden_channels = 20
        n_hidden_layers = 1
        nonlinearity = F.leaky_relu
        if use_lstm:
            if discrete:
                model = chainerrl.links.Sequence(
                    L.LSTM(obs_space.low.size, n_hidden_channels,
                           forget_bias_init=1),
                    policies.FCSoftmaxPolicy(
                        n_hidden_channels, action_space.n,
                        n_hidden_channels=n_hidden_channels,
                        n_hidden_layers=n_hidden_layers,
                        nonlinearity=nonlinearity),
                )
            else:
                model = chainerrl.links.Sequence(
                    L.LSTM(obs_space.low.size, n_hidden_channels,
                           forget_bias_init=1),
                    policies.FCGaussianPolicy(
                        n_hidden_channels, action_space.low.size,
                        n_hidden_channels=n_hidden_channels,
                        n_hidden_layers=n_hidden_layers,
                        bound_mean=True,
                        min_action=action_space.low,
                        max_action=action_space.high,
                        nonlinearity=nonlinearity,
                    )
                )
        else:
            if discrete:
                model = policies.FCSoftmaxPolicy(
                    obs_space.low.size, action_space.n,
                    n_hidden_channels=n_hidden_channels,
                    n_hidden_layers=n_hidden_layers,
                    nonlinearity=nonlinearity)
            else:
                model = policies.FCGaussianPolicy(
                    obs_space.low.size, action_space.low.size,
                    n_hidden_channels=n_hidden_channels,
                    n_hidden_layers=n_hidden_layers,
                    bound_mean=True,
                    min_action=action_space.low,
                    max_action=action_space.high,
                    nonlinearity=nonlinearity,
                )

        if gpu >= 0:
            chainer.cuda.get_device(gpu).use()
            model.to_gpu()

        opt = optimizers.Adam()
        opt.setup(model)
        beta = 1e-2
        agent = chainerrl.agents.REINFORCE(
            model, opt,
            beta=beta,
            phi=phi,
            batchsize=self.batchsize,
            backward_separately=self.backward_separately,
            act_deterministically=True,
        )

        chainerrl.experiments.train_agent_with_evaluation(
            agent=agent,
            env=make_env(0, False),
            eval_env=make_env(0, True),
            outdir=self.outdir,
            steps=steps,
            max_episode_len=2,
            eval_interval=500,
            eval_n_runs=5,
            successful_score=1)

        # Test
        env = make_env(0, True)
        n_test_runs = 5

        for _ in range(n_test_runs):
            total_r = 0
            obs = env.reset()
            done = False
            reward = 0.0

            while not done:
                action = agent.act(obs)
                print('state:', obs, 'action:', action)
                obs, reward, done, _ = env.step(action)
                total_r += reward
            if require_success:
                self.assertAlmostEqual(total_r, 1)
            agent.stop_episode()
Esempio n. 12
0
    start_epoch = 0
    if args.model != -1:  # 保存したモデルがある場合
        serializers.load_hdf5('epoch_' + str(args.model) + '.model', model)
        start_epoch = args.model

    if args.gpus >= 0:  # GPU使用の設定
        cuda.get_device_from_id(args.gpus).use()
        model.to_gpu(args.gpus)
        xp = cuda.cupy

    if args.optimize == 'sgd':  # 学習率のアルゴリズム
        optm = optimizers.SGD()
    elif args.optimize == 'adagrad':
        optm = optimizers.AdaGrad()
    elif args.optimize == 'adam':
        optm = optimizers.Adam()
    elif args.optimize == 'adadelta':
        optm = optimizers.AdaDelta()
    elif args.optimize == 'rmsprop':
        optm = optimizers.RMSprop()
    optm.setup(model)
    optm.add_hook(chainer.optimizer.WeightDecay(0.0001))
    optm.add_hook(chainer.optimizer.GradientClipping(5))

    start_time = time.time()
    cur_time = start_time

    print('Training start...')
    for epoch in range(start_epoch, args.epoch):
        train_batch_loss = []
        dev_batch_loss = []
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--outdir',
                        type=str,
                        default='results',
                        help='Directory path to save output files.'
                        ' If it does not exist, it will be created.')
    parser.add_argument('--env',
                        type=str,
                        default='Hopper-v2',
                        help='OpenAI Gym MuJoCo env to perform algorithm on.')
    parser.add_argument('--num-envs',
                        type=int,
                        default=1,
                        help='Number of envs run in parallel.')
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='Random seed [0, 2 ** 32)')
    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='GPU to use, set to -1 if no GPU.')
    parser.add_argument('--load',
                        type=str,
                        default='',
                        help='Directory to load agent from.')
    parser.add_argument('--steps',
                        type=int,
                        default=10**6,
                        help='Total number of timesteps to train the agent.')
    parser.add_argument('--eval-n-runs',
                        type=int,
                        default=10,
                        help='Number of episodes run for each evaluation.')
    parser.add_argument('--eval-interval',
                        type=int,
                        default=5000,
                        help='Interval in timesteps between evaluations.')
    parser.add_argument('--replay-start-size',
                        type=int,
                        default=10000,
                        help='Minimum replay buffer size before ' +
                        'performing gradient updates.')
    parser.add_argument('--batch-size',
                        type=int,
                        default=256,
                        help='Minibatch size')
    parser.add_argument('--render',
                        action='store_true',
                        help='Render env states in a GUI window.')
    parser.add_argument('--demo',
                        action='store_true',
                        help='Just run evaluation, not training.')
    parser.add_argument('--monitor',
                        action='store_true',
                        help='Wrap env with gym.wrappers.Monitor.')
    parser.add_argument('--log-interval',
                        type=int,
                        default=1000,
                        help='Interval in timesteps between outputting log'
                        ' messages during training')
    parser.add_argument('--logger-level',
                        type=int,
                        default=logging.INFO,
                        help='Level of the root logger.')
    parser.add_argument('--policy-output-scale',
                        type=float,
                        default=1.,
                        help='Weight initialization scale of polity output.')
    parser.add_argument('--debug', action='store_true', help='Debug mode.')
    args = parser.parse_args()

    logging.basicConfig(level=args.logger_level)

    if args.debug:
        chainer.set_debug(True)

    args.outdir = experiments.prepare_output_dir(args,
                                                 args.outdir,
                                                 argv=sys.argv)
    print('Output files are saved in {}'.format(args.outdir))

    # Set a random seed used in ChainerRL
    misc.set_random_seed(args.seed, gpus=(args.gpu, ))

    # Set different random seeds for different subprocesses.
    # If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3].
    # If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7].
    process_seeds = np.arange(args.num_envs) + args.seed * args.num_envs
    assert process_seeds.max() < 2**32

    def make_env(process_idx, test):
        env = gym.make(args.env)
        # Unwrap TimiLimit wrapper
        assert isinstance(env, gym.wrappers.TimeLimit)
        env = env.env
        # Use different random seeds for train and test envs
        process_seed = int(process_seeds[process_idx])
        env_seed = 2**32 - 1 - process_seed if test else process_seed
        env.seed(env_seed)
        # Cast observations to float32 because our model uses float32
        env = chainerrl.wrappers.CastObservationToFloat32(env)
        # Normalize action space to [-1, 1]^n
        env = chainerrl.wrappers.NormalizeActionSpace(env)
        if args.monitor:
            env = gym.wrappers.Monitor(env, args.outdir)
        if args.render:
            env = chainerrl.wrappers.Render(env)
        return env

    def make_batch_env(test):
        return chainerrl.envs.MultiprocessVectorEnv([
            functools.partial(make_env, idx, test)
            for idx, env in enumerate(range(args.num_envs))
        ])

    sample_env = make_env(process_idx=0, test=False)
    timestep_limit = sample_env.spec.tags.get(
        'wrapper_config.TimeLimit.max_episode_steps')
    obs_space = sample_env.observation_space
    action_space = sample_env.action_space
    print('Observation space:', obs_space)
    print('Action space:', action_space)

    action_size = action_space.low.size

    winit = chainer.initializers.GlorotUniform()
    winit_policy_output = chainer.initializers.GlorotUniform(
        args.policy_output_scale)

    def squashed_diagonal_gaussian_head(x):
        assert x.shape[-1] == action_size * 2
        mean, log_scale = F.split_axis(x, 2, axis=1)
        log_scale = F.clip(log_scale, -20, 2)
        var = F.exp(log_scale * 2)
        return chainerrl.distribution.SquashedGaussianDistribution(mean,
                                                                   var=var)

    policy = chainer.Sequential(
        L.Linear(None, 256, initialW=winit),
        F.relu,
        L.Linear(None, 256, initialW=winit),
        F.relu,
        L.Linear(None, action_size * 2, initialW=winit_policy_output),
        squashed_diagonal_gaussian_head,
    )
    policy_optimizer = optimizers.Adam(3e-4).setup(policy)

    def make_q_func_with_optimizer():
        q_func = chainer.Sequential(
            concat_obs_and_action,
            L.Linear(None, 256, initialW=winit),
            F.relu,
            L.Linear(None, 256, initialW=winit),
            F.relu,
            L.Linear(None, 1, initialW=winit),
        )
        q_func_optimizer = optimizers.Adam(3e-4).setup(q_func)
        return q_func, q_func_optimizer

    q_func1, q_func1_optimizer = make_q_func_with_optimizer()
    q_func2, q_func2_optimizer = make_q_func_with_optimizer()

    # Draw the computational graph and save it in the output directory.
    fake_obs = chainer.Variable(policy.xp.zeros_like(obs_space.low,
                                                     dtype=np.float32)[None],
                                name='observation')
    fake_action = chainer.Variable(policy.xp.zeros_like(
        action_space.low, dtype=np.float32)[None],
                                   name='action')
    chainerrl.misc.draw_computational_graph([policy(fake_obs)],
                                            os.path.join(
                                                args.outdir, 'policy'))
    chainerrl.misc.draw_computational_graph([q_func1(fake_obs, fake_action)],
                                            os.path.join(
                                                args.outdir, 'q_func1'))
    chainerrl.misc.draw_computational_graph([q_func2(fake_obs, fake_action)],
                                            os.path.join(
                                                args.outdir, 'q_func2'))

    rbuf = replay_buffer.ReplayBuffer(10**6)

    def burnin_action_func():
        """Select random actions until model is updated one or more times."""
        return np.random.uniform(action_space.low,
                                 action_space.high).astype(np.float32)

    # Hyperparameters in http://arxiv.org/abs/1802.09477
    agent = chainerrl.agents.SoftActorCritic(
        policy,
        q_func1,
        q_func2,
        policy_optimizer,
        q_func1_optimizer,
        q_func2_optimizer,
        rbuf,
        gamma=0.99,
        replay_start_size=args.replay_start_size,
        gpu=args.gpu,
        minibatch_size=args.batch_size,
        burnin_action_func=burnin_action_func,
        entropy_target=-action_size,
        temperature_optimizer=chainer.optimizers.Adam(3e-4),
    )

    if len(args.load) > 0:
        agent.load(args.load)

    if args.demo:
        eval_stats = experiments.eval_performance(
            env=make_batch_env(test=True),
            agent=agent,
            n_steps=None,
            n_episodes=args.eval_n_runs,
            max_episode_len=timestep_limit,
        )
        print('n_runs: {} mean: {} median: {} stdev {}'.format(
            args.eval_n_runs, eval_stats['mean'], eval_stats['median'],
            eval_stats['stdev']))
    else:
        experiments.train_agent_batch_with_evaluation(
            agent=agent,
            env=make_batch_env(test=False),
            eval_env=make_batch_env(test=True),
            outdir=args.outdir,
            steps=args.steps,
            eval_n_steps=None,
            eval_n_episodes=args.eval_n_runs,
            eval_interval=args.eval_interval,
            log_interval=args.log_interval,
            max_episode_len=timestep_limit,
        )
Esempio n. 14
0
def main():
    import logging
    logging.basicConfig(level=logging.DEBUG)

    parser = argparse.ArgumentParser()
    parser.add_argument('--outdir',
                        type=str,
                        default='results',
                        help='Directory path to save output files.'
                        ' If it does not exist, it will be created.')
    parser.add_argument('--env', type=str, default='Pendulum-v0')
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='Random seed [0, 2 ** 32)')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--final-exploration-steps', type=int, default=10**4)
    parser.add_argument('--start-epsilon', type=float, default=1.0)
    parser.add_argument('--end-epsilon', type=float, default=0.1)
    parser.add_argument('--noisy-net-sigma', type=float, default=None)
    parser.add_argument('--demo', action='store_true', default=False)
    parser.add_argument('--load', type=str, default=None)
    parser.add_argument('--steps', type=int, default=10**5)
    parser.add_argument('--prioritized-replay', action='store_true')
    parser.add_argument('--episodic-replay', action='store_true')
    parser.add_argument('--replay-start-size', type=int, default=1000)
    parser.add_argument('--target-update-interval', type=int, default=10**2)
    parser.add_argument('--target-update-method', type=str, default='hard')
    parser.add_argument('--soft-update-tau', type=float, default=1e-2)
    parser.add_argument('--update-interval', type=int, default=1)
    parser.add_argument('--eval-n-runs', type=int, default=100)
    parser.add_argument('--eval-interval', type=int, default=10**4)
    parser.add_argument('--n-hidden-channels', type=int, default=100)
    parser.add_argument('--n-hidden-layers', type=int, default=2)
    parser.add_argument('--gamma', type=float, default=0.99)
    parser.add_argument('--minibatch-size', type=int, default=None)
    parser.add_argument('--render-train', action='store_true')
    parser.add_argument('--render-eval', action='store_true')
    parser.add_argument('--monitor', action='store_true')
    parser.add_argument('--reward-scale-factor', type=float, default=1e-3)
    args = parser.parse_args()

    # Set a random seed used in ChainerRL
    misc.set_random_seed(args.seed, gpus=(args.gpu, ))

    args.outdir = experiments.prepare_output_dir(args,
                                                 args.outdir,
                                                 argv=sys.argv)
    print('Output files are saved in {}'.format(args.outdir))

    def clip_action_filter(a):
        return np.clip(a, action_space.low, action_space.high)

    def make_env(test):
        env = gym.make(args.env)
        # Use different random seeds for train and test envs
        env_seed = 2**32 - 1 - args.seed if test else args.seed
        env.seed(env_seed)
        # Cast observations to float32 because our model uses float32
        env = chainerrl.wrappers.CastObservationToFloat32(env)
        if args.monitor:
            env = gym.wrappers.Monitor(env, args.outdir)
        if isinstance(env.action_space, spaces.Box):
            misc.env_modifiers.make_action_filtered(env, clip_action_filter)
        if not test:
            # Scale rewards (and thus returns) to a reasonable range so that
            # training is easier
            env = chainerrl.wrappers.ScaleReward(env, args.reward_scale_factor)
        if ((args.render_eval and test) or (args.render_train and not test)):
            env = chainerrl.wrappers.Render(env)
        return env

    env = make_env(test=False)
    timestep_limit = env.spec.tags.get(
        'wrapper_config.TimeLimit.max_episode_steps')
    obs_space = env.observation_space
    obs_size = obs_space.low.size
    action_space = env.action_space

    if isinstance(action_space, spaces.Box):
        action_size = action_space.low.size
        # Use NAF to apply DQN to continuous action spaces
        q_func = q_functions.FCQuadraticStateQFunction(
            obs_size,
            action_size,
            n_hidden_channels=args.n_hidden_channels,
            n_hidden_layers=args.n_hidden_layers,
            action_space=action_space)
        # Use the Ornstein-Uhlenbeck process for exploration
        ou_sigma = (action_space.high - action_space.low) * 0.2
        explorer = explorers.AdditiveOU(sigma=ou_sigma)
    else:
        n_actions = action_space.n
        q_func = q_functions.FCStateQFunctionWithDiscreteAction(
            obs_size,
            n_actions,
            n_hidden_channels=args.n_hidden_channels,
            n_hidden_layers=args.n_hidden_layers)
        # Use epsilon-greedy for exploration
        explorer = explorers.LinearDecayEpsilonGreedy(
            args.start_epsilon, args.end_epsilon, args.final_exploration_steps,
            action_space.sample)

    if args.noisy_net_sigma is not None:
        links.to_factorized_noisy(q_func)
        # Turn off explorer
        explorer = explorers.Greedy()

    # Draw the computational graph and save it in the output directory.
    chainerrl.misc.draw_computational_graph(
        [q_func(np.zeros_like(obs_space.low, dtype=np.float32)[None])],
        os.path.join(args.outdir, 'model'))

    opt = optimizers.Adam()
    opt.setup(q_func)

    rbuf_capacity = 5 * 10**5
    if args.episodic_replay:
        if args.minibatch_size is None:
            args.minibatch_size = 4
        if args.prioritized_replay:
            betasteps = (args.steps - args.replay_start_size) \
                // args.update_interval
            rbuf = replay_buffer.PrioritizedEpisodicReplayBuffer(
                rbuf_capacity, betasteps=betasteps)
        else:
            rbuf = replay_buffer.EpisodicReplayBuffer(rbuf_capacity)
    else:
        if args.minibatch_size is None:
            args.minibatch_size = 32
        if args.prioritized_replay:
            betasteps = (args.steps - args.replay_start_size) \
                // args.update_interval
            rbuf = replay_buffer.PrioritizedReplayBuffer(rbuf_capacity,
                                                         betasteps=betasteps)
        else:
            rbuf = replay_buffer.ReplayBuffer(rbuf_capacity)

    agent = DQN(q_func,
                opt,
                rbuf,
                gpu=args.gpu,
                gamma=args.gamma,
                explorer=explorer,
                replay_start_size=args.replay_start_size,
                target_update_interval=args.target_update_interval,
                update_interval=args.update_interval,
                minibatch_size=args.minibatch_size,
                target_update_method=args.target_update_method,
                soft_update_tau=args.soft_update_tau,
                episodic_update=args.episodic_replay,
                episodic_update_len=16)

    if args.load:
        agent.load(args.load)

    eval_env = make_env(test=True)

    if args.demo:
        eval_stats = experiments.eval_performance(
            env=eval_env,
            agent=agent,
            n_steps=None,
            n_episodes=args.eval_n_runs,
            max_episode_len=timestep_limit)
        print('n_runs: {} mean: {} median: {} stdev {}'.format(
            args.eval_n_runs, eval_stats['mean'], eval_stats['median'],
            eval_stats['stdev']))
    else:
        experiments.train_agent_with_evaluation(
            agent=agent,
            env=env,
            steps=args.steps,
            eval_n_steps=None,
            eval_n_episodes=args.eval_n_runs,
            eval_interval=args.eval_interval,
            outdir=args.outdir,
            eval_env=eval_env,
            train_max_episode_len=timestep_limit)
Esempio n. 15
0
def main():
    # Parse the arguments.
    args = parse_arguments()
    augment = False if args.augment == 'False' else True
    multi_gpu = False if args.multi_gpu == 'False' else True
    if args.label:
        labels = args.label
        class_num = len(labels) if isinstance(labels, list) else 1
    else:
        raise ValueError('No target label was specified.')

    # Dataset preparation. Postprocessing is required for the regression task.
    def postprocess_label(label_list):
        label_arr = np.asarray(label_list, dtype=np.int32)
        return label_arr

    # Apply a preprocessor to the dataset.
    logging.info('Preprocess train dataset and test dataset...')
    preprocessor = preprocess_method_dict[args.method]()
    parser = CSVFileParserForPair(preprocessor,
                                  postprocess_label=postprocess_label,
                                  labels=labels,
                                  smiles_cols=['smiles_1', 'smiles_2'])
    train = parser.parse(args.train_datafile)['dataset']
    test = parser.parse(args.test_datafile)['dataset']

    if augment:
        logging.info('Utilizing data augmentation in train set')
        train = augment_dataset(train)

    num_train = train.get_datasets()[0].shape[0]
    num_test = test.get_datasets()[0].shape[0]
    logging.info('Train/test split: {}/{}'.format(num_train, num_test))

    if len(args.net_hidden_dims):
        net_hidden_dims = tuple([
            int(net_hidden_dim)
            for net_hidden_dim in args.net_hidden_dims.split(',')
        ])
    else:
        net_hidden_dims = ()
    fp_attention = True if args.fp_attention else False
    update_attention = True if args.update_attention else False
    weight_tying = False if args.weight_tying == 'False' else True
    attention_tying = False if args.attention_tying == 'False' else True
    fp_batch_normalization = True if args.fp_bn == 'True' else False
    layer_aggregator = None if args.layer_aggregator == '' else args.layer_aggregator
    context = False if args.context == 'False' else True
    output_activation = functions.relu if args.output_activation == 'relu' else None
    predictor = set_up_predictor(
        method=args.method,
        fp_hidden_dim=args.fp_hidden_dim,
        fp_out_dim=args.fp_out_dim,
        conv_layers=args.conv_layers,
        concat_hidden=args.concat_hidden,
        layer_aggregator=layer_aggregator,
        fp_dropout_rate=args.fp_dropout_rate,
        fp_batch_normalization=fp_batch_normalization,
        net_hidden_dims=net_hidden_dims,
        class_num=class_num,
        sim_method=args.sim_method,
        fp_attention=fp_attention,
        weight_typing=weight_tying,
        attention_tying=attention_tying,
        update_attention=update_attention,
        fp_max_degree=args.fp_max_degree,
        context=context,
        context_layers=args.context_layers,
        context_dropout=args.context_dropout,
        message_function=args.message_function,
        readout_function=args.readout_function,
        num_timesteps=args.num_timesteps,
        num_output_hidden_layers=args.num_output_hidden_layers,
        output_hidden_dim=args.output_hidden_dim,
        output_activation=output_activation,
        symmetric=args.symmetric)

    train_iter = SerialIterator(train, args.batchsize)
    test_iter = SerialIterator(test,
                               args.batchsize,
                               repeat=False,
                               shuffle=False)

    metrics_fun = {'accuracy': F.binary_accuracy}
    classifier = Classifier(predictor,
                            lossfun=loss_func,
                            metrics_fun=metrics_fun,
                            device=args.gpu)

    # Set up the optimizer.
    optimizer = optimizers.Adam(alpha=args.learning_rate,
                                weight_decay_rate=args.weight_decay_rate)
    # optimizer = optimizers.Adam()
    # optimizer = optimizers.SGD(lr=args.learning_rate)
    optimizer.setup(classifier)
    # add regularization
    if args.max_norm > 0:
        optimizer.add_hook(
            chainer.optimizer.GradientClipping(threshold=args.max_norm))
    if args.l2_rate > 0:
        optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.l2_rate))
    if args.l1_rate > 0:
        optimizer.add_hook(chainer.optimizer.Lasso(rate=args.l1_rate))

    # Set up the updater.
    if multi_gpu:
        logging.info('Using multiple GPUs')
        updater = training.ParallelUpdater(train_iter,
                                           optimizer,
                                           devices={
                                               'main': 0,
                                               'second': 1
                                           },
                                           converter=concat_mols)
    else:
        logging.info('Using single GPU')
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           device=args.gpu,
                                           converter=concat_mols)

    # Set up the trainer.
    logging.info('Training...')
    # add stop_trigger parameter
    early_stop = triggers.EarlyStoppingTrigger(monitor='validation/main/loss',
                                               patients=10,
                                               max_trigger=(500, 'epoch'))
    out = 'output' + '/' + args.out
    trainer = training.Trainer(updater, stop_trigger=early_stop, out=out)

    # trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(
        E.Evaluator(test_iter,
                    classifier,
                    device=args.gpu,
                    converter=concat_mols))

    train_eval_iter = SerialIterator(train,
                                     args.batchsize,
                                     repeat=False,
                                     shuffle=False)

    trainer.extend(
        AccuracyEvaluator(train_eval_iter,
                          classifier,
                          eval_func=predictor,
                          device=args.gpu,
                          converter=concat_mols,
                          name='train_acc',
                          pos_labels=1,
                          ignore_labels=-1,
                          raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(
        AccuracyEvaluator(test_iter,
                          classifier,
                          eval_func=predictor,
                          device=args.gpu,
                          converter=concat_mols,
                          name='val_acc',
                          pos_labels=1,
                          ignore_labels=-1))

    trainer.extend(
        ROCAUCEvaluator(train_eval_iter,
                        classifier,
                        eval_func=predictor,
                        device=args.gpu,
                        converter=concat_mols,
                        name='train_roc',
                        pos_labels=1,
                        ignore_labels=-1,
                        raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(
        ROCAUCEvaluator(test_iter,
                        classifier,
                        eval_func=predictor,
                        device=args.gpu,
                        converter=concat_mols,
                        name='val_roc',
                        pos_labels=1,
                        ignore_labels=-1))

    trainer.extend(
        PRCAUCEvaluator(train_eval_iter,
                        classifier,
                        eval_func=predictor,
                        device=args.gpu,
                        converter=concat_mols,
                        name='train_prc',
                        pos_labels=1,
                        ignore_labels=-1,
                        raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(
        PRCAUCEvaluator(test_iter,
                        classifier,
                        eval_func=predictor,
                        device=args.gpu,
                        converter=concat_mols,
                        name='val_prc',
                        pos_labels=1,
                        ignore_labels=-1))

    trainer.extend(
        F1Evaluator(train_eval_iter,
                    classifier,
                    eval_func=predictor,
                    device=args.gpu,
                    converter=concat_mols,
                    name='train_f',
                    pos_labels=1,
                    ignore_labels=-1,
                    raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(
        F1Evaluator(test_iter,
                    classifier,
                    eval_func=predictor,
                    device=args.gpu,
                    converter=concat_mols,
                    name='val_f',
                    pos_labels=1,
                    ignore_labels=-1))

    # apply shift strategy to learning rate every 10 epochs
    # trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=(10, 'epoch'))
    if args.exp_shift_strategy == 1:
        trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate),
                       trigger=triggers.ManualScheduleTrigger(
                           [10, 20, 30, 40, 50, 60], 'epoch'))
    elif args.exp_shift_strategy == 2:
        trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate),
                       trigger=triggers.ManualScheduleTrigger(
                           [5, 10, 15, 20, 25, 30], 'epoch'))
    elif args.exp_shift_strategy == 3:
        trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate),
                       trigger=triggers.ManualScheduleTrigger(
                           [5, 10, 15, 20, 25, 30, 40, 50, 60, 70], 'epoch'))
    else:
        raise ValueError('No such strategy to adapt learning rate')
    # # observation of learning rate
    trainer.extend(E.observe_lr(), trigger=(1, 'iteration'))

    entries = [
        'epoch',
        'main/loss',
        'train_acc/main/accuracy',
        'train_roc/main/roc_auc',
        'train_prc/main/prc_auc',
        # 'train_p/main/precision', 'train_r/main/recall',
        'train_f/main/f1',
        'validation/main/loss',
        'val_acc/main/accuracy',
        'val_roc/main/roc_auc',
        'val_prc/main/prc_auc',
        # 'val_p/main/precision', 'val_r/main/recall',
        'val_f/main/f1',
        'lr',
        'elapsed_time'
    ]
    trainer.extend(E.PrintReport(entries=entries))
    # change from 10 to 2 on Mar. 1 2019
    trainer.extend(E.snapshot(), trigger=(2, 'epoch'))
    trainer.extend(E.LogReport())
    trainer.extend(E.ProgressBar())
    trainer.extend(
        E.PlotReport(['main/loss', 'validation/main/loss'],
                     'epoch',
                     file_name='loss.png'))
    trainer.extend(
        E.PlotReport(['train_acc/main/accuracy', 'val_acc/main/accuracy'],
                     'epoch',
                     file_name='accuracy.png'))

    if args.resume:
        resume_path = os.path.join(out, args.resume)
        logging.info(
            'Resume training according to snapshot in {}'.format(resume_path))
        chainer.serializers.load_npz(resume_path, trainer)

    trainer.run()

    # Save the regressor's parameters.
    model_path = os.path.join(out, args.model_filename)
    logging.info('Saving the trained model to {}...'.format(model_path))
    classifier.save_pickle(model_path, protocol=args.protocol)
print(list_length)
all_valid_instance_list = Get_name_list(mode='Valid')
valid_list_length = len(all_valid_instance_list)
print(valid_list_length)

# initialize DRS networks
drs = DRS(w)

if (gpu_id == 0):
    drs.to_gpu(gpu_id)

index = 0
loss_train_list = []
loss_valid_list = []

optimizer = optimizers.Adam(alpha=0.0001,beta1=0.9,beta2=0.999)
optimizer.setup(drs)

iterations = 30000
num_sample = 40 # (if =1, NaN appears)

# Start trainig
for i in range(iterations):

    train_list = []
    for j in range(num_batch_instance):
        if (index == list_length):
            random.shuffle(all_instance_list) # shuffle
            index = 0
        train_list.append(all_instance_list[index])
        index += 1
Esempio n. 17
0
n_iter = n_data // batchsize
print(n_iter, 'iterations,', n_epoch, 'epochs')

model = FastStyleNet()
vgg = VGG()
serializers.load_npz('vgg16.model', vgg)
if args.initmodel:
    print('load model from', args.initmodel)
    serializers.load_npz(args.initmodel, model)
if args.gpu >= 0:
    cuda.get_device(args.gpu).use()
    model.to_gpu()
    vgg.to_gpu()
xp = np if args.gpu < 0 else cuda.cupy

O = optimizers.Adam(alpha=args.lr)
O.setup(model)
if args.resume:
    print('load optimizer state from', args.resume)
    serializers.load_npz(args.resume, O)

style = vgg.preprocess(np.asarray(Image.open(args.style_image).convert('RGB').resize((image_size,image_size)), dtype=np.float32))
style = xp.asarray(style, dtype=xp.float32)
style_b = xp.zeros((batchsize,) + style.shape, dtype=xp.float32)
for i in range(batchsize):
    style_b[i] = style
feature_s = vgg(Variable(style_b))
gram_s = [gram_matrix(y) for y in feature_s]

for epoch in range(n_epoch):
    print('epoch', epoch)
Esempio n. 18
0
def main():

    parser = argparse.ArgumentParser(description='Chainer example: DQN')
    parser.add_argument('--env',
                        type=str,
                        default='CartPole-v0',
                        help='Name of the OpenAI Gym environment')
    parser.add_argument('--batch-size',
                        '-b',
                        type=int,
                        default=64,
                        help='Number of transitions in each mini-batch')
    parser.add_argument('--episodes',
                        '-e',
                        type=int,
                        default=1000,
                        help='Number of episodes to run')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='dqn_result',
                        help='Directory to output the result')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=100,
                        help='Number of units')
    parser.add_argument('--target-type',
                        type=str,
                        default='dqn',
                        help='Target type',
                        choices=['dqn', 'double_dqn'])
    parser.add_argument('--reward-scale',
                        type=float,
                        default=1e-2,
                        help='Reward scale factor')
    parser.add_argument('--replay-start-size',
                        type=int,
                        default=500,
                        help=('Number of iterations after which replay is '
                              'started'))
    parser.add_argument('--iterations-to-decay-epsilon',
                        type=int,
                        default=5000,
                        help='Number of steps used to linearly decay epsilon')
    parser.add_argument('--min-epsilon',
                        type=float,
                        default=0.01,
                        help='Minimum value of epsilon')
    parser.add_argument('--target-update-freq',
                        type=int,
                        default=100,
                        help='Frequency of target network update')
    parser.add_argument('--record',
                        action='store_true',
                        default=True,
                        help='Record performance')
    parser.add_argument('--no-record', action='store_false', dest='record')
    args = parser.parse_args()

    # Initialize an environment
    env = gym.make(args.env)
    assert isinstance(env.observation_space, gym.spaces.Box)
    assert isinstance(env.action_space, gym.spaces.Discrete)
    obs_size = env.observation_space.low.size
    n_actions = env.action_space.n
    if args.record:
        env = gym.wrappers.Monitor(env, args.out, force=True)
    reward_threshold = env.spec.reward_threshold
    if reward_threshold is not None:
        print('{} defines "solving" as getting average reward of {} over 100 '
              'consecutive trials.'.format(args.env, reward_threshold))
    else:
        print('{} is an unsolved environment, which means it does not have a '
              'specified reward threshold at which it\'s considered '
              'solved.'.format(args.env))

    # Initialize variables
    D = collections.deque(maxlen=10**6)  # Replay buffer
    Rs = collections.deque(maxlen=100)  # History of returns
    iteration = 0

    # Initialize a model and its optimizer
    Q = QFunction(obs_size, n_actions, n_units=args.unit)
    if args.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        Q.to_gpu(args.gpu)
    target_Q = copy.deepcopy(Q)
    opt = optimizers.Adam(eps=1e-2)
    opt.setup(Q)

    for episode in range(args.episodes):

        obs = env.reset()
        done = False
        R = 0.0  # Return (sum of rewards obtained in an episode)
        timestep = 0

        while not done and timestep < env.spec.timestep_limit:

            # Epsilon is linearly decayed
            epsilon = 1.0 if len(D) < args.replay_start_size else \
                max(args.min_epsilon,
                    np.interp(
                        iteration,
                        [0, args.iterations_to_decay_epsilon],
                        [1.0, args.min_epsilon]))

            # Select an action epsilon-greedily
            if np.random.rand() < epsilon:
                action = env.action_space.sample()
            else:
                action = get_greedy_action(Q, obs)

            # Execute an action
            new_obs, reward, done, _ = env.step(action)
            R += reward

            # Store a transition
            D.append((obs, action, reward * args.reward_scale, done, new_obs))
            obs = new_obs

            # Sample a random minibatch of transitions and replay
            if len(D) >= args.replay_start_size:
                sample_indices = random.sample(range(len(D)), args.batch_size)
                samples = [D[i] for i in sample_indices]
                update(Q, target_Q, opt, samples, target_type=args.target_type)

            # Update the target network
            if iteration % args.target_update_freq == 0:
                target_Q = copy.deepcopy(Q)

            iteration += 1
            timestep += 1

        Rs.append(R)
        average_R = np.mean(Rs)
        print('episode: {} iteration: {} R: {} average_R: {}'.format(
            episode, iteration, R, average_R))

        if reward_threshold is not None and average_R >= reward_threshold:
            print('Solved {} by getting average reward of '
                  '{} >= {} over 100 consecutive episodes.'.format(
                      args.env, average_R, reward_threshold))
            break
Esempio n. 19
0
def set_optimizer(model, alpha, beta):
    optimizer = optimizers.Adam(alpha = alpha, beta1 = beta)
    optimizer.setup(model)
    return optimizer
Esempio n. 20
0
def train_dcgan_labeled(gen, dis, epoch0=0):
    o_gen = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_gen.setup(gen)
    o_dis.setup(dis)
    o_gen.add_hook(chainer.optimizer.WeightDecay(0.00001))
    o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001))

    zvis = (xp.random.uniform(-1, 1, (100, nz), dtype=np.float32))

    for epoch in xrange(epoch0, n_epoch):
        perm = np.random.permutation(n_train)
        sum_l_dis = np.float32(0)
        sum_l_gen = np.float32(0)

        for i in xrange(0, n_train, batchsize):
            # discriminator
            # 0: from s_dataset
            # 1: from s_dataset encode decode
            # 2: from s_dataset encode decode

            #print "load image start ", i
            sx = np.zeros((batchsize, 3, 96, 96), dtype=np.float32)
            tx = np.zeros((batchsize, 3, 96, 96), dtype=np.float32)
            for j in range(batchsize):
                try:
                    s_rnd = np.random.randint(len(s_dataset))
                    t_rnd = np.random.randint(len(t_dataset))
                    s_rnd2 = np.random.randint(2)
                    t_rnd2 = np.random.randint(2)

                    s_img = np.asarray(
                        Image.open(StringIO(
                            s_dataset[s_rnd])).convert('RGB')).astype(
                                np.float32).transpose(2, 0, 1)
                    t_img = np.asarray(
                        Image.open(StringIO(
                            s_dataset[t_rnd])).convert('RGB')).astype(
                                np.float32).transpose(2, 0, 1)
                    if s_rnd2 == 0:
                        sx[j, :, :, :] = (s_img[:, :, ::-1] - 128.0) / 128.0
                    else:
                        sx[j, :, :, :] = (s_img[:, :, :] - 128.0) / 128.0
                    if t_rnd2 == 0:
                        tx[j, :, :, :] = (t_img[:, :, ::-1] - 128.0) / 128.0
                    else:
                        tx[j, :, :, :] = (t_img[:, :, :] - 128.0) / 128.0

                except:
                    print 'read image error occured', fs[t_rnd]
            #print "load image done"

            # train generator
            sx = Variable(cuda.to_gpu(sx))
            tx = Variable(cuda.to_gpu(tx))
            sx2, sz = gen(sx)
            tx2, tz = gen(tx)
            sx3, sz2 = gen(sx2)
            L_tid = F.mean_squared_error(tx, tx2)
            L_const = F.mean_squared_error(sz, sz2)
            L_tv = (((sx2[:, 1:] - sx2)**2 + (sx2[:, :, 1:] - sx2)**2) +
                    ((tx2[:, 1:] - tx2)**2 +
                     (tx2[:, :, 1:] - tx2)**2)**0.5) / float(batchsize)

            # train discriminator

            yl_sx2 = dis(sx2)
            yl_tx2 = dis(tx2)
            yl_tx = dis(tx)
            L_dis = F.softmax_cross_entropy(
                yl_sx2, Variable(xp.zeros(batchsize, dtype=np.int32)))
            L_dis += F.softmax_cross_entropy(
                yl_tx2, Variable(xp.ones(batchsize, dtype=np.int32)))
            L_dis += F.softmax_cross_entropy(
                yl_tx, Variable(xp.ones(batchsize, dtype=np.int32) * 2))
            L_gang = (
                F.softmax_cross_entropy(
                    sx2, Variable(xp.ones(batchsize, dtype=np.int32) * 2)) +
                F.softmax_cross_entropy(
                    tx2, Variable(xp.ones(batchsize, dtype=np.int32) * 2)))

            L_gen = L_gang + alpha * L_const + beta * L_tid + gamma * L_tv

            #print "forward done"

            o_gen.zero_grads()
            L_gen.backward()
            o_gen.update()

            o_dis.zero_grads()
            L_dis.backward()
            o_dis.update()

            sum_l_gen += L_gen.data.get()
            sum_l_dis += L_dis.data.get()

            #print "backward done"

            if i % image_save_interval == 0:
                pylab.rcParams['figure.figsize'] = (16.0, 16.0)
                pylab.clf()
                vissize = 100
                z = zvis
                z[50:, :] = (xp.random.uniform(-1,
                                               1, (50, nz),
                                               dtype=np.float32))
                z = Variable(z)
                x = gen(z, test=True)
                x = x.data.get()
                for i_ in range(100):
                    tmp = ((np.vectorize(clip_img)(x[i_, :, :, :]) + 1) /
                           2).transpose(1, 2, 0)
                    pylab.subplot(10, 10, i_ + 1)
                    pylab.imshow(tmp)
                    pylab.axis('off')
                pylab.savefig('%s/vis_%d_%d.png' % (out_image_dir, epoch, i))

        serializers.save_hdf5(
            "%s/dcgan_model_dis_%d.h5" % (out_model_dir, epoch), dis)
        serializers.save_hdf5(
            "%s/dcgan_model_gen_%d.h5" % (out_model_dir, epoch), gen)
        serializers.save_hdf5(
            "%s/dcgan_state_dis_%d.h5" % (out_model_dir, epoch), o_dis)
        serializers.save_hdf5(
            "%s/dcgan_state_gen_%d.h5" % (out_model_dir, epoch), o_gen)
        print 'epoch end', epoch, sum_l_gen / n_train, sum_l_dis / n_train
Esempio n. 21
0
def main():

    ###########################
    #### create dictionary ####
    ###########################

    if os.path.exists('./data/corpus/dictionary.dict'):
        corpus = JaConvCorpus(file_path=None,
                              batch_size=batchsize,
                              size_filter=True)
        corpus.load(load_dir='./data/corpus/')
    else:
        corpus = JaConvCorpus(file_path=data_file,
                              batch_size=batchsize,
                              size_filter=True)
        corpus.save(save_dir='./data/corpus/')
    print('Vocabulary Size (number of words) :', len(corpus.dic.token2id))

    ##################################
    #### create model (copy data) ####
    ##################################
    rough_model = './data/199_rough.model'
    model = Seq2Seq(len(corpus.dic.token2id),
                    feature_num=feature_num,
                    hidden_num=hidden_num,
                    batch_size=batchsize,
                    gpu_flg=args.gpu)
    serializers.load_hdf5(rough_model, model)
    if args.gpu >= 0:
        model.to_gpu()

    ##########################
    #### create ID corpus ####
    ##########################

    input_mat = []
    output_mat = []
    max_input_ren = max_output_ren = 0

    for input_text, output_text in zip(corpus.fine_posts, corpus.fine_cmnts):

        # convert to list
        input_text.reverse()  # encode words in a reverse order
        input_text.insert(0, corpus.dic.token2id["<eos>"])
        output_text.append(corpus.dic.token2id["<eos>"])

        # update max sentence length
        max_input_ren = max(max_input_ren, len(input_text))
        max_output_ren = max(max_output_ren, len(output_text))

        input_mat.append(input_text)
        output_mat.append(output_text)

    # padding
    for li in input_mat:
        insert_num = max_input_ren - len(li)
        for _ in range(insert_num):
            li.insert(0, corpus.dic.token2id['<pad>'])
    for li in output_mat:
        insert_num = max_output_ren - len(li)
        for _ in range(insert_num):
            li.append(corpus.dic.token2id['<pad>'])

    # create batch matrix
    input_mat = np.array(input_mat, dtype=np.int32).T
    output_mat = np.array(output_mat, dtype=np.int32).T

    # separate corpus into Train and Test
    train_input_mat = input_mat
    train_output_mat = output_mat

    #############################
    #### train seq2seq model ####
    #############################

    accum_loss = 0
    train_loss_data = []
    for num, epoch in enumerate(range(n_epoch)):
        total_loss = 0
        batch_num = 0
        perm = np.random.permutation(len(corpus.fine_posts) - testsize)

        # initialize optimizer
        optimizer = optimizers.Adam(alpha=0.001)
        optimizer.setup(model)
        # optimizer.add_hook(chainer.optimizer.GradientClipping(5))
        optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001))

        # for training
        for i in range(0, len(corpus.fine_posts) - testsize, batchsize):

            # select batch data
            input_batch = train_input_mat[:, perm[i:i + batchsize]]
            output_batch = train_output_mat[:, perm[i:i + batchsize]]

            # Encode a sentence
            model.initialize()  # initialize cell
            model.encode(input_batch,
                         train=True)  # encode (output: hidden Variable)

            # Decode from encoded context
            end_batch = xp.array(
                [corpus.dic.token2id["<start>"] for _ in range(batchsize)])
            first_words = output_batch[0]
            loss, predict_mat = model.decode(end_batch,
                                             first_words,
                                             train=True)
            next_ids = first_words
            accum_loss += loss
            for w_ids in output_batch[1:]:
                loss, predict_mat = model.decode(next_ids, w_ids, train=True)
                next_ids = w_ids
                accum_loss += loss

            # learn model
            model.cleargrads()  # initialize all grad to zero
            accum_loss.backward()  # back propagation
            optimizer.update()
            total_loss += float(accum_loss.data)
            print('Epoch: ', num, 'Batch_num', batch_num,
                  'batch loss: {:.2f}'.format(float(accum_loss.data)))
            accum_loss = 0

        # save model and optimizer
        if (epoch + 1) % 5 == 0:
            print('-----', epoch + 1, ' times -----')
            print('save the model and optimizer')
            serializers.save_hdf5('data/' + str(epoch) + '_fine.model', model)
            serializers.save_hdf5('data/' + str(epoch) + '_fine.state',
                                  optimizer)

    # save loss data
    with open('./data/fine_loss_train_data.pkl', 'wb') as f:
        pickle.dump(train_loss_data, f)
Esempio n. 22
0
n_vocab = bow.shape[1]
# Number of dimensions in a single word vector
n_units = 256
# number of topics
n_topics = 20
batchsize = 128
counts = corpus.keys_counts[:n_vocab]
# Get the string representation for every compact key
words = corpus.word_list(vocab)[:n_vocab]

model = LDA(n_docs, n_topics, n_units, n_vocab)
if os.path.exists('lda.hdf5'):
    print("Reloading from saved")
    serializers.load_hdf5("lda.hdf5", model)
model.to_gpu()
optimizer = O.Adam()
optimizer.setup(model)

j = 0
fraction = batchsize * 1.0 / bow.shape[0]
for epoch in range(50000000):
    if epoch % 100 == 0:
        p = cuda.to_cpu(model.proportions.W.data).copy()
        f = cuda.to_cpu(model.factors.W.data).copy()
        w = cuda.to_cpu(model.embedding.W.data).copy()
        d = prepare_topics(p, f, w, words)
        print_top_words_per_topic(d)
    for (ids, batch) in utils.chunks(batchsize, np.arange(bow.shape[0]), bow):
        t0 = time.time()
        model.cleargrads()
        rec, ld = model.forward(ids, batch)
Esempio n. 23
0
def train_dcgan_labeled(gen, dis, epoch0=0):
    o_gen = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_gen.setup(gen)
    o_dis.setup(dis)
    o_gen.add_hook(chainer.optimizer.WeightDecay(0.00001))
    o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001))

    zvis = (xp.random.uniform(-1, 1, (100, nz), dtype=np.float32))

    for epoch in range(epoch0, n_epoch):
        perm = np.random.permutation(n_train)
        sum_l_dis = np.float32(0)
        sum_l_gen = np.float32(0)

        for i in range(0, n_train, batchsize):
            # discriminator
            # 0: from dataset
            # 1: from noise

            #print "load image start ", i
            x2 = np.zeros((batchsize, 3, 96, 96), dtype=np.float32)
            for j in range(batchsize):
                try:
                    rnd = np.random.randint(len(dataset))
                    rnd2 = np.random.randint(2)

                    img = np.asarray(
                        Image.open(StringIO(
                            dataset[rnd])).convert('RGB')).astype(
                                np.float32).transpose(2, 0, 1)
                    if rnd2 == 0:
                        x2[j, :, :, :] = (img[:, :, ::-1] - 128.0) / 128.0
                    else:
                        x2[j, :, :, :] = (img[:, :, :] - 128.0) / 128.0
                except:
                    print('read image error occured', fs[rnd])
            #print "load image done"

            # train generator
            z = Variable(
                xp.random.uniform(-1, 1, (batchsize, nz), dtype=np.float32))
            x = gen(z)
            yl = dis(x)
            L_gen = F.softmax_cross_entropy(
                yl, Variable(xp.zeros(batchsize, dtype=np.int32)))
            L_dis = F.softmax_cross_entropy(
                yl, Variable(xp.ones(batchsize, dtype=np.int32)))

            # train discriminator

            x2 = Variable(cuda.to_gpu(x2))
            yl2 = dis(x2)
            L_dis += F.softmax_cross_entropy(
                yl2, Variable(xp.zeros(batchsize, dtype=np.int32)))

            #print "forward done"

            o_gen.zero_grads()
            L_gen.backward()
            o_gen.update()

            o_dis.zero_grads()
            L_dis.backward()
            o_dis.update()

            sum_l_gen += L_gen.data.get()
            sum_l_dis += L_dis.data.get()

            #print "backward done"

            if i % image_save_interval == 0:
                pylab.rcParams['figure.figsize'] = (16.0, 16.0)
                pylab.clf()
                vissize = 100
                z = zvis
                z[50:, :] = (xp.random.uniform(-1,
                                               1, (50, nz),
                                               dtype=np.float32))
                z = Variable(z)
                x = gen(z, test=True)
                x = x.data.get()
                for i_ in range(100):
                    tmp = ((np.vectorize(clip_img)(x[i_, :, :, :]) + 1) /
                           2).transpose(1, 2, 0)
                    pylab.subplot(10, 10, i_ + 1)
                    pylab.imshow(tmp)
                    pylab.axis('off')
                pylab.savefig('%s/vis_%d_%d.png' % (out_image_dir, epoch, i))

        serializers.save_hdf5(
            "%s/dcgan_model_dis_%d.h5" % (out_model_dir, epoch), dis)
        serializers.save_hdf5(
            "%s/dcgan_model_gen_%d.h5" % (out_model_dir, epoch), gen)
        serializers.save_hdf5(
            "%s/dcgan_state_dis_%d.h5" % (out_model_dir, epoch), o_dis)
        serializers.save_hdf5(
            "%s/dcgan_state_gen_%d.h5" % (out_model_dir, epoch), o_gen)
        print('epoch end', epoch, sum_l_gen / n_train, sum_l_dis / n_train)
Esempio n. 24
0
 def __init__(self, NN, lr, w_decay):
     self.model = NN
     self.optimizer = optimizers.Adam(alpha=lr)
     self.optimizer.setup(self.model)
     self.optimizer.add_hook(chainer.optimizer.WeightDecay(rate=w_decay))
Esempio n. 25
0
        h1 = F.dropout(F.relu(self.l1(x)), ratio=self.dr)
        h2 = F.dropout(F.relu(self.l2(h1)), ratio=self.dr)
        return self.l3(h2)


model = Network()

gpu_id = -1  # cpuを使うときは-1に設定

if gpu_id >= 0:
    model.to_cpu(gpu_id)

from chainer import optimizers

# 最適化手段の選択
optimizer = optimizers.Adam(alpha=0.001, beta1=0.9, beta2=0.999, \
                            eps=1e-08, eta=1.0, weight_decay_rate=0, amsgrad=False)
optimizer.setup(model)

from chainer.dataset import concat_examples
from chainer.backends.cuda import to_cpu

max_epoch = 10

train_losses = []
train_accuracies = []
test_losses = []
test_accuracies = []

train_loss_list = []
train_acc_list = []
test_loss_list = []
Esempio n. 26
0
def train():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--dir', type=str, default='./train_images/')
    parser.add_argument('--gen', type=str, default=None)
    parser.add_argument('--dis', type=str, default=None)
    parser.add_argument('--optg', type=str, default=None)
    parser.add_argument('--optd', type=str, default=None)
    parser.add_argument('--epoch', '-e', type=int, default=3)
    parser.add_argument('--lr', '-l', type=float, default=0.001)
    parser.add_argument('--beta1', type=float, default=0)
    parser.add_argument('--beta2', type=float, default=0.99)
    parser.add_argument('--batch', '-b', type=int, default=16)
    parser.add_argument('--depth', '-d', type=int, default=0)
    parser.add_argument('--alpha', type=float, default=0)
    parser.add_argument('--delta', type=float, default=0.00005)
    parser.add_argument('--out', '-o', type=str, default='img/')
    parser.add_argument('--num', '-n', type=int, default=10)
    parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator')
    args = parser.parse_args()

    train = dataset.ImageDataset(directory=args.dir, depth=args.depth)
    train_iter = iterators.MultiprocessIterator(train, batch_size=args.batch, repeat=True, shuffle=True, n_processes=14)

    if args.gpu >= 0:
        comm = chainermn.create_communicator(args.communicator)
        device = comm.intra_rank
        cuda.get_device_from_id(device).use()
        gen.to_gpu()
        dis.to_gpu()
    else:
        comm = chainermn.create_communicator('naive')
        device = -1

    gen = network.Generator(depth=args.depth)
    if args.gen is not None:
        print('loading generator model from ' + args.gen)
        serializers.load_npz(args.gen, gen)

    dis = network.Discriminator(depth=args.depth)
    if args.dis is not None:
        print('loading discriminator model from ' + args.dis)
        serializers.load_npz(args.dis, dis)

    if args.gpu >= 0:
        device = comm.intra_rank
        cuda.get_device_from_id(device).use()
        gen.to_gpu()
        dis.to_gpu()

    opt_g = optimizers.Adam(alpha=args.lr, beta1=args.beta1, beta2=args.beta2)
    opt_g = chainermn.create_multi_node_optimizer(opt_g, comm, double_buffering=False)
    opt_g.setup(gen)
    if args.optg is not None:
        print('loading generator optimizer from ' + args.optg)
        serializers.load_npz(args.optg, opt_g)

    opt_d = optimizers.Adam(alpha=args.lr, beta1=args.beta1, beta2=args.beta2)
    opt_d = chainermn.create_multi_node_optimizer(opt_d, comm, double_buffering=False)
    opt_d.setup(dis)
    if args.optd is not None:
        print('loading discriminator optimizer from ' + args.optd)
        serializers.load_npz(args.optd, opt_d)


    updater = WganGpUpdater(alpha=args.alpha,
                            delta=args.delta,
                            models=(gen, dis),
                            iterator={'main': train_iter},
                            optimizer={'gen': opt_g, 'dis': opt_d},
                            device=device)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out='results')

    if comm.rank == 0:
        out_dir = args.out+'depth'+str(args.depth)
        if os.path.isdir(out_dir):
            shutil.rmtree(out_dir)
        os.makedirs(out_dir)
        for i in range(args.num):
            img = train.get_example(i)
            filename = os.path.join(out_dir, 'real_{}.png'.format(i))
            utils.save_image(img, filename)

    def output_image(gen, depth, out, num):
        @chainer.training.make_extension()
        def make_image(trainer):
            z = gen.z(num)
            x = gen(z, alpha=trainer.updater.alpha)
            x = chainer.cuda.to_cpu(x.data)

            for i in range(args.num):
                img = x[i].copy()
                filename = os.path.join(out, '{}_{}.png'.format(trainer.updater.epoch, i))
                utils.save_image(img, filename)

        return make_image

    if comm.rank == 0:
        trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))
        trainer.extend(extensions.PrintReport(['epoch', 'gen_loss', 'loss_d', 'loss_l', 'loss_dr', 'dis_loss', 'alpha']))
        trainer.extend(extensions.snapshot_object(gen, 'gen'), trigger=(10, 'epoch'))
        trainer.extend(extensions.snapshot_object(dis, 'dis'), trigger=(10, 'epoch'))
        trainer.extend(extensions.snapshot_object(opt_g, 'opt_g'), trigger=(10, 'epoch'))
        trainer.extend(extensions.snapshot_object(opt_d, 'opt_d'), trigger=(10, 'epoch'))
        trainer.extend(output_image(gen, args.depth, out_dir, args.num), trigger=(1, 'epoch'))
        trainer.extend(extensions.ProgressBar(update_interval=1))

    trainer.run()

    if comm.rank == 0:
        modelname = './results/gen'+str(args.depth)
        print( 'saving generator model to ' + modelname )
        serializers.save_npz(modelname, gen)

        modelname = './results/dis'+str(args.depth)
        print( 'saving discriminator model to ' + modelname )
        serializers.save_npz(modelname, dis)

        optname = './results/opt_g'
        print( 'saving generator optimizer to ' + optname )
        serializers.save_npz(optname, opt_g)

        optname = './results/opt_d'
        print( 'saving generator optimizer to ' + optname )
        serializers.save_npz(optname, opt_d)
def main():
    #引数の格納
    args = set_args()

    #modelの設定
    model = Network2.MyNet()

    #GPUの設定
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # optimizerのセットアップ
    #optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate).setup(model)
    optimizer = optimizers.Adam(alpha=1e-4, beta1=0.9, beta2=0.999, eps=1e-08)
    optimizer.setup(model)
    #optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005))
    layer_names = [
        'conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2',
        'conv3_3', 'conv3_4', 'conv4_1', 'conv4_2', 'conv4_3_CPM',
        'conv4_4_CPM'
    ]
    optimizer.add_hook(GradientScaling(layer_names, 1 / 4))

    layer_names = [
        'conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2',
        'conv3_3', 'conv3_4', 'conv4_1', 'conv4_2'
    ]
    for layer_name in layer_names:
        optimizer.target[layer_name].enable_update()

    #datasetの読み込み
    #CocoDataLoaderは既存ファイルの使用
    coco_train = COCO(
        os.path.join(constants['data_add'],
                     'annotations/person_keypoints_train2017.json'))
    coco_val = COCO(
        os.path.join(constants['data_add'],
                     'annotations/person_keypoints_val2017.json'))
    train = CocoDataLoader(coco_train, model.insize, mode='train')
    val = CocoDataLoader(coco_val, model.insize, mode='val', n_samples=100)

    #iteratorのセットアップ
    train_iter = chainer.iterators.SerialIterator(train,
                                                  args.batchsize,
                                                  shuffle=True)
    val_iter = chainer.iterators.SerialIterator(val,
                                                args.batchsize,
                                                repeat=False,
                                                shuffle=False)

    # UpdatorとTrainerのセットアップ
    updater = Net_Updater(train_iter, model, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    #trainerのExtend
    trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))
    trainer.extend(extensions.ProgressBar())
    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              x_key='epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'],
                              x_key='epoch',
                              file_name='accuracy.png'))

    #学習開始
    trainer.run()

    #学習結果の保存
    model.to_cpu()
    serializers.save_npz('trained_model', model)
Esempio n. 28
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--model', '-m', type=str, default=None)
    parser.add_argument('--opt', type=str, default=None)
    parser.add_argument('--epoch', '-e', type=int, default=20)
    parser.add_argument('--lr', '-l', type=float, default=0.001)
    parser.add_argument('--batch', '-b', type=int, default=32)
    parser.add_argument('--noplot', dest='plot', action='store_false')
    args = parser.parse_args()

    # Set up a neural network to train.
    train_x, test_x = get_cifar10(withlabel=False, ndim=3)

    train = LoadDataset(train_x)
    test = LoadDataset(test_x)

    train_iter = iterators.SerialIterator(train,
                                          batch_size=args.batch,
                                          shuffle=True)
    test_iter = iterators.SerialIterator(test,
                                         batch_size=args.batch,
                                         repeat=False,
                                         shuffle=False)

    # Define model
    model = network.CAE(3, 3)

    # Load weight
    if args.model != None:
        print("loading model from " + args.model)
        serializers.load_npz(args.model, model)

    if args.gpu >= 0:
        cuda.get_device_from_id(0).use()
        model.to_gpu()

    # Define optimizer
    opt = optimizers.Adam(alpha=args.lr)
    opt.setup(model)

    if args.opt != None:
        print("loading opt from " + args.opt)
        serializers.load_npz(args.opt, opt)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, opt, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out='results')

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))
    trainer.extend(extensions.LogReport(trigger=(10, 'iteration')))

    if args.plot and extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))

    trainer.extend(
        extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss']))
    trainer.extend(extensions.ProgressBar(update_interval=1))

    # Train
    trainer.run()

    # Save results
    modelname = "./results/model"
    print("saving model to " + modelname)
    serializers.save_npz(modelname, model)

    optname = "./results/opt"
    print("saving opt to " + optname)
    serializers.save_npz(optname, opt)
Esempio n. 29
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--out',
                        type=str,
                        default='result',
                        help='Output directory')
    parser.add_argument('--mscoco-root',
                        type=str,
                        default='data',
                        help='MSOCO dataset root directory')
    parser.add_argument('--max-iters',
                        type=int,
                        default=50000,
                        help='Maximum number of iterations to train')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        help='Minibatch size')
    parser.add_argument('--dropout-ratio',
                        type=float,
                        default=0.5,
                        help='Language model dropout ratio')
    parser.add_argument('--val-keep-quantity',
                        type=int,
                        default=100,
                        help='Keep every N-th validation image')
    parser.add_argument('--val-iter',
                        type=int,
                        default=100,
                        help='Run validation every N-th iteration')
    parser.add_argument('--log-iter',
                        type=int,
                        default=1,
                        help='Log every N-th iteration')
    parser.add_argument('--snapshot-iter',
                        type=int,
                        default=1000,
                        help='Model snapshot every N-th iteration')
    parser.add_argument('--rnn',
                        type=str,
                        default='nsteplstm',
                        choices=['nsteplstm', 'lstm'],
                        help='Language model layer type')
    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--max-caption-length',
                        type=int,
                        default=30,
                        help='Maxium caption length when using LSTM layer')
    args = parser.parse_args()

    # Load the MSCOCO dataset. Assumes that the dataset has been downloaded
    # already using e.g. the `download.py` script
    train, val = datasets.get_mscoco(args.mscoco_root)

    # Validation samples are used to address overfitting and see how well your
    # model generalizes to yet unseen data. However, since the number of these
    # samples in MSCOCO is quite large (~200k) and thus require time to
    # evaluate, you may choose to use only a fraction of the available samples
    val = val[::args.val_keep_quantity]

    # Number of unique words that are found in the dataset
    vocab_size = len(train.vocab)

    # Instantiate the model to be trained either with LSTM layers or with
    # NStepLSTM layers
    model = ImageCaptionModel(vocab_size,
                              dropout_ratio=args.dropout_ratio,
                              rnn=args.rnn)

    if args.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    def transform(in_data):
        # Called for each sample and applies necessary preprocessing to the
        # image such as resizing and normalizing
        img, caption = in_data
        img = model.prepare(img)
        return img, caption

    # We need to preprocess the images since their sizes may vary (and the
    # model requires that they have the exact same fixed size)
    train = TransformDataset(train, transform)
    val = TransformDataset(val, transform)

    train_iter = iterators.MultiprocessIterator(train,
                                                args.batch_size,
                                                shared_mem=700000)
    val_iter = chainer.iterators.MultiprocessIterator(val,
                                                      args.batch_size,
                                                      repeat=False,
                                                      shuffle=False,
                                                      shared_mem=700000)

    optimizer = optimizers.Adam()
    optimizer.setup(model)

    def converter(batch, device):
        # The converted receives a batch of input samples any may modify it if
        # necessary. In our case, we need to align the captions depending on if
        # we are using LSTM layers of NStepLSTM layers in the model.
        if args.rnn == 'lstm':
            max_caption_length = args.max_caption_length
        elif args.rnn == 'nsteplstm':
            max_caption_length = None
        else:
            raise ValueError('Invalid RNN type.')
        return datasets.converter(batch,
                                  device,
                                  max_caption_length=max_caption_length)

    updater = training.updater.StandardUpdater(train_iter,
                                               optimizer=optimizer,
                                               device=args.gpu,
                                               converter=converter)

    trainer = training.Trainer(updater,
                               out=args.out,
                               stop_trigger=(args.max_iters, 'iteration'))
    trainer.extend(extensions.Evaluator(val_iter,
                                        target=model,
                                        converter=converter,
                                        device=args.gpu),
                   trigger=(args.val_iter, 'iteration'))
    trainer.extend(
        extensions.LogReport(['main/loss', 'validation/main/loss'],
                             trigger=(args.log_iter, 'iteration')))
    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              trigger=(args.log_iter, 'iteration')))
    trainer.extend(extensions.PrintReport([
        'elapsed_time', 'epoch', 'iteration', 'main/loss',
        'validation/main/loss'
    ]),
                   trigger=(args.log_iter, 'iteration'))

    # Save model snapshots so that later on, we can load them and generate new
    # captions for any image. This can be done in the `predict.py` script
    trainer.extend(extensions.snapshot_object(model,
                                              'model_{.updater.iteration}'),
                   trigger=(args.snapshot_iter, 'iteration'))
    trainer.extend(extensions.ProgressBar())
    trainer.run()
Esempio n. 30
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--epoch', '-e', type=int, default=10,
                        help='Number of examples in epoch')
    parser.add_argument('--batchsize', '-b', type=int, default=1,
                        help='Number of examples in each mini-batch')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--model', '-m', default='',
                        help='Load model')
    parser.add_argument('--path0', '-p0', default='./DATA/*/*.png',
                        help='path for images used subject')
    parser.add_argument('--path1', '-p1', default='./DATA/*/*.JPEG',
                        help='path for images used back')
    parser.add_argument('--test', '-t', action='store_true',
                        help='evaluation only')
    parser.add_argument('--image', action='store_true',
                        help='put image for test')

    args = parser.parse_args()

    train_dataset = DataSet(420, args.path0, args.path1)
    test_dataset = DataSet(50, args.path0, args.path1)

    model = Loss_Link(VGG())

    if args.gpu >= 0:
        cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    optimizer = optimizers.Adam()
    optimizer.setup(model)
    model.predictor.base.disable_update()

    if args.model:
        serializers.load_npz(args.model, model)

    train_iter = iterators.SerialIterator(
        train_dataset, batch_size=args.batchsize, shuffle=True)
    test_iter = iterators.SerialIterator(
        test_dataset,  batch_size=args.batchsize, repeat=False)

    if args.test:
        eva = training.extensions.Evaluator(
            test_iter, model, device=args.gpu)()
        for key in eva:
            print(key + ":" + str(eva[key]))
    elif args.image:
        if not os.path.exists(args.out):
            os.mkdir(args.out)
        IMG_PATHS = [args.path0, args.path1]
        data = []
        base_n = len(glob.glob(IMG_PATHS[1]))
        img_n = len(glob.glob(IMG_PATHS[0]))
        for i, IMG_PATH in enumerate(IMG_PATHS):
            data.append([])
            for path in glob.glob(IMG_PATH):
                img_ = cv2.imread(path)
                if i == 0:
                    img = cv2.resize(img_, (256, 256))
                else:
                    img = cv2.resize(img_, (512, 512))
                data[i].append(img)

        offset_x = random.randint(0, 255)
        offset_y = random.randint(0, 255)
        base = copy.deepcopy(data[1][random.randint(0, base_n - 1)])
        base[offset_x:offset_x + 256, offset_y:offset_y +
             256, :] = copy.deepcopy(data[0][random.randint(0, img_n - 1)])

        cv2.imwrite(args.out + "/input_image.png",
                    cv2.resize(base, (224, 224)))
        mask = np.zeros((512, 512))
        mask[offset_x:offset_x + 256, offset_y:offset_y + 256] =\
            np.ones((256, 256))
        cv2.imwrite(args.out + "/ideal_image.png",
                    cv2.resize(np.array(mask * 255).
                               astype("uint8"), (224, 224)))
        pred = model.predictor(xp.array([cv2.resize(base, (224, 224)).
                                         transpose(2, 0, 1) / 255]).
                               astype('float32')).array[0] > 0.7
        cv2.imwrite(args.out + "/output_image.png",
                    np.array(pred * 255).reshape(224, 224).astype("uint8"))
    else:
        updater = training.StandardUpdater(train_iter, optimizer)
        trainer = training.Trainer(
            updater, (args.epoch, 'epoch'), out=args.out)

        trainer.extend(training.extensions.Evaluator(
            test_iter, model, device=args.gpu),
            trigger=(1, 'epoch'))

        trainer.extend(training.extensions.LogReport(
            trigger=(1, 'epoch')))
        trainer.extend(training.extensions.PrintReport(
            entries=['iteration', 'main/loss',
                     'main/accuracy', 'elapsed_time']),
            trigger=(1, 'epoch'))
        trainer.extend(training.extensions.snapshot(),
                       trigger=((1, 'epoch')))
        if args.resume:
            serializers.load_npz(args.resume, trainer, strict=False)
        trainer.run()
        serializers.save_npz('model.npz', model)