def test_resumed_trigger_sparse_call(self):
        trainer = testing.get_trainer_with_mock_updater(
            stop_trigger=None, iter_per_epoch=self.iter_per_epoch)
        accumulated = False
        with tempfile.NamedTemporaryFile(delete=False) as f:
            trigger = training.triggers.ManualScheduleTrigger(*self.schedule)
            for expected, finished in zip(self.expected[:self.resume],
                                          self.finished[:self.resume]):
                trainer.updater.update()
                accumulated = accumulated or expected
                if random.randrange(2):
                    self.assertEqual(trigger(trainer), accumulated)
                    self.assertEqual(trigger.finished, finished)
                    accumulated = False
            serializers.save_npz(f.name, trigger)

            trigger = training.triggers.ManualScheduleTrigger(*self.schedule)
            serializers.load_npz(f.name, trigger)
            for expected, finished in zip(self.expected[self.resume:],
                                          self.finished[self.resume:]):
                trainer.updater.update()
                accumulated = accumulated or expected
                if random.randrange(2):
                    self.assertEqual(trigger(trainer), accumulated)
                    self.assertEqual(trigger.finished, finished)
                    accumulated = False
Esempio n. 2
0
def main():
  xdata, ydata, zdata, ids, vocabrary = reader.load_master_data('tabelog_final_s')

  allx, ally, allz = reader.load_train_data(ids, xdata, ydata, zdata, batch_size, steps, vocab_size, out_size)

  train_x_data, test_x_data, train_y_data, test_y_data, train_z_data, test_z_data = reader.split_data(allx, ally, allz)

  for epoch in range(20):
    print('epoch %d' % epoch)

    for i in range(len(train_z_data)):
      loss = train_for(i, train_z_data, train_y_data)
      if i % 10 == 0:
          gc.collect()

      # accuracy(test_x_data, test_y_data)
      if i % 50 == 0:
        # Save the model and the optimizer
        print('epoch done')
        print('save the model')
        serializers.save_npz(('data/chainer_%d_%d.model' % (epoch, i)), rnn)
        print('save the optimizer')
        serializers.save_npz(('data/chainer_%d_%d.state' % (epoch, i)), optimizer)

        import tensorflow as tf
        import numpy as np

        FLAGS = tf.app.flags.FLAGS
        tf.app.flags.DEFINE_string('mode', 'train', 'train or console')
Esempio n. 3
0
    def _test_trigger(self, trigger, key, accuracies, expected,
                      resume=None, save=None):
        trainer = testing.get_trainer_with_mock_updater(
            stop_trigger=(len(accuracies), 'iteration'),
            iter_per_epoch=self.iter_per_epoch)
        updater = trainer.updater

        def _serialize_updater(serializer):
            updater.iteration = serializer('iteration', updater.iteration)
            updater.epoch = serializer('epoch', updater.epoch)
            updater.is_new_epoch = serializer(
                'is_new_epoch', updater.is_new_epoch)
        trainer.updater.serialize = _serialize_updater

        def set_observation(t):
            t.observation = {key: accuracies[t.updater.iteration-1]}
        trainer.extend(set_observation, name='set_observation',
                       trigger=(1, 'iteration'), priority=2)

        invoked_iterations = []

        def record(t):
            invoked_iterations.append(t.updater.iteration)
        trainer.extend(record, name='record', trigger=trigger, priority=1)

        if resume is not None:
            serializers.load_npz(resume, trainer)

        trainer.run()
        self.assertEqual(invoked_iterations, expected)

        if save is not None:
            serializers.save_npz(save, trainer)
Esempio n. 4
0
	def saveInfo(self, model, optimizer, epoch, outputFolder, saveEach):
		if(epoch % saveEach == 0):
			if(not os.path.exists(outputFolder)):
 				os.makedirs(outputFolder)
			bname = outputFolder + '/' + model.getName() + '_' + str(epoch)
			serializers.save_npz(bname + '.model', model)
			serializers.save_npz(bname + '.state', optimizer)
Esempio n. 5
0
    def train(self, epoch=10, batch_size=32, gpu=False):
        if gpu:
            cuda.check_cuda_available()
        xp = cuda.cupy if gpu else np

        self.batch_size = batch_size

        label_types = ['none', 'tap', 'up', 'down', 'right', 'left']

        self.model = Alex(len(label_types))
        optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
        optimizer.setup(self.model)

        if gpu:
            self.model.to_gpu()

        training_data = TrainingData(IMAGE_ROOT, NOTE_ROOT, VIDEO_ROOT, SONG_LIST_PATH)
        self.x_train, self.x_test, self.y_train, self.y_test = training_data.get_train_data(label_types)
        data_size = self.x_train.shape[0]

        for ep in range(epoch):
            print('epoch {0}/{1}: (learning rate={2})'.format(ep + 1, epoch, optimizer.lr))
            indexes = np.random.permutation(data_size)
            for i in range(0, data_size, self.batch_size):
                x_batch = self.x_train[indexes[i:i + self.batch_size]]
                y_batch = self.y_train[indexes[i:i + self.batch_size]]
                x = chainer.Variable(x_batch)
                t = chainer.Variable(y_batch)
                optimizer.update(self.model, x, t)
                print("loss: {0}".format(self.model.loss.data))

            serializers.save_npz(MODEL_PATH, self.model)
            optimizer.lr *= 0.97
Esempio n. 6
0
def save_states(stage_cnt, joint_idx, epoch_cnt, model, optimizer, train_losses,
                test_losses):
    ''' Save model, optimizer, and losses

    If latest loss is the best, best model will be saved.
    '''

    modif = create_modifier(stage_cnt, joint_idx)

    # Save latest model
    filename = settings.RESUME_MODEL % modif
    logger.info('Save model to %s', filename)
    convenient.mkdir_to_save(filename)
    serializers.save_npz(filename, model)

    # Save latest optimizer
    filename = settings.RESUME_OPTIMIZER % modif
    logger.info('Save optimizer to %s', filename)
    convenient.mkdir_to_save(filename)
    serializers.save_npz(filename, optimizer)

    # Save latest loss history
    logger.info('Save loss history to %s', filename)
    filename = settings.RESUME_LOSS % modif
    convenient.mkdir_to_save(filename)
    np.savez(filename, train=train_losses, test=test_losses)

    # Save best model (check current loss)
    if epoch_cnt == 0 or np.min(test_losses[:-1]) > test_losses[-1]:
        save_best_model(stage_cnt, joint_idx, model)
Esempio n. 7
0
File: nnw.py Progetto: takatori/room
 def save(self):
     # Save the model and the optimizer
     print('save the model')
     serializers.save_npz(self.metadata_path + self.appliance + '.model', self.model)
     
     print('save the optimiezer')
     serializers.save_npz(self.metadata_path + self.appliance + '.state', self.optimizer)
Esempio n. 8
0
def caffe_to_chainermodel(model, caffe_prototxt, caffemodel_path,
                          chainermodel_path):
    os.chdir(osp.dirname(caffe_prototxt))
    net = caffe.Net(caffe_prototxt, caffemodel_path, caffe.TEST)

    for name, param in net.params.iteritems():
        try:
            layer = getattr(model, name)
        except AttributeError:
            print('Skipping caffe layer: %s' % name)
            continue

        has_bias = True
        if len(param) == 1:
            has_bias = False

        print('{0}:'.format(name))
        # weight
        print('  - W: %s %s' % (param[0].data.shape, layer.W.data.shape))
        assert param[0].data.shape == layer.W.data.shape
        layer.W.data = param[0].data
        # bias
        if has_bias:
            print('  - b: %s %s' % (param[1].data.shape, layer.b.data.shape))
            assert param[1].data.shape == layer.b.data.shape
            layer.b.data = param[1].data
    S.save_npz(chainermodel_path, model)
Esempio n. 9
0
def saveModelAndOptimizer():
	"""モデルとオプティマイザ保存"""
	testFileIni.set("curEpoch", curEpoch) # 現在の実施済みエポック数保存
	with modelLock:
		print('save the model')
		serializers.save_npz(modelFile, dnn.model)
		print('save the optimizer')
		serializers.save_npz(stateFile, dnn.optimizer)
Esempio n. 10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('caffemodel')
    parser.add_argument('output')
    args = parser.parse_args()

    model = SSDCaffeFunction(args.caffemodel)
    serializers.save_npz(args.output, model)
def save_param(out_dir, epoch, storage):
    serializers.save_npz(
        str(out_dir/model_name(epoch)),
        storage.model
    )
    serializers.save_npz(
        str(out_dir/optimizer_name(epoch)),
        storage.optimizer
    )
Esempio n. 12
0
    def _write_classifier(self, classifier):
        with open(os.path.join(self.directory, "model.name"), "w") as model_file:
            model_file.write(classifier._model.__class__.name + "\n")

        # Saving optimizer state
        serializers.save_npz(os.path.join(self.directory, "model.opt"), classifier._opt)
      
        # Saving classifier specification
        with open(os.path.join(self.directory, "model.state"), "w") as state_file:
            self._write_specification(classifier, state_file)
Esempio n. 13
0
    def _write_model(self, model):
        directory = self.directory
        # Saving model specification
        with open(os.path.join(directory, "model.spec"), "w") as spec_file:
            self._write_specification(model, spec_file)
        with open(os.path.join(directory, "model.src_vocab"), "w") as src_voc_file:
            self._write_vocabulary(model._src_voc, src_voc_file)
        with open(os.path.join(directory, "model.trg_vocab"), "w") as trg_voc_file:
            self._write_vocabulary(model._trg_voc, trg_voc_file)

        serializers.save_npz(os.path.join(directory, "model.weight"), model)
Esempio n. 14
0
    def agent_message(self, inMessage):
        if inMessage.startswith("freeze learning"):
            self.policyFrozen = True
            return "message understood, policy frozen"

        if inMessage.startswith("unfreeze learning"):
            self.policyFrozen = False
            return "message understood, policy unfrozen"

        if inMessage.startswith("save model"):
            serializers.save_npz('resume.model', self.DN.model) # save current model
            np.savez('stored_D012.npz', D0=self.DN.D[0], D1=self.DN.D[1], D2=self.DN.D[2])
            np.savez('stored_D34.npz', D3=self.DN.D[3], D4=self.DN.D[4])
            return "message understood, model saved"
    def DNN(self, x_train, y_train, x_test, y_test, seed):
        np.random.seed(seed)
        dnn = Deep()
        dnn.compute_accuracy = False

        if args.gpu >= 0:
            dnn.to_gpu()

        optimizer = optimizers.Adam()
        optimizer.setup(dnn)

        end_counter = 0
        min_loss = 100
        final_epoch = 0
        final_pred = xp.empty([x_test.shape[0], 1], dtype=xp.float32)

        x_train, y_train = resample(x_train, y_train, n_samples=x_train.shape[0])
        for epoch in range(n_epoch):
            indexes = np.random.permutation(x_train.shape[0])
            for i in range(0, x_train.shape[0], batchsize):
                x_train_dnn = Variable(x_train[indexes[i : i + batchsize]])
                y_train_dnn = Variable(y_train[indexes[i : i + batchsize]])
            dnn.zerograds()
            loss = F.mean_squared_error(dnn(x_train_dnn), y_train_dnn)
            loss.backward()
            optimizer.update()
            end_counter += 1
        
            #evaluation
            if epoch % evaluation == 0:
                y_pred = dnn(Variable(x_test, volatile='on'))
                loss = F.mean_squared_error(y_pred, Variable(y_test, volatile='on'))

                if min_loss > loss.data:
                    min_loss = loss.data
                    print "epoch{}".format(epoch)
                    print "Current minimum loss is {}".format(min_loss)
                    serializers.save_npz('network/DNN{}.model'.format(seed), dnn)
                    final_epoch = epoch
                    final_pred = y_pred
                    end_counter = 0

            if end_counter > end_counter_max:
                f = open("network/final_epoch.txt", "a")
                f.write("DNN{}:{}".format(seed, final_epoch) + "\n")
                f.close()
                break     

        return final_pred.data, min_loss    
Esempio n. 16
0
 def save(self, prefix, encdec, epoch):
     settings = {
         "model":       self.model(),
         "train_file":  self.train_file(),
         "embed":       self.embed_size(),
         "hidden":      self.hidden_size(),
         "minbatch":    self.batch_size(),
         "lr":          self.lr(),
         "epoch":       epoch
     }
     prefix = prefix + "_".join([k+":"+str(v) for k,v in settings.items()]).replace("/", "-").replace(".", "-")
     self.corpus.save(prefix + '.vocab')
     self.serialize(prefix + '.conf', settings)
     serializers.save_npz(prefix + '.weights', encdec)
     return prefix
    def test_resumed_trigger(self):
        trainer = testing.get_trainer_with_mock_updater(
            stop_trigger=None, iter_per_epoch=self.iter_per_epoch)
        with tempfile.NamedTemporaryFile(delete=False) as f:
            trigger = training.triggers.ManualScheduleTrigger(*self.schedule)
            for expected in self.expected[:self.resume]:
                trainer.updater.update()
                self.assertEqual(trigger(trainer), expected)
            serializers.save_npz(f.name, trigger)

            trigger = training.triggers.ManualScheduleTrigger(*self.schedule)
            serializers.load_npz(f.name, trigger)
            for expected in self.expected[self.resume:]:
                trainer.updater.update()
                self.assertEqual(trigger(trainer), expected)
def test_standard_scaler_serialize(tmpdir, data, indices):
    x, expect_x_scaled = data
    scaler = StandardScaler()
    scaler.fit(x, indices=indices)

    scaler_filepath = os.path.join(str(tmpdir), 'scaler.npz')
    serializers.save_npz(scaler_filepath, scaler)

    scaler2 = StandardScaler()
    serializers.load_npz(scaler_filepath, scaler2)

    # print('scaler2 attribs:', scaler2.mean, scaler2.std, scaler2.indices)
    assert numpy.allclose(scaler.mean, scaler2.mean)
    assert numpy.allclose(scaler.std, scaler2.std)
    assert scaler.indices == scaler2.indices
Esempio n. 19
0
 def progress_func(epoch, loss, accuracy, valid_loss, valid_accuracy, test_loss, test_accuracy):
     print 'epoch: {} done'.format(epoch)
     print('train mean loss={}, accuracy={}'.format(loss, accuracy))
     if valid_loss is not None and valid_accuracy is not None:
         print('valid mean loss={}, accuracy={}'.format(valid_loss, valid_accuracy))
     if test_loss is not None and test_accuracy is not None:
         print('test mean loss={}, accuracy={}'.format(test_loss, test_accuracy))
     if valid_accuracy < progress_state['valid_accuracy']:
         serializers.save_npz(args.output, net)
         progress_state['valid_accuracy'] = valid_accuracy
         progress_state['test_accuracy'] = test_accuracy
     if epoch % args.save_iter == 0:
         base, ext = os.path.splitext(args.output)
         serializers.save_npz('{0}_{1:04d}{2}'.format(base, epoch, ext), net)
     if args.lr_decay_iter > 0 and epoch % args.lr_decay_iter == 0:
         optimizer.alpha *= args.lr_decay_ratio
Esempio n. 20
0
def run_training(args):
    out_dir = pathlib.Path(args.directory)
    sentences = dataset.load(args.source)
    
    if args.epoch is not None:
        start = args.epoch + 1
        storage = load(out_dir, args.epoch)
        sentences = itertools.islice(sentences, start, None)
    else:
        start = 0
        storage = init(args)        
        if (out_dir/meta_name).exists():
            if input('Overwrite? [y/N]: ').strip().lower() != 'y':
                exit(1)
        with (out_dir/meta_name).open('wb') as f:
            np.save(f, [storage])
        
    batchsize = 5000
    for i, sentence in enumerate(sentences, start):
        if i % batchsize == 0:
            print()
            serializers.save_npz(
                str(out_dir/model_name(i)),
                storage.model
            )
            serializers.save_npz(
                str(out_dir/optimizer_name(i)),
                storage.optimizer
            )
        else:
            print(
                util.progress(
                    'batch {}'.format(i // batchsize),
                    (i % batchsize) / batchsize, 100),
                end=''
            )
        train(storage.model,
              storage.optimizer,
              generate_data(sentence),
              generate_label(sentence),
              generate_attr(
                  sentence,
                  storage.mappings
              )
        )
Esempio n. 21
0
def test_flow_scaler_serialize(tmpdir):
    x = numpy.random.uniform(50, 100, size=100).astype(numpy.float32)
    scaler = FlowScaler(5)
    scaler.fit(x)
    x_scaled = scaler.transform(x)

    scaler_filepath = os.path.join(str(tmpdir), 'scaler.npz')
    serializers.save_npz(scaler_filepath, scaler)

    scaler2 = FlowScaler(5)
    serializers.load_npz(scaler_filepath, scaler2)
    x_scaled2 = scaler2.transform(x)

    assert numpy.allclose(scaler.W1.array, scaler2.W1.array)
    assert numpy.allclose(scaler.b1.array, scaler2.b1.array)
    assert numpy.allclose(scaler.W2.array, scaler2.W2.array)
    assert numpy.allclose(scaler.b2.array, scaler2.b2.array)
    assert numpy.allclose(x_scaled, x_scaled2)
Esempio n. 22
0
    def test_elapsed_time_serialization(self):
        self.trainer.run()
        serialized_time = self.trainer.elapsed_time

        tempdir = tempfile.mkdtemp()
        try:
            path = os.path.join(tempdir, 'trainer.npz')
            serializers.save_npz(path, self.trainer)

            trainer = _get_mocked_trainer((20, 'iteration'))
            serializers.load_npz(path, trainer)

            trainer.run()

            self.assertGreater(trainer.elapsed_time, serialized_time)

        finally:
            shutil.rmtree(tempdir)
Esempio n. 23
0
    def test_resumed_trigger(self):
        trainer = testing.get_trainer_with_mock_updater(
            stop_trigger=None, iter_per_epoch=self.iter_per_epoch)
        with tempfile.NamedTemporaryFile(delete=False) as f:
            trigger = training.triggers.OnceTrigger(self.call_on_resume)
            for expected, finished in zip(self.resumed_expected[:self.resume],
                                          self.resumed_finished[:self.resume]):
                trainer.updater.update()
                self.assertEqual(trigger.finished, finished)
                self.assertEqual(trigger(trainer), expected)
            serializers.save_npz(f.name, trigger)

            trigger = training.triggers.OnceTrigger(self.call_on_resume)
            serializers.load_npz(f.name, trigger)
            for expected, finished in zip(self.resumed_expected[self.resume:],
                                          self.resumed_finished[self.resume:]):
                trainer.updater.update()
                self.assertEqual(trigger.finished, finished)
                self.assertEqual(trigger(trainer), expected)
Esempio n. 24
0
def train(epoch=10, batch_size=32, gpu=False):
    if gpu:
        cuda.check_cuda_available()
    xp = cuda.cupy if gpu else np

    td = TrainingData(LABEL_FILE, img_root=IMAGES_ROOT, image_property=IMAGE_PROP)

    # make mean image
    if not os.path.isfile(MEAN_IMAGE_FILE):
        print("make mean image...")
        td.make_mean_image(MEAN_IMAGE_FILE)
    else:
        td.mean_image_file = MEAN_IMAGE_FILE

    # train model
    label_def = LabelingMachine.read_label_def(LABEL_DEF_FILE)
    model = alex.Alex(len(label_def))
    optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)
    epoch = epoch
    batch_size = batch_size

    print("Now our model is {0} classification task.".format(len(label_def)))
    print("begin training the model. epoch:{0} batch size:{1}.".format(epoch, batch_size))

    if gpu:
        model.to_gpu()

    for i in range(epoch):
        print("epoch {0}/{1}: (learning rate={2})".format(i + 1, epoch, optimizer.lr))
        td.shuffle(overwrite=True)

        for x_batch, y_batch in td.generate_batches(batch_size):
            x = chainer.Variable(xp.asarray(x_batch))
            t = chainer.Variable(xp.asarray(y_batch))

            optimizer.update(model, x, t)
            print("loss: {0}, accuracy: {1}".format(float(model.loss.data), float(model.accuracy.data)))

        serializers.save_npz(MODEL_FILE, model)
        optimizer.lr *= 0.97
Esempio n. 25
0
def train():
    model = L.Classifier(net.MyChain())
    optimizer = optimizers.SGD()
    optimizer.setup(model)

    dataset = animeface.load_dataset()
    N = int(len(dataset) * train_rate)
    N_test = len(dataset) - N

    for epoch in range(n_epoch):
        print "epoch {0}".format(epoch)

        random.shuffle(dataset)
        data = np.array([x[0] for x in dataset], np.float32)
        target = np.array([x[1] for x in dataset], np.int32)

        x_train, x_test = np.split(data, [N])
        y_train, y_test = np.split(target, [N])

        indexes = np.random.permutation(N)
        sum_loss, sum_accuracy = 0, 0
        for i in range(0, N, batchsize):
            x = Variable(x_train[indexes[i: i + batchsize]])
            t = Variable(y_train[indexes[i: i + batchsize]])
            optimizer.update(model, x, t)
            sum_loss += float(model.loss.data) * batchsize
            sum_accuracy += float(model.accuracy.data) * batchsize
        print "train loss={0}, accuracy={1}".format(sum_loss/N, sum_accuracy/N)

        sum_loss, sum_accuracy = 0, 0
        for i in range(0, N_test, batchsize):
            x = Variable(x_test[i: i + batchsize])
            t = Variable(y_test[i: i + batchsize])
            loss = model(x, t)
            sum_loss += float(loss.data) * batchsize
            sum_accuracy += float(model.accuracy.data) * batchsize
        print "test loss={0}, accuracy={1}".format(
            sum_loss/N_test, sum_accuracy/N_test)

    serializers.save_npz("animeface.model", model)
    def check_serialization(self, backend_config):
        with utils.tempdir() as root:
            filename = os.path.join(root, 'tmp.npz')

            layer1 = self.layer.copy('copy')
            hook1 = copy.deepcopy(self.hook)
            layer1.add_hook(hook1)

            layer1.to_device(backend_config.device)
            x = backend_config.get_array(self.x)
            with backend_config:
                layer1(x)
                with chainer.using_config('train', False):
                    y1 = layer1(x)
            serializers.save_npz(filename, layer1)

            layer2 = self.layer.copy('copy')
            hook2 = copy.deepcopy(self.hook)
            layer2.add_hook(hook2)

            # Test loading is nice.
            msg = None
            try:
                serializers.load_npz(filename, layer2)
            except Exception as e:
                msg = e
            assert msg is None

            with chainer.using_config('train', False):
                y2 = layer2(self.x.copy())

            # Test attributes are the same.
            orig_weight = _cpu._to_cpu(
                getattr(layer1, hook1.weight_name).array)
            orig_vector = _cpu._to_cpu(getattr(layer1, hook1.vector_name))
            numpy.testing.assert_array_equal(
                orig_weight, getattr(layer2, hook2.weight_name).array)
            numpy.testing.assert_array_equal(
                orig_vector, getattr(layer2, hook2.vector_name))
            testing.assert_allclose(y1.array, y2.array)
Esempio n. 27
0
def train(args, model):

    # setup optimizer
    opt = optimizers.SGD()  # 確率勾配法
    opt.setup(model)        # 初期化

    for i in range(args.epoch):
        src_generator = text_generator(args.source)
        trg_generator = text_generator(args.target)

        total_loss = 0.0
        for src_sentence, trg_sentence in zip(src_generator, trg_generator):
            opt.zero_grads()
            loss = forward(model, src_sentence, trg_sentence, True)
            total_loss += loss.data
            loss.backward()            # 誤差逆伝播
            opt.clip_grads(10)         # 10より大きい勾配を抑制
            opt.update()               # パラメタ更新
        print("epoch: %3d, loss: %f" % (i, total_loss))

    # save
    serializers.save_npz("model", model)
Esempio n. 28
0
def train_loop():
    # Trainer
    graph_generated = False
    while True:
        while data_q.empty():
            time.sleep(0.1)
        inp = data_q.get()
        if inp == 'end':  # quit
            res_q.put('end')
            break
        elif inp == 'train':  # restart training
            res_q.put('train')
            model.train = True
            continue
        elif inp == 'val':  # start validation
            res_q.put('val')
            serializers.save_npz(args.out, model)
            serializers.save_npz(args.outstate, optimizer)
            model.train = False
            continue

        volatile = 'off' if model.train else 'on'
        x = chainer.Variable(xp.asarray(inp[0]), volatile=volatile)
        t = chainer.Variable(xp.asarray(inp[1]), volatile=volatile)

        if model.train:
            optimizer.update(model, x, t)
            if not graph_generated:
                with open('graph.dot', 'w') as o:
                    o.write(computational_graph.build_computational_graph(
                        (model.loss,)).dump())
                print('generated graph', file=sys.stderr)
                graph_generated = True
        else:
            model(x, t)

        res_q.put((float(model.loss.data), float(model.accuracy.data)))
        del x, t
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-g', '--gpu', type=int, required=True)
    args = parser.parse_args()

    gpu = args.gpu

    output_nc = 3
    nz = 8

    E = E_ResNet(
        input_nc=output_nc,
        output_nc=nz,
        ndf=64,
        n_blocks=5,
        norm_layer='instance',
        nl_layer='lrelu',
        vaeLike=True,
    )
    G = G_Unet_add_all(
        input_nc=1,
        output_nc=output_nc,
        nz=nz,
        num_downs=8,
        ngf=64,
        norm_layer='instance',
        nl_layer='relu',
        use_dropout=True,
        upsample='basic',
    )
    D = D_NLayersMulti(
        input_nc=output_nc,
        ndf=64,
        n_layers=3,
        norm_layer='instance',
        use_sigmoid=False,
        num_D=2,
    )
    D2 = D_NLayersMulti(
        input_nc=output_nc,
        ndf=64,
        n_layers=3,
        norm_layer='instance',
        use_sigmoid=False,
        num_D=2,
    )

    if gpu >= 0:
        cuda.get_device_from_id(gpu).use()
        E.to_gpu()
        G.to_gpu()
        D.to_gpu()
        D2.to_gpu()

    lr = 0.0002
    beta1 = 0.5
    beta2 = 0.999

    optimizer_E = O.Adam(alpha=lr, beta1=beta1, beta2=beta2)
    optimizer_E.setup(E)

    optimizer_G = O.Adam(alpha=lr, beta1=beta1, beta2=beta2)
    optimizer_G.setup(G)

    optimizer_D = O.Adam(alpha=lr, beta1=beta1, beta2=beta2)
    optimizer_D.setup(D)

    optimizer_D2 = O.Adam(alpha=lr, beta1=beta1, beta2=beta2)
    optimizer_D2.setup(D2)

    batch_size = 2
    dataset = BerkeleyPix2PixDataset('edges2shoes', split='train')
    dataset = chainer.datasets.TransformDataset(dataset, BicycleGANTransform())
    iterator = chainer.iterators.SerialIterator(dataset, batch_size=batch_size)

    epoch_count = 1
    niter = 30
    niter_decay = 30

    def lambda_rule(epoch):
        lr_l = 1.0 - (max(0, epoch + 1 + epoch_count - niter) /
                      float(niter_decay + 1))
        return lr_l

    out_dir = osp.join('logs',
                       datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
    if not osp.exists(out_dir):
        os.makedirs(out_dir)

    with open(osp.join(out_dir, 'log.csv'), 'w') as f:
        f.write(','.join([
            'epoch',
            'iteration',
            'loss_D',
            'loss_D2',
            'loss_G',
            'loss_G_GAN',
            'loss_G_GAN2',
            'loss_G_L1',
            'loss_kl',
            'loss_z_L1',
        ]))
        f.write('\n')

    max_epoch = niter + niter_decay - epoch_count
    dataset_size = len(dataset)
    for epoch in range(epoch_count, niter + niter_decay + 1):
        t_start = time.time()

        for iteration in range(dataset_size // batch_size):
            batch = next(iterator)
            if len(batch) != batch_size:
                continue

            img_A, img_B = zip(*batch)
            img_A = np.asarray(img_A)[:, 0:1, :, :]
            img_B = np.asarray(img_B)

            assert batch_size == 2
            assert len(img_A) == 2
            assert len(img_B) == 2
            real_A_encoded = img_A[0:1]
            real_A_random = img_A[1:2]
            real_B_encoded = img_B[0:1]
            real_B_random = img_B[1:2]
            if gpu >= 0:
                real_A_encoded = cuda.to_gpu(real_A_encoded)
                real_A_random = cuda.to_gpu(real_A_random)
                real_B_encoded = cuda.to_gpu(real_B_encoded)
                real_B_random = cuda.to_gpu(real_B_random)
            real_A_encoded = chainer.Variable(real_A_encoded)
            real_A_random = chainer.Variable(real_A_random)
            real_B_encoded = chainer.Variable(real_B_encoded)
            real_B_random = chainer.Variable(real_B_random)

            # update D
            # -----------------------------------------------------------------
            # forward {{
            mu, logvar = E(real_B_encoded)
            std = F.exp(logvar * 0.5)
            eps = get_z_random(std.shape[0], std.shape[1])
            z_encoded = (eps * std) + mu

            z_random = get_z_random(real_A_random.shape[0], std.shape[1])

            fake_B_encoded = G(real_A_encoded, z_encoded)

            # generate fake_B_random
            fake_B_random = G(real_A_encoded, z_random)

            fake_data_encoded = fake_B_encoded
            fake_data_random = fake_B_random
            real_data_encoded = real_B_encoded
            real_data_random = real_B_random

            lambda_z = 0.5

            mu2, logvar2 = E(fake_B_random)
            # std2 = F.exp(logvar2 * 0.5)
            # eps2 = get_z_random(std2.shape[0], std2.shape[1])
            # z_predict = (eps2 * std2) + mu2

            # }} forward

            # update D1
            lambda_GAN = 1.0
            lambda_GAN2 = 1.0
            if lambda_GAN > 0:
                D.cleargrads()
                loss_D, losses_D = backward_D(D, real_data_encoded,
                                              fake_data_encoded)
                optimizer_D.update()

            # update D2
            if lambda_GAN2 > 0:
                D2.cleargrads()
                loss_D2, losses_D2 = backward_D(D2, real_data_random,
                                                fake_data_random)
                optimizer_D2.update()

            # update G
            # -----------------------------------------------------------------
            E.cleargrads()
            G.cleargrads()
            loss_G, loss_G_GAN, loss_G_GAN2, loss_G_L1, loss_kl = backward_EG(
                fake_data_encoded, fake_data_random, fake_B_encoded,
                real_B_encoded, D, D2, lambda_GAN, lambda_GAN2, mu, logvar)
            optimizer_G.update()
            optimizer_E.update()

            # update G only
            if lambda_z > 0.0:
                G.cleargrads()
                E.cleargrads()
                loss_z_L1 = backward_G_alone(lambda_z, mu2, z_random)
                optimizer_G.update()

            if iteration % (100 // batch_size) != 0:
                continue

            # log
            # -----------------------------------------------------------------
            time_per_iter1 = ((time.time() - t_start) / (iteration + 1) /
                              batch_size)

            if hasattr(loss_D, 'array'):
                loss_D = float(loss_D.array)
            if hasattr(loss_D2, 'array'):
                loss_D2 = float(loss_D2.array)
            if hasattr(loss_G, 'array'):
                loss_G = float(loss_G.array)
            if hasattr(loss_G_GAN, 'array'):
                loss_G_GAN = float(loss_G_GAN.array)
            if hasattr(loss_G_GAN2, 'array'):
                loss_G_GAN2 = float(loss_G_GAN2.array)
            if hasattr(loss_G_L1, 'array'):
                loss_G_L1 = float(loss_G_L1.array)
            if hasattr(loss_kl, 'array'):
                loss_kl = float(loss_kl.array)
            if hasattr(loss_z_L1, 'array'):
                loss_z_L1 = float(loss_z_L1.array)

            print('-' * 79)
            print('Epoch: {:d}/{:d} ({:.1%}), '
                  'Iteration: {:d}/{:d} ({:.1%}), Time: {:f}'.format(
                      epoch, max_epoch, 1. * epoch / max_epoch,
                      batch_size * iteration, dataset_size,
                      1. * batch_size * iteration / dataset_size,
                      time_per_iter1))

            print('D: {:.2f}'.format(loss_D), 'D2: {:.2f}'.format(loss_D2),
                  'G: {:.2f}'.format(loss_G),
                  'G_GAN: {:.2f}'.format(loss_G_GAN),
                  'G_GAN2: {:.2f}'.format(loss_G_GAN2),
                  'G_L1: {:.2f}'.format(loss_G_L1),
                  'kl: {:.2f}'.format(loss_kl),
                  'z_L1: {:.2f}'.format(loss_z_L1))

            with open(osp.join(out_dir, 'log.csv'), 'a') as f:
                f.write(','.join(
                    map(str, [
                        epoch,
                        ((epoch - 1) * dataset_size) + iteration * batch_size,
                        loss_D,
                        loss_D2,
                        loss_G,
                        loss_G_GAN,
                        loss_G_GAN2,
                        loss_G_L1,
                        loss_kl,
                        loss_z_L1,
                    ])))
                f.write('\n')

            # visualize
            # -------------------------------------------------------------------------
            real_A_encoded = real_A_encoded.array[0].transpose(1, 2, 0)
            real_A_encoded = np.repeat(real_A_encoded, 3, axis=2)
            real_A_encoded = cuda.to_cpu(real_A_encoded)
            real_B_encoded = real_B_encoded.array[0].transpose(1, 2, 0)
            real_B_encoded = cuda.to_cpu(real_B_encoded)
            real_A_random = real_A_random.array[0].transpose(1, 2, 0)
            real_A_random = np.repeat(real_A_random, 3, axis=2)
            real_A_random = cuda.to_cpu(real_A_random)
            real_B_random = real_B_random.array[0].transpose(1, 2, 0)
            real_B_random = cuda.to_cpu(real_B_random)
            fake_B_encoded = fake_B_encoded.array[0].transpose(1, 2, 0)
            fake_B_encoded = cuda.to_cpu(fake_B_encoded)
            fake_B_random = fake_B_random.array[0].transpose(1, 2, 0)
            fake_B_random = cuda.to_cpu(fake_B_random)
            viz = np.vstack([
                np.hstack([real_A_encoded, real_B_encoded]),
                np.hstack([real_A_random, real_B_random]),
                np.hstack([fake_B_encoded, fake_B_random])
            ])
            skimage.io.imsave(osp.join(out_dir, '{:08}.jpg'.format(epoch)),
                              viz)

        S.save_npz(osp.join(out_dir, '{:08}_E.npz'.format(epoch)), E)
        S.save_npz(osp.join(out_dir, '{:08}_G.npz'.format(epoch)), G)
        S.save_npz(osp.join(out_dir, '{:08}_D.npz'.format(epoch)), D)
        S.save_npz(osp.join(out_dir, '{:08}_D2.npz'.format(epoch)), D2)

        # update learning rate
        # -------------------------------------------------------------------------
        lr_new = lambda_rule(epoch)
        optimizer_E.alpha *= lr_new
        optimizer_G.alpha *= lr_new
        optimizer_D.alpha *= lr_new
        optimizer_D2.alpha *= lr_new
Esempio n. 30
0
        #train discriminator
        L_dis = -1 * (d_entropy1(real_y) - d_entropy2(real_y) +
                      d_entropy2(fake_y))  #Equation (7) upper
        o_dis.zero_grads()
        L_dis.backward()
        o_dis.update()

        #train generator
        L_gen = -d_entropy1(fake_y) + d_entropy2(fake_y)  #Equation (7) lower

        o_gen.zero_grads()
        L_gen.backward()
        o_gen.update()

        sum_l_dis += L_dis.data
        sum_l_gen += L_gen.data

    error[epoch - 1, :] = [epoch, sum_l_dis, sum_l_gen]

    print('dis_loss', sum_l_dis, sum_l_gen, sum_l_dis + sum_l_gen)
#   print('loss',sum_l_gen)

np.savetxt('train_error.csv',
           error,
           delimiter=',',
           header='epoch,dis_loss,gen_loss')
# Save the model and the optimizer
print('save the model')
serializers.save_npz('catgan_gen.model', gen)
serializers.save_npz('catgan_dis.model', dis)
Esempio n. 31
0
        gen_f_model.cleargrads()
        gen_g_model.cleargrads()

        loss_gen.backward()
        loss_gen.unchain_backward()

        gen_f_opt.update()
        gen_g_opt.update()

        sum_dis_y_loss += loss_dis_y.data.get()
        sum_dis_x_loss += loss_dis_x.data.get()
        sum_gen_loss += loss_gen.data.get()

        if epoch % interval == 0 and batch == 0:
            serializers.save_npz('xy.model', gen_g_model)
            serializers.save_npz('yx.model', gen_f_model)

            for i in range(Ntest):
                black = (x_test[i] * 127.5 + 127.5).transpose(1, 2, 0).astype(
                    np.uint8)
                pylab.subplot(2, Ntest, 2 * i + 1)
                pylab.imshow(black)
                pylab.axis('off')
                pylab.savefig(image_xy + '/output_xy_%d.png' % epoch)

                x = Variable(cuda.to_gpu(x_test[i]))
                x = x.reshape(1, channels, width, height)
                with chainer.using_config('train', False):
                    x_y = gen_g_model(x)
                x_y = x_y.data.get()
Esempio n. 32
0
        feature = vgg(xc)
        feature_hat = vgg(y)

        L_feat = lambda_f * F.mean_squared_error(
            Variable(feature[2].data),
            feature_hat[2])  # compute for only the output of layer conv3_3

        L_style = Variable(xp.zeros((), dtype=np.float32))
        for f, f_hat, g_s in zip(feature, feature_hat, gram_s):
            L_style += lambda_s * F.mean_squared_error(gram_matrix(f_hat),
                                                       Variable(g_s.data))

        L_tv = lambda_tv * total_variation_regularization(y)
        L = L_feat + L_style + L_tv

        print '(epoch {}) batch {}/{}... training loss is...{}'.format(
            epoch, i, n_iter, L.data)

        L.backward()
        O.update()

        if args.checkpoint > 0 and i % args.checkpoint == 0:
            serializers.save_npz(
                'models/{}_{}_{}.model'.format(output, epoch, i), model)

    print 'save "style.model"'
    serializers.save_npz('models/{}_{}.model'.format(output, epoch), model)

serializers.save_npz('models/{}.model'.format(output), model)
Esempio n. 33
0
def train():
    print('import data....')
    data = data_import()
    print('success!')
    data_x = []
    data_y = []
    for i in range(len(data) - INPUT_SIZE - OUTPUT_SIZE):
        data_x.append(data[i:i + INPUT_SIZE])
        data_y.append(data[i + INPUT_SIZE:i + INPUT_SIZE + OUTPUT_SIZE])

    data_x = np.array(data_x).astype("float32")
    data_y = np.array(data_y).astype("float32")

    X = data_x[0:len(data_x) - TESTDATA_SIZE]
    y = data_y[0:len(data_y) - TESTDATA_SIZE]
    test_x = data_x[len(data_x) - TESTDATA_SIZE:]
    test_y = data_y[len(data_y) - TESTDATA_SIZE:]

    mean = test_x.mean(axis=1)
    std = test_x.std(axis=1)
    test_x = (test_x - mean.reshape(TESTDATA_SIZE, 1)) / std.reshape(
        TESTDATA_SIZE, 1)
    test_y = (test_y - mean.reshape(TESTDATA_SIZE, 1)) / std.reshape(
        TESTDATA_SIZE, 1)
    model = predict_model(INPUT_SIZE, OUTPUT_SIZE, 32)
    try:
        serializers.load_npz("predict.model", model)
        print("loaded")
    except:
        print("couldn't load")

    opt = chainer.optimizers.SGD(0.01)
    opt.setup(model)
    num_batches = int(X.shape[0] / BATCH_SIZE)
    for epoch in range(NUM_EPOCH):
        perm = np.random.permutation(X.shape[0])
        for index in range(num_batches):
            X_batch = X[perm[index * BATCH_SIZE:(index + 1) * BATCH_SIZE]]
            Y_batch = y[perm[index * BATCH_SIZE:(index + 1) * BATCH_SIZE]]
            #return X_batch
            mean = X_batch.mean(axis=1)
            std = X_batch.std(axis=1)

            X_batch = (X_batch - mean.reshape(BATCH_SIZE, 1)) / std.reshape(
                BATCH_SIZE, 1) + np.random.normal(
                    0, 0.5, X_batch.shape).astype(np.float32)
            Y_batch = (Y_batch - mean.reshape(BATCH_SIZE, 1)) / std.reshape(
                BATCH_SIZE, 1) + np.random.normal(
                    0, 0.5, Y_batch.shape).astype(np.float32)

            yl = model(X_batch)
            loss = F.mean_squared_error(yl, Y_batch)

            model.cleargrads()
            loss.backward()
            opt.update()

            chainer.config.train = False
            test_loss = F.mean_squared_error(model(test_x), test_y)
            chainer.config.train = True
            print("epoch:%d batch:%d/%d loss:%f test_loss:%f" %
                  (epoch, index, num_batches, loss.data, test_loss.data))

        serializers.save_npz('predict.model', model)
def main():
  args = parse_args()
  XP.set_library(args)
  date=time.localtime()[:6]
  D=[]
  for i in date:
    D.append(str(i))
  D="_".join(D)

  save_path=args.save_path
  if os.path.exists(save_path)==False:
    os.mkdir(save_path)

  if args.model_path!=None:
    print("continue existed model!! load recipe of {}".format(args.model_path))
    with open(args.model_path+'/recipe.json','r') as f:
      recipe=json.load(f)
    vae_enc=recipe["network"]["IM"]["vae_enc"]
    vae_z=recipe["network"]["IM"]["vae_z"]
    vae_dec=recipe["network"]["IM"]["vae_dec"]
    Read_patch=recipe["network"]["IM"]["Read_patch"]
    Write_patch=recipe["network"]["IM"]["Write_patch"]    
    times=recipe["network"]["IM"]["times"] 
    alpha=recipe["network"]["IM"]["KLcoefficient"]    
    
    batchsize=recipe["setting"]["batchsize"]
    maxepoch=args.maxepoch
    weightdecay=recipe["setting"]["weightdecay"]
    grad_clip=recipe["setting"]["grad_clip"]
    cur_epoch=recipe["setting"]["cur_epoch"]+1
    ini_lr=recipe["setting"]["initial_learningrate"]
    cur_lr=recipe["setting"]["cur_lr"]
    
    with open(args.save_path+"/trainloss.json",'r') as f:
      trainloss_dic=json.load(f)
    with open(args.save_path+"/valloss.json",'r') as f:
      valloss_dic=json.load(f)

  else:
    vae_enc=args.vae_enc
    vae_z=args.vae_z
    vae_dec=args.vae_dec
    Read_patch=args.Read_patch
    Write_patch=args.Write_patch
    times=args.times
    alpha=args.alpha
    batchsize=args.batchsize
    maxepoch=args.maxepoch
    weightdecay=args.weightdecay
    grad_clip=5
    cur_epoch=0
    ini_lr=args.lr
    cur_lr=ini_lr
    trainloss_dic={}
    valloss_dic={}

  print('this experiment started at :{}'.format(D))
  print('***Experiment settings***')
  print('[IM]vae encoder hidden size :{}'.format(vae_enc))
  print('[IM]vae hidden layer size :{}'.format(vae_z))
  print('[IM]vae decoder hidden layer size :{}'.format(vae_dec)) 
  print('[IM]Read patch size :{}'.format(Read_patch))
  print('[IM]Write patch size :{}'.format(Write_patch))
  print('[IM]sequence length:{}'.format(times)) 
  print('max epoch :{}'.format(maxepoch))
  print('mini batch size :{}'.format(batchsize))
  print('initial learning rate :{}'.format(cur_lr))
  print('weight decay :{}'.format(weightdecay))
  print("optimization by :{}".format("Adam"))
  print("VAE KL coefficient:",alpha)
  print('*************************') 

  vae = VAE_bernoulli_attention(vae_enc,vae_z,vae_dec,Read_patch,Write_patch,28,28,1)
  opt = optimizers.Adam(alpha = cur_lr)
  opt.use_cleargrads()
  opt.setup(vae)
  if args.model_path!=None:
    print('loading model ...')
    serializers.load_npz(args.model_path + '/VAEweights', vae)
    serializers.load_npz(args.model_path + '/optimizer', opt)
  else:
    print('making [[new]] model ...')
    for param in vae.params():
      data = param.data
      data[:] = np.random.uniform(-0.1, 0.1, data.shape)
  opt.add_hook(optimizer.GradientClipping(grad_clip))
  opt.add_hook(optimizer.WeightDecay(weightdecay))  

  if args.gpu >= 0 :
    vae.to_gpu()

  mnist = MNIST(binarize=True)
  train_size = mnist.train_size
  test_size = mnist.test_size

  eps = 1e-8
  for epoch in range(cur_epoch+1, maxepoch+1):
    print('\nepoch {}'.format(epoch))
    LX = 0.0
    LZ = 0.0
    counter = 0
    for iter_,(img_array,label_array) in enumerate(mnist.gen_train(batchsize,Random=True)):
        B = img_array.shape[0]
        Lz = XP.fzeros(())
        vae.reset(img_array)
        
        #first to T-1 step
        for j in range(times-1):
            y,kl = vae.free_energy_onestep()
            Lz_i = alpha*kl
            Lz += Lz_i
        #last step
        j+=1
        y,kl =vae.free_energy_onestep()
        Lz_i = alpha*kl
        Lz += Lz_i
        Lx = Bernoulli_nll_wesp(vae.x,y,eps)
        
        LZ += Lz.data
        LX += Lx.data
        
        loss = (Lx+Lz)/B
        loss.backward() #if True, all intermediate variables are kept.
        opt.update()

        counter += B
        sys.stdout.write('\rnow training ...  epoch {}, {}/{}  '.format(epoch,counter,train_size))
        sys.stdout.flush()
        if (iter_+1) % 100 == 0:
          print("({}-th batch mean loss) Lx:%03.3f Lz:%03.3f".format(counter) % (Lx.data/B,Lz.data/B))

	print("\nsave fig...")
    img_array = cuda.to_cpu(y.data)
    im_array = img_array.reshape(batchsize*28,28)
    img = im_array[:28*5]
	"""
    plt.clf()
    plt.imshow(img,cmap=cm.gray)
    plt.colorbar(orientation='horizontal')
    plt.savefig(save_path+"/"+"img{}.png".format(epoch))
	"""
	save_img(img,save_path+"/{}.png".format(str(epoch).zfill(3)))
	

    trace(save_path+"/trainloss.txt","epoch {} Lx:{} Lz:{} Lx+Lz:{}".format(epoch,LX/train_size,LZ/train_size,(LX+LZ)/train_size))            	
    trainloss_dic[str(epoch).zfill(3)]={
                    "Lx":float(LX/train_size),
                    "Lz":float(LZ/train_size),
                    "Lx+Lz":float((LX+LZ)/train_size)}
    with open(save_path+"/trainloss.json",'w') as f:
        json.dump(trainloss_dic,f,indent=4)   

    print('save model ...')
    prefix = save_path+"/"+str(epoch).zfill(3)
    if os.path.exists(prefix)==False:
        os.mkdir(prefix)        
    serializers.save_npz(prefix + '/VAEweights', vae) 
    serializers.save_npz(prefix + '/optimizer', opt)
    print('save recipe...')
    recipe_dic = {
    "date":D,
    "setting":{
        "maxepoch":maxepoch,
        "batchsize":batchsize,
        "weightdecay":weightdecay,
        "grad_clip":grad_clip,
        "opt":"Adam",
        "initial_learningrate":ini_lr,
        "cur_epoch":epoch,
        "cur_lr":cur_lr},
    "network":{
        "IM":{
            "x_size":784,
            "vae_enc":vae_enc,
            "vae_z":vae_z,
            "vae_dec":vae_dec,
            "Read_patch":Read_patch,
            "Write_patch":Write_patch,
            "times":times,
            "KLcoefficient":alpha},
            },
            }
    with open(prefix+'/recipe.json','w') as f:
      json.dump(recipe_dic,f,indent=4)
           
    if epoch % 1 == 0:
        print("\nvalidation step")
        LX = 0.0
        LZ = 0.0
        counter = 0
        for iter,(img_array,label_array) in enumerate(mnist.gen_test(batchsize)):
            B = img_array.shape[0]
            Lz = XP.fzeros(())
            vae.reset(img_array)
            
            #first to T-1 step
            for j in range(times-1):
                y,kl = vae.free_energy_onestep()
                Lz_i = alpha*kl
                Lz += Lz_i           
            #last step
            j+=1
            y,kl = vae.free_energy_onestep()
            Lz_i = alpha*kl
            Lz += Lz_i  
            Lx = Bernoulli_nll_wesp(vae.x,y,eps)

            LZ += Lz.data
            LX += Lx.data

            counter += B
            sys.stdout.write('\rnow testing ...  epoch {}, {}/{}  '.format(epoch,counter,test_size))
            sys.stdout.flush()
        print("")
        trace(save_path+"/valloss.txt","epoch {} Lx:{} Lz:{} Lx+Lz:{}".format(epoch,LX/test_size,LZ/test_size,(LX+LZ)/test_size))                  		
        valloss_dic[str(epoch).zfill(3)]={
                        "Lx":float(LX/test_size),
			"Lz":float(LZ/test_size),
			"Lx+Lz":float((LX+LZ)/test_size)}
        with open(save_path+"/valloss.json",'w') as f:
            json.dump(valloss_dic,f,indent=4)
Esempio n. 35
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: seq2seq')
    parser.add_argument('SOURCE', help='source sentence list')
    parser.add_argument('TARGET', help='target sentence list')
    parser.add_argument('SOURCE_VOCAB', help='source vocabulary file')
    parser.add_argument('TARGET_VOCAB', help='target vocabulary file')
    parser.add_argument('--validation-source',
                        help='source sentence list for validation')
    parser.add_argument('--validation-target',
                        help='target sentence list for validation')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=10,
                        help='number of sentence pairs in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=50,
                        help='number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='resume the training from snapshot')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=1024,
                        help='number of units')
    parser.add_argument('--type_unit',
                        '-t',
                        choices={'lstm', 'gru'},
                        help='number of units')
    parser.add_argument('--layer',
                        '-l',
                        type=int,
                        default=3,
                        help='number of layers')
    parser.add_argument(
        '--min-source-sentence',
        type=int,
        default=2,  # for caluculation of ngram 2
        help='minimium length of source sentence')
    parser.add_argument('--max-source-sentence',
                        type=int,
                        default=500,
                        help='maximum length of source sentence')
    parser.add_argument(
        '--min-target-sentence',
        type=int,
        default=2,  # for caluculation of ngram 2
        help='minimium length of target sentence')
    parser.add_argument('--max-target-sentence',
                        type=int,
                        default=50,
                        help='maximum length of target sentence')
    parser.add_argument('--log-interval',
                        type=int,
                        default=200,
                        help='number of iteration to show log')
    parser.add_argument('--validation-interval',
                        type=int,
                        default=1000,
                        help='number of iteration to evlauate the model '
                        'with validation dataset')
    parser.add_argument('--word_dropout', '-w', type=float, default=0.0)
    parser.add_argument('--denoising_rate', '-d', type=float, default=0.0)
    parser.add_argument('--n_latent', type=int, default=100)
    parser.add_argument('--n_embed',
                        type=int,
                        default=512,
                        help='length of embedding')

    args = parser.parse_args()

    source_ids = load_vocabulary(args.SOURCE_VOCAB)
    target_ids = load_vocabulary(args.TARGET_VOCAB)
    train_source = load_data(source_ids, args.SOURCE)
    train_target = load_data(target_ids, args.TARGET)
    assert len(train_source) == len(train_target)
    train_data = [
        (s, t) for s, t in six.moves.zip(train_source, train_target)
        if args.min_source_sentence <= len(s) <= args.max_source_sentence
        and args.min_source_sentence <= len(t) <= args.max_source_sentence
    ]
    train_source_unknown = calculate_unknown_ratio([s for s, _ in train_data])
    train_target_unknown = calculate_unknown_ratio([t for _, t in train_data])

    print('Source vocabulary size: %d' % len(source_ids))
    print('Target vocabulary size: %d' % len(target_ids))
    print('Train data size: %d' % len(train_data))
    print('Train source unknown ratio: %.2f%%' % (train_source_unknown * 100))
    print('Train target unknown ratio: %.2f%%' % (train_target_unknown * 100))

    target_words = {i: w for w, i in target_ids.items()}
    source_words = {i: w for w, i in source_ids.items()}

    model = Seq2seq(args.layer, len(source_ids), len(target_ids), args.unit,
                    args.n_embed, args.n_latent, args.type_unit,
                    args.word_dropout, args.denoising_rate)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu(args.gpu)

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize)
    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       converter=convert,
                                       device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'))
    trainer.extend(
        extensions.LogReport(trigger=(args.log_interval, 'iteration')))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'main/rec', 'main/lat', 'main/perp',
        'bleu', 'p', 'r', 'f', 'p1', 'r1', 'f1', 'elapsed_time'
    ]),
                   trigger=(args.log_interval, 'iteration'))
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}.npz'),
                   trigger=(5, 'epoch'))

    if args.validation_source and args.validation_target:
        test_source = load_data(source_ids, args.validation_source)
        test_target = load_data(target_ids, args.validation_target)
        assert len(test_source) == len(test_target)
        test_data = list(six.moves.zip(test_source, test_target))
        test_data = [(s, t) for s, t in test_data if 0 < len(s) and 0 < len(t)]
        test_source_unknown = calculate_unknown_ratio(
            [s for s, _ in test_data])
        test_target_unknown = calculate_unknown_ratio(
            [t for _, t in test_data])

        print('Validation data: %d' % len(test_data))
        print('Validation source unknown ratio: %.2f%%' %
              (test_source_unknown * 100))
        print('Validation target unknown ratio: %.2f%%' %
              (test_target_unknown * 100))

        @chainer.training.make_extension()
        def translate(trainer):
            source, target = test_data[numpy.random.choice(len(test_data))]
            result = model.translate([model.xp.array(source)])[0]

            #source_sentence = ' '.join([source_words[x] for x in source])
            target_sentence = ' '.join([target_words[y] for y in target])
            result_sentence = ' '.join([target_words[y] for y in result])
            #print('#  source : ' + source_sentence)
            print('#  result : ' + result_sentence)
            print('#  expect : ' + target_sentence)

        trainer.extend(translate,
                       trigger=(args.validation_interval, 'iteration'))

        # @chainer.training.make_extension()
        # def generate(trainer):
        #     results = model.generate(5)
        #     for i, result in enumerate(results):
        #         print('#  result {}: {}'.format(i+1, ' '.join([source_words[x] for x in result])))

        # trainer.extend(
        #     generate, trigger=(args.validation_interval, 'iteration'))

        # trainer.extend(
        #     CalculateBleu(
        #         model, test_data, 'bleu', device=args.gpu),
        #     trigger=(args.validation_interval, 'iteration'))
        trainer.extend(CalculateBleuRouge(
            model,
            test_data, ['bleu', 'p', 'r', 'f', 'p1', 'r1', 'f1'],
            device=args.gpu),
                       trigger=(args.validation_interval, 'iteration'))

    @chainer.training.make_extension()
    def fit_C(trainer):
        if model.C < 0.5 and updater.epoch > 5:
            #if model.C < 0.5:
            model.C += 0.001
        print('epoch: {}, C: {},'.format(updater.epoch, model.C))

    trainer.extend(fit_C, trigger=(1000, 'iteration'))
    #trainer.extend(fit_C, trigger=(1, 'epoch'))

    if args.resume:
        serializers.load_npz(args.resume, model)

    print('start training')
    trainer.run()
    print('complete training')

    with open('result/args.txt', 'w') as f:
        args_dict = {}
        for i in dir(args):
            if '_' in i[0]: continue
            args_dict[str(i)] = getattr(args, i)
        json.dump(args_dict,
                  f,
                  ensure_ascii=False,
                  indent=4,
                  sort_keys=True,
                  separators=(',', ': '))

    serializers.save_npz('result/model.npz', model)
Esempio n. 36
0
 def on_epoch_done(epoch, n, o, loss, acc, valid_loss, valid_acc, test_loss,
                   test_acc, test_time):
     error = 100 * (1 - acc)
     print('epoch {} done'.format(epoch))
     print('train loss: {} error: {}'.format(loss, error))
     if valid_loss is not None:
         valid_error = 100 * (1 - valid_acc)
         print('valid loss: {} error: {}'.format(valid_loss, valid_error))
     else:
         valid_error = None
     if test_loss is not None:
         test_error = 100 * (1 - test_acc)
         print('test  loss: {} error: {}'.format(test_loss, test_error))
         print('test time: {}s'.format(test_time))
     else:
         test_error = None
     if valid_loss is not None and valid_error < state['best_valid_error']:
         save_path = os.path.join('model', '{}.model'.format(model_prefix))
         serializers.save_npz(save_path, n)
         save_path = os.path.join('model', '{}.state'.format(model_prefix))
         serializers.save_npz(save_path, o)
         state['best_valid_error'] = valid_error
         state['best_test_error'] = test_error
     elif valid_loss is None:
         save_path = os.path.join('model', '{}.model'.format(model_prefix))
         serializers.save_npz(save_path, n)
         save_path = os.path.join('model', '{}.state'.format(model_prefix))
         serializers.save_npz(save_path, o)
         state['best_test_error'] = test_error
     if args.save_epoch > 0 and (epoch + 1) % args.save_epoch == 0:
         save_path = os.path.join(
             'model', '{}_{}.model'.format(model_prefix, epoch + 1))
         serializers.save_npz(save_path, n)
         save_path = os.path.join(
             'model', '{}_{}.state'.format(model_prefix, epoch + 1))
         serializers.save_npz(save_path, o)
     clock = time.clock()
     print('elapsed time: {}'.format(clock - state['clock']))
     state['clock'] = clock
     with open(log_file_path, 'a') as f:
         f.write('{},{},{},{},{},{},{}\n'.format(epoch, loss, error,
                                                 valid_loss, valid_error,
                                                 test_loss, test_error))
Esempio n. 37
0
 def save_model(self, model_dir):
     serializers.save_npz(model_dir + "model.npz", self.model)
Esempio n. 38
0
def save_params(file_stem, net, trainer):
    save_npz(file=file_stem + '.npz', obj=net)
    save_npz(file=file_stem + '.states', obj=trainer)
Esempio n. 39
0
def train_dcgan_labeled(gen, dis, epoch0=0):
    #o_gen = optimizers.Adam(alpha=0.0002, beta1=0.5)
    #o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_gen = optimizers.Adam(alpha=0.0001, beta1=0.5)
    o_dis = optimizers.Adam(alpha=0.0001, beta1=0.5)
    o_gen.setup(gen)
    o_dis.setup(dis)
    # o_gen.add_hook(chainer.optimizer.WeightDecay(0.00001))
    # o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001))
    o_gen.add_hook(chainer.optimizer.WeightDecay(0.000005))
    o_dis.add_hook(chainer.optimizer.WeightDecay(0.000005))

    stop_flag_dis = False
    stop_flag_gen = False

    for epoch in xrange(epoch0, n_epoch):
        perm = np.random.permutation(n_train)
        #sum_l_dis = np.float32(0)
        #sum_l_gen = np.float32(0)
        sum_l_dis = []
        sum_l_gen = []

        accum_dis = 0.
        accum_gen = 0.
        prev_time = time.time()
        dis_result = []  # 1 if dis win gen, 0 otherwise

        for i in xrange(0, n_train, batchsize):
            # discriminator
            # 0: from dataset
            # 1: from noise
            n_ins = len(perm[i:i + batchsize])

            emb_ids = xp.asarray(
                sum([dataset[j] for j in perm[i:i + batchsize]], [])).astype(np.int32)

            x2 = F.reshape(Variable(embed(Variable(emb_ids)).data),
                           (n_ins, 1, max_sent, 512))
            # reshape de ikeru? soretomo cocat ?

            # train generator
            z = Variable(
                xp.random.uniform(-1, 1, (n_ins, nz), dtype=np.float32))
            x = gen(z)
            # x = fill_eos_after_first_eos.
            yl = dis(x)

            L_gen = F.softmax_cross_entropy(
                yl, Variable(xp.zeros(n_ins, dtype=np.int32)))
            L_dis = F.softmax_cross_entropy(
                yl, Variable(xp.ones(n_ins, dtype=np.int32)))
            #if not stop_flag_gen: L_gen = F.softmax_cross_entropy(yl, Variable(xp.zeros(n_ins, dtype=np.int32)))
            #if not stop_flag_dis: L_dis = F.softmax_cross_entropy(yl, Variable(xp.ones(n_ins, dtype=np.int32)))

            dis_result.extend(
                [1. if t == 1 else 0. for t in xp.argmax(yl.data, axis=1)])

            # train discriminator
            # if not stop_flag_dis:
            yl2 = dis(x2 + dis.xp.random.normal(0.,
                                                dis.random_std, x2.data.shape))
            L_dis += F.softmax_cross_entropy(yl2,
                                             Variable(xp.zeros(n_ins, dtype=np.int32)))

            # if not stop_flag_gen:
            o_gen.zero_grads()
            L_gen.backward()
            o_gen.update()
            sum_l_gen.append(L_gen.data.get())
            accum_gen += L_gen.data.get()

            # if not stop_flag_dis:
            o_dis.zero_grads()
            L_dis.backward()
            o_dis.update()
            sum_l_dis.append(L_dis.data.get())
            accum_dis += L_dis.data.get()

            # print "backward done"
            if i % result_interval == 0:
                per = len(dis_result) * 1. / (time.time() - prev_time)
                prev_time = time.time()
                print i, "\tdis-train:", not stop_flag_dis, "\tgen-train:", not stop_flag_gen, "noise:", dis.random_std, "\t(%.3lfi/s)" % per, datetime.today().strftime("%Y/%m/%d %H:%M:%S")
                print i, "\tLoss dis:", accum_dis / 100 / batchsize, "\tgen:", accum_gen / 100 / batchsize
                WPdis = np.mean(dis_result)
                print i, "\tWP dis:gen =", WPdis, ":", 1 - WPdis

                if (epoch >= 1 or i >= 50000):
                    if WPdis >= 0.8:
                        stop_flag_dis = True
                        stop_flag_gen = False
                        if dis.random_std < dis.max_std:
                            dis.random_std *= 2.0
                    elif WPdis <= 0.2:
                        stop_flag_dis = False
                        stop_flag_gen = True
                        dis.random_std *= 0.5
                    else:
                        stop_flag_dis = False
                        stop_flag_gen = False
                        dis.random_std *= 0.9

                accum_gen = 0.
                accum_dis = 0.
                dis_result = []

            if i % image_save_interval == 0:
                z = (xp.random.uniform(-1, 1, (10, nz), dtype=np.float32))
                z = Variable(z)
                x = gen(z, test=True)
                print "make sentences"
                for j, sent_seq in enumerate(make_sentences(x)):
                    sent = []
                    for t in sent_seq:
                        """
                        if t == EOS_str:
                            sent.append("<EOS>."+str(len(sent)))
                            break
                        """
                        if t == EOS_str:
                            t = t.replace(EOS_str, "_")
                        sent.append(t)
                    print "\t", j, " ".join(sent)

        serializers.save_npz("%s/dcgan_model_dis_%d.npz" %
                             (out_model_dir, epoch), dis)
        serializers.save_npz("%s/dcgan_model_gen_%d.npz" %
                             (out_model_dir, epoch), gen)
        serializers.save_npz("%s/dcgan_state_dis_%d.npz" %
                             (out_model_dir, epoch), o_dis)
        serializers.save_npz("%s/dcgan_state_gen_%d.npz" %
                             (out_model_dir, epoch), o_gen)
        # print 'epoch end', epoch, sum_l_gen/n_train, sum_l_dis/n_train
        print 'epoch end', epoch, "dis:", sum(sum_l_dis) / len(sum_l_dis) / batchsize, "gen:", sum(sum_l_gen) / len(sum_l_gen) / batchsize
        dis.max_std *= 0.5
Esempio n. 40
0
def Train():
    # Creat data generator
    batch_tuples, history = {}, {}
    for dataset in args.dataset.split('+'):
        batch_tuples.update({dataset: []})
        for image_size in args.scales_tr:
            iterator = MultiprocessIterator(DataChef.GetExample(
                datasets[dataset]['train'], True, dataset, image_size),
                                            args.minibatch,
                                            n_prefetch=2,
                                            n_processes=args.nb_processes,
                                            shared_mem=20000000,
                                            repeat=True,
                                            shuffle=True)
            batch_tuples[dataset].append(iterator)
        # Keep the log in history
        if dataset in ['LIP', 'MSCOCO', 'PASCAL_SBD']:
            history.update({
                dataset: {
                    'loss': [],
                    'miou': [],
                    'pixel_accuracy': [],
                    'mean_class_accuracy': []
                }
            })
        elif dataset in ['WIDER', 'BAPD']:
            history.update(
                {dataset: {
                    'loss': [],
                    'prediction': [],
                    'groundtruth': []
                }})
    # Random input image size (change it after every x minibatch)
    batch_tuple_indx = np.random.choice(range(len(args.scales_tr)),
                                        args.max_iter / 10)
    batch_tuple_indx = list(np.repeat(batch_tuple_indx, 10))
    # Train
    start_time = time.time()
    for iterk in range(args.checkpoint, len(batch_tuple_indx)):
        # Get a minibatch while sequentially rotating between datasets
        for dataset in args.dataset.split('+'):
            dataBatch = batch_tuples[dataset][batch_tuple_indx[iterk]].next()
            dataBatch = zip(*dataBatch)
            # Prepare batch data
            IMG = np.array_split(np.array(dataBatch[0]), len(Model), axis=0)
            LBL = np.array_split(np.array(dataBatch[1]), len(Model), axis=0)
            # Forward
            for device_id, img, lbl in zip(range(len(Model)), IMG, LBL):
                Model[device_id](img, lbl, dataset, train=True)
            # Aggregate reporters from all GPUs
            reporters = []
            for i in range(len(Model)):
                reporters.append(Model[i].reporter)
                Model[i].reporter = {}  # clear reporter
            # History
            for reporter in reporters:
                for k in reporter[dataset].keys():
                    history[dataset][k].append(reporter[dataset][k])
            # Accumulate grads
            for i in range(1, len(Model)):
                Model[0].addgrads(Model[i])
            # Update
            opt.update()
            # Update params of other models
            for i in range(1, len(Model)):
                Model[i].copyparams(Model[0])
        # Report
        if (iterk + 1) % args.report_interval == 0:
            DataChef.Report(history,
                            args.report_interval * len(args.GPUs), (iterk + 1),
                            time.time() - start_time,
                            split='train')
        # Saving the model
        if (iterk + 1) % args.save_interval == 0 or (
                iterk + 1) == len(batch_tuple_indx):
            serializers.save_hdf5(
                '%s/checkpoints/%s_iter_%d_%s.chainermodel' %
                (args.project_folder, args.dataset, iterk + 1, args.suffix),
                Model[0])
            serializers.save_npz(
                '%s/checkpoints/%s_iter_%d_%s.chaineropt' %
                (args.project_folder, args.dataset, iterk + 1, args.suffix),
                opt)
        # Evaluation
        if (iterk + 1) % args.eval_interval == 0:
            Evaluation(splits=args.eval_split)
        # Decrease learning rate (poly in 10 steps)
        if (iterk + 1) % int(args.max_iter / 10) == 0:
            decay_rate = (
                1.0 -
                float(iterk) / args.max_iter)**args.optimizer['lr_decay_power']
            # Learning rate of fresh layers
            opt.lr *= decay_rate
            # Learning rate of pretrained layers
            for name, param in opt.target.namedparams():
                if name.startswith('/predictor/'):
                    param.update_rule.hyperparam.lr *= decay_rate
Esempio n. 41
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    model = L.Classifier(train_mnist.MLP(args.unit, 10))
    if args.gpu >= 0:
        # Make a speciied GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_count = len(train)
    test_count = len(test)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    sum_accuracy = 0
    sum_loss = 0

    while train_iter.epoch < args.epoch:
        batch = train_iter.next()
        x_array, t_array = convert.concat_examples(batch, args.gpu)
        x = chainer.Variable(x_array)
        t = chainer.Variable(t_array)
        optimizer.update(model, x, t)
        sum_loss += float(model.loss.data) * len(t.data)
        sum_accuracy += float(model.accuracy.data) * len(t.data)

        if train_iter.is_new_epoch:
            print('epoch: ', train_iter.epoch)
            print('train mean loss: {}, accuracy: {}'.format(
                sum_loss / train_count, sum_accuracy / train_count))
            # evaluation
            sum_accuracy = 0
            sum_loss = 0
            for batch in test_iter:
                x_array, t_array = convert.concat_examples(batch, args.gpu)
                x = chainer.Variable(x_array)
                t = chainer.Variable(t_array)
                loss = model(x, t)
                sum_loss += float(loss.data) * len(t.data)
                sum_accuracy += float(model.accuracy.data) * len(t.data)

            test_iter.reset()
            print('test mean  loss: {}, accuracy: {}'.format(
                sum_loss / test_count, sum_accuracy / test_count))
            sum_accuracy = 0
            sum_loss = 0

    # Save the model and the optimizer
    print('save the model')
    serializers.save_npz('mlp.model', model)
    print('save the optimizer')
    serializers.save_npz('mlp.state', optimizer)
 def save(self, fname="data/cnn.model"):
     serializers.save_npz(fname, self)
Esempio n. 43
0
def convert_gl2ch(dst_net, dst_params_file_path, dst_params, dst_param_keys,
                  src_params, src_param_keys, ext_src_param_keys,
                  ext_src_param_keys2, src_model):

    dst_param_keys = [key.replace('/W', '/weight') for key in dst_param_keys]
    dst_param_keys = [
        key.replace('/post_activ/', '/stageN/post_activ/')
        for key in dst_param_keys
    ]
    dst_param_keys = [
        key.replace('/final_block/', '/stageN/final_block/')
        for key in dst_param_keys
    ]
    dst_param_keys = [
        key.replace('/stem1_unit/', '/stage0/stem1_unit/')
        for key in dst_param_keys
    ]
    dst_param_keys = [
        key.replace('/stem2_unit/', '/stage0/stem2_unit/')
        for key in dst_param_keys
    ]

    src_param_keys.sort()
    src_param_keys.sort(key=lambda var: [
        '{:10}'.format(int(x)) if x.isdigit() else x
        for x in re.findall(r'[^0-9]|[0-9]+', var)
    ])

    dst_param_keys.sort()
    dst_param_keys.sort(key=lambda var: [
        '{:10}'.format(int(x)) if x.isdigit() else x
        for x in re.findall(r'[^0-9]|[0-9]+', var)
    ])

    dst_param_keys = [key.replace('/weight', '/W') for key in dst_param_keys]
    dst_param_keys = [
        key.replace('/stageN/post_activ/', '/post_activ/')
        for key in dst_param_keys
    ]
    dst_param_keys = [
        key.replace('/stageN/final_block/', '/final_block/')
        for key in dst_param_keys
    ]
    dst_param_keys = [
        key.replace('/stage0/stem1_unit/', '/stem1_unit/')
        for key in dst_param_keys
    ]
    dst_param_keys = [
        key.replace('/stage0/stem2_unit/', '/stem2_unit/')
        for key in dst_param_keys
    ]

    ext2_src_param_keys = [
        key for key in src_param_keys if key.endswith(".beta")
    ]
    ext2_dst_param_keys = [
        key for key in dst_param_keys if key.endswith("/beta")
    ]
    ext3_src_param_keys = {
        ".".join(v.split(".")[:-1]): i
        for i, v in enumerate(ext2_src_param_keys)
    }
    ext3_dst_param_keys = list(
        map(lambda x: x.split('/')[1:-1], ext2_dst_param_keys))

    for i, src_key in enumerate(ext_src_param_keys):
        src_key1 = src_key.split(".")[-1]
        src_key2 = ".".join(src_key.split(".")[:-1])
        dst_ind = ext3_src_param_keys[src_key2]
        dst_path = ext3_dst_param_keys[dst_ind]
        obj = dst_net
        for j, sub_path in enumerate(dst_path):
            obj = getattr(obj, sub_path)
        if src_key1 == 'running_mean':
            assert (obj.avg_mean.shape == src_params[src_key].shape), \
                "src_key={}, dst_path={}, src_shape={}, obj.avg_mean.shape={}".format(
                    src_key, dst_path, src_params[src_key].shape, obj.avg_mean.shape)
            obj.avg_mean = src_params[src_key]._data[0].asnumpy()
        elif src_key1 == 'running_var':
            assert (obj.avg_var.shape == src_params[src_key].shape)
            obj.avg_var = src_params[src_key]._data[0].asnumpy()

    if src_model in ["condensenet74_c4_g4", "condensenet74_c8_g8"]:
        assert (dst_net.output.fc.index.shape ==
                src_params["output.1.index"].shape)
        dst_net.output.fc.index = src_params["output.1.index"]._data[
            0].asnumpy().astype(np.int32)
        ext_src_param_keys2.remove("output.1.index")

        ext2_src_param_keys = [
            key for key in src_param_keys if key.endswith(".conv1.conv.weight")
        ]
        ext2_dst_param_keys = [
            key for key in dst_param_keys if key.endswith("/conv1/conv/W")
        ]
        ext3_src_param_keys = {
            ".".join(v.split(".")[:-2]): i
            for i, v in enumerate(ext2_src_param_keys)
        }
        ext3_dst_param_keys = list(
            map(lambda x: x.split('/')[1:-2], ext2_dst_param_keys))

        for i, src_key in enumerate(ext_src_param_keys2):
            src_key2 = ".".join(src_key.split(".")[:-1])
            dst_ind = ext3_src_param_keys[src_key2]
            dst_path = ext3_dst_param_keys[dst_ind]
            obj = dst_net
            for j, sub_path in enumerate(dst_path):
                obj = getattr(obj, sub_path)
            assert (obj.index.shape == src_params[src_key].shape), \
                "src_key={}, dst_path={}, src_shape={}, obj.index.shape={}".format(
                    src_key, dst_path, src_params[src_key].shape, obj.index.shape)
            obj.index = src_params[src_key]._data[0].asnumpy().astype(np.int32)

    for i, (src_key, dst_key) in enumerate(zip(src_param_keys,
                                               dst_param_keys)):
        assert (dst_params[dst_key].array.shape == src_params[src_key].shape), \
            "src_key={}, dst_key={}, src_shape={}, dst_shape={}".format(
                src_key, dst_key, src_params[src_key].shape, dst_params[dst_key].array.shape)
        dst_params[dst_key].array = src_params[src_key]._data[0].asnumpy()

    from chainer.serializers import save_npz
    save_npz(file=dst_params_file_path, obj=dst_net)
Esempio n. 44
0
def save(model, optimizer, vocab, save_name, args):
    serializers.save_npz(save_name+"model", copy.deepcopy(model).to_cpu())
    serializers.save_npz(save_name+"optimizer", optimizer)
    json.dump(vocab, open(save_name+"vocab.json", "w"))
    print('save', save_name)
Esempio n. 45
0
def train(epochs, iterations, batchsize, modeldir, extension, time_width,
          mel_bins, sampling_rate, g_learning_rate, d_learning_rate, beta1,
          beta2, identity_epoch, adv_type, residual_flag, data_path):

    # Dataset Definition
    dataloader = DatasetLoader(data_path)

    # Model & Optimizer Definition
    generator = GeneratorWithCIN(adv_type=adv_type)
    generator.to_gpu()
    gen_opt = set_optimizer(generator, g_learning_rate, beta1, beta2)

    discriminator = Discriminator()
    discriminator.to_gpu()
    dis_opt = set_optimizer(discriminator, d_learning_rate, beta1, beta2)

    # Loss Function Definition
    lossfunc = StarGANVC2LossFunction()

    for epoch in range(epochs):
        sum_dis_loss = 0
        sum_gen_loss = 0
        for batch in range(0, iterations, batchsize):
            x_sp, x_label, y_sp, y_label = dataloader.train(batchsize)

            if adv_type == 'sat':
                y_fake = generator(x_sp, F.concat([y_label, x_label]))
            elif adv_type == 'orig':
                y_fake = generator(x_sp, y_label)
            else:
                raise AttributeError

            y_fake.unchain_backward()

            if adv_type == 'sat':
                advloss_dis_real, advloss_dis_fake = lossfunc.dis_loss(
                    discriminator, y_fake, x_sp, F.concat([y_label, x_label]),
                    F.concat([x_label, y_label]), residual_flag)
            elif adv_type == 'orig':
                advloss_dis_real, advloss_dis_fake = lossfunc.dis_loss(
                    discriminator, y_fake, x_sp, y_label, x_label,
                    residual_flag)
            else:
                raise AttributeError

            dis_loss = advloss_dis_real + advloss_dis_fake
            discriminator.cleargrads()
            dis_loss.backward()
            dis_opt.update()
            dis_loss.unchain_backward()

            if adv_type == 'sat':
                y_fake = generator(x_sp, F.concat([y_label, x_label]))
                x_fake = generator(y_fake, F.concat([x_label, y_label]))
                x_identity = generator(x_sp, F.concat([x_label, x_label]))
                advloss_gen_fake, cycle_loss = lossfunc.gen_loss(
                    discriminator, y_fake, x_fake, x_sp,
                    F.concat([y_label, x_label]), residual_flag)
            elif adv_type == 'orig':
                y_fake = generator(x_sp, y_label)
                x_fake = generator(y_fake, x_label)
                x_identity = generator(x_sp, x_label)
                advloss_gen_fake, cycle_loss = lossfunc.gen_loss(
                    discriminator, y_fake, x_fake, x_sp, y_label,
                    residual_flag)
            else:
                raise AttributeError

            if epoch < identity_epoch:
                identity_loss = lossfunc.identity_loss(x_identity, x_sp)
            else:
                identity_loss = call_zeros(advloss_dis_fake)

            gen_loss = advloss_gen_fake + cycle_loss + identity_loss
            generator.cleargrads()
            gen_loss.backward()
            gen_opt.update()
            gen_loss.unchain_backward()

            sum_dis_loss += dis_loss.data
            sum_gen_loss += gen_loss.data

            if batch == 0:
                serializers.save_npz(f"{modeldir}/generator_{epoch}.model",
                                     generator)

        print(f"epoch: {epoch}")
        print(
            f"dis loss: {sum_dis_loss / iterations} gen loss: {sum_gen_loss / iterations}"
        )
              end='')

        test_losses = []
        test_accuracies = []
        while True:
            test_batch = test_iter.next()
            text_test, target_test = concat_examples(test_batch)

            # Forward the test data
            prediction_test = model(text_test)

            # Calculate the loss
            loss_test = F.softmax_cross_entropy(prediction_test, target_test)
            test_losses.append(loss_test.data)

            # Calculate the accuracy
            accuracy = F.accuracy(prediction_test, target_test)
            test_accuracies.append(accuracy.data)

            if test_iter.is_new_epoch:
                test_iter.epoch = 0
                test_iter.current_position = 0
                test_iter.is_new_epoch = False
                test_iter._pushed_position = None
                break

        print('val_loss:{:.04f} val_accuracy:{:.04f}'.format(
            np.mean(test_losses), np.mean(test_accuracies)))

serializers.save_npz('./neural_network/my_mnist.model', model)
Esempio n. 47
0
        model.cleargrads()  # 学習前に内部の勾配をフラットに
        loss.backward()
        opt.update()

    # テストをスキップ
    #if (i + 1) % test_interval != 0: continue
    '''
    Test
     - 1データ毎に入力(バッチサイズ = 1)
     - パディングなし
    '''
    epoch_accu = 0
    for b in I.SerialIterator(test, 1, repeat=False, shuffle=False):
        # データの整形
        enc, dec = zip(*b)
        # forward 処理
        ts = model(enc, dec[:-1])
        # accuracy の計算
        accu = sum(F.accuracy(t, w) for t, w in zip(ts, dec[1:]))
        epoch_accu += accu.data / (len(test) * len(ts))
        #print(" ".join(id_word[F.argmax(t).data] for t in ts))
    ''' 出力'''
    message = '{:>3} | {:>8.5f} | {:>6.1%}'.format(i + 1, epoch_loss,
                                                   epoch_accu)
    print(message)
    with open(dir + '/log.txt', 'a') as f:
        f.write(message + '\n')

# モデルの保存
S.save_npz(dir + '/model/epoch_' + str(i + 1) + '.npz', model)
Esempio n. 48
0
def batch_train_loop(bucket_fname, num_epochs,
                     batch_size=10, num_buckets=NUM_BUCKETS,
                     num_training=2,
                     bucket_width=BUCKET_WIDTH, log_mode="a", last_epoch_id=0):

    # Set up log file for loss
    log_train_fil = open(log_train_fil_name, mode=log_mode)
    log_train_csv = csv.writer(log_train_fil, lineterminator="\n")

    log_dev_fil = open(log_dev_fil_name, mode=log_mode)
    log_dev_csv = csv.writer(log_dev_fil, lineterminator="\n")

    # initialize perplexity on dev set
    # save model when new epoch value is lower than previous
    pplx = float("inf")
    bleu_score = 0

    sys.stderr.flush()

    for epoch in range(num_epochs):
        train_count = 0
        with tqdm(total=num_training) as pbar:
            sys.stderr.flush()
            loss_per_epoch = 0
            out_str = "epoch={0:d}, iter={1:d}, loss={2:.4f}, mean loss={3:.4f}, bucket={4:d}".format(
                            epoch+1, 0, 0, 0,0)
            pbar.set_description(out_str)

            for buck_indx in range(num_buckets):
                bucket_data = pickle.load(open(bucket_data_fname.format(buck_indx+1), "rb"))
                buck_pad_lim = (buck_indx+1) * bucket_width

                for i in range(0, len(bucket_data), batch_size):
                    if train_count >= num_training:
                        break
                    next_batch_end = min(batch_size, (num_training-train_count))
                    # print("current batch")
                    # print(bucket_data[i:i+next_batch_end])
                    # print("bucket limit", buck_pad_lim)
                    curr_len = len(bucket_data[i:i+next_batch_end])

                    loss = model.encode_decode_train_batch(bucket_data[i:i+next_batch_end], buck_pad_lim, buck_pad_lim)
                    train_count += curr_len

                    # set up for backprop
                    model.cleargrads()
                    loss.backward()
                    # update parameters
                    optimizer.update()
                    # store loss value for display
                    loss_val = float(loss.data)
                    loss_per_epoch += loss_val

                    it = (epoch * NUM_TRAINING_SENTENCES) + curr_len

                    out_str = "epoch={0:d}, iter={1:d}, loss={2:.4f}, mean loss={3:.4f}, bucket={4:d}".format(
                               epoch+1, it, loss_val, (loss_per_epoch / (i+1)), (buck_indx+1))
                    pbar.set_description(out_str)
                    pbar.update(curr_len)

                    # log every 10 batches
                    if i % 10 == 0:
                        log_train_csv.writerow([it, loss_val])

                if train_count >= num_training:
                    break

        print("finished training on {0:d} sentences".format(num_training))
        print("{0:s}".format("-"*50))
        print("computing perplexity")
        # pplx_new = compute_dev_pplx()
        pplx_new = compute_pplx(dev_fname["fr"], dev_fname["en"], NUM_MINI_DEV_SENTENCES)

        if pplx_new > pplx:
            print("perplexity went up during training, breaking out of loop")
            break
        
        if (epoch+1) % ITERS_TO_SAVE == 0:
            print("Saving model")
            serializers.save_npz(model_fil.replace(".model", "_{0:d}.model".format(last_epoch_id+epoch+1)), model)
            print("Finished saving model")

        pplx = pplx_new
        print(log_train_fil_name)
        print(log_dev_fil_name)
        print(model_fil.replace(".model", "_{0:d}.model".format(epoch+1)))

        if (epoch+1) % ITERS_TO_SAVE == 0:
            bleu_score = compute_bleu(dev_fname["fr"], dev_fname["en"], NUM_MINI_DEV_SENTENCES)

        # log pplx and bleu score
        log_dev_csv.writerow([(last_epoch_id+epoch+1), pplx_new, bleu_score])
        log_train_fil.flush()
        log_dev_fil.flush()
    print("Simple predictions (╯°□°)╯︵ ┻━┻")
    print("training set predictions")
    _ = predict(s=0, num=2, plot=False)
    print("Simple predictions (╯°□°)╯︵ ┻━┻")
    print("dev set predictions")
    _ = predict(s=NUM_TRAINING_SENTENCES, num=3, plot=False)
    # print("{0:s}".format("-"*50))
    # compute_bleu(dev_fname["fr"], dev_fname["en"], NUM_MINI_DEV_SENTENCES)
    # print("{0:s}".format("-"*50))

    print("Final saving model")
    serializers.save_npz(model_fil, model)
    print("Finished saving model")

    # close log file
    log_train_fil.close()
    log_dev_fil.close()
    print(log_train_fil_name)
    print(log_dev_fil_name)
    print(model_fil)
        x -= 120 # subtract mean
        xc = Variable(x.copy(), volatile=True)
        x = Variable(x)

        y = model(x)

        feature = vgg(xc)
        feature_hat = vgg(y)

        L_feat = lambda_f * F.mean_squared_error(Variable(feature[2].data), feature_hat[2]) # compute for only the output of layer conv3_3

        L_style = Variable(xp.zeros((), dtype=np.float32))
        for f, f_hat, g_s in zip(feature, feature_hat, gram_s):
            L_style += lambda_s * F.mean_squared_error(gram_matrix(f_hat), Variable(g_s.data))

        L_tv = lambda_tv * total_variation_regularization(y)
        L = L_feat + L_style + L_tv

        print '(epoch {}) batch {}/{}... training loss is...{}'.format(epoch, i, n_iter, L.data)

        L.backward()
        O.update()

        if args.checkpoint > 0 and i % args.checkpoint == 0:
            serializers.save_npz('models/style_{}_{}.model'.format(epoch, i), model)

    print 'save "style.model"'
    serializers.save_npz('models/style_{}.model'.format(epoch), model)

serializers.save_npz('models/style.model'.format(epoch), model)
Esempio n. 50
0
    exec(txt)

    # load convolution weight(Convolution2D.Wは、outch * in_ch * フィルタサイズ。これを(out_ch, in_ch, 3, 3)にreshapeする)
    txt = "yolov2.conv%d.W.data = dat[%d:%d].reshape(%d, %d, %d, %d)" % (
        i + 1, offset, offset +
        (out_ch * in_ch * ksize * ksize), out_ch, in_ch, ksize, ksize)
    offset += (out_ch * in_ch * ksize * ksize)
    exec(txt)
    print(i + 1, offset)

# load last convolution weight(BiasとConvolution2Dのみロードする)
in_ch = 1024
out_ch = last_out
ksize = 1

txt = "yolov2.bias%d.b.data = dat[%d:%d]" % (i + 2, offset, offset + out_ch)
offset += out_ch
exec(txt)

txt = "yolov2.conv%d.W.data = dat[%d:%d].reshape(%d, %d, %d, %d)" % (
    i + 2, offset, offset +
    (out_ch * in_ch * ksize * ksize), out_ch, in_ch, ksize, ksize)
offset += out_ch * in_ch * ksize * ksize
exec(txt)
print(i + 2, offset)

print("save weights file to yolov2_darknet_hdf5.model")
serializers.save_hdf5("yolov2_darknet_hdf5.model", yolov2)
print("save weights file to yolov2_darknet.model")
serializers.save_npz("yolov2_darknet.model", yolov2)
Esempio n. 51
0
        return self.l2(h3)


# -- モデルとoptimizerの設定 --
model = MS()
optimizer = optimizers.Adam()
optimizer.setup(model)

# -- 学習 --
iterator = iterators.SerialIterator(train_data, 100)
updater = training.StandardUpdater(iterator, optimizer)
trainer = training.Trainer(updater, (20, 'epoch'))
trainer.extend(extensions.ProgressBar())
trainer.run()

# -- モデルの保存 --
serializers.save_npz("ms_classification.npz", model)

# -- テスト--
correct = 0
for i in range(len(test_data)):
    x = Variable(np.array([test_data[i][0]], dtype=np.float32))
    t = test_data[i][1]
    y = model.predict(x)
    maxIndex = np.argmax(y.data)
    if (maxIndex == t):
        correct += 1

# -- 正解率 --
print("Correct:", correct, "Total:", len(test_data), "Acuuracy:",
      correct / len(test_data) * 100, "%")
Esempio n. 52
0
 def save_model(self, outputfile):
     serializers.save_npz(outputfile, self.model)
Esempio n. 53
0
	Nepoch = 400
	Probloss_val = xp.asarray(np.zeros(Nepoch))
	Probloss_train = xp.asarray(np.zeros(Nepoch))
	start_at = time.time()
 	print "Starting training..."
	with cupy.cuda.Device(gpu_id):
		epoch = 0
		period_start_at = time.time()
		bi = 0
		curr_epoch = 0
		MER_val = np.ones(Nepoch)
		while True:
			#monitor objective value
			if bi % size_epoch == 0:
				if curr_epoch % monitor_frequency == 0 or curr_epoch == (Nepoch-1):
					serializers.save_npz(savedir + '/model_%d.model' % curr_epoch, model) # save model every epoch
					MER_val[curr_epoch] = objective_function(model, x_val, y_val, n_per_sample_val, z_monitor_all_val)
					now = time.time()
					tput = float(size_epoch*monitor_frequency*batchsize) / (now-period_start_at)
					tpassed = now-start_at
					print "   %.1fs Epoch %d, batch %d, Probloss on Validation Set %.4f, %.2f S/s" % \
						(tpassed, curr_epoch, bi, MER_val[curr_epoch],tput)
					# Reset
					period_start_at = time.time()
				
				curr_epoch += 1
				if curr_epoch >= Nepoch:
					print("we're stopping")
					break
			bi += 1  # Batch index
			indexes = np.sort(np.random.choice(N, batchsize, replace=False))
def main(use_gpu=-1):
    start_time = time.clock()
    # select processing unit
    if use_gpu >= 0:
        import cupy as cp
        xp = cp
        chainer.cuda.get_device(use_gpu).use()
    else:
        xp = np
    # paths set
    training_dataset_path = './samples/sample_dataset/mnist/mnist_training.csv'
    validation_dataset_path = './samples/sample_dataset/mnist/mnist_test.csv'
    image_path = './samples/sample_dataset/mnist'
    # setup network
    model = BinaryConnectMnistLeNet()
    if use_gpu >= 0:
        model.to_gpu()
    optimizer = chainer.optimizers.Adam(alpha=0.001,
                                        beta1=0.9,
                                        beta2=0.999,
                                        eps=10**(-8),
                                        weight_decay_rate=0)
    optimizer.setup(model)

    # setup dataset(training)
    train_image_list, train_image_label_list = load_dataset(
        training_dataset_path, image_path)

    # setup dataset(validation)
    validation_image_list, validation_image_label_list = load_dataset(
        validation_dataset_path, image_path)

    epoch = 100
    batchsize = 64
    accuracy_train_list, accuracy_val_list = [], []

    # learning
    for ep in range(0, epoch):
        print('epoch', ep + 1)
        # before learning, we have to shuffle training data because we want to make network learn different order for
        # each epoch.
        zipped_train_list = list(zip(train_image_list, train_image_label_list))
        random.shuffle(zipped_train_list)
        learn_image_list, learn_label_list = zip(*zipped_train_list)
        learn_image_list = xp.array(list(learn_image_list))
        learn_label_list = xp.array(list(learn_label_list))
        batch_times = 0
        accuracy_train = 0
        for b in range(0, len(learn_image_list), batchsize):
            model.cleargrads()
            x = chainer.Variable(
                xp.asarray(learn_image_list[b:b + batchsize]).astype(
                    xp.float32))
            y = chainer.Variable(
                xp.asarray(learn_label_list[b:b + batchsize]).astype(xp.int32))
            h = model(x)
            # CategorialCrossEntropy doesn't exist in chainer, so, instead of it, I use softmax_cross_entropy.
            loss = F.softmax_cross_entropy(h, y)
            accuracy_train += F.accuracy(h, y).data
            batch_times += 1
            loss.backward()
            optimizer.update()

        accuracy_train_list.append(1 - (accuracy_train / batch_times))

        with chainer.using_config('train', False), chainer.no_backprop_mode():
            x_valid = chainer.Variable(
                xp.asarray(validation_image_list).astype(xp.float32))
            y_valid_acc = chainer.Variable(
                xp.asarray(validation_image_label_list).astype(xp.int32))
            h_valid = model(x_valid)
            accuracy_val = F.accuracy(h_valid, y_valid_acc)

        accuracy_val_list.append(1 - accuracy_val.data)

    serializers.save_npz('./models/binary_connect_mnist_LeNet', model)
    print("Time to finish learning:" + str(time.clock() - start_time))
    # draw accuracy graph

    axis_x = np.arange(0, epoch, 1)
    y0 = accuracy_train_list
    y1 = accuracy_val_list
    plt.plot(axis_x, y0, label='train')
    plt.plot(axis_x, y1, label='validation')
    plt.title('Learning Curve', fontsize=20)
    plt.xlabel('epoch', fontsize=16)
    plt.ylabel('Error rate')
    plt.tick_params(labelsize=14)
    plt.grid(True)
    plt.legend(loc='upper right')
    plt.show()
Esempio n. 55
0
    loss = F.mean_squared_error(y, t) #平均二乗誤差

    loss.backward()              # 誤差逆伝播
    optimizer.update()           # 最適化

    # 途中結果を表示
    if epoch % 1000 == 0:
        #誤差と正解率を計算
        loss_val = loss.data

        print('epoch:', epoch)
        print('x:\n', x.data)
        print('t:\n', t.data)
        print('y:\n', y.data)

        print('train mean loss={}'.format(loss_val)) # 訓練誤差, 正解率
        print(' - - - - - - - - - ')

    # n_epoch以上になると終了
    if epoch >= n_epoch:
        break

    epoch += 1

#modelとoptimizerを保存
print('save the model')
serializers.save_npz('xor_mlp.model', model)
print('save the optimizer')
serializers.save_npz('xor_mlp.state', optimizer)
Esempio n. 56
0
    sum_test_loss = 0
    sum_test_accuracy1 = 0
    sum_test_accuracy2 = 0
    for i in range(0, len(test_data) - args.testbatchsize, args.testbatchsize):
        x1, x2, t1, t2, z = mini_batch(test_data[i:i + args.testbatchsize])
        with chainer.no_backprop_mode():
            with chainer.using_config('train', False):
                y1, y2 = model(x1, x2)
        itr_test += 1
        loss1 = F.mean(F.softmax_cross_entropy(y1, t1, reduce='no') * z)
        loss2 = F.sigmoid_cross_entropy(y2, t2)
        loss = loss1 + loss2
        sum_test_loss1 += loss1.data
        sum_test_loss2 += loss2.data
        sum_test_loss += loss.data
        sum_test_accuracy1 += F.accuracy(y1, t1).data
        sum_test_accuracy2 += F.binary_accuracy(y2, t2).data
    logging.info(
        'epoch = {}, iteration = {}, train loss avr = {}, test_loss = {}, {}, {}, test accuracy = {}, {}'
        .format(optimizer.epoch + 1, optimizer.t, sum_loss_epoch / itr_epoch,
                sum_test_loss1 / itr_test, sum_test_loss2 / itr_test,
                sum_test_loss / itr_test, sum_test_accuracy1 / itr_test,
                sum_test_accuracy2 / itr_test))

    optimizer.new_epoch()

print('save the model')
serializers.save_npz(args.model, model)
print('save the optimizer')
serializers.save_npz(args.state, optimizer)
Esempio n. 57
0
            i + 1, perp, throuput))
        cur_at = now
        cur_log_perp.fill(0)

    if (i + 1) % jump == 0:
        epoch += 1
        print('evaluate')
        now = time.time()
        perp = evaluate(valid_data)
        print('epoch {} validation perplexity: {:.2f}'.format(epoch, perp))
        cur_at += time.time() - now  # skip time of evaluation

        # Save the model and the optimizer
        print('save the model')
        strtime = datetime.now().strftime('%Y%m%d%H%M%S')
        serializers.save_npz('jsai2016ptb_dialogue_%s.model' % (strtime),
                             model)
        serializers.save_npz('jsai2016ptb_dialogueQ_%s.model' % (strtime),
                             modelQ)
        serializers.save_npz('jsai2016ptb_dialogueA_%s.model' % (strtime),
                             modelA)
        print('save the optimizer')
        serializers.save_npz('jsai2016ptb_dialogue_%s.state' % (strtime),
                             optimizer)

        if epoch >= 6:
            optimizer.lr /= 1.2
            optimizerQ.lr /= 1.2
            optimizerA.lr /= 1.2
            print('learning rate =', optimizer.lr)

    sys.stdout.flush()
def pretraining():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--batchsize', type=int, default=256)
    args = parser.parse_args()

    xp = np
    gpu_id = args.gpu
    seed = args.seed
    train, _ = mnist.get_mnist()
    train, _ = convert.concat_examples(train, device=gpu_id)
    batchsize = args.batchsize
    model = StackedDenoisingAutoEncoder(input_dim=train.shape[1])
    if chainer.cuda.available and args.gpu >= 0:
        xp = cp
        model.to_gpu(gpu_id)
    xp.random.seed(seed)

    # Layer-Wise Pretrain
    print("Layer-Wise Pretrain")
    for i, dae in enumerate(model.children()):
        print("Layer {}".format(i + 1))
        train_tuple = tuple_dataset.TupleDataset(train, train)
        train_iter = iterators.SerialIterator(train_tuple, batchsize)
        clf = L.Classifier(dae, lossfun=mean_squared_error)
        clf.compute_accuracy = False
        if chainer.cuda.available and args.gpu >= 0:
            clf.to_gpu(gpu_id)
        optimizer = optimizers.MomentumSGD(lr=0.1)
        optimizer.setup(clf)
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           device=gpu_id)
        trainer = training.Trainer(updater, (50000, "iteration"),
                                   out="mnist_result")
        trainer.extend(extensions.LogReport())
        trainer.extend(
            extensions.PrintReport(['iteration', 'main/loss', 'elapsed_time']))
        trainer.extend(ChangeLearningRate(), trigger=(20000, "iteration"))
        trainer.run()
        train = dae.encode(train).data

    # Finetuning
    print("fine tuning")
    with chainer.using_config("train", False):
        train, _ = mnist.get_mnist()
        train, _ = convert.concat_examples(train, device=gpu_id)
        train_tuple = tuple_dataset.TupleDataset(train, train)
        train_iter = iterators.SerialIterator(train_tuple, batchsize)
        model = L.Classifier(model, lossfun=mean_squared_error)
        model.compute_accuracy = False
        if chainer.cuda.available and args.gpu >= 0:
            model.to_gpu(gpu_id)
        optimizer = optimizers.MomentumSGD(lr=0.1)
        optimizer.setup(model)
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           device=gpu_id)
        trainer = training.Trainer(updater, (100000, "iteration"),
                                   out="mnist_result")
        trainer.extend(extensions.LogReport())
        trainer.extend(
            extensions.PrintReport(['iteration', 'main/loss', 'elapsed_time']))
        trainer.extend(ChangeLearningRate(), trigger=(20000, "iteration"))
        trainer.run()

    outfile = "StackedDenoisingAutoEncoder-seed{}.model".format(seed)
    serializers.save_npz(outfile, model.predictor)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=20,
                        help='Number of examples in each mini-batch')
    parser.add_argument('--bproplen',
                        '-l',
                        type=int,
                        default=35,
                        help='Number of words in each mini-batch '
                        '(= length of truncated BPTT)')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=39,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--gradclip',
                        '-c',
                        type=float,
                        default=5,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--test',
                        action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.set_defaults(test=False)
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=650,
                        help='Number of LSTM units in each layer')
    args = parser.parse_args()

    def evaluate(model, iter):
        # Evaluation routine to be used for validation and test.
        model.predictor.train = False
        evaluator = model.copy()  # to use different state
        evaluator.predictor.reset_state()  # initialize state
        evaluator.predictor.train = False  # dropout does nothing
        sum_perp = 0
        data_count = 0
        for batch in copy.copy(iter):
            x, t = convert.concat_examples(batch, args.gpu)
            loss = evaluator(x, t)
            sum_perp += loss.data
            data_count += 1
        model.predictor.train = True
        return np.exp(float(sum_perp) / data_count)

    # Load the Penn Tree Bank long word sequence dataset
    train, val, test = chainer.datasets.get_ptb_words()
    n_vocab = max(train) + 1  # train is just an array of integers
    print('#vocab = {}'.format(n_vocab))

    if args.test:
        train = train[:100]
        val = val[:100]
        test = test[:100]

    # Create the dataset iterators
    train_iter = train_ptb.ParallelSequentialIterator(train, args.batchsize)
    val_iter = train_ptb.ParallelSequentialIterator(val, 1, repeat=False)
    test_iter = train_ptb.ParallelSequentialIterator(test, 1, repeat=False)

    # Prepare an RNNLM model
    rnn = train_ptb.RNNForLM(n_vocab, args.unit)
    model = L.Classifier(rnn)
    model.compute_accuracy = False  # we only want the perplexity
    if args.gpu >= 0:
        # Make the specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # Set up an optimizer
    optimizer = chainer.optimizers.SGD(lr=1.0)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))

    sum_perp = 0
    count = 0
    iteration = 0
    while train_iter.epoch < args.epoch:
        loss = 0
        iteration += 1
        # Progress the dataset iterator for bprop_len words at each iteration.
        for i in range(args.bproplen):
            # Get the next batch (a list of tuples of two word IDs)
            batch = train_iter.__next__()
            # Concatenate the word IDs to matrices and send them to the device
            # self.converter does this job
            # (it is chainer.dataset.concat_examples by default)
            x, t = convert.concat_examples(batch, args.gpu)
            # Compute the loss at this time step and accumulate it
            loss += optimizer.target(chainer.Variable(x), chainer.Variable(t))
            count += 1

        sum_perp += loss.data
        optimizer.target.cleargrads()  # Clear the parameter gradients
        loss.backward()  # Backprop
        loss.unchain_backward()  # Truncate the graph
        optimizer.update()  # Update the parameters

        if iteration % 20 == 0:
            print('iteration: {}'.format(iteration))
            print('training perplexity: {}'.format(
                np.exp(float(sum_perp) / count)))
            sum_perp = 0
            count = 0

        if train_iter.is_new_epoch:
            print('epoch: {}'.format(train_iter.epoch))
            print('validation perplexity: {}'.format(evaluate(model,
                                                              val_iter)))

    # Evaluate on test dataset
    print('test')
    test_perp = evaluate(model, test_iter)
    print('test perplexity: {}'.format(test_perp))

    # Save the model and the optimizer
    print('save the model')
    serializers.save_npz('rnnlm.model', model)
    print('save the optimizer')
    serializers.save_npz('rnnlm.state', optimizer)
Esempio n. 60
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: VAE')
    parser.add_argument('--initmodel', '-m', default='',
                        help='Initialize the model from given file')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the optimization from snapshot')
    parser.add_argument('--gpu', '-g', default=0, type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--epoch', '-e', default=1000, type=int,
                        help='number of epochs to learn')
    parser.add_argument('--dimz', '-z', default=20, type=int,
                        help='dimention of encoded vector')
    parser.add_argument('--batchsize', '-b', type=int, default=50,
                        help='learning minibatch size')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# dim z: {}'.format(args.dimz))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    if 0 <= args.gpu:
        cuda.get_device_from_id(args.gpu).use()

    # net内VAEオブジェクトの生成
    textVae = net.VAE(600, args.dimz, 300, 100)
    chainer.serializers.load_npz("birds_txt.npz", textVae)
    if 0 <= args.gpu:
        textVae.to_gpu()  # GPUを使うための処理

    #model = netN.VAE(textVae, n_latent=10, ch1=5000, ch2=10000, ch3=16384)
    model = net_img.VAE(1, 20, 64,textVae)
    #chainer.serializers.load_npz("mymodel_img.npz", model)
    if 0 <= args.gpu:
        model.to_gpu()  # GPUを使うための処理
    # optimizer(パラメータ更新用)
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # モデルの読み込み npzはnumpy用
    """
    if args.initmodel:
        chainer.serializers.load_npz(args.initmodel, model)
"""

    traint = np.load('birds_txt.npy')

    traini = np.load('birds_img.npy')

    #traini = traini.reshape((len(traini), 1, 128, 128))

    train = tuple_dataset.TupleDataset(traint, traini)

    train, test = train_test_split(train, test_size=0.2, random_state=50)


#------------------イテレーターによるデータセットの設定-----------------------------------
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)
#---------------------------------------------------------------
    # Set up an updater. StandardUpdater can explicitly specify a loss function
    # used in the training with 'loss_func' option
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer,
        device=args.gpu, loss_func=model.get_loss_func())

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu,
                                        eval_func=model.get_loss_func(k=10)))
    # trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time']))
    # trainer.extend(extensions.ProgressBar())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    # トレーナーの実行
    trainer.run()

    # Visualize the results
    def save_images(x, filename):
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(3, 3, figsize=(9, 9), dpi=100)
        for ai, xi in zip(ax.flatten(), x):
            ai.imshow(xi.reshape(128, 128))
        fig.savefig(filename)


    serializers.save_npz("birds_all.npz", model)