def test_resumed_trigger_sparse_call(self): trainer = testing.get_trainer_with_mock_updater( stop_trigger=None, iter_per_epoch=self.iter_per_epoch) accumulated = False with tempfile.NamedTemporaryFile(delete=False) as f: trigger = training.triggers.ManualScheduleTrigger(*self.schedule) for expected, finished in zip(self.expected[:self.resume], self.finished[:self.resume]): trainer.updater.update() accumulated = accumulated or expected if random.randrange(2): self.assertEqual(trigger(trainer), accumulated) self.assertEqual(trigger.finished, finished) accumulated = False serializers.save_npz(f.name, trigger) trigger = training.triggers.ManualScheduleTrigger(*self.schedule) serializers.load_npz(f.name, trigger) for expected, finished in zip(self.expected[self.resume:], self.finished[self.resume:]): trainer.updater.update() accumulated = accumulated or expected if random.randrange(2): self.assertEqual(trigger(trainer), accumulated) self.assertEqual(trigger.finished, finished) accumulated = False
def main(): xdata, ydata, zdata, ids, vocabrary = reader.load_master_data('tabelog_final_s') allx, ally, allz = reader.load_train_data(ids, xdata, ydata, zdata, batch_size, steps, vocab_size, out_size) train_x_data, test_x_data, train_y_data, test_y_data, train_z_data, test_z_data = reader.split_data(allx, ally, allz) for epoch in range(20): print('epoch %d' % epoch) for i in range(len(train_z_data)): loss = train_for(i, train_z_data, train_y_data) if i % 10 == 0: gc.collect() # accuracy(test_x_data, test_y_data) if i % 50 == 0: # Save the model and the optimizer print('epoch done') print('save the model') serializers.save_npz(('data/chainer_%d_%d.model' % (epoch, i)), rnn) print('save the optimizer') serializers.save_npz(('data/chainer_%d_%d.state' % (epoch, i)), optimizer) import tensorflow as tf import numpy as np FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string('mode', 'train', 'train or console')
def _test_trigger(self, trigger, key, accuracies, expected, resume=None, save=None): trainer = testing.get_trainer_with_mock_updater( stop_trigger=(len(accuracies), 'iteration'), iter_per_epoch=self.iter_per_epoch) updater = trainer.updater def _serialize_updater(serializer): updater.iteration = serializer('iteration', updater.iteration) updater.epoch = serializer('epoch', updater.epoch) updater.is_new_epoch = serializer( 'is_new_epoch', updater.is_new_epoch) trainer.updater.serialize = _serialize_updater def set_observation(t): t.observation = {key: accuracies[t.updater.iteration-1]} trainer.extend(set_observation, name='set_observation', trigger=(1, 'iteration'), priority=2) invoked_iterations = [] def record(t): invoked_iterations.append(t.updater.iteration) trainer.extend(record, name='record', trigger=trigger, priority=1) if resume is not None: serializers.load_npz(resume, trainer) trainer.run() self.assertEqual(invoked_iterations, expected) if save is not None: serializers.save_npz(save, trainer)
def saveInfo(self, model, optimizer, epoch, outputFolder, saveEach): if(epoch % saveEach == 0): if(not os.path.exists(outputFolder)): os.makedirs(outputFolder) bname = outputFolder + '/' + model.getName() + '_' + str(epoch) serializers.save_npz(bname + '.model', model) serializers.save_npz(bname + '.state', optimizer)
def train(self, epoch=10, batch_size=32, gpu=False): if gpu: cuda.check_cuda_available() xp = cuda.cupy if gpu else np self.batch_size = batch_size label_types = ['none', 'tap', 'up', 'down', 'right', 'left'] self.model = Alex(len(label_types)) optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(self.model) if gpu: self.model.to_gpu() training_data = TrainingData(IMAGE_ROOT, NOTE_ROOT, VIDEO_ROOT, SONG_LIST_PATH) self.x_train, self.x_test, self.y_train, self.y_test = training_data.get_train_data(label_types) data_size = self.x_train.shape[0] for ep in range(epoch): print('epoch {0}/{1}: (learning rate={2})'.format(ep + 1, epoch, optimizer.lr)) indexes = np.random.permutation(data_size) for i in range(0, data_size, self.batch_size): x_batch = self.x_train[indexes[i:i + self.batch_size]] y_batch = self.y_train[indexes[i:i + self.batch_size]] x = chainer.Variable(x_batch) t = chainer.Variable(y_batch) optimizer.update(self.model, x, t) print("loss: {0}".format(self.model.loss.data)) serializers.save_npz(MODEL_PATH, self.model) optimizer.lr *= 0.97
def save_states(stage_cnt, joint_idx, epoch_cnt, model, optimizer, train_losses, test_losses): ''' Save model, optimizer, and losses If latest loss is the best, best model will be saved. ''' modif = create_modifier(stage_cnt, joint_idx) # Save latest model filename = settings.RESUME_MODEL % modif logger.info('Save model to %s', filename) convenient.mkdir_to_save(filename) serializers.save_npz(filename, model) # Save latest optimizer filename = settings.RESUME_OPTIMIZER % modif logger.info('Save optimizer to %s', filename) convenient.mkdir_to_save(filename) serializers.save_npz(filename, optimizer) # Save latest loss history logger.info('Save loss history to %s', filename) filename = settings.RESUME_LOSS % modif convenient.mkdir_to_save(filename) np.savez(filename, train=train_losses, test=test_losses) # Save best model (check current loss) if epoch_cnt == 0 or np.min(test_losses[:-1]) > test_losses[-1]: save_best_model(stage_cnt, joint_idx, model)
def save(self): # Save the model and the optimizer print('save the model') serializers.save_npz(self.metadata_path + self.appliance + '.model', self.model) print('save the optimiezer') serializers.save_npz(self.metadata_path + self.appliance + '.state', self.optimizer)
def caffe_to_chainermodel(model, caffe_prototxt, caffemodel_path, chainermodel_path): os.chdir(osp.dirname(caffe_prototxt)) net = caffe.Net(caffe_prototxt, caffemodel_path, caffe.TEST) for name, param in net.params.iteritems(): try: layer = getattr(model, name) except AttributeError: print('Skipping caffe layer: %s' % name) continue has_bias = True if len(param) == 1: has_bias = False print('{0}:'.format(name)) # weight print(' - W: %s %s' % (param[0].data.shape, layer.W.data.shape)) assert param[0].data.shape == layer.W.data.shape layer.W.data = param[0].data # bias if has_bias: print(' - b: %s %s' % (param[1].data.shape, layer.b.data.shape)) assert param[1].data.shape == layer.b.data.shape layer.b.data = param[1].data S.save_npz(chainermodel_path, model)
def saveModelAndOptimizer(): """モデルとオプティマイザ保存""" testFileIni.set("curEpoch", curEpoch) # 現在の実施済みエポック数保存 with modelLock: print('save the model') serializers.save_npz(modelFile, dnn.model) print('save the optimizer') serializers.save_npz(stateFile, dnn.optimizer)
def main(): parser = argparse.ArgumentParser() parser.add_argument('caffemodel') parser.add_argument('output') args = parser.parse_args() model = SSDCaffeFunction(args.caffemodel) serializers.save_npz(args.output, model)
def save_param(out_dir, epoch, storage): serializers.save_npz( str(out_dir/model_name(epoch)), storage.model ) serializers.save_npz( str(out_dir/optimizer_name(epoch)), storage.optimizer )
def _write_classifier(self, classifier): with open(os.path.join(self.directory, "model.name"), "w") as model_file: model_file.write(classifier._model.__class__.name + "\n") # Saving optimizer state serializers.save_npz(os.path.join(self.directory, "model.opt"), classifier._opt) # Saving classifier specification with open(os.path.join(self.directory, "model.state"), "w") as state_file: self._write_specification(classifier, state_file)
def _write_model(self, model): directory = self.directory # Saving model specification with open(os.path.join(directory, "model.spec"), "w") as spec_file: self._write_specification(model, spec_file) with open(os.path.join(directory, "model.src_vocab"), "w") as src_voc_file: self._write_vocabulary(model._src_voc, src_voc_file) with open(os.path.join(directory, "model.trg_vocab"), "w") as trg_voc_file: self._write_vocabulary(model._trg_voc, trg_voc_file) serializers.save_npz(os.path.join(directory, "model.weight"), model)
def agent_message(self, inMessage): if inMessage.startswith("freeze learning"): self.policyFrozen = True return "message understood, policy frozen" if inMessage.startswith("unfreeze learning"): self.policyFrozen = False return "message understood, policy unfrozen" if inMessage.startswith("save model"): serializers.save_npz('resume.model', self.DN.model) # save current model np.savez('stored_D012.npz', D0=self.DN.D[0], D1=self.DN.D[1], D2=self.DN.D[2]) np.savez('stored_D34.npz', D3=self.DN.D[3], D4=self.DN.D[4]) return "message understood, model saved"
def DNN(self, x_train, y_train, x_test, y_test, seed): np.random.seed(seed) dnn = Deep() dnn.compute_accuracy = False if args.gpu >= 0: dnn.to_gpu() optimizer = optimizers.Adam() optimizer.setup(dnn) end_counter = 0 min_loss = 100 final_epoch = 0 final_pred = xp.empty([x_test.shape[0], 1], dtype=xp.float32) x_train, y_train = resample(x_train, y_train, n_samples=x_train.shape[0]) for epoch in range(n_epoch): indexes = np.random.permutation(x_train.shape[0]) for i in range(0, x_train.shape[0], batchsize): x_train_dnn = Variable(x_train[indexes[i : i + batchsize]]) y_train_dnn = Variable(y_train[indexes[i : i + batchsize]]) dnn.zerograds() loss = F.mean_squared_error(dnn(x_train_dnn), y_train_dnn) loss.backward() optimizer.update() end_counter += 1 #evaluation if epoch % evaluation == 0: y_pred = dnn(Variable(x_test, volatile='on')) loss = F.mean_squared_error(y_pred, Variable(y_test, volatile='on')) if min_loss > loss.data: min_loss = loss.data print "epoch{}".format(epoch) print "Current minimum loss is {}".format(min_loss) serializers.save_npz('network/DNN{}.model'.format(seed), dnn) final_epoch = epoch final_pred = y_pred end_counter = 0 if end_counter > end_counter_max: f = open("network/final_epoch.txt", "a") f.write("DNN{}:{}".format(seed, final_epoch) + "\n") f.close() break return final_pred.data, min_loss
def save(self, prefix, encdec, epoch): settings = { "model": self.model(), "train_file": self.train_file(), "embed": self.embed_size(), "hidden": self.hidden_size(), "minbatch": self.batch_size(), "lr": self.lr(), "epoch": epoch } prefix = prefix + "_".join([k+":"+str(v) for k,v in settings.items()]).replace("/", "-").replace(".", "-") self.corpus.save(prefix + '.vocab') self.serialize(prefix + '.conf', settings) serializers.save_npz(prefix + '.weights', encdec) return prefix
def test_resumed_trigger(self): trainer = testing.get_trainer_with_mock_updater( stop_trigger=None, iter_per_epoch=self.iter_per_epoch) with tempfile.NamedTemporaryFile(delete=False) as f: trigger = training.triggers.ManualScheduleTrigger(*self.schedule) for expected in self.expected[:self.resume]: trainer.updater.update() self.assertEqual(trigger(trainer), expected) serializers.save_npz(f.name, trigger) trigger = training.triggers.ManualScheduleTrigger(*self.schedule) serializers.load_npz(f.name, trigger) for expected in self.expected[self.resume:]: trainer.updater.update() self.assertEqual(trigger(trainer), expected)
def test_standard_scaler_serialize(tmpdir, data, indices): x, expect_x_scaled = data scaler = StandardScaler() scaler.fit(x, indices=indices) scaler_filepath = os.path.join(str(tmpdir), 'scaler.npz') serializers.save_npz(scaler_filepath, scaler) scaler2 = StandardScaler() serializers.load_npz(scaler_filepath, scaler2) # print('scaler2 attribs:', scaler2.mean, scaler2.std, scaler2.indices) assert numpy.allclose(scaler.mean, scaler2.mean) assert numpy.allclose(scaler.std, scaler2.std) assert scaler.indices == scaler2.indices
def progress_func(epoch, loss, accuracy, valid_loss, valid_accuracy, test_loss, test_accuracy): print 'epoch: {} done'.format(epoch) print('train mean loss={}, accuracy={}'.format(loss, accuracy)) if valid_loss is not None and valid_accuracy is not None: print('valid mean loss={}, accuracy={}'.format(valid_loss, valid_accuracy)) if test_loss is not None and test_accuracy is not None: print('test mean loss={}, accuracy={}'.format(test_loss, test_accuracy)) if valid_accuracy < progress_state['valid_accuracy']: serializers.save_npz(args.output, net) progress_state['valid_accuracy'] = valid_accuracy progress_state['test_accuracy'] = test_accuracy if epoch % args.save_iter == 0: base, ext = os.path.splitext(args.output) serializers.save_npz('{0}_{1:04d}{2}'.format(base, epoch, ext), net) if args.lr_decay_iter > 0 and epoch % args.lr_decay_iter == 0: optimizer.alpha *= args.lr_decay_ratio
def run_training(args): out_dir = pathlib.Path(args.directory) sentences = dataset.load(args.source) if args.epoch is not None: start = args.epoch + 1 storage = load(out_dir, args.epoch) sentences = itertools.islice(sentences, start, None) else: start = 0 storage = init(args) if (out_dir/meta_name).exists(): if input('Overwrite? [y/N]: ').strip().lower() != 'y': exit(1) with (out_dir/meta_name).open('wb') as f: np.save(f, [storage]) batchsize = 5000 for i, sentence in enumerate(sentences, start): if i % batchsize == 0: print() serializers.save_npz( str(out_dir/model_name(i)), storage.model ) serializers.save_npz( str(out_dir/optimizer_name(i)), storage.optimizer ) else: print( util.progress( 'batch {}'.format(i // batchsize), (i % batchsize) / batchsize, 100), end='' ) train(storage.model, storage.optimizer, generate_data(sentence), generate_label(sentence), generate_attr( sentence, storage.mappings ) )
def test_flow_scaler_serialize(tmpdir): x = numpy.random.uniform(50, 100, size=100).astype(numpy.float32) scaler = FlowScaler(5) scaler.fit(x) x_scaled = scaler.transform(x) scaler_filepath = os.path.join(str(tmpdir), 'scaler.npz') serializers.save_npz(scaler_filepath, scaler) scaler2 = FlowScaler(5) serializers.load_npz(scaler_filepath, scaler2) x_scaled2 = scaler2.transform(x) assert numpy.allclose(scaler.W1.array, scaler2.W1.array) assert numpy.allclose(scaler.b1.array, scaler2.b1.array) assert numpy.allclose(scaler.W2.array, scaler2.W2.array) assert numpy.allclose(scaler.b2.array, scaler2.b2.array) assert numpy.allclose(x_scaled, x_scaled2)
def test_elapsed_time_serialization(self): self.trainer.run() serialized_time = self.trainer.elapsed_time tempdir = tempfile.mkdtemp() try: path = os.path.join(tempdir, 'trainer.npz') serializers.save_npz(path, self.trainer) trainer = _get_mocked_trainer((20, 'iteration')) serializers.load_npz(path, trainer) trainer.run() self.assertGreater(trainer.elapsed_time, serialized_time) finally: shutil.rmtree(tempdir)
def test_resumed_trigger(self): trainer = testing.get_trainer_with_mock_updater( stop_trigger=None, iter_per_epoch=self.iter_per_epoch) with tempfile.NamedTemporaryFile(delete=False) as f: trigger = training.triggers.OnceTrigger(self.call_on_resume) for expected, finished in zip(self.resumed_expected[:self.resume], self.resumed_finished[:self.resume]): trainer.updater.update() self.assertEqual(trigger.finished, finished) self.assertEqual(trigger(trainer), expected) serializers.save_npz(f.name, trigger) trigger = training.triggers.OnceTrigger(self.call_on_resume) serializers.load_npz(f.name, trigger) for expected, finished in zip(self.resumed_expected[self.resume:], self.resumed_finished[self.resume:]): trainer.updater.update() self.assertEqual(trigger.finished, finished) self.assertEqual(trigger(trainer), expected)
def train(epoch=10, batch_size=32, gpu=False): if gpu: cuda.check_cuda_available() xp = cuda.cupy if gpu else np td = TrainingData(LABEL_FILE, img_root=IMAGES_ROOT, image_property=IMAGE_PROP) # make mean image if not os.path.isfile(MEAN_IMAGE_FILE): print("make mean image...") td.make_mean_image(MEAN_IMAGE_FILE) else: td.mean_image_file = MEAN_IMAGE_FILE # train model label_def = LabelingMachine.read_label_def(LABEL_DEF_FILE) model = alex.Alex(len(label_def)) optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) epoch = epoch batch_size = batch_size print("Now our model is {0} classification task.".format(len(label_def))) print("begin training the model. epoch:{0} batch size:{1}.".format(epoch, batch_size)) if gpu: model.to_gpu() for i in range(epoch): print("epoch {0}/{1}: (learning rate={2})".format(i + 1, epoch, optimizer.lr)) td.shuffle(overwrite=True) for x_batch, y_batch in td.generate_batches(batch_size): x = chainer.Variable(xp.asarray(x_batch)) t = chainer.Variable(xp.asarray(y_batch)) optimizer.update(model, x, t) print("loss: {0}, accuracy: {1}".format(float(model.loss.data), float(model.accuracy.data))) serializers.save_npz(MODEL_FILE, model) optimizer.lr *= 0.97
def train(): model = L.Classifier(net.MyChain()) optimizer = optimizers.SGD() optimizer.setup(model) dataset = animeface.load_dataset() N = int(len(dataset) * train_rate) N_test = len(dataset) - N for epoch in range(n_epoch): print "epoch {0}".format(epoch) random.shuffle(dataset) data = np.array([x[0] for x in dataset], np.float32) target = np.array([x[1] for x in dataset], np.int32) x_train, x_test = np.split(data, [N]) y_train, y_test = np.split(target, [N]) indexes = np.random.permutation(N) sum_loss, sum_accuracy = 0, 0 for i in range(0, N, batchsize): x = Variable(x_train[indexes[i: i + batchsize]]) t = Variable(y_train[indexes[i: i + batchsize]]) optimizer.update(model, x, t) sum_loss += float(model.loss.data) * batchsize sum_accuracy += float(model.accuracy.data) * batchsize print "train loss={0}, accuracy={1}".format(sum_loss/N, sum_accuracy/N) sum_loss, sum_accuracy = 0, 0 for i in range(0, N_test, batchsize): x = Variable(x_test[i: i + batchsize]) t = Variable(y_test[i: i + batchsize]) loss = model(x, t) sum_loss += float(loss.data) * batchsize sum_accuracy += float(model.accuracy.data) * batchsize print "test loss={0}, accuracy={1}".format( sum_loss/N_test, sum_accuracy/N_test) serializers.save_npz("animeface.model", model)
def check_serialization(self, backend_config): with utils.tempdir() as root: filename = os.path.join(root, 'tmp.npz') layer1 = self.layer.copy('copy') hook1 = copy.deepcopy(self.hook) layer1.add_hook(hook1) layer1.to_device(backend_config.device) x = backend_config.get_array(self.x) with backend_config: layer1(x) with chainer.using_config('train', False): y1 = layer1(x) serializers.save_npz(filename, layer1) layer2 = self.layer.copy('copy') hook2 = copy.deepcopy(self.hook) layer2.add_hook(hook2) # Test loading is nice. msg = None try: serializers.load_npz(filename, layer2) except Exception as e: msg = e assert msg is None with chainer.using_config('train', False): y2 = layer2(self.x.copy()) # Test attributes are the same. orig_weight = _cpu._to_cpu( getattr(layer1, hook1.weight_name).array) orig_vector = _cpu._to_cpu(getattr(layer1, hook1.vector_name)) numpy.testing.assert_array_equal( orig_weight, getattr(layer2, hook2.weight_name).array) numpy.testing.assert_array_equal( orig_vector, getattr(layer2, hook2.vector_name)) testing.assert_allclose(y1.array, y2.array)
def train(args, model): # setup optimizer opt = optimizers.SGD() # 確率勾配法 opt.setup(model) # 初期化 for i in range(args.epoch): src_generator = text_generator(args.source) trg_generator = text_generator(args.target) total_loss = 0.0 for src_sentence, trg_sentence in zip(src_generator, trg_generator): opt.zero_grads() loss = forward(model, src_sentence, trg_sentence, True) total_loss += loss.data loss.backward() # 誤差逆伝播 opt.clip_grads(10) # 10より大きい勾配を抑制 opt.update() # パラメタ更新 print("epoch: %3d, loss: %f" % (i, total_loss)) # save serializers.save_npz("model", model)
def train_loop(): # Trainer graph_generated = False while True: while data_q.empty(): time.sleep(0.1) inp = data_q.get() if inp == 'end': # quit res_q.put('end') break elif inp == 'train': # restart training res_q.put('train') model.train = True continue elif inp == 'val': # start validation res_q.put('val') serializers.save_npz(args.out, model) serializers.save_npz(args.outstate, optimizer) model.train = False continue volatile = 'off' if model.train else 'on' x = chainer.Variable(xp.asarray(inp[0]), volatile=volatile) t = chainer.Variable(xp.asarray(inp[1]), volatile=volatile) if model.train: optimizer.update(model, x, t) if not graph_generated: with open('graph.dot', 'w') as o: o.write(computational_graph.build_computational_graph( (model.loss,)).dump()) print('generated graph', file=sys.stderr) graph_generated = True else: model(x, t) res_q.put((float(model.loss.data), float(model.accuracy.data))) del x, t
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-g', '--gpu', type=int, required=True) args = parser.parse_args() gpu = args.gpu output_nc = 3 nz = 8 E = E_ResNet( input_nc=output_nc, output_nc=nz, ndf=64, n_blocks=5, norm_layer='instance', nl_layer='lrelu', vaeLike=True, ) G = G_Unet_add_all( input_nc=1, output_nc=output_nc, nz=nz, num_downs=8, ngf=64, norm_layer='instance', nl_layer='relu', use_dropout=True, upsample='basic', ) D = D_NLayersMulti( input_nc=output_nc, ndf=64, n_layers=3, norm_layer='instance', use_sigmoid=False, num_D=2, ) D2 = D_NLayersMulti( input_nc=output_nc, ndf=64, n_layers=3, norm_layer='instance', use_sigmoid=False, num_D=2, ) if gpu >= 0: cuda.get_device_from_id(gpu).use() E.to_gpu() G.to_gpu() D.to_gpu() D2.to_gpu() lr = 0.0002 beta1 = 0.5 beta2 = 0.999 optimizer_E = O.Adam(alpha=lr, beta1=beta1, beta2=beta2) optimizer_E.setup(E) optimizer_G = O.Adam(alpha=lr, beta1=beta1, beta2=beta2) optimizer_G.setup(G) optimizer_D = O.Adam(alpha=lr, beta1=beta1, beta2=beta2) optimizer_D.setup(D) optimizer_D2 = O.Adam(alpha=lr, beta1=beta1, beta2=beta2) optimizer_D2.setup(D2) batch_size = 2 dataset = BerkeleyPix2PixDataset('edges2shoes', split='train') dataset = chainer.datasets.TransformDataset(dataset, BicycleGANTransform()) iterator = chainer.iterators.SerialIterator(dataset, batch_size=batch_size) epoch_count = 1 niter = 30 niter_decay = 30 def lambda_rule(epoch): lr_l = 1.0 - (max(0, epoch + 1 + epoch_count - niter) / float(niter_decay + 1)) return lr_l out_dir = osp.join('logs', datetime.datetime.now().strftime('%Y%m%d_%H%M%S')) if not osp.exists(out_dir): os.makedirs(out_dir) with open(osp.join(out_dir, 'log.csv'), 'w') as f: f.write(','.join([ 'epoch', 'iteration', 'loss_D', 'loss_D2', 'loss_G', 'loss_G_GAN', 'loss_G_GAN2', 'loss_G_L1', 'loss_kl', 'loss_z_L1', ])) f.write('\n') max_epoch = niter + niter_decay - epoch_count dataset_size = len(dataset) for epoch in range(epoch_count, niter + niter_decay + 1): t_start = time.time() for iteration in range(dataset_size // batch_size): batch = next(iterator) if len(batch) != batch_size: continue img_A, img_B = zip(*batch) img_A = np.asarray(img_A)[:, 0:1, :, :] img_B = np.asarray(img_B) assert batch_size == 2 assert len(img_A) == 2 assert len(img_B) == 2 real_A_encoded = img_A[0:1] real_A_random = img_A[1:2] real_B_encoded = img_B[0:1] real_B_random = img_B[1:2] if gpu >= 0: real_A_encoded = cuda.to_gpu(real_A_encoded) real_A_random = cuda.to_gpu(real_A_random) real_B_encoded = cuda.to_gpu(real_B_encoded) real_B_random = cuda.to_gpu(real_B_random) real_A_encoded = chainer.Variable(real_A_encoded) real_A_random = chainer.Variable(real_A_random) real_B_encoded = chainer.Variable(real_B_encoded) real_B_random = chainer.Variable(real_B_random) # update D # ----------------------------------------------------------------- # forward {{ mu, logvar = E(real_B_encoded) std = F.exp(logvar * 0.5) eps = get_z_random(std.shape[0], std.shape[1]) z_encoded = (eps * std) + mu z_random = get_z_random(real_A_random.shape[0], std.shape[1]) fake_B_encoded = G(real_A_encoded, z_encoded) # generate fake_B_random fake_B_random = G(real_A_encoded, z_random) fake_data_encoded = fake_B_encoded fake_data_random = fake_B_random real_data_encoded = real_B_encoded real_data_random = real_B_random lambda_z = 0.5 mu2, logvar2 = E(fake_B_random) # std2 = F.exp(logvar2 * 0.5) # eps2 = get_z_random(std2.shape[0], std2.shape[1]) # z_predict = (eps2 * std2) + mu2 # }} forward # update D1 lambda_GAN = 1.0 lambda_GAN2 = 1.0 if lambda_GAN > 0: D.cleargrads() loss_D, losses_D = backward_D(D, real_data_encoded, fake_data_encoded) optimizer_D.update() # update D2 if lambda_GAN2 > 0: D2.cleargrads() loss_D2, losses_D2 = backward_D(D2, real_data_random, fake_data_random) optimizer_D2.update() # update G # ----------------------------------------------------------------- E.cleargrads() G.cleargrads() loss_G, loss_G_GAN, loss_G_GAN2, loss_G_L1, loss_kl = backward_EG( fake_data_encoded, fake_data_random, fake_B_encoded, real_B_encoded, D, D2, lambda_GAN, lambda_GAN2, mu, logvar) optimizer_G.update() optimizer_E.update() # update G only if lambda_z > 0.0: G.cleargrads() E.cleargrads() loss_z_L1 = backward_G_alone(lambda_z, mu2, z_random) optimizer_G.update() if iteration % (100 // batch_size) != 0: continue # log # ----------------------------------------------------------------- time_per_iter1 = ((time.time() - t_start) / (iteration + 1) / batch_size) if hasattr(loss_D, 'array'): loss_D = float(loss_D.array) if hasattr(loss_D2, 'array'): loss_D2 = float(loss_D2.array) if hasattr(loss_G, 'array'): loss_G = float(loss_G.array) if hasattr(loss_G_GAN, 'array'): loss_G_GAN = float(loss_G_GAN.array) if hasattr(loss_G_GAN2, 'array'): loss_G_GAN2 = float(loss_G_GAN2.array) if hasattr(loss_G_L1, 'array'): loss_G_L1 = float(loss_G_L1.array) if hasattr(loss_kl, 'array'): loss_kl = float(loss_kl.array) if hasattr(loss_z_L1, 'array'): loss_z_L1 = float(loss_z_L1.array) print('-' * 79) print('Epoch: {:d}/{:d} ({:.1%}), ' 'Iteration: {:d}/{:d} ({:.1%}), Time: {:f}'.format( epoch, max_epoch, 1. * epoch / max_epoch, batch_size * iteration, dataset_size, 1. * batch_size * iteration / dataset_size, time_per_iter1)) print('D: {:.2f}'.format(loss_D), 'D2: {:.2f}'.format(loss_D2), 'G: {:.2f}'.format(loss_G), 'G_GAN: {:.2f}'.format(loss_G_GAN), 'G_GAN2: {:.2f}'.format(loss_G_GAN2), 'G_L1: {:.2f}'.format(loss_G_L1), 'kl: {:.2f}'.format(loss_kl), 'z_L1: {:.2f}'.format(loss_z_L1)) with open(osp.join(out_dir, 'log.csv'), 'a') as f: f.write(','.join( map(str, [ epoch, ((epoch - 1) * dataset_size) + iteration * batch_size, loss_D, loss_D2, loss_G, loss_G_GAN, loss_G_GAN2, loss_G_L1, loss_kl, loss_z_L1, ]))) f.write('\n') # visualize # ------------------------------------------------------------------------- real_A_encoded = real_A_encoded.array[0].transpose(1, 2, 0) real_A_encoded = np.repeat(real_A_encoded, 3, axis=2) real_A_encoded = cuda.to_cpu(real_A_encoded) real_B_encoded = real_B_encoded.array[0].transpose(1, 2, 0) real_B_encoded = cuda.to_cpu(real_B_encoded) real_A_random = real_A_random.array[0].transpose(1, 2, 0) real_A_random = np.repeat(real_A_random, 3, axis=2) real_A_random = cuda.to_cpu(real_A_random) real_B_random = real_B_random.array[0].transpose(1, 2, 0) real_B_random = cuda.to_cpu(real_B_random) fake_B_encoded = fake_B_encoded.array[0].transpose(1, 2, 0) fake_B_encoded = cuda.to_cpu(fake_B_encoded) fake_B_random = fake_B_random.array[0].transpose(1, 2, 0) fake_B_random = cuda.to_cpu(fake_B_random) viz = np.vstack([ np.hstack([real_A_encoded, real_B_encoded]), np.hstack([real_A_random, real_B_random]), np.hstack([fake_B_encoded, fake_B_random]) ]) skimage.io.imsave(osp.join(out_dir, '{:08}.jpg'.format(epoch)), viz) S.save_npz(osp.join(out_dir, '{:08}_E.npz'.format(epoch)), E) S.save_npz(osp.join(out_dir, '{:08}_G.npz'.format(epoch)), G) S.save_npz(osp.join(out_dir, '{:08}_D.npz'.format(epoch)), D) S.save_npz(osp.join(out_dir, '{:08}_D2.npz'.format(epoch)), D2) # update learning rate # ------------------------------------------------------------------------- lr_new = lambda_rule(epoch) optimizer_E.alpha *= lr_new optimizer_G.alpha *= lr_new optimizer_D.alpha *= lr_new optimizer_D2.alpha *= lr_new
#train discriminator L_dis = -1 * (d_entropy1(real_y) - d_entropy2(real_y) + d_entropy2(fake_y)) #Equation (7) upper o_dis.zero_grads() L_dis.backward() o_dis.update() #train generator L_gen = -d_entropy1(fake_y) + d_entropy2(fake_y) #Equation (7) lower o_gen.zero_grads() L_gen.backward() o_gen.update() sum_l_dis += L_dis.data sum_l_gen += L_gen.data error[epoch - 1, :] = [epoch, sum_l_dis, sum_l_gen] print('dis_loss', sum_l_dis, sum_l_gen, sum_l_dis + sum_l_gen) # print('loss',sum_l_gen) np.savetxt('train_error.csv', error, delimiter=',', header='epoch,dis_loss,gen_loss') # Save the model and the optimizer print('save the model') serializers.save_npz('catgan_gen.model', gen) serializers.save_npz('catgan_dis.model', dis)
gen_f_model.cleargrads() gen_g_model.cleargrads() loss_gen.backward() loss_gen.unchain_backward() gen_f_opt.update() gen_g_opt.update() sum_dis_y_loss += loss_dis_y.data.get() sum_dis_x_loss += loss_dis_x.data.get() sum_gen_loss += loss_gen.data.get() if epoch % interval == 0 and batch == 0: serializers.save_npz('xy.model', gen_g_model) serializers.save_npz('yx.model', gen_f_model) for i in range(Ntest): black = (x_test[i] * 127.5 + 127.5).transpose(1, 2, 0).astype( np.uint8) pylab.subplot(2, Ntest, 2 * i + 1) pylab.imshow(black) pylab.axis('off') pylab.savefig(image_xy + '/output_xy_%d.png' % epoch) x = Variable(cuda.to_gpu(x_test[i])) x = x.reshape(1, channels, width, height) with chainer.using_config('train', False): x_y = gen_g_model(x) x_y = x_y.data.get()
feature = vgg(xc) feature_hat = vgg(y) L_feat = lambda_f * F.mean_squared_error( Variable(feature[2].data), feature_hat[2]) # compute for only the output of layer conv3_3 L_style = Variable(xp.zeros((), dtype=np.float32)) for f, f_hat, g_s in zip(feature, feature_hat, gram_s): L_style += lambda_s * F.mean_squared_error(gram_matrix(f_hat), Variable(g_s.data)) L_tv = lambda_tv * total_variation_regularization(y) L = L_feat + L_style + L_tv print '(epoch {}) batch {}/{}... training loss is...{}'.format( epoch, i, n_iter, L.data) L.backward() O.update() if args.checkpoint > 0 and i % args.checkpoint == 0: serializers.save_npz( 'models/{}_{}_{}.model'.format(output, epoch, i), model) print 'save "style.model"' serializers.save_npz('models/{}_{}.model'.format(output, epoch), model) serializers.save_npz('models/{}.model'.format(output), model)
def train(): print('import data....') data = data_import() print('success!') data_x = [] data_y = [] for i in range(len(data) - INPUT_SIZE - OUTPUT_SIZE): data_x.append(data[i:i + INPUT_SIZE]) data_y.append(data[i + INPUT_SIZE:i + INPUT_SIZE + OUTPUT_SIZE]) data_x = np.array(data_x).astype("float32") data_y = np.array(data_y).astype("float32") X = data_x[0:len(data_x) - TESTDATA_SIZE] y = data_y[0:len(data_y) - TESTDATA_SIZE] test_x = data_x[len(data_x) - TESTDATA_SIZE:] test_y = data_y[len(data_y) - TESTDATA_SIZE:] mean = test_x.mean(axis=1) std = test_x.std(axis=1) test_x = (test_x - mean.reshape(TESTDATA_SIZE, 1)) / std.reshape( TESTDATA_SIZE, 1) test_y = (test_y - mean.reshape(TESTDATA_SIZE, 1)) / std.reshape( TESTDATA_SIZE, 1) model = predict_model(INPUT_SIZE, OUTPUT_SIZE, 32) try: serializers.load_npz("predict.model", model) print("loaded") except: print("couldn't load") opt = chainer.optimizers.SGD(0.01) opt.setup(model) num_batches = int(X.shape[0] / BATCH_SIZE) for epoch in range(NUM_EPOCH): perm = np.random.permutation(X.shape[0]) for index in range(num_batches): X_batch = X[perm[index * BATCH_SIZE:(index + 1) * BATCH_SIZE]] Y_batch = y[perm[index * BATCH_SIZE:(index + 1) * BATCH_SIZE]] #return X_batch mean = X_batch.mean(axis=1) std = X_batch.std(axis=1) X_batch = (X_batch - mean.reshape(BATCH_SIZE, 1)) / std.reshape( BATCH_SIZE, 1) + np.random.normal( 0, 0.5, X_batch.shape).astype(np.float32) Y_batch = (Y_batch - mean.reshape(BATCH_SIZE, 1)) / std.reshape( BATCH_SIZE, 1) + np.random.normal( 0, 0.5, Y_batch.shape).astype(np.float32) yl = model(X_batch) loss = F.mean_squared_error(yl, Y_batch) model.cleargrads() loss.backward() opt.update() chainer.config.train = False test_loss = F.mean_squared_error(model(test_x), test_y) chainer.config.train = True print("epoch:%d batch:%d/%d loss:%f test_loss:%f" % (epoch, index, num_batches, loss.data, test_loss.data)) serializers.save_npz('predict.model', model)
def main(): args = parse_args() XP.set_library(args) date=time.localtime()[:6] D=[] for i in date: D.append(str(i)) D="_".join(D) save_path=args.save_path if os.path.exists(save_path)==False: os.mkdir(save_path) if args.model_path!=None: print("continue existed model!! load recipe of {}".format(args.model_path)) with open(args.model_path+'/recipe.json','r') as f: recipe=json.load(f) vae_enc=recipe["network"]["IM"]["vae_enc"] vae_z=recipe["network"]["IM"]["vae_z"] vae_dec=recipe["network"]["IM"]["vae_dec"] Read_patch=recipe["network"]["IM"]["Read_patch"] Write_patch=recipe["network"]["IM"]["Write_patch"] times=recipe["network"]["IM"]["times"] alpha=recipe["network"]["IM"]["KLcoefficient"] batchsize=recipe["setting"]["batchsize"] maxepoch=args.maxepoch weightdecay=recipe["setting"]["weightdecay"] grad_clip=recipe["setting"]["grad_clip"] cur_epoch=recipe["setting"]["cur_epoch"]+1 ini_lr=recipe["setting"]["initial_learningrate"] cur_lr=recipe["setting"]["cur_lr"] with open(args.save_path+"/trainloss.json",'r') as f: trainloss_dic=json.load(f) with open(args.save_path+"/valloss.json",'r') as f: valloss_dic=json.load(f) else: vae_enc=args.vae_enc vae_z=args.vae_z vae_dec=args.vae_dec Read_patch=args.Read_patch Write_patch=args.Write_patch times=args.times alpha=args.alpha batchsize=args.batchsize maxepoch=args.maxepoch weightdecay=args.weightdecay grad_clip=5 cur_epoch=0 ini_lr=args.lr cur_lr=ini_lr trainloss_dic={} valloss_dic={} print('this experiment started at :{}'.format(D)) print('***Experiment settings***') print('[IM]vae encoder hidden size :{}'.format(vae_enc)) print('[IM]vae hidden layer size :{}'.format(vae_z)) print('[IM]vae decoder hidden layer size :{}'.format(vae_dec)) print('[IM]Read patch size :{}'.format(Read_patch)) print('[IM]Write patch size :{}'.format(Write_patch)) print('[IM]sequence length:{}'.format(times)) print('max epoch :{}'.format(maxepoch)) print('mini batch size :{}'.format(batchsize)) print('initial learning rate :{}'.format(cur_lr)) print('weight decay :{}'.format(weightdecay)) print("optimization by :{}".format("Adam")) print("VAE KL coefficient:",alpha) print('*************************') vae = VAE_bernoulli_attention(vae_enc,vae_z,vae_dec,Read_patch,Write_patch,28,28,1) opt = optimizers.Adam(alpha = cur_lr) opt.use_cleargrads() opt.setup(vae) if args.model_path!=None: print('loading model ...') serializers.load_npz(args.model_path + '/VAEweights', vae) serializers.load_npz(args.model_path + '/optimizer', opt) else: print('making [[new]] model ...') for param in vae.params(): data = param.data data[:] = np.random.uniform(-0.1, 0.1, data.shape) opt.add_hook(optimizer.GradientClipping(grad_clip)) opt.add_hook(optimizer.WeightDecay(weightdecay)) if args.gpu >= 0 : vae.to_gpu() mnist = MNIST(binarize=True) train_size = mnist.train_size test_size = mnist.test_size eps = 1e-8 for epoch in range(cur_epoch+1, maxepoch+1): print('\nepoch {}'.format(epoch)) LX = 0.0 LZ = 0.0 counter = 0 for iter_,(img_array,label_array) in enumerate(mnist.gen_train(batchsize,Random=True)): B = img_array.shape[0] Lz = XP.fzeros(()) vae.reset(img_array) #first to T-1 step for j in range(times-1): y,kl = vae.free_energy_onestep() Lz_i = alpha*kl Lz += Lz_i #last step j+=1 y,kl =vae.free_energy_onestep() Lz_i = alpha*kl Lz += Lz_i Lx = Bernoulli_nll_wesp(vae.x,y,eps) LZ += Lz.data LX += Lx.data loss = (Lx+Lz)/B loss.backward() #if True, all intermediate variables are kept. opt.update() counter += B sys.stdout.write('\rnow training ... epoch {}, {}/{} '.format(epoch,counter,train_size)) sys.stdout.flush() if (iter_+1) % 100 == 0: print("({}-th batch mean loss) Lx:%03.3f Lz:%03.3f".format(counter) % (Lx.data/B,Lz.data/B)) print("\nsave fig...") img_array = cuda.to_cpu(y.data) im_array = img_array.reshape(batchsize*28,28) img = im_array[:28*5] """ plt.clf() plt.imshow(img,cmap=cm.gray) plt.colorbar(orientation='horizontal') plt.savefig(save_path+"/"+"img{}.png".format(epoch)) """ save_img(img,save_path+"/{}.png".format(str(epoch).zfill(3))) trace(save_path+"/trainloss.txt","epoch {} Lx:{} Lz:{} Lx+Lz:{}".format(epoch,LX/train_size,LZ/train_size,(LX+LZ)/train_size)) trainloss_dic[str(epoch).zfill(3)]={ "Lx":float(LX/train_size), "Lz":float(LZ/train_size), "Lx+Lz":float((LX+LZ)/train_size)} with open(save_path+"/trainloss.json",'w') as f: json.dump(trainloss_dic,f,indent=4) print('save model ...') prefix = save_path+"/"+str(epoch).zfill(3) if os.path.exists(prefix)==False: os.mkdir(prefix) serializers.save_npz(prefix + '/VAEweights', vae) serializers.save_npz(prefix + '/optimizer', opt) print('save recipe...') recipe_dic = { "date":D, "setting":{ "maxepoch":maxepoch, "batchsize":batchsize, "weightdecay":weightdecay, "grad_clip":grad_clip, "opt":"Adam", "initial_learningrate":ini_lr, "cur_epoch":epoch, "cur_lr":cur_lr}, "network":{ "IM":{ "x_size":784, "vae_enc":vae_enc, "vae_z":vae_z, "vae_dec":vae_dec, "Read_patch":Read_patch, "Write_patch":Write_patch, "times":times, "KLcoefficient":alpha}, }, } with open(prefix+'/recipe.json','w') as f: json.dump(recipe_dic,f,indent=4) if epoch % 1 == 0: print("\nvalidation step") LX = 0.0 LZ = 0.0 counter = 0 for iter,(img_array,label_array) in enumerate(mnist.gen_test(batchsize)): B = img_array.shape[0] Lz = XP.fzeros(()) vae.reset(img_array) #first to T-1 step for j in range(times-1): y,kl = vae.free_energy_onestep() Lz_i = alpha*kl Lz += Lz_i #last step j+=1 y,kl = vae.free_energy_onestep() Lz_i = alpha*kl Lz += Lz_i Lx = Bernoulli_nll_wesp(vae.x,y,eps) LZ += Lz.data LX += Lx.data counter += B sys.stdout.write('\rnow testing ... epoch {}, {}/{} '.format(epoch,counter,test_size)) sys.stdout.flush() print("") trace(save_path+"/valloss.txt","epoch {} Lx:{} Lz:{} Lx+Lz:{}".format(epoch,LX/test_size,LZ/test_size,(LX+LZ)/test_size)) valloss_dic[str(epoch).zfill(3)]={ "Lx":float(LX/test_size), "Lz":float(LZ/test_size), "Lx+Lz":float((LX+LZ)/test_size)} with open(save_path+"/valloss.json",'w') as f: json.dump(valloss_dic,f,indent=4)
def main(): parser = argparse.ArgumentParser(description='Chainer example: seq2seq') parser.add_argument('SOURCE', help='source sentence list') parser.add_argument('TARGET', help='target sentence list') parser.add_argument('SOURCE_VOCAB', help='source vocabulary file') parser.add_argument('TARGET_VOCAB', help='target vocabulary file') parser.add_argument('--validation-source', help='source sentence list for validation') parser.add_argument('--validation-target', help='target sentence list for validation') parser.add_argument('--batchsize', '-b', type=int, default=10, help='number of sentence pairs in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=50, help='number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1024, help='number of units') parser.add_argument('--type_unit', '-t', choices={'lstm', 'gru'}, help='number of units') parser.add_argument('--layer', '-l', type=int, default=3, help='number of layers') parser.add_argument( '--min-source-sentence', type=int, default=2, # for caluculation of ngram 2 help='minimium length of source sentence') parser.add_argument('--max-source-sentence', type=int, default=500, help='maximum length of source sentence') parser.add_argument( '--min-target-sentence', type=int, default=2, # for caluculation of ngram 2 help='minimium length of target sentence') parser.add_argument('--max-target-sentence', type=int, default=50, help='maximum length of target sentence') parser.add_argument('--log-interval', type=int, default=200, help='number of iteration to show log') parser.add_argument('--validation-interval', type=int, default=1000, help='number of iteration to evlauate the model ' 'with validation dataset') parser.add_argument('--word_dropout', '-w', type=float, default=0.0) parser.add_argument('--denoising_rate', '-d', type=float, default=0.0) parser.add_argument('--n_latent', type=int, default=100) parser.add_argument('--n_embed', type=int, default=512, help='length of embedding') args = parser.parse_args() source_ids = load_vocabulary(args.SOURCE_VOCAB) target_ids = load_vocabulary(args.TARGET_VOCAB) train_source = load_data(source_ids, args.SOURCE) train_target = load_data(target_ids, args.TARGET) assert len(train_source) == len(train_target) train_data = [ (s, t) for s, t in six.moves.zip(train_source, train_target) if args.min_source_sentence <= len(s) <= args.max_source_sentence and args.min_source_sentence <= len(t) <= args.max_source_sentence ] train_source_unknown = calculate_unknown_ratio([s for s, _ in train_data]) train_target_unknown = calculate_unknown_ratio([t for _, t in train_data]) print('Source vocabulary size: %d' % len(source_ids)) print('Target vocabulary size: %d' % len(target_ids)) print('Train data size: %d' % len(train_data)) print('Train source unknown ratio: %.2f%%' % (train_source_unknown * 100)) print('Train target unknown ratio: %.2f%%' % (train_target_unknown * 100)) target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} model = Seq2seq(args.layer, len(source_ids), len(target_ids), args.unit, args.n_embed, args.n_latent, args.type_unit, args.word_dropout, args.denoising_rate) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) optimizer = chainer.optimizers.Adam() optimizer.setup(model) train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize) updater = training.StandardUpdater(train_iter, optimizer, converter=convert, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch')) trainer.extend( extensions.LogReport(trigger=(args.log_interval, 'iteration'))) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'main/rec', 'main/lat', 'main/perp', 'bleu', 'p', 'r', 'f', 'p1', 'r1', 'f1', 'elapsed_time' ]), trigger=(args.log_interval, 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}.npz'), trigger=(5, 'epoch')) if args.validation_source and args.validation_target: test_source = load_data(source_ids, args.validation_source) test_target = load_data(target_ids, args.validation_target) assert len(test_source) == len(test_target) test_data = list(six.moves.zip(test_source, test_target)) test_data = [(s, t) for s, t in test_data if 0 < len(s) and 0 < len(t)] test_source_unknown = calculate_unknown_ratio( [s for s, _ in test_data]) test_target_unknown = calculate_unknown_ratio( [t for _, t in test_data]) print('Validation data: %d' % len(test_data)) print('Validation source unknown ratio: %.2f%%' % (test_source_unknown * 100)) print('Validation target unknown ratio: %.2f%%' % (test_target_unknown * 100)) @chainer.training.make_extension() def translate(trainer): source, target = test_data[numpy.random.choice(len(test_data))] result = model.translate([model.xp.array(source)])[0] #source_sentence = ' '.join([source_words[x] for x in source]) target_sentence = ' '.join([target_words[y] for y in target]) result_sentence = ' '.join([target_words[y] for y in result]) #print('# source : ' + source_sentence) print('# result : ' + result_sentence) print('# expect : ' + target_sentence) trainer.extend(translate, trigger=(args.validation_interval, 'iteration')) # @chainer.training.make_extension() # def generate(trainer): # results = model.generate(5) # for i, result in enumerate(results): # print('# result {}: {}'.format(i+1, ' '.join([source_words[x] for x in result]))) # trainer.extend( # generate, trigger=(args.validation_interval, 'iteration')) # trainer.extend( # CalculateBleu( # model, test_data, 'bleu', device=args.gpu), # trigger=(args.validation_interval, 'iteration')) trainer.extend(CalculateBleuRouge( model, test_data, ['bleu', 'p', 'r', 'f', 'p1', 'r1', 'f1'], device=args.gpu), trigger=(args.validation_interval, 'iteration')) @chainer.training.make_extension() def fit_C(trainer): if model.C < 0.5 and updater.epoch > 5: #if model.C < 0.5: model.C += 0.001 print('epoch: {}, C: {},'.format(updater.epoch, model.C)) trainer.extend(fit_C, trigger=(1000, 'iteration')) #trainer.extend(fit_C, trigger=(1, 'epoch')) if args.resume: serializers.load_npz(args.resume, model) print('start training') trainer.run() print('complete training') with open('result/args.txt', 'w') as f: args_dict = {} for i in dir(args): if '_' in i[0]: continue args_dict[str(i)] = getattr(args, i) json.dump(args_dict, f, ensure_ascii=False, indent=4, sort_keys=True, separators=(',', ': ')) serializers.save_npz('result/model.npz', model)
def on_epoch_done(epoch, n, o, loss, acc, valid_loss, valid_acc, test_loss, test_acc, test_time): error = 100 * (1 - acc) print('epoch {} done'.format(epoch)) print('train loss: {} error: {}'.format(loss, error)) if valid_loss is not None: valid_error = 100 * (1 - valid_acc) print('valid loss: {} error: {}'.format(valid_loss, valid_error)) else: valid_error = None if test_loss is not None: test_error = 100 * (1 - test_acc) print('test loss: {} error: {}'.format(test_loss, test_error)) print('test time: {}s'.format(test_time)) else: test_error = None if valid_loss is not None and valid_error < state['best_valid_error']: save_path = os.path.join('model', '{}.model'.format(model_prefix)) serializers.save_npz(save_path, n) save_path = os.path.join('model', '{}.state'.format(model_prefix)) serializers.save_npz(save_path, o) state['best_valid_error'] = valid_error state['best_test_error'] = test_error elif valid_loss is None: save_path = os.path.join('model', '{}.model'.format(model_prefix)) serializers.save_npz(save_path, n) save_path = os.path.join('model', '{}.state'.format(model_prefix)) serializers.save_npz(save_path, o) state['best_test_error'] = test_error if args.save_epoch > 0 and (epoch + 1) % args.save_epoch == 0: save_path = os.path.join( 'model', '{}_{}.model'.format(model_prefix, epoch + 1)) serializers.save_npz(save_path, n) save_path = os.path.join( 'model', '{}_{}.state'.format(model_prefix, epoch + 1)) serializers.save_npz(save_path, o) clock = time.clock() print('elapsed time: {}'.format(clock - state['clock'])) state['clock'] = clock with open(log_file_path, 'a') as f: f.write('{},{},{},{},{},{},{}\n'.format(epoch, loss, error, valid_loss, valid_error, test_loss, test_error))
def save_model(self, model_dir): serializers.save_npz(model_dir + "model.npz", self.model)
def save_params(file_stem, net, trainer): save_npz(file=file_stem + '.npz', obj=net) save_npz(file=file_stem + '.states', obj=trainer)
def train_dcgan_labeled(gen, dis, epoch0=0): #o_gen = optimizers.Adam(alpha=0.0002, beta1=0.5) #o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5) o_gen = optimizers.Adam(alpha=0.0001, beta1=0.5) o_dis = optimizers.Adam(alpha=0.0001, beta1=0.5) o_gen.setup(gen) o_dis.setup(dis) # o_gen.add_hook(chainer.optimizer.WeightDecay(0.00001)) # o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001)) o_gen.add_hook(chainer.optimizer.WeightDecay(0.000005)) o_dis.add_hook(chainer.optimizer.WeightDecay(0.000005)) stop_flag_dis = False stop_flag_gen = False for epoch in xrange(epoch0, n_epoch): perm = np.random.permutation(n_train) #sum_l_dis = np.float32(0) #sum_l_gen = np.float32(0) sum_l_dis = [] sum_l_gen = [] accum_dis = 0. accum_gen = 0. prev_time = time.time() dis_result = [] # 1 if dis win gen, 0 otherwise for i in xrange(0, n_train, batchsize): # discriminator # 0: from dataset # 1: from noise n_ins = len(perm[i:i + batchsize]) emb_ids = xp.asarray( sum([dataset[j] for j in perm[i:i + batchsize]], [])).astype(np.int32) x2 = F.reshape(Variable(embed(Variable(emb_ids)).data), (n_ins, 1, max_sent, 512)) # reshape de ikeru? soretomo cocat ? # train generator z = Variable( xp.random.uniform(-1, 1, (n_ins, nz), dtype=np.float32)) x = gen(z) # x = fill_eos_after_first_eos. yl = dis(x) L_gen = F.softmax_cross_entropy( yl, Variable(xp.zeros(n_ins, dtype=np.int32))) L_dis = F.softmax_cross_entropy( yl, Variable(xp.ones(n_ins, dtype=np.int32))) #if not stop_flag_gen: L_gen = F.softmax_cross_entropy(yl, Variable(xp.zeros(n_ins, dtype=np.int32))) #if not stop_flag_dis: L_dis = F.softmax_cross_entropy(yl, Variable(xp.ones(n_ins, dtype=np.int32))) dis_result.extend( [1. if t == 1 else 0. for t in xp.argmax(yl.data, axis=1)]) # train discriminator # if not stop_flag_dis: yl2 = dis(x2 + dis.xp.random.normal(0., dis.random_std, x2.data.shape)) L_dis += F.softmax_cross_entropy(yl2, Variable(xp.zeros(n_ins, dtype=np.int32))) # if not stop_flag_gen: o_gen.zero_grads() L_gen.backward() o_gen.update() sum_l_gen.append(L_gen.data.get()) accum_gen += L_gen.data.get() # if not stop_flag_dis: o_dis.zero_grads() L_dis.backward() o_dis.update() sum_l_dis.append(L_dis.data.get()) accum_dis += L_dis.data.get() # print "backward done" if i % result_interval == 0: per = len(dis_result) * 1. / (time.time() - prev_time) prev_time = time.time() print i, "\tdis-train:", not stop_flag_dis, "\tgen-train:", not stop_flag_gen, "noise:", dis.random_std, "\t(%.3lfi/s)" % per, datetime.today().strftime("%Y/%m/%d %H:%M:%S") print i, "\tLoss dis:", accum_dis / 100 / batchsize, "\tgen:", accum_gen / 100 / batchsize WPdis = np.mean(dis_result) print i, "\tWP dis:gen =", WPdis, ":", 1 - WPdis if (epoch >= 1 or i >= 50000): if WPdis >= 0.8: stop_flag_dis = True stop_flag_gen = False if dis.random_std < dis.max_std: dis.random_std *= 2.0 elif WPdis <= 0.2: stop_flag_dis = False stop_flag_gen = True dis.random_std *= 0.5 else: stop_flag_dis = False stop_flag_gen = False dis.random_std *= 0.9 accum_gen = 0. accum_dis = 0. dis_result = [] if i % image_save_interval == 0: z = (xp.random.uniform(-1, 1, (10, nz), dtype=np.float32)) z = Variable(z) x = gen(z, test=True) print "make sentences" for j, sent_seq in enumerate(make_sentences(x)): sent = [] for t in sent_seq: """ if t == EOS_str: sent.append("<EOS>."+str(len(sent))) break """ if t == EOS_str: t = t.replace(EOS_str, "_") sent.append(t) print "\t", j, " ".join(sent) serializers.save_npz("%s/dcgan_model_dis_%d.npz" % (out_model_dir, epoch), dis) serializers.save_npz("%s/dcgan_model_gen_%d.npz" % (out_model_dir, epoch), gen) serializers.save_npz("%s/dcgan_state_dis_%d.npz" % (out_model_dir, epoch), o_dis) serializers.save_npz("%s/dcgan_state_gen_%d.npz" % (out_model_dir, epoch), o_gen) # print 'epoch end', epoch, sum_l_gen/n_train, sum_l_dis/n_train print 'epoch end', epoch, "dis:", sum(sum_l_dis) / len(sum_l_dis) / batchsize, "gen:", sum(sum_l_gen) / len(sum_l_gen) / batchsize dis.max_std *= 0.5
def Train(): # Creat data generator batch_tuples, history = {}, {} for dataset in args.dataset.split('+'): batch_tuples.update({dataset: []}) for image_size in args.scales_tr: iterator = MultiprocessIterator(DataChef.GetExample( datasets[dataset]['train'], True, dataset, image_size), args.minibatch, n_prefetch=2, n_processes=args.nb_processes, shared_mem=20000000, repeat=True, shuffle=True) batch_tuples[dataset].append(iterator) # Keep the log in history if dataset in ['LIP', 'MSCOCO', 'PASCAL_SBD']: history.update({ dataset: { 'loss': [], 'miou': [], 'pixel_accuracy': [], 'mean_class_accuracy': [] } }) elif dataset in ['WIDER', 'BAPD']: history.update( {dataset: { 'loss': [], 'prediction': [], 'groundtruth': [] }}) # Random input image size (change it after every x minibatch) batch_tuple_indx = np.random.choice(range(len(args.scales_tr)), args.max_iter / 10) batch_tuple_indx = list(np.repeat(batch_tuple_indx, 10)) # Train start_time = time.time() for iterk in range(args.checkpoint, len(batch_tuple_indx)): # Get a minibatch while sequentially rotating between datasets for dataset in args.dataset.split('+'): dataBatch = batch_tuples[dataset][batch_tuple_indx[iterk]].next() dataBatch = zip(*dataBatch) # Prepare batch data IMG = np.array_split(np.array(dataBatch[0]), len(Model), axis=0) LBL = np.array_split(np.array(dataBatch[1]), len(Model), axis=0) # Forward for device_id, img, lbl in zip(range(len(Model)), IMG, LBL): Model[device_id](img, lbl, dataset, train=True) # Aggregate reporters from all GPUs reporters = [] for i in range(len(Model)): reporters.append(Model[i].reporter) Model[i].reporter = {} # clear reporter # History for reporter in reporters: for k in reporter[dataset].keys(): history[dataset][k].append(reporter[dataset][k]) # Accumulate grads for i in range(1, len(Model)): Model[0].addgrads(Model[i]) # Update opt.update() # Update params of other models for i in range(1, len(Model)): Model[i].copyparams(Model[0]) # Report if (iterk + 1) % args.report_interval == 0: DataChef.Report(history, args.report_interval * len(args.GPUs), (iterk + 1), time.time() - start_time, split='train') # Saving the model if (iterk + 1) % args.save_interval == 0 or ( iterk + 1) == len(batch_tuple_indx): serializers.save_hdf5( '%s/checkpoints/%s_iter_%d_%s.chainermodel' % (args.project_folder, args.dataset, iterk + 1, args.suffix), Model[0]) serializers.save_npz( '%s/checkpoints/%s_iter_%d_%s.chaineropt' % (args.project_folder, args.dataset, iterk + 1, args.suffix), opt) # Evaluation if (iterk + 1) % args.eval_interval == 0: Evaluation(splits=args.eval_split) # Decrease learning rate (poly in 10 steps) if (iterk + 1) % int(args.max_iter / 10) == 0: decay_rate = ( 1.0 - float(iterk) / args.max_iter)**args.optimizer['lr_decay_power'] # Learning rate of fresh layers opt.lr *= decay_rate # Learning rate of pretrained layers for name, param in opt.target.namedparams(): if name.startswith('/predictor/'): param.update_rule.hyperparam.lr *= decay_rate
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train model = L.Classifier(train_mnist.MLP(args.unit, 10)) if args.gpu >= 0: # Make a speciied GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_count = len(train) test_count = len(test) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) optimizer.update(model, x, t) sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) if train_iter.is_new_epoch: print('epoch: ', train_iter.epoch) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation sum_accuracy = 0 sum_loss = 0 for batch in test_iter: x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) loss = model(x, t) sum_loss += float(loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) test_iter.reset() print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0 # Save the model and the optimizer print('save the model') serializers.save_npz('mlp.model', model) print('save the optimizer') serializers.save_npz('mlp.state', optimizer)
def save(self, fname="data/cnn.model"): serializers.save_npz(fname, self)
def convert_gl2ch(dst_net, dst_params_file_path, dst_params, dst_param_keys, src_params, src_param_keys, ext_src_param_keys, ext_src_param_keys2, src_model): dst_param_keys = [key.replace('/W', '/weight') for key in dst_param_keys] dst_param_keys = [ key.replace('/post_activ/', '/stageN/post_activ/') for key in dst_param_keys ] dst_param_keys = [ key.replace('/final_block/', '/stageN/final_block/') for key in dst_param_keys ] dst_param_keys = [ key.replace('/stem1_unit/', '/stage0/stem1_unit/') for key in dst_param_keys ] dst_param_keys = [ key.replace('/stem2_unit/', '/stage0/stem2_unit/') for key in dst_param_keys ] src_param_keys.sort() src_param_keys.sort(key=lambda var: [ '{:10}'.format(int(x)) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var) ]) dst_param_keys.sort() dst_param_keys.sort(key=lambda var: [ '{:10}'.format(int(x)) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var) ]) dst_param_keys = [key.replace('/weight', '/W') for key in dst_param_keys] dst_param_keys = [ key.replace('/stageN/post_activ/', '/post_activ/') for key in dst_param_keys ] dst_param_keys = [ key.replace('/stageN/final_block/', '/final_block/') for key in dst_param_keys ] dst_param_keys = [ key.replace('/stage0/stem1_unit/', '/stem1_unit/') for key in dst_param_keys ] dst_param_keys = [ key.replace('/stage0/stem2_unit/', '/stem2_unit/') for key in dst_param_keys ] ext2_src_param_keys = [ key for key in src_param_keys if key.endswith(".beta") ] ext2_dst_param_keys = [ key for key in dst_param_keys if key.endswith("/beta") ] ext3_src_param_keys = { ".".join(v.split(".")[:-1]): i for i, v in enumerate(ext2_src_param_keys) } ext3_dst_param_keys = list( map(lambda x: x.split('/')[1:-1], ext2_dst_param_keys)) for i, src_key in enumerate(ext_src_param_keys): src_key1 = src_key.split(".")[-1] src_key2 = ".".join(src_key.split(".")[:-1]) dst_ind = ext3_src_param_keys[src_key2] dst_path = ext3_dst_param_keys[dst_ind] obj = dst_net for j, sub_path in enumerate(dst_path): obj = getattr(obj, sub_path) if src_key1 == 'running_mean': assert (obj.avg_mean.shape == src_params[src_key].shape), \ "src_key={}, dst_path={}, src_shape={}, obj.avg_mean.shape={}".format( src_key, dst_path, src_params[src_key].shape, obj.avg_mean.shape) obj.avg_mean = src_params[src_key]._data[0].asnumpy() elif src_key1 == 'running_var': assert (obj.avg_var.shape == src_params[src_key].shape) obj.avg_var = src_params[src_key]._data[0].asnumpy() if src_model in ["condensenet74_c4_g4", "condensenet74_c8_g8"]: assert (dst_net.output.fc.index.shape == src_params["output.1.index"].shape) dst_net.output.fc.index = src_params["output.1.index"]._data[ 0].asnumpy().astype(np.int32) ext_src_param_keys2.remove("output.1.index") ext2_src_param_keys = [ key for key in src_param_keys if key.endswith(".conv1.conv.weight") ] ext2_dst_param_keys = [ key for key in dst_param_keys if key.endswith("/conv1/conv/W") ] ext3_src_param_keys = { ".".join(v.split(".")[:-2]): i for i, v in enumerate(ext2_src_param_keys) } ext3_dst_param_keys = list( map(lambda x: x.split('/')[1:-2], ext2_dst_param_keys)) for i, src_key in enumerate(ext_src_param_keys2): src_key2 = ".".join(src_key.split(".")[:-1]) dst_ind = ext3_src_param_keys[src_key2] dst_path = ext3_dst_param_keys[dst_ind] obj = dst_net for j, sub_path in enumerate(dst_path): obj = getattr(obj, sub_path) assert (obj.index.shape == src_params[src_key].shape), \ "src_key={}, dst_path={}, src_shape={}, obj.index.shape={}".format( src_key, dst_path, src_params[src_key].shape, obj.index.shape) obj.index = src_params[src_key]._data[0].asnumpy().astype(np.int32) for i, (src_key, dst_key) in enumerate(zip(src_param_keys, dst_param_keys)): assert (dst_params[dst_key].array.shape == src_params[src_key].shape), \ "src_key={}, dst_key={}, src_shape={}, dst_shape={}".format( src_key, dst_key, src_params[src_key].shape, dst_params[dst_key].array.shape) dst_params[dst_key].array = src_params[src_key]._data[0].asnumpy() from chainer.serializers import save_npz save_npz(file=dst_params_file_path, obj=dst_net)
def save(model, optimizer, vocab, save_name, args): serializers.save_npz(save_name+"model", copy.deepcopy(model).to_cpu()) serializers.save_npz(save_name+"optimizer", optimizer) json.dump(vocab, open(save_name+"vocab.json", "w")) print('save', save_name)
def train(epochs, iterations, batchsize, modeldir, extension, time_width, mel_bins, sampling_rate, g_learning_rate, d_learning_rate, beta1, beta2, identity_epoch, adv_type, residual_flag, data_path): # Dataset Definition dataloader = DatasetLoader(data_path) # Model & Optimizer Definition generator = GeneratorWithCIN(adv_type=adv_type) generator.to_gpu() gen_opt = set_optimizer(generator, g_learning_rate, beta1, beta2) discriminator = Discriminator() discriminator.to_gpu() dis_opt = set_optimizer(discriminator, d_learning_rate, beta1, beta2) # Loss Function Definition lossfunc = StarGANVC2LossFunction() for epoch in range(epochs): sum_dis_loss = 0 sum_gen_loss = 0 for batch in range(0, iterations, batchsize): x_sp, x_label, y_sp, y_label = dataloader.train(batchsize) if adv_type == 'sat': y_fake = generator(x_sp, F.concat([y_label, x_label])) elif adv_type == 'orig': y_fake = generator(x_sp, y_label) else: raise AttributeError y_fake.unchain_backward() if adv_type == 'sat': advloss_dis_real, advloss_dis_fake = lossfunc.dis_loss( discriminator, y_fake, x_sp, F.concat([y_label, x_label]), F.concat([x_label, y_label]), residual_flag) elif adv_type == 'orig': advloss_dis_real, advloss_dis_fake = lossfunc.dis_loss( discriminator, y_fake, x_sp, y_label, x_label, residual_flag) else: raise AttributeError dis_loss = advloss_dis_real + advloss_dis_fake discriminator.cleargrads() dis_loss.backward() dis_opt.update() dis_loss.unchain_backward() if adv_type == 'sat': y_fake = generator(x_sp, F.concat([y_label, x_label])) x_fake = generator(y_fake, F.concat([x_label, y_label])) x_identity = generator(x_sp, F.concat([x_label, x_label])) advloss_gen_fake, cycle_loss = lossfunc.gen_loss( discriminator, y_fake, x_fake, x_sp, F.concat([y_label, x_label]), residual_flag) elif adv_type == 'orig': y_fake = generator(x_sp, y_label) x_fake = generator(y_fake, x_label) x_identity = generator(x_sp, x_label) advloss_gen_fake, cycle_loss = lossfunc.gen_loss( discriminator, y_fake, x_fake, x_sp, y_label, residual_flag) else: raise AttributeError if epoch < identity_epoch: identity_loss = lossfunc.identity_loss(x_identity, x_sp) else: identity_loss = call_zeros(advloss_dis_fake) gen_loss = advloss_gen_fake + cycle_loss + identity_loss generator.cleargrads() gen_loss.backward() gen_opt.update() gen_loss.unchain_backward() sum_dis_loss += dis_loss.data sum_gen_loss += gen_loss.data if batch == 0: serializers.save_npz(f"{modeldir}/generator_{epoch}.model", generator) print(f"epoch: {epoch}") print( f"dis loss: {sum_dis_loss / iterations} gen loss: {sum_gen_loss / iterations}" )
end='') test_losses = [] test_accuracies = [] while True: test_batch = test_iter.next() text_test, target_test = concat_examples(test_batch) # Forward the test data prediction_test = model(text_test) # Calculate the loss loss_test = F.softmax_cross_entropy(prediction_test, target_test) test_losses.append(loss_test.data) # Calculate the accuracy accuracy = F.accuracy(prediction_test, target_test) test_accuracies.append(accuracy.data) if test_iter.is_new_epoch: test_iter.epoch = 0 test_iter.current_position = 0 test_iter.is_new_epoch = False test_iter._pushed_position = None break print('val_loss:{:.04f} val_accuracy:{:.04f}'.format( np.mean(test_losses), np.mean(test_accuracies))) serializers.save_npz('./neural_network/my_mnist.model', model)
model.cleargrads() # 学習前に内部の勾配をフラットに loss.backward() opt.update() # テストをスキップ #if (i + 1) % test_interval != 0: continue ''' Test - 1データ毎に入力(バッチサイズ = 1) - パディングなし ''' epoch_accu = 0 for b in I.SerialIterator(test, 1, repeat=False, shuffle=False): # データの整形 enc, dec = zip(*b) # forward 処理 ts = model(enc, dec[:-1]) # accuracy の計算 accu = sum(F.accuracy(t, w) for t, w in zip(ts, dec[1:])) epoch_accu += accu.data / (len(test) * len(ts)) #print(" ".join(id_word[F.argmax(t).data] for t in ts)) ''' 出力''' message = '{:>3} | {:>8.5f} | {:>6.1%}'.format(i + 1, epoch_loss, epoch_accu) print(message) with open(dir + '/log.txt', 'a') as f: f.write(message + '\n') # モデルの保存 S.save_npz(dir + '/model/epoch_' + str(i + 1) + '.npz', model)
def batch_train_loop(bucket_fname, num_epochs, batch_size=10, num_buckets=NUM_BUCKETS, num_training=2, bucket_width=BUCKET_WIDTH, log_mode="a", last_epoch_id=0): # Set up log file for loss log_train_fil = open(log_train_fil_name, mode=log_mode) log_train_csv = csv.writer(log_train_fil, lineterminator="\n") log_dev_fil = open(log_dev_fil_name, mode=log_mode) log_dev_csv = csv.writer(log_dev_fil, lineterminator="\n") # initialize perplexity on dev set # save model when new epoch value is lower than previous pplx = float("inf") bleu_score = 0 sys.stderr.flush() for epoch in range(num_epochs): train_count = 0 with tqdm(total=num_training) as pbar: sys.stderr.flush() loss_per_epoch = 0 out_str = "epoch={0:d}, iter={1:d}, loss={2:.4f}, mean loss={3:.4f}, bucket={4:d}".format( epoch+1, 0, 0, 0,0) pbar.set_description(out_str) for buck_indx in range(num_buckets): bucket_data = pickle.load(open(bucket_data_fname.format(buck_indx+1), "rb")) buck_pad_lim = (buck_indx+1) * bucket_width for i in range(0, len(bucket_data), batch_size): if train_count >= num_training: break next_batch_end = min(batch_size, (num_training-train_count)) # print("current batch") # print(bucket_data[i:i+next_batch_end]) # print("bucket limit", buck_pad_lim) curr_len = len(bucket_data[i:i+next_batch_end]) loss = model.encode_decode_train_batch(bucket_data[i:i+next_batch_end], buck_pad_lim, buck_pad_lim) train_count += curr_len # set up for backprop model.cleargrads() loss.backward() # update parameters optimizer.update() # store loss value for display loss_val = float(loss.data) loss_per_epoch += loss_val it = (epoch * NUM_TRAINING_SENTENCES) + curr_len out_str = "epoch={0:d}, iter={1:d}, loss={2:.4f}, mean loss={3:.4f}, bucket={4:d}".format( epoch+1, it, loss_val, (loss_per_epoch / (i+1)), (buck_indx+1)) pbar.set_description(out_str) pbar.update(curr_len) # log every 10 batches if i % 10 == 0: log_train_csv.writerow([it, loss_val]) if train_count >= num_training: break print("finished training on {0:d} sentences".format(num_training)) print("{0:s}".format("-"*50)) print("computing perplexity") # pplx_new = compute_dev_pplx() pplx_new = compute_pplx(dev_fname["fr"], dev_fname["en"], NUM_MINI_DEV_SENTENCES) if pplx_new > pplx: print("perplexity went up during training, breaking out of loop") break if (epoch+1) % ITERS_TO_SAVE == 0: print("Saving model") serializers.save_npz(model_fil.replace(".model", "_{0:d}.model".format(last_epoch_id+epoch+1)), model) print("Finished saving model") pplx = pplx_new print(log_train_fil_name) print(log_dev_fil_name) print(model_fil.replace(".model", "_{0:d}.model".format(epoch+1))) if (epoch+1) % ITERS_TO_SAVE == 0: bleu_score = compute_bleu(dev_fname["fr"], dev_fname["en"], NUM_MINI_DEV_SENTENCES) # log pplx and bleu score log_dev_csv.writerow([(last_epoch_id+epoch+1), pplx_new, bleu_score]) log_train_fil.flush() log_dev_fil.flush() print("Simple predictions (╯°□°)╯︵ ┻━┻") print("training set predictions") _ = predict(s=0, num=2, plot=False) print("Simple predictions (╯°□°)╯︵ ┻━┻") print("dev set predictions") _ = predict(s=NUM_TRAINING_SENTENCES, num=3, plot=False) # print("{0:s}".format("-"*50)) # compute_bleu(dev_fname["fr"], dev_fname["en"], NUM_MINI_DEV_SENTENCES) # print("{0:s}".format("-"*50)) print("Final saving model") serializers.save_npz(model_fil, model) print("Finished saving model") # close log file log_train_fil.close() log_dev_fil.close() print(log_train_fil_name) print(log_dev_fil_name) print(model_fil)
x -= 120 # subtract mean xc = Variable(x.copy(), volatile=True) x = Variable(x) y = model(x) feature = vgg(xc) feature_hat = vgg(y) L_feat = lambda_f * F.mean_squared_error(Variable(feature[2].data), feature_hat[2]) # compute for only the output of layer conv3_3 L_style = Variable(xp.zeros((), dtype=np.float32)) for f, f_hat, g_s in zip(feature, feature_hat, gram_s): L_style += lambda_s * F.mean_squared_error(gram_matrix(f_hat), Variable(g_s.data)) L_tv = lambda_tv * total_variation_regularization(y) L = L_feat + L_style + L_tv print '(epoch {}) batch {}/{}... training loss is...{}'.format(epoch, i, n_iter, L.data) L.backward() O.update() if args.checkpoint > 0 and i % args.checkpoint == 0: serializers.save_npz('models/style_{}_{}.model'.format(epoch, i), model) print 'save "style.model"' serializers.save_npz('models/style_{}.model'.format(epoch), model) serializers.save_npz('models/style.model'.format(epoch), model)
exec(txt) # load convolution weight(Convolution2D.Wは、outch * in_ch * フィルタサイズ。これを(out_ch, in_ch, 3, 3)にreshapeする) txt = "yolov2.conv%d.W.data = dat[%d:%d].reshape(%d, %d, %d, %d)" % ( i + 1, offset, offset + (out_ch * in_ch * ksize * ksize), out_ch, in_ch, ksize, ksize) offset += (out_ch * in_ch * ksize * ksize) exec(txt) print(i + 1, offset) # load last convolution weight(BiasとConvolution2Dのみロードする) in_ch = 1024 out_ch = last_out ksize = 1 txt = "yolov2.bias%d.b.data = dat[%d:%d]" % (i + 2, offset, offset + out_ch) offset += out_ch exec(txt) txt = "yolov2.conv%d.W.data = dat[%d:%d].reshape(%d, %d, %d, %d)" % ( i + 2, offset, offset + (out_ch * in_ch * ksize * ksize), out_ch, in_ch, ksize, ksize) offset += out_ch * in_ch * ksize * ksize exec(txt) print(i + 2, offset) print("save weights file to yolov2_darknet_hdf5.model") serializers.save_hdf5("yolov2_darknet_hdf5.model", yolov2) print("save weights file to yolov2_darknet.model") serializers.save_npz("yolov2_darknet.model", yolov2)
return self.l2(h3) # -- モデルとoptimizerの設定 -- model = MS() optimizer = optimizers.Adam() optimizer.setup(model) # -- 学習 -- iterator = iterators.SerialIterator(train_data, 100) updater = training.StandardUpdater(iterator, optimizer) trainer = training.Trainer(updater, (20, 'epoch')) trainer.extend(extensions.ProgressBar()) trainer.run() # -- モデルの保存 -- serializers.save_npz("ms_classification.npz", model) # -- テスト-- correct = 0 for i in range(len(test_data)): x = Variable(np.array([test_data[i][0]], dtype=np.float32)) t = test_data[i][1] y = model.predict(x) maxIndex = np.argmax(y.data) if (maxIndex == t): correct += 1 # -- 正解率 -- print("Correct:", correct, "Total:", len(test_data), "Acuuracy:", correct / len(test_data) * 100, "%")
def save_model(self, outputfile): serializers.save_npz(outputfile, self.model)
Nepoch = 400 Probloss_val = xp.asarray(np.zeros(Nepoch)) Probloss_train = xp.asarray(np.zeros(Nepoch)) start_at = time.time() print "Starting training..." with cupy.cuda.Device(gpu_id): epoch = 0 period_start_at = time.time() bi = 0 curr_epoch = 0 MER_val = np.ones(Nepoch) while True: #monitor objective value if bi % size_epoch == 0: if curr_epoch % monitor_frequency == 0 or curr_epoch == (Nepoch-1): serializers.save_npz(savedir + '/model_%d.model' % curr_epoch, model) # save model every epoch MER_val[curr_epoch] = objective_function(model, x_val, y_val, n_per_sample_val, z_monitor_all_val) now = time.time() tput = float(size_epoch*monitor_frequency*batchsize) / (now-period_start_at) tpassed = now-start_at print " %.1fs Epoch %d, batch %d, Probloss on Validation Set %.4f, %.2f S/s" % \ (tpassed, curr_epoch, bi, MER_val[curr_epoch],tput) # Reset period_start_at = time.time() curr_epoch += 1 if curr_epoch >= Nepoch: print("we're stopping") break bi += 1 # Batch index indexes = np.sort(np.random.choice(N, batchsize, replace=False))
def main(use_gpu=-1): start_time = time.clock() # select processing unit if use_gpu >= 0: import cupy as cp xp = cp chainer.cuda.get_device(use_gpu).use() else: xp = np # paths set training_dataset_path = './samples/sample_dataset/mnist/mnist_training.csv' validation_dataset_path = './samples/sample_dataset/mnist/mnist_test.csv' image_path = './samples/sample_dataset/mnist' # setup network model = BinaryConnectMnistLeNet() if use_gpu >= 0: model.to_gpu() optimizer = chainer.optimizers.Adam(alpha=0.001, beta1=0.9, beta2=0.999, eps=10**(-8), weight_decay_rate=0) optimizer.setup(model) # setup dataset(training) train_image_list, train_image_label_list = load_dataset( training_dataset_path, image_path) # setup dataset(validation) validation_image_list, validation_image_label_list = load_dataset( validation_dataset_path, image_path) epoch = 100 batchsize = 64 accuracy_train_list, accuracy_val_list = [], [] # learning for ep in range(0, epoch): print('epoch', ep + 1) # before learning, we have to shuffle training data because we want to make network learn different order for # each epoch. zipped_train_list = list(zip(train_image_list, train_image_label_list)) random.shuffle(zipped_train_list) learn_image_list, learn_label_list = zip(*zipped_train_list) learn_image_list = xp.array(list(learn_image_list)) learn_label_list = xp.array(list(learn_label_list)) batch_times = 0 accuracy_train = 0 for b in range(0, len(learn_image_list), batchsize): model.cleargrads() x = chainer.Variable( xp.asarray(learn_image_list[b:b + batchsize]).astype( xp.float32)) y = chainer.Variable( xp.asarray(learn_label_list[b:b + batchsize]).astype(xp.int32)) h = model(x) # CategorialCrossEntropy doesn't exist in chainer, so, instead of it, I use softmax_cross_entropy. loss = F.softmax_cross_entropy(h, y) accuracy_train += F.accuracy(h, y).data batch_times += 1 loss.backward() optimizer.update() accuracy_train_list.append(1 - (accuracy_train / batch_times)) with chainer.using_config('train', False), chainer.no_backprop_mode(): x_valid = chainer.Variable( xp.asarray(validation_image_list).astype(xp.float32)) y_valid_acc = chainer.Variable( xp.asarray(validation_image_label_list).astype(xp.int32)) h_valid = model(x_valid) accuracy_val = F.accuracy(h_valid, y_valid_acc) accuracy_val_list.append(1 - accuracy_val.data) serializers.save_npz('./models/binary_connect_mnist_LeNet', model) print("Time to finish learning:" + str(time.clock() - start_time)) # draw accuracy graph axis_x = np.arange(0, epoch, 1) y0 = accuracy_train_list y1 = accuracy_val_list plt.plot(axis_x, y0, label='train') plt.plot(axis_x, y1, label='validation') plt.title('Learning Curve', fontsize=20) plt.xlabel('epoch', fontsize=16) plt.ylabel('Error rate') plt.tick_params(labelsize=14) plt.grid(True) plt.legend(loc='upper right') plt.show()
loss = F.mean_squared_error(y, t) #平均二乗誤差 loss.backward() # 誤差逆伝播 optimizer.update() # 最適化 # 途中結果を表示 if epoch % 1000 == 0: #誤差と正解率を計算 loss_val = loss.data print('epoch:', epoch) print('x:\n', x.data) print('t:\n', t.data) print('y:\n', y.data) print('train mean loss={}'.format(loss_val)) # 訓練誤差, 正解率 print(' - - - - - - - - - ') # n_epoch以上になると終了 if epoch >= n_epoch: break epoch += 1 #modelとoptimizerを保存 print('save the model') serializers.save_npz('xor_mlp.model', model) print('save the optimizer') serializers.save_npz('xor_mlp.state', optimizer)
sum_test_loss = 0 sum_test_accuracy1 = 0 sum_test_accuracy2 = 0 for i in range(0, len(test_data) - args.testbatchsize, args.testbatchsize): x1, x2, t1, t2, z = mini_batch(test_data[i:i + args.testbatchsize]) with chainer.no_backprop_mode(): with chainer.using_config('train', False): y1, y2 = model(x1, x2) itr_test += 1 loss1 = F.mean(F.softmax_cross_entropy(y1, t1, reduce='no') * z) loss2 = F.sigmoid_cross_entropy(y2, t2) loss = loss1 + loss2 sum_test_loss1 += loss1.data sum_test_loss2 += loss2.data sum_test_loss += loss.data sum_test_accuracy1 += F.accuracy(y1, t1).data sum_test_accuracy2 += F.binary_accuracy(y2, t2).data logging.info( 'epoch = {}, iteration = {}, train loss avr = {}, test_loss = {}, {}, {}, test accuracy = {}, {}' .format(optimizer.epoch + 1, optimizer.t, sum_loss_epoch / itr_epoch, sum_test_loss1 / itr_test, sum_test_loss2 / itr_test, sum_test_loss / itr_test, sum_test_accuracy1 / itr_test, sum_test_accuracy2 / itr_test)) optimizer.new_epoch() print('save the model') serializers.save_npz(args.model, model) print('save the optimizer') serializers.save_npz(args.state, optimizer)
i + 1, perp, throuput)) cur_at = now cur_log_perp.fill(0) if (i + 1) % jump == 0: epoch += 1 print('evaluate') now = time.time() perp = evaluate(valid_data) print('epoch {} validation perplexity: {:.2f}'.format(epoch, perp)) cur_at += time.time() - now # skip time of evaluation # Save the model and the optimizer print('save the model') strtime = datetime.now().strftime('%Y%m%d%H%M%S') serializers.save_npz('jsai2016ptb_dialogue_%s.model' % (strtime), model) serializers.save_npz('jsai2016ptb_dialogueQ_%s.model' % (strtime), modelQ) serializers.save_npz('jsai2016ptb_dialogueA_%s.model' % (strtime), modelA) print('save the optimizer') serializers.save_npz('jsai2016ptb_dialogue_%s.state' % (strtime), optimizer) if epoch >= 6: optimizer.lr /= 1.2 optimizerQ.lr /= 1.2 optimizerA.lr /= 1.2 print('learning rate =', optimizer.lr) sys.stdout.flush()
def pretraining(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--batchsize', type=int, default=256) args = parser.parse_args() xp = np gpu_id = args.gpu seed = args.seed train, _ = mnist.get_mnist() train, _ = convert.concat_examples(train, device=gpu_id) batchsize = args.batchsize model = StackedDenoisingAutoEncoder(input_dim=train.shape[1]) if chainer.cuda.available and args.gpu >= 0: xp = cp model.to_gpu(gpu_id) xp.random.seed(seed) # Layer-Wise Pretrain print("Layer-Wise Pretrain") for i, dae in enumerate(model.children()): print("Layer {}".format(i + 1)) train_tuple = tuple_dataset.TupleDataset(train, train) train_iter = iterators.SerialIterator(train_tuple, batchsize) clf = L.Classifier(dae, lossfun=mean_squared_error) clf.compute_accuracy = False if chainer.cuda.available and args.gpu >= 0: clf.to_gpu(gpu_id) optimizer = optimizers.MomentumSGD(lr=0.1) optimizer.setup(clf) updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) trainer = training.Trainer(updater, (50000, "iteration"), out="mnist_result") trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport(['iteration', 'main/loss', 'elapsed_time'])) trainer.extend(ChangeLearningRate(), trigger=(20000, "iteration")) trainer.run() train = dae.encode(train).data # Finetuning print("fine tuning") with chainer.using_config("train", False): train, _ = mnist.get_mnist() train, _ = convert.concat_examples(train, device=gpu_id) train_tuple = tuple_dataset.TupleDataset(train, train) train_iter = iterators.SerialIterator(train_tuple, batchsize) model = L.Classifier(model, lossfun=mean_squared_error) model.compute_accuracy = False if chainer.cuda.available and args.gpu >= 0: model.to_gpu(gpu_id) optimizer = optimizers.MomentumSGD(lr=0.1) optimizer.setup(model) updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) trainer = training.Trainer(updater, (100000, "iteration"), out="mnist_result") trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport(['iteration', 'main/loss', 'elapsed_time'])) trainer.extend(ChangeLearningRate(), trigger=(20000, "iteration")) trainer.run() outfile = "StackedDenoisingAutoEncoder-seed{}.model".format(seed) serializers.save_npz(outfile, model.predictor)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=20, help='Number of examples in each mini-batch') parser.add_argument('--bproplen', '-l', type=int, default=35, help='Number of words in each mini-batch ' '(= length of truncated BPTT)') parser.add_argument('--epoch', '-e', type=int, default=39, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.set_defaults(test=False) parser.add_argument('--unit', '-u', type=int, default=650, help='Number of LSTM units in each layer') args = parser.parse_args() def evaluate(model, iter): # Evaluation routine to be used for validation and test. model.predictor.train = False evaluator = model.copy() # to use different state evaluator.predictor.reset_state() # initialize state evaluator.predictor.train = False # dropout does nothing sum_perp = 0 data_count = 0 for batch in copy.copy(iter): x, t = convert.concat_examples(batch, args.gpu) loss = evaluator(x, t) sum_perp += loss.data data_count += 1 model.predictor.train = True return np.exp(float(sum_perp) / data_count) # Load the Penn Tree Bank long word sequence dataset train, val, test = chainer.datasets.get_ptb_words() n_vocab = max(train) + 1 # train is just an array of integers print('#vocab = {}'.format(n_vocab)) if args.test: train = train[:100] val = val[:100] test = test[:100] # Create the dataset iterators train_iter = train_ptb.ParallelSequentialIterator(train, args.batchsize) val_iter = train_ptb.ParallelSequentialIterator(val, 1, repeat=False) test_iter = train_ptb.ParallelSequentialIterator(test, 1, repeat=False) # Prepare an RNNLM model rnn = train_ptb.RNNForLM(n_vocab, args.unit) model = L.Classifier(rnn) model.compute_accuracy = False # we only want the perplexity if args.gpu >= 0: # Make the specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Set up an optimizer optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) sum_perp = 0 count = 0 iteration = 0 while train_iter.epoch < args.epoch: loss = 0 iteration += 1 # Progress the dataset iterator for bprop_len words at each iteration. for i in range(args.bproplen): # Get the next batch (a list of tuples of two word IDs) batch = train_iter.__next__() # Concatenate the word IDs to matrices and send them to the device # self.converter does this job # (it is chainer.dataset.concat_examples by default) x, t = convert.concat_examples(batch, args.gpu) # Compute the loss at this time step and accumulate it loss += optimizer.target(chainer.Variable(x), chainer.Variable(t)) count += 1 sum_perp += loss.data optimizer.target.cleargrads() # Clear the parameter gradients loss.backward() # Backprop loss.unchain_backward() # Truncate the graph optimizer.update() # Update the parameters if iteration % 20 == 0: print('iteration: {}'.format(iteration)) print('training perplexity: {}'.format( np.exp(float(sum_perp) / count))) sum_perp = 0 count = 0 if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('validation perplexity: {}'.format(evaluate(model, val_iter))) # Evaluate on test dataset print('test') test_perp = evaluate(model, test_iter) print('test perplexity: {}'.format(test_perp)) # Save the model and the optimizer print('save the model') serializers.save_npz('rnnlm.model', model) print('save the optimizer') serializers.save_npz('rnnlm.state', optimizer)
def main(): parser = argparse.ArgumentParser(description='Chainer example: VAE') parser.add_argument('--initmodel', '-m', default='', help='Initialize the model from given file') parser.add_argument('--resume', '-r', default='', help='Resume the optimization from snapshot') parser.add_argument('--gpu', '-g', default=0, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--epoch', '-e', default=1000, type=int, help='number of epochs to learn') parser.add_argument('--dimz', '-z', default=20, type=int, help='dimention of encoded vector') parser.add_argument('--batchsize', '-b', type=int, default=50, help='learning minibatch size') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# dim z: {}'.format(args.dimz)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') if 0 <= args.gpu: cuda.get_device_from_id(args.gpu).use() # net内VAEオブジェクトの生成 textVae = net.VAE(600, args.dimz, 300, 100) chainer.serializers.load_npz("birds_txt.npz", textVae) if 0 <= args.gpu: textVae.to_gpu() # GPUを使うための処理 #model = netN.VAE(textVae, n_latent=10, ch1=5000, ch2=10000, ch3=16384) model = net_img.VAE(1, 20, 64,textVae) #chainer.serializers.load_npz("mymodel_img.npz", model) if 0 <= args.gpu: model.to_gpu() # GPUを使うための処理 # optimizer(パラメータ更新用) optimizer = chainer.optimizers.Adam() optimizer.setup(model) # モデルの読み込み npzはnumpy用 """ if args.initmodel: chainer.serializers.load_npz(args.initmodel, model) """ traint = np.load('birds_txt.npy') traini = np.load('birds_img.npy') #traini = traini.reshape((len(traini), 1, 128, 128)) train = tuple_dataset.TupleDataset(traint, traini) train, test = train_test_split(train, test_size=0.2, random_state=50) #------------------イテレーターによるデータセットの設定----------------------------------- train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) #--------------------------------------------------------------- # Set up an updater. StandardUpdater can explicitly specify a loss function # used in the training with 'loss_func' option updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu, loss_func=model.get_loss_func()) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu, eval_func=model.get_loss_func(k=10))) # trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time'])) # trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # トレーナーの実行 trainer.run() # Visualize the results def save_images(x, filename): import matplotlib.pyplot as plt fig, ax = plt.subplots(3, 3, figsize=(9, 9), dpi=100) for ai, xi in zip(ax.flatten(), x): ai.imshow(xi.reshape(128, 128)) fig.savefig(filename) serializers.save_npz("birds_all.npz", model)