def main(): train, test = chainer.datasets.get_mnist() def forward(x, t, model): y, l = model(x) if model.c: y, l = Lmt(t)(y, l) t = np.eye(10)[t].astype(np.float32) loss = mse(y, t) return loss model = MLP(c=0.05) optimizer = Opt() optimizer.setup(model) for epoch in range(5): for batch in SerialIterator(train, 60, repeat=False): x, t = format(batch) optimizer.update(forward, x, t, model) tx, tt = format(test) print("epoch {}: accuracy: {:.3f}".format(epoch + 1, model.accuracy(tx, tt))) fgsm = FGSM(model) for eta in [0.01, 0.02, 0.05, 0.1]: cnt = 0 fail = 0 for i in np.random.randint(0, 10000, 100): res = fgsm.attack(test[i][0], test[i][1], eta=eta) if res != -1: cnt += 1 if not res: fail += 1 print("c: {:.3f}, eta: {:.3f}, attacked: {:.3f}".format(model.c, eta, fail / cnt))
class Train: def __init__(self): with open("data.pickle", "rb") as f: self.data = pickle.load(f) self.model = Model() self.model.to_gpu() self.optimizer = Adam() self.optimizer.setup(self.model) self.executor = ThreadPoolExecutor(8) self.hoge = self.data.next(2, 2) def load(self): d = self.hoge self.hoge = self.executor.submit(self.data.next, 2, 2) return d def training(self): for i in range(1000000000000000): a = self.batch() if i % 100 == 0: print(f"{i} loss:{a}") def batch(self): a, b = self.load() self.model.cleargrads() y = tuple(self.executor.map(self.model, a + b)) loss = F.contrastive(y[0], y[1], [1]) +\ F.contrastive(y[2], y[3], [1]) +\ F.contrastive(y[0], y[2], [0]) +\ F.contrastive(y[1], y[3], [0]) loss.backward() self.optimizer.update() return loss.data.get()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", default=None) parser.add_argument("--gpu", type=int, default=0) parser.add_argument("--batch_size", type=int, default=4) parser.add_argument("--data_dir", type=str, default="./datasets") parser.add_argument("--data_list", type=str, default="train.txt") parser.add_argument("--n_class", type=int, default=5) parser.add_argument("--n_steps", type=int, default=100) parser.add_argument("--snapshot_dir", type=str, default="./snapshots") parser.add_argument("--save_steps", type=int, default=50) args = parser.parse_args() print(args) if not os.path.exists(args.snapshot_dir): os.makedirs(args.snapshot_dir) model = RefineResNet(n_class=args.n_class) if args.model is not None: serializers.load_npz(args.model, model) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() xp = cuda.cupy else: xp = np optimizer = Adam() #optimizer = MomentumSGD() optimizer.setup(model) optimizer.add_hook(WeightDecay(1e-5), "hook_wd") train_dataset = ImageDataset(args.data_dir, args.data_list, crop_size=(320, 320)) train_iterator = MultiprocessIterator(train_dataset, batch_size=args.batch_size, repeat=True, shuffle=True) step = 0 for zipped_batch in train_iterator: step += 1 x = Variable(xp.array([zipped[0] for zipped in zipped_batch])) y = Variable( xp.array([zipped[1] for zipped in zipped_batch], dtype=xp.int32)) pred = xp.array(model(x).data, dtype=xp.float32) loss = F.softmax_cross_entropy(pred, y) optimizer.update(F.softmax_cross_entropy, pred, y) print("Step: {}, Loss: {}".format(step, loss.data)) if step % args.save_steps == 0: serializers.save_npz( os.path.join(args.snapshot_dir, "model_{}.npz".format(step)), model) if step >= args.n_steps: break
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of examples in each mini-batch') parser.add_argument('--bproplen', '-l', type=int, default=200, help='Number of words in each mini-batch ' '(= length of truncated BPTT)') parser.add_argument('--epoch', '-e', type=int, default=40, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--file', default="enwik8", help='path to text file for training') parser.add_argument('--unit', '-u', type=int, default=2800, help='Number of LSTM units') parser.add_argument('--embd', type=int, default=400, help='Number of embedding units') parser.add_argument('--hdrop', type=float, default=0.2, help='hidden state dropout (variational)') parser.add_argument('--edrop', type=float, default=0.5, help='embedding dropout') args = parser.parse_args() nembd = args.embd #number of training iterations per model save, log write, and validation set evaluation interval =100 pdrop = args.hdrop pdrope = args.edrop #initial learning rate alpha0 = .001 #inverse of linear decay rate towards 0 dec_it = 12*9000 #minimum learning rate alpha_min = .00007 #first ntrain words of dataset will be used for training ntrain = 90000000 seqlen = args.bproplen nbatch = args.batchsize filename= args.file text,mapping = get_char(filename) sequence = np.array(text).astype(np.int32) itrain =sequence[0:ntrain] ttrain = sequence[1:ntrain+1] fullseql=int(ntrain/nbatch) itrain = itrain.reshape(nbatch,fullseql) ttrain = ttrain.reshape(nbatch,fullseql) #doesn't use full validations set nval = 500000 ival = sequence[ntrain:ntrain+nval] tval = sequence[ntrain+1:ntrain+nval+1] ival = ival.reshape(ival.shape[0]//1000,1000) tval = tval.reshape(tval.shape[0]//1000,1000) #test = sequence[ntrain+nval:ntrain+nval+ntest] nvocab = max(sequence) + 1 # train is just an array of integers print('#vocab =', nvocab) # Prepare an RNNLM model rnn = RNNForLM(nvocab, args.unit,args.embd) model = L.Classifier(rnn) model.compute_accuracy = False # we only want the perplexity if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # make the GPU current model.to_gpu() # Set up an optimizer optimizer = Adam(alpha=alpha0) optimizer.setup(model) resultdir = args.out print('starting') nepoch = args.epoch start = 0 loss_sum = 0; if not os.path.isdir(resultdir): os.mkdir(resultdir) vloss = test(rnn,ival,tval) vloss= (1.4427*vloss) f = open(os.path.join(resultdir,'log'), 'w') outstring = "Initial Validation loss (bits/word): " + str(vloss) + '\n' f.write(outstring) f.close() i=0 epoch_num = 0 it_num = 0 while True: # Get the result of the forward pass. fin = start+seqlen if fin>(itrain.shape[1]): start = 0 fin = start+seqlen epoch_num = epoch_num+1 if epoch_num== nepoch: break inputs = itrain[:,start:fin] targets = ttrain[:,start:fin] start = fin inputs = Variable(inputs) targets = Variable(targets) targets.to_gpu() inputs.to_gpu() it_num+=1 loss = 0 rnn.applyWN() #make hidden dropout mask mask = cp.zeros((inputs.shape[0],args.unit),dtype = cp.float32) ind = cp.nonzero(cp.random.rand(inputs.shape[0],args.unit)>pdrop) mask[ind] = 1/(1-pdrop) #make embedding dropout mask mask2 = cp.zeros((inputs.shape[0],nembd),dtype = cp.float32) ind = cp.nonzero(cp.random.rand(inputs.shape[0],nembd)>pdrope) mask2[ind] = 1/(1-pdrope) for j in range(seqlen): output = rnn(inputs[:,j],mask,mask2) loss = loss+ F.softmax_cross_entropy(output,targets[:,j]) loss = loss/(seqlen) # Zero all gradients before updating them. rnn.zerograds() loss_sum += loss.data # Calculate and update all gradients. loss.backward() s = 0; # Use the optmizer to move all parameters of the network # to values which will reduce the loss. optimizer.update() #decays learning rate linearly optimizer.alpha = alpha0*(dec_it-it_num)/float(dec_it) #prevents learning rate from going below minumum if optimizer.alpha<alpha_min: optimizer.alpha = alpha_min loss.unchain_backward() if ((i+1)%interval) ==0: rnn.reset_state() vloss = test(rnn,ival,tval) #converts to binary entropy vloss= (1.4427*vloss) loss_sum = (1.4427*loss_sum/interval) serializers.save_npz(os.path.join(resultdir,'model'),rnn) outstring = "Training iteration: " + str(i+1) + " Training loss (bits/char): " + str(loss_sum) + " Validation loss (bits/word): " + str(vloss) + '\n' f = open(os.path.join(resultdir,'log'), 'a') f.write(outstring) f.close() print("Training iteration: " + str(i+1)) print('training loss: ' + str(loss_sum)) print('validation loss: ' + str(vloss)) loss_sum=0 i+=1
for epoch in range(0, epochs): print('EPOCH: {}/{}'.format(epoch+1, epochs)) perm = np.random.permutation(num_data) # ランダムサンプリング # ミニバッチ単位で回す for idx in range(0, 1000, batch_size): # 入力データと出力データをスライス batch_x = Variable(train_x[perm[idx: idx + batch_size]]) # if idx + batch_size < num_data else num_data]])] batch_y = Variable(train_y[perm[idx: idx + batch_size]]) # if idx + batch_size < num_data else num_data]])] # modelの最適化 model.cleargrads() loss, accuracy = model(batch_x, batch_y) # ニューラルネットへの入力 loss.backward() optimizer.update() now = time.time() print('{}/{}, train_loss = {}, accuracy = {}, time = {:.2f}'.format( idx, num_data, loss.data, accuracy.data, now-cur_at)) average_loss.append(loss.data) accuracy_list.append(accuracy.data) cur_at = now # 1エポック終わる毎に適当な入力を与えて確認 for tmp in perm[1: 10]: # print('入力-> {}'.format(''.join(x[tmp]))) # print('出力-> ', end='') test_x = Variable(train_x[tmp]) for index in model.beam_search_predict(test_x):
class QNeuralNetwork(QModel): def __init__(self, model, target, device_id=-1, learning_rate=0.00025, momentum=.9, minibatch_size=32, update_interval=10000): assert isinstance(model, ChainerModel), \ 'model should inherit from ChainerModel' super(QNeuralNetwork, self).__init__(model.input_shape, model.output_shape) self._gpu_device = None self._loss_val = 0 # Target model update method self._steps = 0 self._target_update_interval = update_interval # Setup model and target network self._minibatch_size = minibatch_size self._model = model self._target = target self._target.copyparams(self._model) # If GPU move to GPU memory if device_id >= 0: with cuda.get_device(device_id) as device: self._gpu_device = device self._model.to_gpu(device) self._target.to_gpu(device) # Setup optimizer self._optimizer = Adam(learning_rate, momentum, 0.999) self._optimizer.setup(self._model) def evaluate(self, environment, model=QModel.ACTION_VALUE_NETWORK): if check_rank(environment.shape, get_rank(self._input_shape)): environment = environment.reshape((1, ) + environment.shape) # Move data if necessary if self._gpu_device is not None: environment = cuda.to_gpu(environment, self._gpu_device) if model == QModel.ACTION_VALUE_NETWORK: output = self._model(environment) else: output = self._target(environment) return cuda.to_cpu(output.data) def train(self, x, y, actions=None): actions = actions.astype(np.int32) batch_size = len(actions) if self._gpu_device: x = cuda.to_gpu(x, self._gpu_device) y = cuda.to_gpu(y, self._gpu_device) actions = cuda.to_gpu(actions, self._gpu_device) q = self._model(x) q_subset = F.reshape(F.select_item(q, actions), (batch_size, 1)) y = y.reshape(batch_size, 1) loss = F.sum(F.huber_loss(q_subset, y, 1.0)) self._model.cleargrads() loss.backward() self._optimizer.update() self._loss_val = np.asscalar(cuda.to_cpu(loss.data)) # Keeps track of the number of train() calls self._steps += 1 if self._steps % self._target_update_interval == 0: # copy weights self._target.copyparams(self._model) @property def loss_val(self): return self._loss_val # / self._minibatch_size def save(self, output_file): save_npz(output_file, self._model) def load(self, input_file): load_npz(input_file, self._model) # Copy parameter from model to target self._target.copyparams(self._model)
# Train Generator loss_gen = F.softmax_cross_entropy( y1, Variable(xp.zeros(batch_size, dtype=np.int32))) loss_dis = F.softmax_cross_entropy( y1, Variable(xp.ones(batch_size, dtype=np.int32))) # Train Discriminator batch_x = Variable(train_x[perm[idx:idx + batch_size]]) y2 = discriminator(batch_x) loss_dis += F.softmax_cross_entropy( y2, Variable(xp.zeros(batch_size, dtype=np.int32))) # Generatorの最適化 generator.cleargrads() loss_gen.backward() opt_gen.update() # Discriminatorの最適化 discriminator.cleargrads() loss_dis.backward() opt_dis.update() now = time.time() print('{}/{}, Gen_loss = {}, Dis_loss = {}, time = {:.2f}'.format( idx, n_train_data, loss_gen.data, loss_dis.data, now - cur_at)) gen_loss.append(loss_gen.data) dis_loss.append(loss_dis.data) cur_at = now pickle.dump(generator, open('generator_snapshot.model', 'wb')) pickle.dump(discriminator, open('discriminator_snapshot.model', 'wb'))
def train(source_bpe, target_bpe, source_glove, target_glove, chunk_length, batch_size, warmup_steps, save_decimation, num_steps, gpu_id, out, log_level): if not os.path.exists(out): os.makedirs(out) ll = getattr(logging, log_level) stream_handler = logging.StreamHandler(sys.stdout) stream_handler.setLevel(ll) stream_handler.setFormatter(logging.Formatter('%(message)s')) file_handler = logging.FileHandler(filename=os.path.join( out, 'training.log'), mode='a') file_handler.setLevel(ll) file_handler.setFormatter(logging.Formatter('%(message)s')) logger.addHandler(stream_handler) logger.addHandler(file_handler) logger.setLevel(ll) gpu_id = gpu_id if gpu_id is not None else -1 device_name = '@intel64' if gpu_id >= 0: device_name = f'@cupy:{gpu_id}' with chainer.using_device(device_name): source_vocab = make_vocab(source_glove) target_vocab = make_vocab(target_glove) output_model_dim = target_vocab.embedding_size dataset = make_dataset(source_bpe, target_bpe, source_vocab, target_vocab, chunk_length) iterator = MultithreadIterator(dataset, batch_size) state = TrainingState() model = Transformer(source_vocab, target_vocab) model.to_gpu(gpu_id) optimizer = Adam(beta1=0.99, beta2=0.98, eps=1e-9).setup(model) load_training(out, model, optimizer, state) try: for n, batch in enumerate(iterator): if n >= num_steps: break if (n + 1) % save_decimation == 0: save_training(out, model, optimizer, state) model.cleargrads() gc.collect() source, target = stack_nested(batch) source.token_ids.to_gpu(gpu_id) source.masks.to_gpu(gpu_id) target.token_ids.to_gpu(gpu_id) target.masks.to_gpu(gpu_id) output_probs = model.train_forward(source.token_ids, target.token_ids, input_masks=source.masks, output_masks=target.masks) unnormalized_loss = F.softmax_cross_entropy( F.reshape(output_probs, (output_probs.shape[0] * output_probs.shape[1], output_probs.shape[2])), F.reshape(target.token_ids, (target.token_ids.shape[0] * target.token_ids.shape[1], )), reduce='no') loss_mask = xp.reshape( xp.logical_not(target.masks.array).astype(xp.float32), (target.masks.shape[0] * target.masks.shape[1], )) loss = F.sum(unnormalized_loss * loss_mask) / F.sum(loss_mask) loss.backward() learning_rate = (output_model_dim**-0.5) * min( (state.step**-0.5), state.step * (warmup_steps**-1.5)) optimizer.alpha = learning_rate optimizer.update() logger.info( f'time = {int(time.time())} | step = {state.step} | loss = {float(loss.array)} | lr = {learning_rate}' ) state.step += 1 finally: save_training(out, model, optimizer, state)
def main(): parser = argparse.ArgumentParser(description="LapSRN") parser.add_argument("--dataset", type=str) parser.add_argument("--outdirname", type=str, default="./models") parser.add_argument("--scale", type=int, default=4) parser.add_argument("--batchsize", type=int, default=64) parser.add_argument("--epoch", type=int, default=100) parser.add_argument("--steps_per_epoch", type=int, default=128) parser.add_argument("--model", default=None) parser.add_argument("--gpu", type=int, default=-1) args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# dataset: {}'.format(args.dataset)) print('# outdirname: {}'.format(args.outdirname)) print('# scale: {}'.format(args.scale)) print('# batchsize: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('# steps_per_epoch: {}'.format(args.steps_per_epoch)) print('# model: {}'.format(args.model)) print('') OUTPUT_DIRECTORY = args.outdirname if not os.path.exists(OUTPUT_DIRECTORY): os.makedirs(OUTPUT_DIRECTORY) model = LapSRN() if args.model is not None: print("Loading model...") serializers.load_npz(args.model, model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() xp = cuda.cupy else: xp = np optimizer = Adam() optimizer.setup(model) print("loading dataset...") paths = glob.glob(args.dataset) train_dataset = ImageDataset(scale=args.scale, paths=paths, dtype=xp.float32, cropsize=96) iterator = MultiprocessIterator(train_dataset, batch_size=args.batchsize, repeat=True, shuffle=True) step = 0 epoch = 0 loss = 0 print("training...") for zipped_batch in iterator: lr = chainer.Variable(xp.array([zipped[0] for zipped in zipped_batch])) hr = chainer.Variable(xp.array([zipped[1] for zipped in zipped_batch])) sr = model(lr) loss += l1_charbonnier(sr, hr, model).data optimizer.update(l1_charbonnier, sr, hr, model) if step % args.steps_per_epoch == 0: loss /= args.steps_per_epoch print("Epoch: {}, Loss: {}, PSNR: {}".format( epoch, loss, PSNR(sr.data[0], hr.data[0]))) chainer.serializers.save_npz( os.path.join(OUTPUT_DIRECTORY, "model_{}.npz".format(epoch)), model) epoch += 1 loss = 0 step += 1 if epoch > args.epoch: break print("Done")
def main(): model = Siamese() print('model params: ', model.count_params()) optimizer = Adam(alpha=0.0002, beta1=0.5, beta2=0.999, eps=10e-8) optimizer.setup(model) epochs = 1000 batch_size = 64 data_batch = np.zeros((batch_size, 2), dtype=np.float32) labels = np.zeros((batch_size, 1), dtype=int) loss_list = [] for e in range(epochs): for b in range(batch_size): x1 = randint(0, 9) x2 = randint(0, 9) if x1 == x2: lr = randint(0, 1) # decide which will be bigger if x1 == 0: if lr == 1: # left x1 += 1 else: x2 += 1 elif x1 == 9: if lr == 1: x2 -= 1 else: x1 -= 1 else: if lr == 1: x1 += 1 else: x2 += 1 data_batch[b] = [np.float32(x1), x2] # (1, 0) = left (0, 1) = right if x1 > x2: # left # labels[b] = [np.float32(1), np.float32(0)] labels[b] = 1 else: # right or equal # labels[b] = [np.float32(0), np.float32(1)] labels[b] = 0 with chainer.using_config('train', True): model.cleargrads() d1 = np.expand_dims(data_batch[:, 0], -1) d2 = np.expand_dims(data_batch[:, 1], -1) # prediction = model(np.expand_dims(d1, 0), np.expand_dims(d2, 0)) prediction = model(d1, d2) loss = sigmoid_cross_entropy(prediction, labels) # loss = mean_squared_error(prediction, labels) loss.backward() optimizer.update() loss_list.append(float(loss.data)) # print(e, float(loss.data)) if (e + 1) % 100 == 0: cm = make_confusion_matrix(prediction, labels) print(e, cm, loss, 'W: ', model.fc1.W, 'b: ', model.fc1.b)
class QNeuralNetwork(QModel): def __init__(self, model, target, device_id=-1, learning_rate=0.00025, momentum=.9, minibatch_size=32, update_interval=10000): assert isinstance(model, ChainerModel), \ 'model should inherit from ChainerModel' super(QNeuralNetwork, self).__init__(model.input_shape, model.output_shape) self._gpu_device = None self._loss_val = 0 # Target model update method self._steps = 0 self._target_update_interval = update_interval # Setup model and target network self._minibatch_size = minibatch_size self._model = model self._target = target self._target.copyparams(self._model) # If GPU move to GPU memory if device_id >= 0: with cuda.get_device(device_id) as device: self._gpu_device = device self._model.to_gpu(device) self._target.to_gpu(device) # Setup optimizer self._optimizer = Adam(learning_rate, momentum, 0.999) self._optimizer.setup(self._model) def evaluate(self, environment, model=QModel.ACTION_VALUE_NETWORK): if check_rank(environment.shape, get_rank(self._input_shape)): environment = environment.reshape((1,) + environment.shape) # Move data if necessary if self._gpu_device is not None: environment = cuda.to_gpu(environment, self._gpu_device) if model == QModel.ACTION_VALUE_NETWORK: output = self._model(environment) else: output = self._target(environment) return cuda.to_cpu(output.data) def train(self, x, y, actions=None): actions = actions.astype(np.int32) batch_size = len(actions) if self._gpu_device: x = cuda.to_gpu(x, self._gpu_device) y = cuda.to_gpu(y, self._gpu_device) actions = cuda.to_gpu(actions, self._gpu_device) q = self._model(x) q_subset = F.reshape(F.select_item(q, actions), (batch_size, 1)) y = y.reshape(batch_size, 1) loss = F.sum(F.huber_loss(q_subset, y, 1.0)) self._model.cleargrads() loss.backward() self._optimizer.update() self._loss_val = np.asscalar(cuda.to_cpu(loss.data)) # Keeps track of the number of train() calls self._steps += 1 if self._steps % self._target_update_interval == 0: # copy weights self._target.copyparams(self._model) @property def loss_val(self): return self._loss_val # / self._minibatch_size def save(self, output_file): save_npz(output_file, self._model) def load(self, input_file): load_npz(input_file, self._model) # Copy parameter from model to target self._target.copyparams(self._model)
class AdamSet(): def __init__(self, alpha, beta1, beta2, conditional=False): self.conditional = conditional self.mapper_optimizer = Adam(alpha / 100, beta1, beta2, eps=1e-08) self.synthesizer_optimizer = Adam(alpha, beta1, beta2, eps=1e-08) self.discriminator_optimizer = Adam(alpha, beta1, beta2, eps=1e-08) if conditional: self.generator_embedder_optimizer = Adam(alpha, beta1, beta2, eps=1e-08) self.discriminator_embedder_optimizer = Adam(alpha, beta1, beta2, eps=1e-08) self.condition_mapper_optimizer = Adam(alpha / 100, beta1, beta2, eps=1e-08) def __iter__(self): yield "mapper", self.mapper_optimizer yield "synthesizer", self.synthesizer_optimizer yield "discriminator", self.discriminator_optimizer if self.conditional: yield "generator_embedder", self.generator_embedder_optimizer yield "discriminator_embedder", self.discriminator_embedder_optimizer yield "condition_mapper", self.condition_mapper_optimizer def setup(self, generator, discriminator): self.mapper_optimizer.setup(generator.mapper) self.synthesizer_optimizer.setup(generator.synthesizer) self.discriminator_optimizer.setup(discriminator.main) if self.conditional: self.generator_embedder_optimizer.setup(generator.embedder) self.discriminator_embedder_optimizer.setup(discriminator.embedder) self.condition_mapper_optimizer.setup( discriminator.condition_mapper) def update_generator(self): self.mapper_optimizer.update() self.synthesizer_optimizer.update() if self.conditional: self.generator_embedder_optimizer.update() def update_discriminator(self): self.discriminator_optimizer.update() if self.conditional: self.discriminator_embedder_optimizer.update() self.condition_mapper_optimizer.update()
class RNN(object): def __init__(self, n_words, emb_size, n_hidden, n_classes, classes): self.model = chainer.FunctionSet( Emb=F.EmbedID(n_words, emb_size), W=F.Linear(emb_size, n_hidden), U=F.Linear(n_hidden, n_hidden), O=F.Linear(n_hidden, n_classes) ) self.n_hidden = n_hidden self.n_clsses = n_classes self.emb_size = emb_size self.classes = classes self.classes_rev = {v: k for k, v in classes.iteritems()} for param in self.model.parameters: param[:] = np.random.randn(*param.shape) * 0.1 self.optimizer = Adam(alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-8) self.optimizer.setup(self.model) def forward_loss(self, mb_x, mb_y, train=True): mb_size = mb_x.shape[0] n_steps = mb_x.shape[1] loss = 0.0 h = chainer.Variable(np.zeros((mb_size, self.n_hidden), dtype='float32'), volatile=not train) y_hat = [] for i in range(n_steps): x_i = chainer.Variable(mb_x[:, i], volatile=not train) y_i = chainer.Variable(mb_y[:, i], volatile=not train) h = self.model.W(self.model.Emb(x_i)) + self.model.U(h) out = self.model.O(h) curr_loss = F.softmax_cross_entropy(out, y_i) y_hat.append(curr_loss.creator.y) loss += curr_loss * 1.0 / (n_steps * mb_size) y_hat = np.array(y_hat).swapaxes(0, 1) return loss, y_hat def learn(self, x, y): self.optimizer.zero_grads() loss, y_hat = self.forward_loss(x, y, train=True) loss.backward() self.optimizer.update() return loss.data def predict(self, x): _, y_hat = self.forward_loss(x, np.zeros(x.shape, dtype='int32')) return np.argmax(y_hat, axis=2) def predictions_to_text(self, y): return [self.classes_rev.get(i, '#EOS') for i in y] def eval(self, mb_x, mb_y): mb_y_hat = self.predict(mb_x) t = self.predictions_to_text acc = sklearn.metrics.accuracy_score(mb_y.flat[mb_y.flat != -1], mb_y_hat.flat[mb_y.flat != -1]) prec = sklearn.metrics.precision_score(mb_y.flat[mb_y.flat != -1], mb_y_hat.flat[mb_y.flat != -1]) recall = sklearn.metrics.recall_score(mb_y.flat[mb_y.flat != -1], mb_y_hat.flat[mb_y.flat != -1]) report = sklearn.metrics.classification_report(t(mb_y.flat[mb_y.flat != -1]), t(mb_y_hat.flat[mb_y.flat != -1])) return acc, prec, recall, report, mb_y_hat
img1 = Variable(img1) img1.to_device(device) img2 = L.Parameter(np.random.rand(*img1.shape).astype(np.float32)) img2.to_device(device) optimizer = Adam(0.1) optimizer.setup(img2) device.use() print(type(img1), type(img2())) ssim_value = ssim_loss(img1, img2(), 11, 11) print("Initial ssim:", ssim_value) step = 1 while ssim_value.data < 0.95: optimizer.update(loss, img1, img2()) ssim_value = -loss(img1, img2()) ssim_value_s = "ssim: {}".format(ssim_value.array) print("ssim:", ssim_value) if args.is_plot: im = (img2.W.array[0].transpose(1, 2, 0).clip(0, 1) * 255).astype(np.uint8) plt.imshow(im) plt.text(0, -5, ssim_value_s) plt.show() step += 1
x = generator(z) y1 = discriminator(x) # Train Generator loss_gen = F.softmax_cross_entropy(y1, Variable(xp.zeros(batch_size, dtype=np.int32))) loss_dis = F.softmax_cross_entropy(y1, Variable(xp.ones(batch_size, dtype=np.int32))) # Train Discriminator batch_x = Variable(train_x[perm[idx: idx + batch_size]]) y2 = discriminator(batch_x) loss_dis += F.softmax_cross_entropy(y2, Variable(xp.zeros(batch_size, dtype=np.int32))) # Generatorの最適化 generator.cleargrads() loss_gen.backward() opt_gen.update() # Discriminatorの最適化 discriminator.cleargrads() loss_dis.backward() opt_dis.update() now = time.time() print('{}/{}, Gen_loss = {}, Dis_loss = {}, time = {:.2f}'.format( idx, n_train_data, loss_gen.data, loss_dis.data, now-cur_at)) gen_loss.append(loss_gen.data) dis_loss.append(loss_dis.data) cur_at = now pickle.dump(generator, open('generator_snapshot.model', 'wb')) pickle.dump(discriminator, open('discriminator_snapshot.model', 'wb'))