def train_loop(): # Trainer graph_generated = False data = np.ndarray((args.batchsize, 3, 224, 224), dtype=np.float32) data.fill(33333) x = xp.asarray(data) label = np.ndarray((args.batchsize), dtype=np.int32) label.fill(1) y = xp.asarray(label) while True: optimizer.zero_grads() loss, accuracy = model.forward(x, y) loss.backward() optimizer.update() if not graph_generated: with open('graph.dot', 'w') as o: o.write(c.build_computational_graph((loss,), False).dump()) with open('graph.wo_split.dot', 'w') as o: o.write(c.build_computational_graph((loss,), True).dump()) print('generated graph') graph_generated = True del loss, accuracy, x, y
def write_graph(loss): with open("graph.dot", "w") as o: o.write(c.build_computational_graph((loss, )).dump()) with open("graph.wo_split.dot", "w") as o: g = c.build_computational_graph((loss, ), remove_split=True) o.write(g.dump()) print('graph generated')
def setUp(self): self.x = variable.Variable(np.zeros((1, 2)).astype(np.float32)) self.y = 2 * self.x self.x_splitter = self.x.splitter() self.x_clone = self.x_splitter.outputs[0]() self.f = self.y.creator self.g1 = c.build_computational_graph((self.y,), False) self.g2 = c.build_computational_graph((self.y,), True)
def train_loop(): # Trainer graph_generated = False while True: while data_q.empty(): time.sleep(0.1) inp = data_q.get() if inp == 'end': # quit res_q.put('end') break elif inp == 'train': # restart training res_q.put('train') train = True continue elif inp == 'val': # start validation res_q.put('val') pickle.dump(model, open(folder+'model', 'wb'), -1) train = False continue x = xp.asarray(inp[0]) y = xp.asarray(inp[1]) if train: optimizer.zero_grads() loss, accuracy = model.forward(x, y) loss.backward() optimizer.update() if not graph_generated: with open('graph.dot', 'w') as o: o.write(c.build_computational_graph((loss,), False).dump()) with open('graph.wo_split.dot', 'w') as o: o.write(c.build_computational_graph((loss,), True).dump()) print('generated graph') graph_generated = True else: loss, accuracy = model.forward(x, y, train=False) if epoch_count % 2 == 0: print ('save model') model.to_cpu() with open(folder + 'model_' + str(epoch_count), 'wb') as o: pickle.dump(model, o) model.to_gpu()#もう一度GPUに戻すのか? optimizer.setup(model) res_q.put((float(loss.data), float(accuracy.data))) del loss, accuracy, x, y
def test_tail_node(self): edges = c.build_computational_graph((self.y,), False) self.assertEqual(len(edges), 4) self.assertTrue((self.x, self.x_splitter) in edges) self.assertTrue((self.x_splitter, self.x_clone) in edges) self.assertTrue((self.x_clone, self.f) in edges) self.assertTrue((self.f, self.y) in edges)
def train_loop(model, output_dir, xp, optimizer, res_q, data_q): graph_generated = False while True: while data_q.empty(): time.sleep(0.1) inp = data_q.get() if inp == 'end': res_q.put('end') break elif inp == 'train': res_q.put('train') model.train = True continue elif inp == 'val': res_q.put('val') model.train = False continue volatile = 'off' if model.train else 'on' x = chainer.Variable(xp.asarray(inp[0]), volatile=volatile) t = chainer.Variable(xp.asarray(inp[1]), volatile=volatile) if model.train: optimizer.update(model, x, t) if not graph_generated: with open('graph.dot', 'w') as o: o.write(computational_graph.build_computational_graph((model.loss,)).dump()) print('generated graph') graph_generated = True else: model(x, t) serializers.save_hdf5(output_dir + os.sep + 'model%04d'%inp[2], model) #serializers.save_hdf5(output_dir + os.sep + 'optimizer%04d'%inp[2], optimizer) res_q.put((float(model.loss.data), float(model.accuracy.data), inp[2])) del x, t
def draw_computational_graph(*args, **kwargs): """Draw computational graph. @param output: output ps file. """ from chainer.computational_graph import build_computational_graph output = kwargs.pop('output') if len(args) > 2: variable_style = args[2] else: variable_style = kwargs.get( 'variable_style', {'shape': 'octagon', 'fillcolor': '#E0E0E0', 'style': 'filled'}, ) kwargs['variable_style'] = variable_style if len(args) > 3: function_style = args[3] else: function_style = kwargs.get( 'function_style', {'shape': 'record', 'fillcolor': '#6495ED', 'style': 'filled'}, ) kwargs['function_style'] = function_style dotfile = tempfile.mktemp() with open(dotfile, 'w') as f: f.write(build_computational_graph(*args, **kwargs).dump()) ext = osp.splitext(output)[-1][1:] # ex) .ps -> ps cmd = 'dot -T{0} {1} > {2}'.format(ext, dotfile, output) subprocess.call(cmd, shell=True)
def learn_as_autoencoder(self, x_train, x_test=None): optimizer = self.optimizer train_size = x_train.shape[0] train_data_size = x_train.shape[1] #self.add_dummy_output_link(train_data_size) for epoch in six.moves.range(self.epoch): perm = np.random.permutation(train_size) train_loss = 0 test_loss = None test_accuracy = 0 for i in range(0, train_size, self.batch_size): x = Variable(x_train[perm[i:i+self.batch_size]]) self.zerograds() loss = self.loss_function(self[0](x), x) loss.backward() self.optimizer.update() train_loss += loss.data * self.batch_size train_loss /= train_size if len(x_test): x = Variable(x_test) test_loss = self.loss_function(self[0](x), x).data if test_loss is not None: print('Pre-training test loss: ' + str(test_loss)) if self.visualize: import chainer.computational_graph as c g = c.build_computational_graph((loss,)) with open('child_graph.dot', 'w') as o: o.write(g.dump()) del self.optimizer
def test_backward(self): rpn_in_ch = 512 rpn_out_ch = 512 feat_stride = 16 anchor_ratios = [0.5, 1, 2] anchor_scales = [8, 16, 32] num_classes = 21 model = FasterRCNN( self.trunk, rpn_in_ch, rpn_out_ch, feat_stride, anchor_ratios, anchor_scales, num_classes) model.rpn_train, model.rcnn_train = self.train if self.device >= 0: model.to_gpu(self.device) self.x.to_gpu(self.device) self.im_info.to_gpu(self.device) self.gt_boxes.to_gpu(self.device) self.assertIs(model.xp, cp) self.assertIs(model.trunk.xp, cp) opt = optimizers.Adam() opt.setup(model) if model.rpn_train: st = time.time() rpn_loss = model(self.x, self.im_info, self.gt_boxes) model.cleargrads() rpn_loss.backward() opt.update() print('Backward rpn device:{}, ({}, train:{}): {} sec'.format( self.device, self.trunk.__name__, self.train, time.time() - st)) rpn_cg = cg.build_computational_graph([rpn_loss]) with open('tests/rpn_cg.dot', 'w') as fp: fp.write(rpn_cg.dump()) elif model.rcnn_train: st = time.time() loss_rcnn = model(self.x, self.im_info, self.gt_boxes) model.cleargrads() loss_rcnn.backward() opt.update() print('Backward rcnn device:{}, ({}, train:{}): {} sec'.format( self.device, self.trunk.__name__, self.train, time.time() - st)) loss_rcnn_cg = cg.build_computational_graph([loss_rcnn]) with open('tests/loss_rcnn_cg.dot', 'w') as fp: fp.write(loss_rcnn_cg.dump())
def train_loop(): graph_generated = False while True: while data_q.empty(): time.sleep(0.1) inp = data_q.get() if inp == 'end': # quit res_q.put('end') break elif inp == 'train': # restart training res_q.put('train') train = True continue elif inp == 'val': # start validation res_q.put('val') pickle.dump(model, open('model', 'wb'), -1) train = False continue x, y = inp if args.gpu >= 0: x = cuda.to_gpu(x) y = cuda.to_gpu(y) if train: optimizer.zero_grads() loss, accuracy = model.forward(x, y) loss.backward() optimizer.update() if not graph_generated: with open('graph.dot', 'w') as o: o.write(c.build_computational_graph((loss,), False).dump()) with open('graph.wo_split.dot', 'w') as o: o.write(c.build_computational_graph((loss,), True).dump()) print('generated graph') graph_generated = True else: loss, accuracy = model.forward(x, y, train=False) res_q.put((float(cuda.to_cpu(loss.data)), float(cuda.to_cpu(accuracy.data)))) del loss, accuracy, x, y
def train_nnet(model, optimizer, train_data_resource, opts): if opts.gpu >= 0: cuda.check_cuda_available() model.to_gpu(opts.gpu) accum_loss = 0 i = 0 train_loss = 0 prev_dev_loss = 100000 prev_percentage = 0 dump_graph = True for train_idx, x_batch, y_batch, epoch, percentage, eos in train_data_resource: if train_idx is None: # Done one epoch if opts.fname_dev: dev_loss, _, _ = evaluation(model, opts.fname_dev) if xp == cuda.cupy: model.to_gpu() print(' dev loss: %.3f' % dev_loss, end='') if optimizer.lr < opts.lr_stop: break if prev_dev_loss - dev_loss < opts.start_decay: optimizer.lr *= opts.lr_decay print('\n...reducing lr to %.6f' % optimizer.lr) prev_dev_loss = dev_loss print('') continue x = Variable(xp.asarray(x_batch)) t = Variable(xp.asarray(y_batch)) loss_i = model(x, t) accum_loss += loss_i if dump_graph: print('Dump graph') with open('graph.dot', 'w') as o: o.write(c.build_computational_graph((loss_i, )).dump()) dump_graph = False if train_idx >= 1: train_loss = (train_loss * (train_idx - 1) + loss_i.data) / train_idx if eos and opts.forget_on_new_utt: model.predictor.forget_history() if eos or (i + 1) % opts.bprop_len == 0: model.zerograds() accum_loss.backward() accum_loss.unchain_backward() accum_loss = 0 optimizer.update() i = 0 if percentage != prev_percentage: prev_percentage = percentage print_stats(percentage, epoch, optimizer.lr, train_loss) sys.stdout.flush() i += 1
def setUp(self): self.x1 = variable.Variable(np.zeros((1, 2)).astype(np.float32)) self.x2 = variable.Variable(np.zeros((1, 2)).astype(np.float32)) self.y = self.x1 + self.x2 self.f = self.y.creator self.variable_style = {'label': 'variable_0', 'shape': 'octagon', 'style': 'filled', 'fillcolor': '#E0E0E0'} self.function_style = {'label': 'function_0', 'shape': 'record', 'style': 'filled', 'fillcolor': '#6495ED'} self.g = c.build_computational_graph( (self.y,), variable_style=self.variable_style, function_style=self.function_style)
def dump_graph(trainer): var = trainer.observation[root_name] if not isinstance(var, variable.Variable): raise TypeError('root value is not a Variable') cg = computational_graph.build_computational_graph( [var], variable_style=variable_style, function_style=function_style ).dump() out_path = os.path.join(trainer.out, out_name) # TODO(beam2d): support outputting images by the dot command with open(out_path, 'w') as f: f.write(cg)
def __call__(self, trainer): try: var = trainer.observation[self._root_name] if not isinstance(var, variable.Variable): raise TypeError('root value is not a Variable') cg = computational_graph.build_computational_graph( [var], variable_style=self._variable_style, function_style=self._function_style ).dump() filename = os.path.join(trainer.out, self._filename) with open(filename, 'w') as f: f.write(cg) if is_graphviz_available(): img_fn = os.path.splitext(self._filename)[0] + '.png' image_filename = os.path.join(trainer.out, img_fn) subprocess.check_call( ['dot', '-Tpng', filename, '-o', image_filename]) finally: configuration.config.keep_graph_on_report = self._original_flag
def train_loop(): # Trainer graph_generated = False while True: while data_q.empty(): time.sleep(0.1) inp = data_q.get() if inp == 'end': # quit res_q.put('end') break elif inp == 'train': # restart training res_q.put('train') model.train = True continue elif inp == 'val': # start validation res_q.put('val') serializers.save_npz(args.out, model) serializers.save_npz(args.outstate, optimizer) model.train = False continue volatile = 'off' if model.train else 'on' x = chainer.Variable(xp.asarray(inp[0]), volatile=volatile) t = chainer.Variable(xp.asarray(inp[1]), volatile=volatile) if model.train: optimizer.update(model, x, t) if not graph_generated: with open('graph.dot', 'w') as o: o.write(computational_graph.build_computational_graph( (model.loss,)).dump()) print('generated graph', file=sys.stderr) graph_generated = True else: model(x, t) res_q.put((float(model.loss.data), float(model.accuracy.data))) del x, t
def backprop(self,t): x=Tensor.context self.optimizer.lr = Deel.optimizer_lr self.optimizer.zero_grads() loss,accuracy = self.func.getLoss(x.content,t.content) loss.backward() self.optimizer.update() if not self.graph_generated: #with open('graph.dot', 'w') as o: # o.write(c.build_computational_graph((loss,), False).dump()) with open('graph.wo_split.dot', 'w') as o: o.write(c.build_computational_graph((loss,), True).dump()) print('generated graph') self.graph_generated = True return loss.data,accuracy.data
def dump_comp_graph(filename, vs): g = C.build_computational_graph(vs) with open(filename, 'w') as o: o.write(g.dump())
x_batch[i] = image y_batch[i] = label i += 1 if i == args.batchsize: x_data = xp.asarray(x_batch) y_data = xp.asarray(y_batch) x = chainer.Variable(x_data, volatile=True) t = chainer.Variable(y_data, volatile=True) loss, accuracy = forward(x, t) import chainer.computational_graph as c with open('data/graph.dot', 'w') as o: o.write(c.build_computational_graph((loss,)).dump()) accum_loss += float(loss.data) * args.batchsize accum_accuracy += float(accuracy.data) * args.batchsize del x, t, loss, accuracy count += args.batchsize print('{} / {}'.format(count, len(dataset)), end='\r', file=sys.stderr) sys.stderr.flush() i = 0 print('mean loss: {}'.format(accum_loss / count)) print('mean accuracy: {}'.format(accum_accuracy / count))
def update_core(self): xp = self.gen.xp self.map.cleargrads() self.gen.cleargrads() self.dis.cleargrads() opt_g_m = self.get_optimizer('map') opt_g_g = self.get_optimizer('gen') opt_d = self.get_optimizer('dis') # z: latent | x: data | y: dis output # *_real/*_fake/*_pertubed: Variable # *_data: just data (xp array) stage = self.stage # Need to retrive the value since next statement may change state (at the stage boundary) batch = self.get_iterator('main').next() batch_size = len(batch) lr_scale = get_lr_scale_factor(self.total_gpu, stage) x_real_data = self.get_x_real_data(batch, batch_size) z_fake_data = self.get_z_fake_data(batch_size) x_real = Variable(x_real_data) # Image.fromarray(convert_batch_images(x_real.data.get(), 4, 4)).save('no_downsized.png') x_real = downsize_real(x_real, stage) x_real = Variable(x_real.data) # Image.fromarray(convert_batch_images(x_real.data.get(), 4, 4)).save('downsized.png') image_size = x_real.shape[2] z_fake = Variable(z_fake_data) w_fake = self.map(z_fake) if self.style_mixing_rate > 0 and np.random.rand() < self.style_mixing_rate: z_fake2 = Variable(self.get_z_fake_data(batch_size)) w_fake2 = self.map(z_fake2) x_fake = self.gen(w_fake, stage=stage, w2=w_fake2) else: x_fake = self.gen(w_fake, stage=stage) y_fake = self.dis(x_fake, stage=stage) loss_gen = loss_func_dcgan_gen(y_fake) * lr_scale if chainer.global_config.debug: g = c.build_computational_graph(loss_gen) with open('out_loss_gen', 'w') as o: o.write(g.dump()) assert not xp.isnan(loss_gen.data) chainer.report({'loss_adv': loss_gen}, self.gen) loss_gen.backward() opt_g_m.update() opt_g_g.update() # keep smoothed generator if instructed to do so. if self.smoothed_gen is not None: # layers_in_use = self.gen.get_layers_in_use(stage=stage) soft_copy_param(self.smoothed_gen, self.gen, 1.0 - self.smoothing) soft_copy_param(self.smoothed_map, self.map, 1.0 - self.smoothing) z_fake_data = self.get_z_fake_data(batch_size) z_fake = Variable(z_fake_data) with chainer.using_config('enable_backprop', False): w_fake = self.map(z_fake) if self.style_mixing_rate > 0 and np.random.rand() < self.style_mixing_rate: z_fake2 = Variable(self.get_z_fake_data(batch_size)) w_fake2 = self.map(z_fake2) x_fake = self.gen(w_fake, stage=stage, w2=w_fake2) else: x_fake = self.gen(w_fake, stage=stage) x_fake.unchain_backward() y_fake = self.dis(x_fake, stage=stage) y_real = self.dis(x_real, stage=stage) loss_adv = loss_func_dcgan_dis(y_fake, y_real) if self.lambda_gp > 0: x_perturbed = x_real y_perturbed = y_real # y_perturbed = self.dis(x_perturbed, stage=stage) grad_x_perturbed, = chainer.grad([y_perturbed], [x_perturbed], enable_double_backprop=True) grad_l2 = F.sqrt(F.sum(grad_x_perturbed ** 2, axis=(1, 2, 3))) loss_gp = self.lambda_gp * loss_l2(grad_l2, 0.0) chainer.report({'loss_gp': loss_gp}, self.dis) else: loss_gp = 0. loss_dis = (loss_adv + loss_gp) * lr_scale assert not xp.isnan(loss_dis.data) chainer.report({'loss_adv': loss_adv}, self.dis) self.dis.cleargrads() loss_dis.backward() opt_d.update() chainer.reporter.report({'stage': stage}) chainer.reporter.report({'batch_size': batch_size}) chainer.reporter.report({'image_size': image_size})
if __name__ == "__main__": import argparse import numpy as np import chainer.computational_graph as c import chainer.links as L from chainer.functions.loss.mean_squared_error import mean_squared_error parser = argparse.ArgumentParser(description='PredNet') parser.add_argument('--size', '-s', default='160,120', help='Size of target images. width,height (pixels)') parser.add_argument('--channels', '-c', default='3,48,96,192', help='Number of channels on each layers') args = parser.parse_args() args.size = args.size.split(',') for i in range(len(args.size)): args.size[i] = int(args.size[i]) args.channels = args.channels.split(',') for i in range(len(args.channels)): args.channels[i] = int(args.channels[i]) model = PredNet(args.size[0], args.size[1], args.channels) x_batch = np.ndarray((1, args.channels[0], args.size[1], args.size[0]), dtype=np.float32) g = c.build_computational_graph( model(chainer.Variable(np.asarray(x_batch)))) with open('network.dot', 'w') as o: o.write(g.dump())
def train(model=model, gpu=None, epoch=10, batch_size=128): train, test = chainer.datasets.cifar.get_cifar10() train_x = np.array([x[0] for x in train]) train_t = np.array([x[1] for x in train]) test_x = np.array([x[0] for x in test]) test_t = np.array([x[1] for x in test]) train_n = len(train_x) test_n = len(test_x) train_x = train_x.reshape(train_n, 3, 32, 32) test_x = test_x.reshape(test_n, 3, 32, 32) slack.s_print('here we go', channel='output') slack.s_print('train n: {}'.format(train_n), channel='output') slack.s_print('test n: {}'.format(test_n), channel='output') slack.s_print('epoch: {}'.format(epoch), channel='output') slack.s_print('batch size: {}'.format(batch_size), channel='output') slack.s_print('gpu: {}'.format(gpu), channel='output') train_x = np.subtract(train_x, np.mean(train_x, axis=0)) test_x = np.subtract(test_x, np.mean(test_x, axis=0)) optimizer = optimizers.SGD() if gpu: chainer.cuda.get_device(gpu).use() model.to_gpu() xp = chainer.cuda.cupy else: xp = np optimizer.setup(model) optimizer.lr = 0.1 optimizer.add_hook(chainer.optimizer.WeightDecay(.05)) optimizer.add_hook(chainer.optimizer.GradientClipping(.0005)) train_log = Log() test_loss_log = Log() test_acc_log = Log() for i in tqdm(range(epoch)): order = np.random.permutation(train_n) train_iter_x = Iterator(train_x, batch_size, order=order) train_iter_t = Iterator(train_t, batch_size, order=order) sum_loss = 0 if i % 10 == 0: optimizer.lr /= 2 for x, t in tqdm(zip(train_iter_x, train_iter_t), total=train_n/batch_size): x_len = len(x) x = model.prepare_input(x, dtype=xp.float32, xp=xp) t = model.prepare_input(t, dtype=xp.int32, xp=xp) model.cleargrads() loss, _ = model(x, t) loss.backward() optimizer.update() loss.to_cpu() sum_loss += loss.data * x_len del x del t if i == 0: with open('graph.dot', 'w') as o: g = computational_graph.build_computational_graph( (loss, ), remove_split=True) o.write(g.dump()) print('graph generated') del loss train_log.add(sum_loss/train_n) order = np.random.permutation(test_n) test_iter_x = Iterator(test_x, batch_size, order=order) test_iter_t = Iterator(test_t, batch_size, order=order) sum_loss = 0 sum_acc = 0 for x, t in tqdm(zip(test_iter_x, test_iter_t), total=test_n/batch_size): x_len = len(x) x = model.prepare_input(x, dtype=xp.float32, xp=xp) t = model.prepare_input(t, dtype=xp.int32, xp=xp) model.cleargrads() loss, acc = model(x, t) loss.to_cpu() acc.to_cpu() sum_loss += loss.data * x_len sum_acc += float(acc.data) * x_len slack.s_print('acc: {}'.format(sum_acc/test_n), channel='output') slack.s_print('loss: {}'.format(sum_loss/test_n), channel='output') test_loss_log.add(sum_loss/test_n) test_acc_log.add(sum_acc/test_n) train_log.save('train.log') train_log.save_graph('train.log.png') test_loss_log.save('test_loss.log') test_loss_log.save_graph('test_loss.log.png') test_acc_log.save('test_acc.log') test_acc_log.save_graph('test_acc.log.png') model.save_model()
optimizer = optimizers.Adam() optimizer.setup(model) n_win = args.window bs = args.batchsize for i in tqdm(range(args.epoch)): indexes = np.random.permutation(n_data) for index in range(n_win, n_data-n_win, bs): position = dataset[index:index+bs if index+bs < n_data else n_data] model.zerograds() loss = loss_calc(position) loss.backward() optimizer.update() if args.gpu >= 0: w = cuda.to_cpu(model.embed.W.data) else: w = model.embed.W.data #with open('test_sss.model','wb') as fw: # pickle.dump(w,fw) import chainer.computational_graph as cg graph = cg.build_computational_graph((loss,), remove_split=True) with open('./w2v_full.dot', 'w') as fw: fw.write(graph.dump())
generator = Generator(args.size, args.depth, args.levels, *args.channels) discriminator = Discriminator(args.levels, args.channels[1], args.channels[0]) z = generator.generate_latents(args.batch) #mix = gen.generate_latent(args.batch) ws, i = generator(z) y = discriminator(i) gen_varstyle = {"fillcolor": "#5edbf1", "shape": "record", "style": "filled"} gen_funstyle = {"fillcolor": "#ffa9e0", "shape": "record", "style": "filled"} dis_varstyle = {"fillcolor": "#7a9fe6", "shape": "record", "style": "filled"} dis_funstyle = {"fillcolor": "#fea21d", "shape": "record", "style": "filled"} gen_graph = build_computational_graph([i], variable_style=gen_varstyle, function_style=gen_funstyle).dump() i.unchain_backward() dis_graph = build_computational_graph([y], variable_style=dis_varstyle, function_style=dis_funstyle).dump() #print(f"D: {self.count_params()}") #print(self.count_params()) mkdirs(args.dest) gen_path = build_filepath(args.dest, "generator", "pdf", args.force) graph_from_dot_data(gen_graph)[0].write_pdf(gen_path) print(f"Saved: {gen_path}")
def main(): parser = argparse.ArgumentParser(description='DCGAN') parser.add_argument('--batchsize', '-b', default=50, type=int) parser.add_argument('--epoch', '-e', default=1000, type=int) parser.add_argument('--gpu', '-g', default=0, type=int) parser.add_argument('--out', '-o', default='result') parser.add_argument('--n_hidden', '-n', default=100, type=int) parser.add_argument('--snapshot_interval', default=1000, type=int) args = parser.parse_args() print('=== DCGAN ===') # rootディレクトリ退避 root = os.getcwd() # 出力フォルダ作成 if not os.path.exists(args.out): print('** create result') os.mkdir(args.out) save_path = args.out + '/' + time.strftime('%y%m%d_%H%M%S', time.localtime()) os.makedirs(save_path) os.chdir(save_path) # loggerの設定 logger_names = ['losses', 'debug'] loggers = {} for logger_name in logger_names: loggers[logger_name] = set_logger(logger_name, save_path) loggers['debug'].debug('# batchsize: {}'.format(args.batchsize)) loggers['debug'].debug('# epoch: {}'.format(args.epoch)) loggers['debug'].debug('# n_hidden: {}'.format(args.n_hidden)) loggers['debug'].debug('') # 学習用のモデル設定 gen = Generator(n_hidden=args.n_hidden) dis = Discriminator() # cpu or gpu if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() gen.to_gpu() dis.to_gpu() xp = chainer.cuda.cupy if args.gpu >= 0 else np def make_optimizer(model, alpha=0.002, beta1=0.5): optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) return optimizer opt_gen = make_optimizer(gen) opt_dis = make_optimizer(dis) # MNISTデータ読み込み # contents['datas'] -> (60000, 784) # contents['labels] -> (60000,) with open(root + '/mnist.pkl', 'rb') as f: contents = pickle.load(f) loggers['debug'].debug('# data_size: {}'.format( contents['datas'].shape[0])) loggers['debug'].debug('') # バッチ作成 iterator_train = Iterator(contents, args.batchsize) # 学習ループ iteration = 1 for epoch in range(1, args.epoch + 1): start_time = time.time() print('# epoch gen/loss dis/loss') # ミニバッチ学習 for yielded in iterator_train(shuffle=True): batch = yielded variables = {} # データ整形 --- 784 -> 1ch, 28*28 b_size = batch['datas'].shape[0] d = batch['datas'].reshape(b_size, 1, 28, 28) datas = chainer.Variable(xp.asarray(d, dtype=xp.float32)) variables['datas'] = datas # 画像と対応したラベルを入れて学習させる labels = batch['labels'] # one_hotラベルをバッチ数分作成 labels = xp.asarray(create_one_hot_label(10, labels)).reshape( b_size, 10, 1, 1) # 勾配クリア gen.cleargrads() dis.cleargrads() ## 順伝播(DCGAN) # x_real = variables['datas'] / 255. x_real = variables['datas'] / 255. # Discriminatorの出力値(本物入力) y_real = dis(x=x_real, label=labels) # 入力ノイズデータ作成 z = chainer.Variable(xp.asarray(gen.make_hidden(b_size))) # Generatorの出力値(ノイズ入力) x_fake = gen(z=z, label=labels) # Discriminatorの出力値(偽物入力) y_fake = dis(x=x_fake, label=labels) ## Discriminatorの誤差関数 # 本物画像に対して本物(1)を出力させたい # 本物を本物と判定するほどL1は小さくなる L1 = F.sum(F.softplus(-y_real)) / b_size # 偽物画像に対して偽物(0)を出力させたい # 偽物を偽物と判定するほどL2は小さくなる L2 = F.sum(F.softplus(y_fake)) / b_size dis_loss = L1 + L2 ## Generatorの誤差関数 # 偽物画像を入力した時のDiscriminatorの出力を本物(1)に近づける # 偽物で本物と判定するほどlossは小さくなる gen_loss = F.sum(F.softplus(-y_fake)) / b_size loggers['losses'].debug('# epoch: {}, iteration{}'.format( epoch, iteration)) loggers['losses'].debug('gen/loss: {} dis/loss: {}'.format( gen_loss.data, dis_loss.data)) print('{} {} {}'.format(epoch, gen_loss.data, dis_loss.data)) # 誤差逆伝播 -> 重み更新 dis_loss.backward() opt_dis.update() gen_loss.backward() opt_gen.update() if iteration % args.snapshot_interval == 0: out_generated_image(gen, 10, 10, 0, iteration, xp) iteration += 1 passed_time = time.time() - start_time print('*** passed time in this epoch: {}[sec]'.format(passed_time)) loggers['debug'].debug('# epoch: {}'.format(epoch)) loggers['debug'].debug('# passed_time: {}[sec]'.format(passed_time)) print('=== Save Model ====') gen.to_cpu() chainer.serializers.save_npz('./generator.npz', gen) dis.to_cpu() chainer.serializers.save_npz('./discriminator.npz', dis) # computational_graph作成 print('=== Draw Computational Graph ===') _val_style = { 'shape': 'octagon', 'fillcolor': '#E0E0E0', 'style': 'filled' } _fanc_style = { 'shape': 'record', 'fillcolor': '#6495ED', 'style': 'filled' } with open('computational_graph.dot', 'w') as o: g = build_computational_graph([gen_loss, dis_loss], variable_style=_val_style, function_style=_fanc_style) o.write(g.dump())
def CifarAnalysis(folderName=None,batchsize = 1000, **kwd): id_gpu = 0 OutStr = "" OutStr += 'GPU: {}\n'.format(id_gpu) OutStr += 'Minibatch-size: {}\n'.format(batchsize) OutStr += 'kwd: {}\n'.format(kwd) OutStr += '' print OutStr fOutput = None if folderName: if not os.path.exists(folderName): os.makedirs(folderName) fOutput = open(os.path.join(folderName,"output.dat"),"w") shutil.copyfile(__file__,os.path.join(folderName,os.path.basename(__file__))) # Prepare dataset data_tr = np.zeros((50000,3*32*32),dtype=np.float32) data_ev = np.zeros((10000,3*32*32),dtype=np.float32) label_tr = np.zeros((50000),dtype=np.int32) label_ev = np.zeros((10000),dtype=np.int32) I_colors = 3 I_Xunit = 32 I_Yunit = 32 F_unit = 100 # be careful!! h5f_tr = h5py.File("data_cifar100/train.h5f","r") data_tr[:] = h5f_tr["Original/data"].value #data_tr[:] = h5f_tr["ZCA_byTrainData/data"].value label_tr[:] = h5f_tr["Info/fine_labels"].value #print np.mean(data_tr,axis=1).reshape(3*32*32) data_tr -= np.mean(data_tr,axis=1).reshape((50000,1)) data_tr -= np.mean(data_tr,axis=0) data_tr /= np.std (data_tr,axis=0) h5f_ev = h5py.File("data_cifar100/test.h5f","r") data_ev[:] = h5f_ev["Original/data"].value #data_ev[:] = h5f_ev["ZCA_byTrainData/data"].value label_ev[:] = h5f_ev["Info/fine_labels"].value data_ev -= np.mean(data_ev,axis=1).reshape((10000,1)) data_ev -= np.mean(data_ev,axis=0) data_ev /= np.std (data_ev,axis=0) ## Prep x_tr = data_tr.reshape((len(data_tr),3,32,32)) x_ev = data_ev.reshape((len(data_ev),3,32,32)) y_tr = label_tr y_ev = label_ev N_tr = len(data_tr) # 50000 N_ev = len(data_ev) # 10000 ag = Augument.Augumentation() ## Define analisis Resume = None if "Resume" in kwd: Resume = kwd["Resume"] del kwd["Resume"] #model,ModelKwd = net2.GenModel(F_unit = F_unit) model = net8.GenModel(F_unit) ModelKwd = "" if id_gpu >= 0: cuda.get_device(id_gpu).use() model.to_gpu() xp = np if id_gpu < 0 else cuda.cupy # Setup optimizer optimizer = optimizers.Adam() optimizer.setup(model) #optimizer.add_hook(scheduled_alpha_reduction) # Init/Resume if Resume: print 'Load optimizer state from %s'%(Resume) with h5py.File(Resume,"r") as f: s = HDF5Deserializer(f) s_model = s["model"] s_model.load(model) # Setup stop manager sm = StopManager.StopManager() sm.SetMaximumEpoch(10000) sm.SetMinimumEpoch(10) sm.SetStopThreshold(3e-4) print sm #alphaTiming = [10,20,40,80] optimizer.alpha /= 16 # Learning loop if fOutput: fOutput.write("epoch,mode,loss,accuracy\n") #for epoch in six.moves.range(1, n_epoch + 1): epoch = 0 while True: epoch += 1 print 'epoch %d'%epoch # training perm = np.random.permutation(N_tr) sum_accuracy = 0 sum_loss = 0 start = time.time() """ if epoch in alphaTiming: optimizer.alpha /= 2 print "alpha changed... currently Alpha = ", optimizer.alpha """ for i in six.moves.range(0, N_tr, batchsize): bx = x_tr[perm[i:i + batchsize]] #if epoch>10: bx = ag.Aug(bx) #print bx[0] bx = ag.Aug(bx) #print bx[0] #raw_input() x = chainer.Variable(xp.asarray(bx)) t = chainer.Variable(xp.asarray(y_tr[perm[i:i + batchsize]])) # Pass the loss function (Classifier defines it) and its arguments model.predictor.setTrainMode(True) optimizer.update(model, x, t) #print optimizer.alpha if (epoch == 1 and i == 0) and folderName: with open(os.path.join(folderName,'graph.dot'), 'w') as o: g = computational_graph.build_computational_graph( (model.loss, )) o.write(g.dump()) print 'graph generated' sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) end = time.time() elapsed_time = end - start throughput = N_tr / elapsed_time print 'train mean loss=%.5f, accuracy=%.2f%%, throughput=%.0f images/sec'%(sum_loss / N_tr, sum_accuracy / N_tr * 100., throughput) if fOutput: fOutput.write("%d,Train,%e,%e\n"%(epoch,sum_loss/N_tr,sum_accuracy/N_tr)) # evaluation perm = np.random.permutation(N_ev) sum_accuracy = 0 sum_loss = 0 for i in six.moves.range(0, N_ev, batchsize): x = chainer.Variable(xp.asarray(x_ev[perm[i:i + batchsize]]),volatile='on') t = chainer.Variable(xp.asarray(y_ev[perm[i:i + batchsize]]),volatile='on') model.predictor.setTrainMode(False) loss = model(x, t) sum_loss += float(loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) print 'test mean loss=%.5f, accuracy=%.2f%%'%(sum_loss / N_ev, sum_accuracy / N_ev * 100, ) sm.AddAccuracy(sum_accuracy/N_ev) print sm.GetInfo() if fOutput: fOutput.write("%d,Test,%e,%e\n"%(epoch,sum_loss/N_ev,sum_accuracy/N_ev)) StopFlag = sm.StopCheck() StopFlag = False if folderName and (epoch%1 == 0 or StopFlag): # Save the model and the optimizer if StopFlag: myFname = os.path.join(folderName,'mlp_final') else: myFname = os.path.join(folderName,'mlp_%d'%epoch) with h5py.File(myFname+".hdf5","w") as f: s = HDF5Serializer(f) s["model"].save(model) f.create_dataset("kwd",data=ModelKwd.__str__(),dtype=h5py.special_dtype(vlen=unicode)) f.create_dataset("net",data=netFile,dtype=h5py.special_dtype(vlen=unicode)) f.flush() if StopFlag: break if fOutput: fOutput.close()
import numpy as np from chainer import Variable import chainer.computational_graph as c import net gen = net.Generator(784, 20, 500) dis = net.Discriminator(784, 500) x_real = np.empty((1, 784), dtype=np.float32) z = Variable(np.asarray(gen.make_hidden(1))) y_real = dis(x_real) x_fake = gen(z) y_fake = dis(x_fake) g = c.build_computational_graph([y_real, x_fake, y_fake]) with open('graph.dot', 'w') as o: o.write(g.dump())
def update_core(self): xp = self.gen.xp use_rotate = True if self.iteration > self.config.start_rotation else False self.gen.cleargrads() self.gen.mapping.cleargrads() self.dis.cleargrads() opt_g_m = self.get_optimizer('map') opt_g_g = self.get_optimizer('gen') opt_d = self.get_optimizer('dis') # z: latent | x: data | y: dis output # *_real/*_fake/*_pertubed: Variable # *_data: just data (xp array) stage = self.stage # Need to retrive the value since next statement may change state (at the stage boundary) batch = self.get_iterator('main').next() batch_size = len(batch) # lr_scale = get_lr_scale_factor(self.total_gpu, stage) x_real_data = self.get_x_real_data(batch, batch_size) z_fake_data = xp.tile(self.get_z_fake_data(batch_size // 2), (2, 1, 1, 1, 1)) # repeat same z z_fake_data2 = xp.tile(self.get_z_fake_data(batch_size // 2), (2, 1, 1, 1, 1)) # repeat same z if isinstance(chainer.global_config.dtype, chainer._Mixed16): x_real_data = x_real_data.astype("float16") z_fake_data = z_fake_data.astype("float16") z_fake_data2 = z_fake_data.astype("float16") # theta->6 DOF thetas = self.prior.sample(batch_size) # theta -> camera matrix random_camera_matrices = xp.array(get_camera_matries(thetas), dtype="float32") thetas = xp.array( np.concatenate( [np.cos(thetas[:, :3]), np.sin(thetas[:, :3]), thetas[:, 3:]], axis=1)) x_real = Variable(x_real_data) x_real = downsize_real(x_real, IMG_SIZE) x_real = Variable(x_real.data) image_size = x_real.shape[2] x_fake = self.gen(z_fake_data, stage, random_camera_matrices, z2=z_fake_data2, theta=thetas) y_fake = self.dis(x_fake[:, :3], stage=stage) loss_gen = loss_func_dcgan_gen( y_fake, self.config.focal_loss_gamma) # * lr_scale chainer.report({'loss_adv': loss_gen}, self.gen) assert not xp.isnan(loss_gen.data) if use_rotate: if self.config.background_generator: loss_rotate_fore, _ = self.loss_func_rotate( x_fake[:batch_size // 2], random_camera_matrices[:batch_size // 2], x_fake[batch_size // 2:], random_camera_matrices[batch_size // 2:], max_depth=3) virtual_camera_matrices = random_camera_matrices.copy() virtual_camera_matrices[:, :3, 3] = 0 loss_rotate_back, _ = self.loss_func_rotate( x_fake[:batch_size // 2], virtual_camera_matrices[:batch_size // 2], x_fake[batch_size // 2:], virtual_camera_matrices[batch_size // 2:], min_depth=3) loss_rotate = loss_rotate_fore + loss_rotate_back else: loss_rotate, _ = self.loss_func_rotate( x_fake[:batch_size // 2], random_camera_matrices[:batch_size // 2], x_fake[batch_size // 2:], random_camera_matrices[batch_size // 2:]) loss_rotate += F.mean(F.relu(self.config.depth_min - x_fake[:, -1]) ** 2) * \ self.config.lambda_depth # make depth larger chainer.report({'loss_rotate': loss_rotate}, self.gen) assert not xp.isnan(loss_rotate.data) lambda_loss_rotate = self.config.lambda_loss_rotate if self.config.lambda_loss_rotatec else 0.3 loss_gen = loss_gen + loss_rotate * lambda_loss_rotate if chainer.global_config.debug: g = c.build_computational_graph(loss_gen) with open('out_loss_gen', 'w') as o: o.write(g.dump()) # assert not xp.isnan(loss_dsgan.data) loss_gen.backward() opt_g_m.update() opt_g_g.update() del loss_gen, y_fake, x_fake self.dis.cleargrads() # keep smoothed generator if instructed to do so. if self.smoothed_gen is not None: # layers_in_use = self.gen.get_layers_in_use(stage=stage) soft_copy_param(self.smoothed_gen, self.gen, 1.0 - self.smoothing) z_fake_data = self.get_z_fake_data(batch_size) z_fake_data2 = self.get_z_fake_data(batch_size) if isinstance(chainer.global_config.dtype, chainer._Mixed16): z_fake_data = z_fake_data.astype("float16") z_fake_data2 = z_fake_data.astype("float16") # with chainer.using_config('enable_backprop', False): x_fake = self.gen(z_fake_data, stage, random_camera_matrices, z2=z_fake_data2, theta=thetas) x_fake.unchain_backward() y_fake = self.dis(x_fake[:, :3], stage=stage) y_real = self.dis(x_real, stage=stage) loss_adv = loss_func_dcgan_dis(y_fake, y_real) if not self.dis.sn and self.lambda_gp > 0: x_perturbed = x_real y_perturbed = y_real # y_perturbed = self.dis(x_perturbed, stage=stage) grad_x_perturbed, = chainer.grad([y_perturbed], [x_perturbed], enable_double_backprop=True) grad_l2 = F.sqrt(F.sum(grad_x_perturbed**2, axis=(1, 2, 3))) loss_gp = self.lambda_gp * loss_l2(grad_l2, 0.0) chainer.report({'loss_gp': loss_gp}, self.dis) else: loss_gp = 0. loss_dis = (loss_adv + loss_gp) # * lr_scale assert not xp.isnan(loss_dis.data) chainer.report({'loss_adv': loss_adv}, self.dis) loss_dis.backward() opt_d.update() chainer.reporter.report({'batch_size': batch_size}) chainer.reporter.report({'image_size': image_size})
import chainer.functions as F sentence = '[["How @ 0", [["can @ 468", ["I @ 33", ["increase @ 222", [[["the @ 2", "speed @ 1002"], ["of @ 76", ["my @ 106", ["internet @ 1174", "connection @ 3430"]]]], ["while @ 1133", ["using @ 575", ["a @ 93", "VPN @ 2615"]]]]]]], "? @ 10"]], ["How @ 0", [["can @ 468", [["Internet @ 1176", "speed @ 1002"], ["be @ 121", ["increased @ 5998", [["by @ 123", "hacking @ 1776"], ["through @ 514", "DNS @ 26222"]]]]]], "? @ 10"]], "0", "2"]' vocab_size = 153451 embedding_size = 30 learning_rate = 0.00001 predict = PredictNet(vocab_size, embedding_size) import chainer.links as L model = L.Classifier(predict) pred = predict(json.loads(sentence)) print(pred, predict.id) g = c.build_computational_graph([pred]) with open('./graph', 'w') as f: f.write(g.dump()) optimizer = chainer.optimizers.MomentumSGD(learning_rate) optimizer.setup(predict) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_data = read_corpus('%s/tree.0' % (train_data_path)) train_iter = chainer.iterators.SerialIterator(train_data, 10, shuffle=False) batch = train_iter.next() batch = [json.loads(line) for line in batch] import numpy as np
from pydot import graph_from_dot_data from chainer import Variable from chainer.computational_graph import build_computational_graph from network import Network import numpy as np net = Network() net.to_gpu() data = np.zeros((1, 3, 512, 512), dtype="float32") variable = Variable(data) variable.to_gpu() outputs = net(variable, 3) d = build_computational_graph(outputs).dump() g = graph_from_dot_data(d)[0] g.write_pdf("graph.pdf")
def train( iterator, gpu, encoder, decoder, enc_optim, dec_optim, rel_send, rel_rec, edge_types, temp, prediction_steps, var, out, benchmark, lr_decay, gamma): iter_i = 0 edge_accuracies = [] node_mses = [] nll_train = [] kl_train = [] logger = logging.getLogger(__name__) while True: inputs = iterator.next() node_features, edge_labels = dataset.concat_examples(inputs, device=gpu) # logits: [batch_size, num_edges, edge_types] logits = encoder(node_features, rel_send, rel_rec) # inverse func. of softmax edges = F.gumbel_softmax(logits, tau=temp, axis=2) edge_probs = F.softmax(logits, axis=2) # edges, edge_probs: [batch_size, num_edges, edge_types] if isinstance(decoder, decoders.MLPDecoder): output = decoder( node_features, edges, rel_rec, rel_send, prediction_steps) elif isinstance(decoder, decoders.RNNDecoder): output = decoder( node_features, edges, rel_rec, rel_send, 100, burn_in=True, burn_in_steps=args.timesteps - args.prediction_steps) target = node_features[:, :, 1:, :] num_nodes = node_features.shape[1] loss_nll = get_nll_gaussian(output, target, var) loss_kl = get_kl_categorical_uniform(edge_probs, num_nodes, edge_types) loss = loss_nll + loss_kl nll_train.append(float(loss_nll.array)) kl_train.append(float(loss_kl.array)) edge_accuracy = get_edge_accuracy(logits.array, edge_labels) edge_accuracies.append(edge_accuracy) node_mse = float(F.mean_squared_error(output, target).array) node_mses.append(node_mse) encoder.cleargrads() decoder.cleargrads() loss.backward() enc_optim.update() dec_optim.update() # Exit after 10 iterations when benchmark mode is ON iter_i += 1 if benchmark: put_log(iterator.epoch, np.mean(nll_train), np.mean(kl_train), np.mean(edge_accuracies), np.mean(node_mses)) if iter_i == 10: exit() if iterator.is_new_epoch: break if not os.path.exists(os.path.join(out, 'graph.dot')): with open(os.path.join(out, 'graph.dot'), 'w') as o: g = computational_graph.build_computational_graph([loss]) o.write(g.dump()) if iterator.is_new_epoch: put_log(iterator.epoch, np.mean(nll_train), np.mean(kl_train), np.mean(edge_accuracies), np.mean(node_mses)) serializers.save_npz(os.path.join(out, 'encoder_epoch-{}.npz'.format(iterator.epoch)), encoder) serializers.save_npz(os.path.join(out, 'decoder_epoch-{}.npz'.format(iterator.epoch)), decoder) serializers.save_npz(os.path.join(out, 'enc_optim_epoch-{}.npz'.format(iterator.epoch)), enc_optim) serializers.save_npz(os.path.join(out, 'dec_optim_epoch-{}.npz'.format(iterator.epoch)), dec_optim) if iterator.epoch % lr_decay == 0: enc_optim.alpha *= gamma dec_optim.alpha *= gamma logger.info('alpha of enc_optim: {}'.format(enc_optim.alpha)) logger.info('alpha of dec_optim: {}'.format(dec_optim.alpha))
y_pre = nn.predict(input_cloud) t2 = time.time() elapsed_time1 = t1 - t0 elapsed_time2 = t2 - t1 print("voxel化時間:{}".format(elapsed_time1)) print("推定時間:{}".format(elapsed_time2)) print(output_label) y = y_pre.data y_pos = y[0, 0:3] y_ori = y[0, 3:7] y_pos = 0.1 * y_pos nrm = y_ori[0] * y_ori[0] + y_ori[1] * y_ori[1] + y_ori[2] * y_ori[2] + y_ori[ 3] * y_ori[3] y_ori_x = np.sign(y_ori[0]) * np.sqrt((y_ori[0] * y_ori[0] / float(nrm))) y_ori_y = np.sign(y_ori[1]) * np.sqrt((y_ori[1] * y_ori[1] / float(nrm))) y_ori_z = np.sign(y_ori[2]) * np.sqrt((y_ori[2] * y_ori[2] / float(nrm))) y_ori_w = np.sign(y_ori[3]) * np.sqrt((y_ori[3] * y_ori[3] / float(nrm))) print(y_pos, y_ori_x, y_ori_y, y_ori_z, y_ori_w) g = c.build_computational_graph([y_pre]) with open('graph.dot', 'w') as o: o.write(g.dump()) #plt.plot(x, label) #plt.plot(x, y_pre) #plt.show
def draw_graph(variable): g = c.build_computational_graph(variable) with open('resource/g.out', 'w') as o: o.write(g.dump())
def __call__(self, cgp, gpuID, epoch_num=200, batchsize=256, weight_decay=1e-4, eval_epoch_num=10, data_aug=True, comp_graph='comp_graph.dot', out_model='mymodel.model', init_model=None, retrain_mode=False): if self.verbose: print('\tGPUID :', gpuID) print('\tepoch_num:', epoch_num) print('\tbatchsize:', batchsize) chainer.cuda.get_device(gpuID).use() # Make a specified GPU current model = CGP2CNN(cgp, self.n_class) if init_model is not None: if self.verbose: print('\tLoad model from', init_model) serializers.load_npz(init_model, model) model.to_gpu(gpuID) optimizer = chainer.optimizers.Adam() if not retrain_mode else chainer.optimizers.MomentumSGD(lr=0.01) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay)) eval_epoch_num = np.min((eval_epoch_num, epoch_num)) test_accuracies = np.zeros(eval_epoch_num) for epoch in six.moves.range(1, epoch_num+1): if self.verbose: print('\tepoch', epoch) perm = np.random.permutation(self.train_data_num) train_accuracy = train_loss = 0 start = time.time() for i in six.moves.range(0, self.train_data_num, batchsize): xx_train = self.data_augmentation(self.x_train[perm[i:i + batchsize]]) if data_aug else self.x_train[perm[i:i + batchsize]] x = chainer.Variable(cuda.to_gpu(xx_train)) t = chainer.Variable(cuda.to_gpu(self.y_train[perm[i:i + batchsize]])) try: optimizer.update(model, x, t) except: import traceback traceback.print_exc() return 0. if comp_graph is not None and epoch == 1 and i == 0: with open(comp_graph, 'w') as o: g = computational_graph.build_computational_graph((model.loss, )) o.write(g.dump()) del g if self.verbose: print('\tCNN graph generated.') train_loss += float(model.loss.data) * len(t.data) train_accuracy += float(model.accuracy.data) * len(t.data) elapsed_time = time.time() - start throughput = self.train_data_num / elapsed_time if self.verbose: print('\ttrain mean loss={}, train accuracy={}, time={}, throughput={} images/sec, paramNum={}'.format(train_loss / self.train_data_num, train_accuracy / self.train_data_num, elapsed_time, throughput, model.param_num)) # apply the model to test data # use the maximum validation accuracy in the last 10 epoch as the fitness value eval_index = epoch - (epoch_num - eval_epoch_num) -1 if self.verbose or eval_index >= 0: test_accuracy, test_loss = self.__test(model, batchsize) if self.verbose: print('\tvalid mean loss={}, valid accuracy={}'.format(test_loss / self.test_data_num, test_accuracy / self.test_data_num)) if eval_index >= 0: test_accuracies[eval_index] = test_accuracy / self.test_data_num # decay the learning rate if not retrain_mode and epoch % 30 == 0: optimizer.alpha *= 0.1 elif retrain_mode: if epoch == 5: optimizer.lr = 0.1 if epoch == 250: optimizer.lr *= 0.1 if epoch == 375: optimizer.lr *= 0.1 # test_accuracy, test_loss = self.__test(model, batchsize) if out_model is not None: model.to_cpu() serializers.save_npz(out_model, model) return np.max(test_accuracies)
def test_dont_show_name(self): g = c.build_computational_graph( (self.x1, self.x2, self.y), show_name=False) dotfile_content = g.dump() for var in [self.x1, self.x2, self.y]: self.assertNotIn('label="%s:' % var.name, dotfile_content)
def setUp(self): self.x1 = variable.Variable(np.zeros((1, 2)).astype('f')) self.x2 = variable.Variable(np.zeros((1, 2)).astype('f')) self.y = self.x1 + self.x2 self.f = self.y.creator self.g = c.build_computational_graph((self.y,), remove_variable=True)
def train(iterator, gpu, encoder, decoder, enc_optim, dec_optim, rel_send, rel_rec, edge_types, temp, prediction_steps, var, out, benchmark, lr_decay, gamma): iter_i = 0 edge_accuracies = [] node_mses = [] nll_train = [] kl_train = [] logger = logging.getLogger(__name__) while True: inputs = iterator.next() node_features, edge_labels = dataset.concat_examples(inputs, device=gpu) # logits: [batch_size, num_edges, edge_types] logits = encoder(node_features, rel_send, rel_rec) # inverse func. of softmax edges = F.gumbel_softmax(logits, tau=temp, axis=2) edge_probs = F.softmax(logits, axis=2) # edges, edge_probs: [batch_size, num_edges, edge_types] if isinstance(decoder, decoders.MLPDecoder): output = decoder(node_features, edges, rel_rec, rel_send, prediction_steps) elif isinstance(decoder, decoders.RNNDecoder): output = decoder(node_features, edges, rel_rec, rel_send, 100, burn_in=True, burn_in_steps=args.timesteps - args.prediction_steps) target = node_features[:, :, 1:, :] num_nodes = node_features.shape[1] loss_nll = get_nll_gaussian(output, target, var) loss_kl = get_kl_categorical_uniform(edge_probs, num_nodes, edge_types) loss = loss_nll + loss_kl nll_train.append(float(loss_nll.array)) kl_train.append(float(loss_kl.array)) edge_accuracy = get_edge_accuracy(logits.array, edge_labels) edge_accuracies.append(edge_accuracy) node_mse = float(F.mean_squared_error(output, target).array) node_mses.append(node_mse) encoder.cleargrads() decoder.cleargrads() loss.backward() enc_optim.update() dec_optim.update() # Exit after 10 iterations when benchmark mode is ON iter_i += 1 if benchmark: put_log(iterator.epoch, np.mean(nll_train), np.mean(kl_train), np.mean(edge_accuracies), np.mean(node_mses)) if iter_i == 10: exit() if iterator.is_new_epoch: break if not os.path.exists(os.path.join(out, 'graph.dot')): with open(os.path.join(out, 'graph.dot'), 'w') as o: g = computational_graph.build_computational_graph([loss]) o.write(g.dump()) if iterator.is_new_epoch: put_log(iterator.epoch, np.mean(nll_train), np.mean(kl_train), np.mean(edge_accuracies), np.mean(node_mses)) serializers.save_npz( os.path.join(out, 'encoder_epoch-{}.npz'.format(iterator.epoch)), encoder) serializers.save_npz( os.path.join(out, 'decoder_epoch-{}.npz'.format(iterator.epoch)), decoder) serializers.save_npz( os.path.join(out, 'enc_optim_epoch-{}.npz'.format(iterator.epoch)), enc_optim) serializers.save_npz( os.path.join(out, 'dec_optim_epoch-{}.npz'.format(iterator.epoch)), dec_optim) if iterator.epoch % lr_decay == 0: enc_optim.alpha *= gamma dec_optim.alpha *= gamma logger.info('alpha of enc_optim: {}'.format(enc_optim.alpha)) logger.info('alpha of dec_optim: {}'.format(dec_optim.alpha))
and ((bi - 1) % (log_interval * 100) == 0) ): #Additional *100 term because we don't want a checkpoint every log point print( '##################### Saving Model Checkpoint #####################' ) batch_number = str(bi).zfill(6) modelfile = directory + '/' + batch_number + '.h5' print "Writing model checkpoint to '%s' ..." % (modelfile) serializers.save_hdf5(modelfile, vae) # (Optionally:) visualize computation graph if bi == 1 and args['--vis'] is not None: print "Writing computation graph to '%s/%s'." % (directory, args['--vis']) g = computational_graph.build_computational_graph([obj]) util.print_compute_graph(directory + '/' + args['--vis'], g) # Sample a set of poses if (bi % sample_every_epoch == 0) and data_type == 'pose': counter += 1 print " # sampling" z = np.random.normal(loc=0.0, scale=1.0, size=(1024, nlatent)) z = chainer.Variable(xp.asarray(z, dtype=np.float32), volatile='ON') vae.decode(z) Xsample = F.gaussian(vae.pmu, vae.pln_var) Xsample.to_cpu() sio.savemat('%s/samples_%d.mat' % (directory, counter), {'X': Xsample.data}) vae.pmu.to_cpu()
def main(): w2vec_model = None w_vocab = {} t_vocab = {} if (opts.fname_test or opts.fname_decode) and not opts.fname_in_model: print('Please specify a model in testing mode') exit(0) if opts.fname_w2vec: print('Loading w2vec model') w2vec_model = Word2VecModel() w2vec_model.load(opts.fname_w2vec) w_vocab = w2vec_model.vocab if opts.fname_in_model: model = pickle.load(open(opts.fname_in_model, 'rb')) w_vocab = model.w_vocab t_vocab = model.t_vocab if opts.fname_train: trainset, _ = load_data(opts.fname_train, w_vocab, t_vocab, update_dict=False) else: update_dict = False if opts.fname_w2vec else True trainset, _ = load_data(opts.fname_train, w_vocab, t_vocab, update_dict=update_dict) model = setup_model(len(w_vocab), len(t_vocab), nlayer=opts.nlayer, layer_size=opts.layer_size, use_gpu=opts.gpu, dropout=opts.dropout, w2vec_model=w2vec_model, seed=opts.random_seed) setattr(model, 'w_vocab', w_vocab) setattr(model, 't_vocab', t_vocab) if opts.fname_test: test_mode(model, opts.fname_test) exit(0) if opts.fname_decode: test_mode(model, opts.fname_decode, decode=True) exit(0) devset = None if opts.fname_dev: devset, oov = load_data(opts.fname_dev, w_vocab, t_vocab, update_dict=False) optimizer = optimizers.SGD(lr=opts.lr) optimizer.setup(model) # Calculating training parameters whole_len = trainset['input'].shape[0] jump = whole_len // opts.batchsize cur_log_perp = 0 epoch = 0 cur_loss = 0 start_at = time.time() progress_idx = 0 state = make_initial_state(batchsize=opts.batchsize, nlayer=opts.nlayer, layer_size=model.layer_size ) accum_loss = chainer.Variable(xp.zeros((), dtype=np.float32)) print('going to train {} iterations'.format(jump * opts.n_epoch)) prev_dev_loss = 1000 curr_dev_loss = 0 for i in six.moves.range(jump * opts.n_epoch): x_batch = xp.array([trainset['input'][(jump * j + i) % whole_len] for j in six.moves.range(opts.batchsize)]) y_batch = xp.array([trainset['target'][(jump * j + i) % whole_len] for j in six.moves.range(opts.batchsize)]) state, loss_i, _ = forward_one_step(model, x_batch, y_batch, state) cur_loss += loss_i.data if i == 0: with open('graph.dot', 'w') as o: o.write(c.build_computational_graph((loss_i, )).dump()) accum_loss += loss_i cur_log_perp += loss_i.data if (i + 1) % opts.bprop_len == 0: optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() optimizer.clip_grads(opts.grad_clip) optimizer.update() accum_loss = chainer.Variable(xp.zeros((), dtype=np.float32)) if i >= 1: training_loss = cur_loss / i progress_idx += 1 progress = int(progress_idx * 100 / float(jump)) print('\r#Epoch {0}, lr: {1} [{2}] {3}% Training loss: {4}'.format( epoch, "%.6f" % optimizer.lr, '#'*(progress/5), progress, "%.2f" % training_loss), end='') sys.stdout.flush() if (i + 1) % jump == 0: progress_idx = 0 epoch += 1 if devset: # print('') curr_dev_loss, _ = evaluate(model, devset, report_progress=False) improvment = prev_dev_loss - curr_dev_loss print(", Dev lost:", curr_dev_loss) if improvment < opts.lr_start_decay: print("... reducing learning rate") optimizer.lr /= opts.lr_decay if optimizer.lr < opts.lr_stop: break prev_dev_loss = curr_dev_loss else: print('') if opts.fname_out_model: model = model.to_cpu() print('... saving model to', opts.fname_out_model) pickle.dump(model, open(opts.fname_out_model, 'wb'), -1)
batchsize: int batchsize indicate len(z) """ return np.random.uniform(-1, 1, (batchsize, self.n_hidden))\ .astype(np.float32) def __call__(self, z): if self.isBN: h = F.relu(self.bn0(self.l0(z))) else: h = F.relu(self.l0(z)) if self.isBN: h = F.relu(self.bn1(self.l1(h))) else: h = F.relu(self.l1(h)) x = self.l2(h) # linear projection to 2 return x if __name__ == "__main__": import chainer.computational_graph as c from chainer import Variable model = Generator(n_hidden=100, isBN=False) img = model(Variable(model.make_hidden(10))) # print(img) g = c.build_computational_graph(img) with open('gen_graph.dot', 'w') as o: o.write(g.dump())
sum_loss = 0 for i in six.moves.range(0, N, batchsize): x_batch = x_train[perm[i:i + batchsize]] y_batch = y_train[perm[i:i + batchsize]] if args.gpu >= 0: x_batch = cuda.to_gpu(x_batch) y_batch = cuda.to_gpu(y_batch) optimizer.zero_grads() loss, acc = forward(x_batch, y_batch) loss.backward() optimizer.update() if epoch == 1 and i == 0: with open("graph.dot", "w") as o: o.write(c.build_computational_graph((loss, )).dump()) with open("graph.wo_split.dot", "w") as o: g = c.build_computational_graph((loss, ), remove_split=True) o.write(g.dump()) print('graph generated') sum_loss += float(cuda.to_cpu(loss.data)) * len(y_batch) sum_accuracy += float(cuda.to_cpu(acc.data)) * len(y_batch) print('train mean loss={}, accuracy={}'.format( sum_loss / N, sum_accuracy / N)) # evaluation sum_accuracy = 0 sum_loss = 0
def update_core(self): xp = self.gen.xp use_rotate = True if self.iteration > self.config.start_rotation else False self.gen.cleargrads() self.dis.cleargrads() if self.bigan: self.enc.cleargrads() if self.config.generator_architecture == "stylegan": opt_g_m = self.get_optimizer('map') opt_g_g = self.get_optimizer('gen') opt_d = self.get_optimizer('dis') # z: latent | x: data | y: dis output # *_real/*_fake/*_pertubed: Variable # *_data: just data (xp array) stage = self.stage # Need to retrive the value since next statement may change state (at the stage boundary) batch = self.get_iterator('main').next() batch_size = len(batch) # lr_scale = get_lr_scale_factor(self.total_gpu, stage) x_real_data = self.get_x_real_data(batch, batch_size) z_fake_data = xp.concatenate([self.get_z_fake_data(batch_size // 2)] * 2) # repeat same z if isinstance(chainer.global_config.dtype, chainer._Mixed16): x_real_data = x_real_data.astype("float16") z_fake_data = z_fake_data.astype("float16") # theta->6 DOF thetas = self.prior.sample(batch_size) # thetas = Variable(xp.array(thetas)) # # theta -> camera matrix # random_camera_matrices = xp.array(get_camera_matries(thetas, order=(0, 1, 2)), dtype="float32") # thetas = F.concat([F.cos(thetas[:, :3]), F.sin(thetas[:, :3]), thetas[:, 3:]], axis=1) # theta -> camera matrix thetas_ = xp.array(thetas) random_camera_matrices = xp.array(get_camera_matries(thetas)) thetas = F.concat([F.cos(thetas_[:, :3]), F.sin(thetas_[:, :3]), thetas_[:, 3:]], axis=1) x_real = Variable(x_real_data) # Image.fromarray(convert_batch_images(x_real.data.get(), 4, 4)).save('no_downsized.png') x_real = downsize_real(x_real, stage) x_real = Variable(x_real.data) # Image.fromarray(convert_batch_images(x_real.data.get(), 4, 4)).save('downsized.png') image_size = x_real.shape[2] x_fake = self.gen(z_fake_data, stage, thetas) if self.bigan: # bigan is not supported now assert False, "bigan is not supported" else: y_fake, feat = self.dis(x_fake[:, :3], stage=stage, return_hidden=True) loss_gen = loss_func_dcgan_gen(y_fake) # * lr_scale assert not xp.isnan(loss_gen.data) chainer.report({'loss_adv': loss_gen}, opt_g_g.target) if use_rotate: loss_rotate, warped_zp = self.loss_func_rotate(x_fake[:batch_size // 2], random_camera_matrices[:batch_size // 2], x_fake[batch_size // 2:], random_camera_matrices[batch_size // 2:], self.iteration >= self.config.start_occlusion_aware) if self.config.rotate_feature: downsample_rate = x_real.shape[2] // feat.shape[2] depth = F.average_pooling_2d(x_real[:, -1:], downsample_rate, downsample_rate, 0) feat = F.concat([feat, depth], axis=1) loss_rotate_feature, _ = self.loss_func_rotate_feature(feat[:batch_size // 2], random_camera_matrices[:batch_size // 2], feat[batch_size // 2:], random_camera_matrices[batch_size // 2:], self.iteration >= self.config.start_occlusion_aware) loss_rotate += loss_rotate_feature # loss_rotate *= 10 if self.config.lambda_depth > 0: loss_rotate += F.mean(F.relu(self.config.depth_min - x_fake[:, -1]) ** 2) * \ self.config.lambda_depth # make depth larger assert not xp.isnan(loss_rotate.data) chainer.report({'loss_rotate': loss_rotate}, opt_g_g.target) lambda_rotate = self.config.lambda_rotate if self.config.lambda_rotate else 2 lambda_rotate = lambda_rotate if image_size <= 128 else lambda_rotate * 2 loss_gen += loss_rotate * lambda_rotate if self.config.use_occupancy_net_loss: loss_occupancy = self.loss_func_rotate.occupancy_net_loss(self.gen.occupancy, x_fake[:, -1:], random_camera_matrices, z_fake_data.squeeze()) chainer.report({'loss_occupancy': loss_occupancy}, opt_g_g.target) loss_gen += loss_occupancy * self.config.lambda_occupancy if self.config.optical_flow: assert False, "optical flow loss is not supported" # loss_rotate += - 0.2 * F.log( # F.mean((F.mean(1 / x_fake[:, -1].reshape(batch_size, -1) ** 2, axis=1) - # F.mean(1 / x_fake[:, -1].reshape(batch_size, -1), axis=1) ** 2) + 1e-3)) # loss_depth = self.loss_smooth_depth(x_fake[:, -1:]) * 20 # loss_dsgan = loss_func_dsgan(x_fake, z_fake, theta) # Diversity sensitive gan in ICLR2019 if chainer.global_config.debug: g = c.build_computational_graph(loss_gen) with open('out_loss_gen', 'w') as o: o.write(g.dump()) # assert not xp.isnan(loss_dsgan.data) # with chainer.using_config('debug', True): loss_gen.backward() # loss_depth.backward() # loss_dsgan.backward() if self.config.generator_architecture == "stylegan": opt_g_m.update() opt_g_g.update() del loss_gen self.dis.cleargrads() # keep smoothed generator if instructed to do so. if self.smoothed_gen is not None: # layers_in_use = self.gen.get_layers_in_use(stage=stage) soft_copy_param(self.smoothed_gen, self.gen, 1.0 - self.smoothing) # with chainer.using_config('enable_backprop', False): if self.bigan: assert False, "bigan is not supported" else: v_x_fake = Variable(x_fake.array[:, :3]) y_fake, feat = self.dis(v_x_fake, stage=stage, return_hidden=True) y_real = self.dis(x_real, stage=stage) loss_dis = loss_func_dcgan_dis(y_fake, y_real) # loss_reg_camera_param = calc_distance(est_camera_param_real, xp.array(thetas)) / 10 if not self.dis.sn and self.lambda_gp > 0: # y_perturbed = self.dis(x_perturbed, stage=stage) grad_x_perturbed, = chainer.grad([y_real], [x_real], enable_double_backprop=True) grad_l2 = F.sqrt(F.sum(grad_x_perturbed ** 2, axis=(1, 2, 3))) loss_gp = self.lambda_gp * loss_l2(grad_l2, 0.0) chainer.report({'loss_gp': loss_gp}, self.dis) loss_dis = loss_dis + loss_gp # * lr_scale if use_rotate and self.config.rotate_feature: downsample_rate = x_real.shape[2] // feat.shape[2] depth = F.average_pooling_2d(x_real[:, -1:], downsample_rate, downsample_rate, 0) feat = F.concat([feat, depth], axis=1) loss_rotate_feature, _ = self.loss_func_rotate_feature(feat[:batch_size // 2], random_camera_matrices[:batch_size // 2], feat[batch_size // 2:], random_camera_matrices[batch_size // 2:], self.iteration >= self.config.start_occlusion_aware) loss_dis -= loss_rotate_feature if not self.dis.sn and self.lambda_gp > 0: grad_x_perturbed, = chainer.grad([feat], [v_x_fake], enable_double_backprop=True) grad_l2 = F.sqrt(F.sum(grad_x_perturbed ** 2, axis=(1, 2, 3))) loss_gp = self.lambda_gp * loss_l2(grad_l2, 0.0) loss_dis += loss_gp assert not xp.isnan(loss_dis.data) chainer.report({'loss_adv': loss_dis}, self.dis) loss_dis.backward() opt_d.update() chainer.reporter.report({'stage': stage}) chainer.reporter.report({'batch_size': batch_size}) chainer.reporter.report({'image_size': image_size})
print("Producing plots") online_optimizer = optimizers.MomentumSGD(lr=0.001, momentum=0.6) # -- graph n_graph_steps = 3 assert n_graph_steps >= 3 X_graph = next( g.batch_sequence_multi_hdf5(model.training.getlast("test_files"), batch_size=n_graph_steps)) RMSplot, _ = model.loss_series(state, xp.asarray(X_graph, dtype=np.float32), train=True) with open("../Models/{}.dot".format(modelname), "w") as o: o.write(c.build_computational_graph((RMSplot, ), rankdir='LR').dump()) cmdstr = "dot -Tpng ../Models/{}.dot > ../Plots/{}_graph.png".format( modelname, modelname) status = subprocess.call(cmdstr, shell=True) # -- error plot radarplot.learningplot("../Plots/{}_loss.pdf".format(modelname), model) # -- validation nstep_ahead = 48 # X_plot = next(g.batch_sequence_multi_hdf5(model.training.getlast("test_files"), # batch_size=nstep_plot+nstep_ahead)) X_plot = xp.asarray(next( g.batch_sequence_multi_hdf5(test_files, batch_size=nstep_plot + nstep_ahead)),
def save_model_graph(loss, filepath, remove_split=False): with open(filepath, 'w') as o: o.write(c.build_computational_graph((loss,), remove_split).dump())
def train(self, train_x, train_y, test_x, test_y): # train_x, train_y = train["x"].astype(numpy.float32), train["y"].astype(numpy.int32) # test_x, test_y = test["x"].astype(numpy.float32), test["y"].astype(numpy.int32) N_train = train_y.size N_test = test_y.size data_dim = train_x[0].shape[0] train_x = train_x.reshape(N_train, 1, 1, data_dim) test_x = test_x.reshape(N_test, 1, 1, data_dim) # Prepare multi-layer perceptron model, defined in net.py model = L.Classifier(cnn_net.MnistMLP(data_dim, 1000, 10)) # Setup optimizer optimizer = optimizers.Adam() optimizer.setup(model) acc_txt = open(self.save_model_dir + "accuracy.txt", "w") loss_txt = open(self.save_model_dir + "loss.txt", "w") acc_txt.write("epoch\test_accuracy\n") loss_txt.write("epoch\train_loss\n") # Learning loop for epoch in six.moves.range(1, self.epoch_num + 1): print('epoch', epoch) # training perm = numpy.random.permutation(N_train) sum_accuracy = 0 sum_loss = 0 start = time.time() for i in six.moves.range(0, N_train, self.batchsize): x = chainer.Variable(numpy.asarray(train_x[perm[i:i + self.batchsize]])) t = chainer.Variable(numpy.asarray(train_y[perm[i:i + self.batchsize]])) # Pass the loss function (Classifier defines it) and its arguments optimizer.update(model, x, t) if epoch == 1 and i == 0: with open(self.save_model_dir + 'graph.dot', 'w') as o: g = computational_graph.build_computational_graph( (model.loss, )) o.write(g.dump()) print('graph generated') sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) end = time.time() elapsed_time = end - start throughput = N_train / elapsed_time print('train mean loss={}, accuracy={}, throughput={} images/sec'.format( sum_loss / N_train, sum_accuracy / N_train, throughput)) loss_txt.write("%d\t%f\n" % (epoch, sum_loss / N_train)) loss_txt.flush() # evaluation sum_accuracy = 0 sum_loss = 0 for i in six.moves.range(0, N_test, self.batchsize): x = chainer.Variable(numpy.asarray(test_x[i:i + self.batchsize]), volatile='on') t = chainer.Variable(numpy.asarray(test_y[i:i + self.batchsize]), volatile='on') loss = model(x, t) sum_loss += float(loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) print('test mean loss={}, accuracy={}'.format( sum_loss / N_test, sum_accuracy / N_test)) acc_txt.write("%d\t%f\n" % (epoch, sum_accuracy / N_test)) acc_txt.flush() # Save the model and the optimizer print('save the model') serializers.save_npz(self.save_model_dir + 'mlp.model', model) print('save the optimizer') serializers.save_npz(self.save_model_dir + 'mlp.state', optimizer)
# training perm = np.random.permutation(X_train.shape[0]) sum_accuracy = 0 sum_loss = 0 for i in six.moves.range(0, X_train.shape[0], batchsize): x = chainer.Variable(xp.asarray(X_train[perm[i:i + batchsize]])) t = chainer.Variable(xp.asarray(y_train[perm[i:i + batchsize], 0])) # x = xp.asarray(X_train[perm[i:i + batchsize]]) # t = xp.asarray(y_train[perm[i:i + batchsize],0]) # Pass the loss function (Classifier defines it) and its arguments optimizer.update(model, x, t) if epoch == 1 and i == 0: with open('graph.dot', 'w') as o: g = computational_graph.build_computational_graph( (model.loss, ), remove_split=True) o.write(g.dump()) print('graph generated') sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) print('train mean loss={}, accuracy={}'.format( sum_loss / X_train.shape[0], sum_accuracy / X_train.shape[0])) # evaluation sum_accuracy = 0 sum_loss = 0 for i in six.moves.range(0, X_test.shape[0], batchsize): x = chainer.Variable(xp.asarray(X_test[i:i + batchsize]), volatile='on')
sum_loss = 0 for i in six.moves.range(0, N, batchsize): x_batch = x_train[perm[i:i + batchsize]] y_batch = y_train[perm[i:i + batchsize]] if args.gpu >= 0: x_batch = cuda.to_gpu(x_batch) y_batch = cuda.to_gpu(y_batch) optimizer.zero_grads() loss, acc = forward(x_batch, y_batch) loss.backward() optimizer.update() if epoch == 1 and i == 0: with open("graph.dot", "w") as o: o.write(c.build_computational_graph((loss, )).dump()) with open("graph.wo_split.dot", "w") as o: g = c.build_computational_graph((loss, ), remove_split=True) o.write(g.dump()) print('graph generated') sum_loss += float(cuda.to_cpu(loss.data)) * batchsize sum_accuracy += float(cuda.to_cpu(acc.data)) * batchsize print('train mean loss={}, accuracy={}'.format(sum_loss / N, sum_accuracy / N)) # evaluation sum_accuracy = 0 sum_loss = 0 for i in six.moves.range(0, N_test, batchsize):
serializers.load_npz(args.resume, optimizer) # Learning loop for epoch in six.moves.range(1, n_epoch + 1): print('epoch', epoch) # training perm = np.random.permutation(N) sum_loss = 0 # total loss sum_rec_loss = 0 # reconstruction loss for i in six.moves.range(0, N, batchsize): x = chainer.Variable(xp.asarray(x_train[perm[i:i + batchsize]])) optimizer.update(model.get_loss_func(), x) if epoch == 1 and i == 0: with open('graph.dot', 'w') as o: g = computational_graph.build_computational_graph( (model.loss, )) o.write(g.dump()) print('graph generated') writer.add_scalar('train/loss', model.loss, epoch*N+i) writer.add_scalar('train/rec_loss', model.rec_loss, epoch*N+i) sum_loss += float(model.loss.data) * len(x.data) sum_rec_loss += float(model.rec_loss.data) * len(x.data) print('train mean loss={}, mean reconstruction loss={}' .format(sum_loss / N, sum_rec_loss / N)) # evaluation sum_loss = 0 sum_rec_loss = 0 with chainer.no_backprop_mode(): for i in six.moves.range(0, N_test, batchsize):
def test_randir(self): for rankdir in ['TB', 'BT', 'LR', 'RL']: g = c.build_computational_graph((self.y,), rankdir=rankdir) self.assertIn('rankdir=%s' % rankdir, g.dump())
def CifarAnalysis(folderName=None,n_epoch=1,batchsize = 1000, **kwd): id_gpu = 0 OutStr = "" OutStr += 'GPU: {}\n'.format(id_gpu) OutStr += 'Minibatch-size: {}\n'.format(batchsize) OutStr += 'epoch: {}\n'.format(n_epoch) OutStr += 'kwd: {}\n'.format(kwd) OutStr += '' print OutStr fOutput = None fInfo = None if folderName: if not os.path.exists(folderName): os.makedirs(folderName) fOutput = open(os.path.join(folderName,"output.dat"),"w") fInfo = open(os.path.join(folderName,"info.dat"),"w") shutil.copyfile(__file__,os.path.join(folderName,os.path.basename(__file__))) if fInfo: fInfo.write(OutStr) # Prepare dataset InDataBatch = [] data_tr = np.zeros((50000,3*32*32),dtype=np.float32) data_ev = np.zeros((10000,3*32*32),dtype=np.float32) label_tr = np.zeros((50000),dtype=np.int32) label_ev = np.zeros((10000),dtype=np.int32) for i in range(1,5+1): with open("data_cifar10/data_batch_%d"%i,"r") as f: tmp = pickle.load(f) data_tr [(i-1)*10000:i*10000] = tmp["data"] label_tr[(i-1)*10000:i*10000] = tmp["labels"] with open("data_cifar10/test_batch","r") as f: tmp = pickle.load(f) data_ev [:] = tmp["data"] label_ev [:] = tmp["labels"] ## Prep print "Normalizing data ..." def Normalize(x): avg = np.average(x,axis=1).reshape((len(x),1)) std = np.sqrt(np.sum(x*x,axis=1) - np.sum(x,axis=1)).reshape((len(x),1)) y = (x - avg) / std return y data_tr = Normalize(data_tr) data_ev = Normalize(data_ev) x_tr = data_tr.reshape((len(data_tr),3,32,32)) x_ev = data_ev.reshape((len(data_ev),3,32,32)) y_tr = label_tr y_ev = label_ev N_tr = len(data_tr) # 50000 N_ev = len(data_ev) # 10000 ## Define analisis Resume = None if "Resume" in kwd: Resume = kwd["Resume"] del kwd["Resume"] model = L.Classifier(ImageProcessNetwork(I_colors=3, I_Xunit=32, I_Yunit=32, F_unit = 10, **kwd)) if id_gpu >= 0: cuda.get_device(id_gpu).use() model.to_gpu() xp = np if id_gpu < 0 else cuda.cupy # Setup optimizer optimizer = optimizers.Adam() optimizer.setup(model) # Init/Resume if Resume: print('Load optimizer state from', Resume) serializers.load_hdf5(Resume+".state", optimizer) serializers.load_hdf5(Resume+".model", model) # Learning loop if fOutput: fOutput.write("epoch,mode,loss,accuracy\n") for epoch in six.moves.range(1, n_epoch + 1): print 'epoch %d'%epoch # training perm = np.random.permutation(N_tr) sum_accuracy = 0 sum_loss = 0 start = time.time() for i in six.moves.range(0, N_tr, batchsize): x = chainer.Variable(xp.asarray(x_tr[perm[i:i + batchsize]])) t = chainer.Variable(xp.asarray(y_tr[perm[i:i + batchsize]])) # Pass the loss function (Classifier defines it) and its arguments model.predictor.setTrainMode(True) optimizer.update(model, x, t) if (epoch == 1 and i == 0) and folderName: with open(os.path.join(folderName,'graph.dot'), 'w') as o: g = computational_graph.build_computational_graph( (model.loss, )) o.write(g.dump()) print 'graph generated' sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) end = time.time() elapsed_time = end - start throughput = N_tr / elapsed_time print 'train mean loss=%.3f, accuracy=%.1f%%, throughput=%.0f images/sec'%(sum_loss / N_tr, sum_accuracy / N_tr * 100., throughput) if fOutput: fOutput.write("%d,Train,%e,%e\n"%(epoch,sum_loss/N_tr,sum_accuracy/N_tr)) # evaluation perm = np.random.permutation(N_ev) sum_accuracy = 0 sum_loss = 0 for i in six.moves.range(0, N_ev, batchsize): x = chainer.Variable(xp.asarray(x_ev[perm[i:i + batchsize]]),volatile='on') t = chainer.Variable(xp.asarray(y_ev[perm[i:i + batchsize]]),volatile='on') model.predictor.setTrainMode(False) loss = model(x, t) sum_loss += float(loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) print 'test mean loss=%.3f, accuracy=%.1f%%'%(sum_loss / N_ev, sum_accuracy / N_ev * 100, ) if fOutput: fOutput.write("%d,Test,%e,%e\n"%(epoch,sum_loss/N_ev,sum_accuracy/N_ev)) if folderName and (epoch%10 == 0 or epoch==n_epoch): # Save the model and the optimizer if epoch == n_epoch: myFname = os.path.join(folderName,'mlp_final') else: myFname = os.path.join(folderName,'mlp_%d'%n_epoch) #print 'save the model' serializers.save_hdf5(myFname+".model", model) serializers.save_hdf5(myFname+".state", optimizer) if fOutput: fOutput.close() if fInfo : fInfo.close()
# training perm = np.random.permutation(N) sum_accuracy = 0 sum_loss = 0 net.train = True for i in six.moves.range(0, N, batchsize): x = chainer.Variable(xp.asarray(x_train[perm[i:i + batchsize]])) t = chainer.Variable(xp.asarray(y_train[perm[i:i + batchsize]])) # Pass the loss function (Classifier defines it) and its arguments optimizer.update(model, x, t) if epoch == 1 and i == 0: with open('graph.dot', 'w') as o: g = computational_graph.build_computational_graph( (model.loss, ), remove_split=True) o.write(g.dump()) print('graph generated') sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) print('train mean loss={}, accuracy={}'.format( sum_loss / N, sum_accuracy / N)) # evaluation sum_accuracy = 0 sum_loss = 0 # net.train = False for i in six.moves.range(0, N_test, batchsize): # these volatile='on' but current chainer has bug on batch normalization
def setUp(self): self.x = variable.Variable(np.zeros((1, 2)).astype(np.float32)) self.y = 2 * self.x self.f = self.y.creator_node self.g = c.build_computational_graph((self.y,))
serializers.load_npz(args.resume, optimizer) # Learning loop for epoch in six.moves.range(1, n_epoch + 1): print('epoch', epoch) # training perm = np.random.permutation(N) sum_loss = 0 # total loss sum_rec_loss = 0 # reconstruction loss for i in six.moves.range(0, N, batchsize): x = chainer.Variable(xp.asarray(x_train[perm[i:i + batchsize]])) optimizer.update(model.get_loss_func(), x) if epoch == 1 and i == 0: with open('graph.dot', 'w') as o: g = computational_graph.build_computational_graph( (model.loss, )) o.write(g.dump()) print('graph generated') sum_loss += float(model.loss.data) * len(x.data) sum_rec_loss += float(model.rec_loss.data) * len(x.data) print('train mean loss={}, mean reconstruction loss={}' .format(sum_loss / N, sum_rec_loss / N)) # evaluation sum_loss = 0 sum_rec_loss = 0 with chainer.no_backprop_mode(): for i in six.moves.range(0, N_test, batchsize): x = chainer.Variable(xp.asarray(x_test[i:i + batchsize]))
def setUp(self): self.x1 = variable.Variable(np.zeros((1, 2)).astype(np.float32)) self.x2 = variable.Variable(np.zeros((1, 2)).astype(np.float32)) self.y = self.x1 + self.x2 self.f = self.y.creator self.g = c.build_computational_graph((self.y,))
def setUp(self): self.x1 = variable.Variable(np.zeros((1, 2)).astype('f')) self.x2 = variable.Variable(np.zeros((1, 2)).astype('f')) self.y = self.x1 + self.x2 self.f = self.y.creator_node self.g = c.build_computational_graph((self.y,), remove_variable=True)
def _check(self, outputs, node_num, edge_num): g = c.build_computational_graph(outputs) self.assertEqual(len(g.nodes), node_num) self.assertEqual(len(g.edges), edge_num)