def tiling(x: chainer.Variable, rows, cols): x = chainer.cuda.to_cpu(x.data) x = x[:, :3, :, :] x = numpy.asarray(numpy.clip(x * 127.5 + 127.5, 0.0, 255.0), dtype=numpy.uint8) _, _, h, w = x.shape x = x.reshape((rows, cols, 3, h, w)) x = x.transpose(0, 3, 1, 4, 2) x = x.reshape((rows * h, cols * w, 3)) return x
def main(): # Set the number of epochs parser = argparse.ArgumentParser(description='IaGo:') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID to be used') args = parser.parse_args() # Model definition model = network.Value() optimizer = optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) cuda.get_device(args.gpu).use() test_x = np.load('./value_data/npy/states_test.npy') test_y = np.load('./value_data/npy/results_test.npy') test_x = np.stack([test_x==1, test_x==2], axis=0).astype(np.float32) test_x = Variable(cuda.to_gpu(test_x.transpose(1,0,2,3))) test_y = Variable(cuda.to_gpu(test_y.astype(np.float32))) # Load train dataset train_x = np.load('./value_data/npy/states.npy') train_y = np.load('./value_data/npy/results.npy') train_size = train_y.shape[0] minibatch_size = 4096 # 2**12 # Learing loop for epoch in tqdm(range(args.epoch)): model.to_gpu(args.gpu) # Should be unnecessary... #chainer.config.train = True #chainer.config.enable_backprop = True # Shuffle train dataset rands = np.random.choice(train_size, train_size, replace=False) train_x = train_x[rands,:,:] train_y = train_y[rands] # Minibatch learning for idx in tqdm(range(0, train_size, minibatch_size)): x = train_x[idx:min(idx+minibatch_size, train_size), :, :] x = np.stack([x==1, x==2], axis=0).astype(np.float32) x = Variable(cuda.to_gpu(x.transpose(1,0,2,3))) y = train_y[idx:min(idx+minibatch_size, train_size)] y = Variable(cuda.to_gpu(y.astype(np.float32))) train_pred = model(x) train_loss = mean_squared_error(train_pred, y) model.cleargrads() train_loss.backward() optimizer.update() # Calculate loss with chainer.using_config('train', False): with chainer.using_config('enable_backprop', False): test_pred = model(test_x) test_loss = mean_squared_error(test_pred, test_y) print('\nepoch :', epoch, ' loss :', test_loss) # Log with open("./log_value.txt", "a") as f: f.write(str(test_loss)[9:15]+", \n") # Save models model.to_cpu() serializers.save_npz('./models/value_model.npz', model) serializers.save_npz('./models/value_optimizer.npz', optimizer)
def update_core(self): gen_optimizer = self.get_optimizer('gen') dis_optimizer = self.get_optimizer('dis') gen, dis = self.gen, self.dis batch = self.get_iterator('main').next() batchsize = len(batch) xy, xyz, scale = chainer.dataset.concat_examples(batch, self.device) xy_real = Variable(xy) z_pred = gen(xy_real) # Random rotation. theta = np.random.uniform(0, 2 * np.pi, len(xy)).astype(np.float32) cos_theta = np.broadcast_to(np.cos(theta), z_pred.shape[::-1]) cos_theta = Variable(self.gen.xp.array(cos_theta.transpose(3, 2, 1, 0))) sin_theta = np.broadcast_to(np.sin(theta), z_pred.shape[::-1]) sin_theta = Variable(self.gen.xp.array(sin_theta.transpose(3, 2, 1, 0))) # 2D Projection. x = xy_real[:, :, :, 0::2] y = xy_real[:, :, :, 1::2] xx = x * cos_theta + z_pred * sin_theta xx = xx[:, :, :, :, None] yy = y[:, :, :, :, None] xy_fake = F.concat((xx, yy), axis=4) xy_fake = F.reshape(xy_fake, (*y.shape[:3], -1)) if self.batch_statistics: xy_real = concat_stat(xy_real) xy_fake = concat_stat(xy_fake) y_real = dis(xy_real) y_fake = dis(xy_fake) mse = F.mean_squared_error(z_pred, xyz[:, :, :, 2::3]) if self.mode == 'supervised': gen.cleargrads() mse.backward() gen_optimizer.update() chainer.report({'mse': mse}, gen) elif self.mode == 'dcgan': acc_dis_fake = F.binary_accuracy(y_fake, dis.xp.zeros(y_fake.data.shape, dtype=int)) acc_dis_real = F.binary_accuracy(y_real, dis.xp.ones(y_real.data.shape, dtype=int)) acc_dis = (acc_dis_fake + acc_dis_real) / 2 loss_gen = F.sum(F.softplus(-y_fake)) / batchsize if self.use_heuristic_loss: loss_heuristic = self.calculate_heuristic_loss(xy_real=xy_real, z_pred=z_pred) loss_gen += loss_heuristic * self.heuristic_loss_weight chainer.report({'loss_heuristic': loss_heuristic}, gen) gen.cleargrads() if acc_dis.data >= (1 - self.dcgan_accuracy_cap): loss_gen.backward() gen_optimizer.update() xy_fake.unchain_backward() loss_dis = F.sum(F.softplus(-y_real)) / batchsize loss_dis += F.sum(F.softplus(y_fake)) / batchsize dis.cleargrads() if acc_dis.data <= self.dcgan_accuracy_cap: loss_dis.backward() dis_optimizer.update() chainer.report({'loss': loss_gen, 'mse': mse}, gen) chainer.report({'loss': loss_dis, 'acc': acc_dis, 'acc/fake': acc_dis_fake, 'acc/real': acc_dis_real}, dis) elif self.mode == 'wgan': y_real = F.sum(y_real) / batchsize y_fake = F.sum(y_fake) / batchsize wasserstein_distance = y_real - y_fake loss_dis = -wasserstein_distance loss_gen = -y_fake if self.use_heuristic_loss: loss_heuristic = self.calculate_heuristic_loss(xy_real=xy_real, z_pred=z_pred) loss_gen += loss_heuristic * self.heuristic_loss_weight chainer.report({'loss_heuristic': loss_heuristic}, gen) dis.cleargrads() loss_dis.backward() dis_optimizer.update() if self.iteration < 2500 and self.iteration % 100 == 0: gen.cleargrads() loss_gen.backward() gen_optimizer.update() if self.iteration > 2500 and self.iteration % 5 == 0: gen.cleargrads() loss_gen.backward() gen_optimizer.update() chainer.report({'loss': loss_gen, 'mse': mse}, gen) chainer.report({'loss': loss_dis}, dis) else: raise NotImplementedError
def main(): # Set the number of sets parser = argparse.ArgumentParser(description='IaGo:') parser.add_argument('--models', '-m', type=int, default=1, help='Number of trained models') parser.add_argument('--set', '-s', type=int, default=1000, help='Number of game sets played to train') args = parser.parse_args() N = 32 # Model definition model1 = network.SLPolicy() serializers.load_npz("../models/RL/model2.npz", model1) optimizer = optimizers.Adam() optimizer.setup(model1) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) serializers.load_npz("../models/RL/optimizers/2.npz", optimizer) # REINFORCE algorithm models = args.models cnt = 0 #for set in tqdm(range(0, args.set)): while (models <= 20): # Randomly choose competitor model from reinforced models model2 = network.SLPolicy() model2_path = np.random.choice(glob.glob("../models/RL/*.npz")) print(model2_path) serializers.load_npz(model2_path, model2) result = 0 state_seq, action_seq, reward_seq = [], [], [] for i in tqdm(range(2 * N)): game = rl_self_play.Game(model1, model2) if i % 2 == 1: # Switch head and tail pos = random.choice([[2, 4], [3, 5], [4, 2], [5, 3]]) game.state[pos[0], pos[1]] = 2 states, actions, judge = game() rewards = [judge] * len(states) state_seq += states action_seq += actions reward_seq += rewards if judge == 1: result += 1 # Update model x = np.array(state_seq) x = np.stack([x == 1, x == 2], axis=0).astype(np.float32) x = Variable(x.transpose(1, 0, 2, 3)) y = Variable(np.array(action_seq).astype(np.int32)) r = Variable(np.array(reward_seq).astype(np.float32)) pred = model1(x) c = F.softmax_cross_entropy(pred, y, reduce="no") model1.cleargrads() loss = F.mean(c * r) loss.backward() optimizer.update() rate = result / (2 * N) print("Models:" + str(models) + ", Result:" + str(rate) + ", Loss:" + str(loss.data)) with open("../log/rl.txt", "a") as f: f.write(str(rate) + ", \n") if rate > 0.5: cnt += 1 if cnt > 4 * np.sqrt(models) and rate > 0.6: model = copy.deepcopy(model1) #model.to_cpu() serializers.save_npz("../models/RL/model" + str(models) + ".npz", model) serializers.save_npz( "../models/RL/optimizers/" + str(models) + ".npz", optimizer) models += 1 cnt = 0 if rate < 0.2: break
def step(self,perm,batch_index,mode,epoch): if mode=='train': data_vgg, data, segs, t, size, gt_bboxs=self.read_batch(perm,batch_index,self.train_data) train=True else : data_vgg, data, segs, t, size, gt_bboxs=self.read_batch(perm,batch_index,self.test_data) train=False data_vgg = Variable(cuda.to_gpu(data_vgg)) t=Variable(cuda.to_gpu(t)) h = self.vgg(data_vgg) h = h.data h = Variable(h) if self.stage=='rpn': pred_cls_score,pred_bbox = self.rpn(h, train=train, test=not train) tgt_bbox,tgt_cls,inside_ind = generate_tgt_bbox(gt_bboxs, self.anchors, size) tgt_bbox=Variable(cuda.to_gpu(tgt_bbox)) tgt_cls=Variable(cuda.to_gpu(tgt_cls.transpose(0,3,1,2))) L_rpn_bbox = My_Mean_absolute_error.mean_absolute_error(pred_bbox, tgt_bbox) L_rpn_cls = F.softmax_cross_entropy(pred_cls_score, tgt_cls,ignore_label=-1) A_rpn = F.accuracy(pred_cls_score, tgt_cls,ignore_label=-1) if mode=='train': self.rpn.cleargrads() L_rpn_bbox.backward() L_rpn_cls.backward() self.o_rpn.update() return {"prediction": pred_cls_score.data.get(), "current_loss": L_rpn_bbox.data.get(), "current_accuracy": A_rpn.data.get(), } elif self.stage=='mask': pred_cls_score,pred_bbox = self.rpn(h, train=False, test=True) topk=5 topk_bbox = self.extract_topk_region(pred_cls_score, pred_bbox, topk) data_resnet = np.zeros((topk,self.batchsize, 3, self.input_height, self.input_width), dtype=np.float32) tgt_seg = np.zeros((topk,self.batchsize, 1, self.out_channel, 14, 14), dtype=np.int32) tgt_cls = np.zeros((topk,self.batchsize), dtype=np.int32) tgt_bbox = np.zeros((topk,self.batchsize, 4), dtype=np.float32)-1 data_resnet,tgt_seg = generate_tgt_semantics(data_resnet,tgt_seg,topk_bbox,data,segs,self.out_channel) data_resnet = Variable(cuda.to_gpu(data_resnet)) tgt_seg = Variable(cuda.to_gpu(tgt_seg)) for k in range(topk): if k==0: topk_cls_label,topk_box,topk_mask=self.cnn(data_resnet[k], train=train, test=not train) topk_cls_label=F.expand_dims(topk_cls_label,axis=0) topk_box=F.expand_dims(topk_box,axis=0) topk_mask=F.expand_dims(topk_mask,axis=0) else: cls_label,box,mask=self.cnn(data_resnet[k], train=train, test=not train) cls_label=F.expand_dims(cls_label,axis=0) box=F.expand_dims(box,axis=0) mask=F.expand_dims(mask,axis=0) topk_cls_label = F.vstack((topk_cls_label,cls_label)) topk_box = F.vstack((topk_box,box)) topk_mask = F.vstack((topk_mask,mask)) # topk_cls_label,topk_box,topk_mask = cnn_topk_loop(topk,self.cnn,data_resnet,train) tgt_bbox,tgt_cls = generate_tgt_bbox_mask(tgt_bbox,tgt_cls,topk_bbox,gt_bboxs) tgt_bbox = Variable(cuda.to_gpu(tgt_bbox)) tgt_cls = Variable(cuda.to_gpu(tgt_cls)) L_cnn_bbox = My_Mean_absolute_error.mean_absolute_error(topk_box, tgt_bbox) topk_cls_label=F.transpose(topk_cls_label,axes=(0,2,1)) L_cnn_cls = My_Softmax_cross_entropy.softmax_cross_entropy(topk_cls_label, tgt_cls,ignore_label=-1) A_cnn = F.accuracy(topk_cls_label, tgt_cls,ignore_label=-1) L_cnn_mask = F.sigmoid_cross_entropy(topk_mask,tgt_seg) if mode=='train': self.cnn.cleargrads() L_cnn_bbox.backward() L_cnn_cls.backward() L_cnn_mask.backward() self.o_cnn.update() return {"prediction": topk_cls_label.data.get(), "current_loss": L_cnn_mask.data.get(), "current_accuracy": A_cnn.data.get(), }