Example #1
0
 def tiling(x: chainer.Variable, rows, cols):
     x = chainer.cuda.to_cpu(x.data)
     x = x[:, :3, :, :]
     x = numpy.asarray(numpy.clip(x * 127.5 + 127.5, 0.0, 255.0),
                       dtype=numpy.uint8)
     _, _, h, w = x.shape
     x = x.reshape((rows, cols, 3, h, w))
     x = x.transpose(0, 3, 1, 4, 2)
     x = x.reshape((rows * h, cols * w, 3))
     return x
Example #2
0
def main():
	# Set the number of epochs
	parser = argparse.ArgumentParser(description='IaGo:')
	parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train')
	parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID to be used')
	args = parser.parse_args()

	# Model definition
	model = network.Value()
	optimizer = optimizers.Adam()
	optimizer.setup(model)
	optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4))
	cuda.get_device(args.gpu).use()

	test_x = np.load('./value_data/npy/states_test.npy')
	test_y = np.load('./value_data/npy/results_test.npy')
	test_x = np.stack([test_x==1, test_x==2], axis=0).astype(np.float32)
	test_x = Variable(cuda.to_gpu(test_x.transpose(1,0,2,3)))
	test_y = Variable(cuda.to_gpu(test_y.astype(np.float32)))

	# Load train dataset
	train_x = np.load('./value_data/npy/states.npy')
	train_y = np.load('./value_data/npy/results.npy')
	train_size = train_y.shape[0]
	minibatch_size = 4096 # 2**12

	# Learing loop
	for epoch in tqdm(range(args.epoch)):
		model.to_gpu(args.gpu)
		# Should be unnecessary...
		#chainer.config.train = True
		#chainer.config.enable_backprop = True
		# Shuffle train dataset
		rands = np.random.choice(train_size, train_size, replace=False)
		train_x = train_x[rands,:,:]
		train_y = train_y[rands]

		# Minibatch learning
		for idx in tqdm(range(0, train_size, minibatch_size)):
			x = train_x[idx:min(idx+minibatch_size, train_size), :, :]
			x = np.stack([x==1, x==2], axis=0).astype(np.float32)
			x = Variable(cuda.to_gpu(x.transpose(1,0,2,3)))
			y = train_y[idx:min(idx+minibatch_size, train_size)]
			y = Variable(cuda.to_gpu(y.astype(np.float32)))
			train_pred = model(x)
			train_loss = mean_squared_error(train_pred, y)
			model.cleargrads()
			train_loss.backward()
			optimizer.update()
		# Calculate loss
		with chainer.using_config('train', False):
			with chainer.using_config('enable_backprop', False):
				test_pred = model(test_x)
		test_loss = mean_squared_error(test_pred, test_y)
		print('\nepoch :', epoch, '  loss :', test_loss)
		# Log
		with open("./log_value.txt", "a") as f:
			f.write(str(test_loss)[9:15]+", \n")
		# Save models
		model.to_cpu()
		serializers.save_npz('./models/value_model.npz', model)
		serializers.save_npz('./models/value_optimizer.npz', optimizer)
    def update_core(self):
        gen_optimizer = self.get_optimizer('gen')
        dis_optimizer = self.get_optimizer('dis')

        gen, dis = self.gen, self.dis

        batch = self.get_iterator('main').next()
        batchsize = len(batch)
        xy, xyz, scale = chainer.dataset.concat_examples(batch, self.device)

        xy_real = Variable(xy)
        z_pred = gen(xy_real)

        # Random rotation.
        theta = np.random.uniform(0, 2 * np.pi, len(xy)).astype(np.float32)
        cos_theta = np.broadcast_to(np.cos(theta), z_pred.shape[::-1])
        cos_theta = Variable(self.gen.xp.array(cos_theta.transpose(3, 2, 1, 0)))
        sin_theta = np.broadcast_to(np.sin(theta), z_pred.shape[::-1])
        sin_theta = Variable(self.gen.xp.array(sin_theta.transpose(3, 2, 1, 0)))

        # 2D Projection.
        x = xy_real[:, :, :, 0::2]
        y = xy_real[:, :, :, 1::2]
        xx = x * cos_theta + z_pred * sin_theta
        xx = xx[:, :, :, :, None]
        yy = y[:, :, :, :, None]
        xy_fake = F.concat((xx, yy), axis=4)
        xy_fake = F.reshape(xy_fake, (*y.shape[:3], -1))

        if self.batch_statistics:
            xy_real = concat_stat(xy_real)
            xy_fake = concat_stat(xy_fake)

        y_real = dis(xy_real)
        y_fake = dis(xy_fake)
        mse = F.mean_squared_error(z_pred, xyz[:, :, :, 2::3])

        if self.mode == 'supervised':
            gen.cleargrads()
            mse.backward()
            gen_optimizer.update()
            chainer.report({'mse': mse}, gen)

        elif self.mode == 'dcgan':
            acc_dis_fake = F.binary_accuracy(y_fake, dis.xp.zeros(y_fake.data.shape, dtype=int))
            acc_dis_real = F.binary_accuracy(y_real, dis.xp.ones(y_real.data.shape, dtype=int))
            acc_dis = (acc_dis_fake + acc_dis_real) / 2

            loss_gen = F.sum(F.softplus(-y_fake)) / batchsize
            if self.use_heuristic_loss:
                loss_heuristic = self.calculate_heuristic_loss(xy_real=xy_real, z_pred=z_pred)
                loss_gen += loss_heuristic * self.heuristic_loss_weight
                chainer.report({'loss_heuristic': loss_heuristic}, gen)
            gen.cleargrads()
            if acc_dis.data >= (1 - self.dcgan_accuracy_cap):
                loss_gen.backward()
                gen_optimizer.update()
            xy_fake.unchain_backward()

            loss_dis = F.sum(F.softplus(-y_real)) / batchsize
            loss_dis += F.sum(F.softplus(y_fake)) / batchsize
            dis.cleargrads()
            if acc_dis.data <= self.dcgan_accuracy_cap:
                loss_dis.backward()
                dis_optimizer.update()

            chainer.report({'loss': loss_gen, 'mse': mse}, gen)
            chainer.report({'loss': loss_dis, 'acc': acc_dis, 'acc/fake': acc_dis_fake, 'acc/real': acc_dis_real}, dis)

        elif self.mode == 'wgan':
            y_real = F.sum(y_real) / batchsize
            y_fake = F.sum(y_fake) / batchsize

            wasserstein_distance = y_real - y_fake
            loss_dis = -wasserstein_distance
            loss_gen = -y_fake
            if self.use_heuristic_loss:
                loss_heuristic = self.calculate_heuristic_loss(xy_real=xy_real, z_pred=z_pred)
                loss_gen += loss_heuristic * self.heuristic_loss_weight
                chainer.report({'loss_heuristic': loss_heuristic}, gen)

            dis.cleargrads()
            loss_dis.backward()
            dis_optimizer.update()

            if self.iteration < 2500 and self.iteration % 100 == 0:
                gen.cleargrads()
                loss_gen.backward()
                gen_optimizer.update()

            if self.iteration > 2500 and self.iteration % 5 == 0:
                gen.cleargrads()
                loss_gen.backward()
                gen_optimizer.update()

            chainer.report({'loss': loss_gen, 'mse': mse}, gen)
            chainer.report({'loss': loss_dis}, dis)

        else:
            raise NotImplementedError
Example #4
0
def main():
    # Set the number of sets
    parser = argparse.ArgumentParser(description='IaGo:')
    parser.add_argument('--models',
                        '-m',
                        type=int,
                        default=1,
                        help='Number of trained models')
    parser.add_argument('--set',
                        '-s',
                        type=int,
                        default=1000,
                        help='Number of game sets played to train')
    args = parser.parse_args()
    N = 32

    # Model definition
    model1 = network.SLPolicy()
    serializers.load_npz("../models/RL/model2.npz", model1)
    optimizer = optimizers.Adam()
    optimizer.setup(model1)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4))
    serializers.load_npz("../models/RL/optimizers/2.npz", optimizer)
    # REINFORCE algorithm
    models = args.models
    cnt = 0
    #for set in tqdm(range(0, args.set)):
    while (models <= 20):
        # Randomly choose competitor model from reinforced models
        model2 = network.SLPolicy()
        model2_path = np.random.choice(glob.glob("../models/RL/*.npz"))
        print(model2_path)
        serializers.load_npz(model2_path, model2)

        result = 0
        state_seq, action_seq, reward_seq = [], [], []
        for i in tqdm(range(2 * N)):
            game = rl_self_play.Game(model1, model2)
            if i % 2 == 1:
                # Switch head and tail
                pos = random.choice([[2, 4], [3, 5], [4, 2], [5, 3]])
                game.state[pos[0], pos[1]] = 2
            states, actions, judge = game()
            rewards = [judge] * len(states)
            state_seq += states
            action_seq += actions
            reward_seq += rewards
            if judge == 1:
                result += 1

        # Update model
        x = np.array(state_seq)
        x = np.stack([x == 1, x == 2], axis=0).astype(np.float32)
        x = Variable(x.transpose(1, 0, 2, 3))
        y = Variable(np.array(action_seq).astype(np.int32))
        r = Variable(np.array(reward_seq).astype(np.float32))
        pred = model1(x)
        c = F.softmax_cross_entropy(pred, y, reduce="no")
        model1.cleargrads()
        loss = F.mean(c * r)
        loss.backward()
        optimizer.update()
        rate = result / (2 * N)
        print("Models:" + str(models) + ", Result:" + str(rate) + ", Loss:" +
              str(loss.data))
        with open("../log/rl.txt", "a") as f:
            f.write(str(rate) + ", \n")
        if rate > 0.5:
            cnt += 1
        if cnt > 4 * np.sqrt(models) and rate > 0.6:
            model = copy.deepcopy(model1)
            #model.to_cpu()
            serializers.save_npz("../models/RL/model" + str(models) + ".npz",
                                 model)
            serializers.save_npz(
                "../models/RL/optimizers/" + str(models) + ".npz", optimizer)
            models += 1
            cnt = 0
        if rate < 0.2:
            break
Example #5
0
    def step(self,perm,batch_index,mode,epoch): 
        if mode=='train':
            data_vgg, data, segs, t, size, gt_bboxs=self.read_batch(perm,batch_index,self.train_data)
            train=True
        else :
            data_vgg, data, segs, t, size, gt_bboxs=self.read_batch(perm,batch_index,self.test_data)
            train=False

        data_vgg = Variable(cuda.to_gpu(data_vgg))
        t=Variable(cuda.to_gpu(t))
        h = self.vgg(data_vgg)
        h = h.data
        h = Variable(h)


        if self.stage=='rpn':
            pred_cls_score,pred_bbox = self.rpn(h, train=train, test=not train)
            tgt_bbox,tgt_cls,inside_ind = generate_tgt_bbox(gt_bboxs, self.anchors, size)
            tgt_bbox=Variable(cuda.to_gpu(tgt_bbox))
            tgt_cls=Variable(cuda.to_gpu(tgt_cls.transpose(0,3,1,2)))

            L_rpn_bbox = My_Mean_absolute_error.mean_absolute_error(pred_bbox, tgt_bbox)
            L_rpn_cls = F.softmax_cross_entropy(pred_cls_score, tgt_cls,ignore_label=-1)
            A_rpn = F.accuracy(pred_cls_score, tgt_cls,ignore_label=-1)
            if mode=='train':
                self.rpn.cleargrads()
                L_rpn_bbox.backward()
                L_rpn_cls.backward()
                self.o_rpn.update()


            return {"prediction": pred_cls_score.data.get(),
                    "current_loss": L_rpn_bbox.data.get(),
                    "current_accuracy": A_rpn.data.get(),
            }
        elif self.stage=='mask':
            pred_cls_score,pred_bbox = self.rpn(h, train=False, test=True)

            topk=5
            topk_bbox = self.extract_topk_region(pred_cls_score, pred_bbox, topk)
            data_resnet = np.zeros((topk,self.batchsize, 3, self.input_height, self.input_width), dtype=np.float32)
            tgt_seg = np.zeros((topk,self.batchsize, 1, self.out_channel, 14, 14), dtype=np.int32)
            tgt_cls = np.zeros((topk,self.batchsize), dtype=np.int32)
            tgt_bbox = np.zeros((topk,self.batchsize, 4), dtype=np.float32)-1
            data_resnet,tgt_seg = generate_tgt_semantics(data_resnet,tgt_seg,topk_bbox,data,segs,self.out_channel)

            data_resnet = Variable(cuda.to_gpu(data_resnet))
            tgt_seg = Variable(cuda.to_gpu(tgt_seg))
            for k in range(topk):
                if k==0:
                    topk_cls_label,topk_box,topk_mask=self.cnn(data_resnet[k], train=train, test=not train)
                    topk_cls_label=F.expand_dims(topk_cls_label,axis=0)
                    topk_box=F.expand_dims(topk_box,axis=0)
                    topk_mask=F.expand_dims(topk_mask,axis=0)
                else:
                    cls_label,box,mask=self.cnn(data_resnet[k], train=train, test=not train)
                    cls_label=F.expand_dims(cls_label,axis=0)
                    box=F.expand_dims(box,axis=0)
                    mask=F.expand_dims(mask,axis=0)
                    topk_cls_label = F.vstack((topk_cls_label,cls_label))
                    topk_box = F.vstack((topk_box,box))
                    topk_mask = F.vstack((topk_mask,mask))
            # topk_cls_label,topk_box,topk_mask = cnn_topk_loop(topk,self.cnn,data_resnet,train) 

            tgt_bbox,tgt_cls = generate_tgt_bbox_mask(tgt_bbox,tgt_cls,topk_bbox,gt_bboxs)

            tgt_bbox = Variable(cuda.to_gpu(tgt_bbox))
            tgt_cls = Variable(cuda.to_gpu(tgt_cls))
            L_cnn_bbox = My_Mean_absolute_error.mean_absolute_error(topk_box, tgt_bbox)

            topk_cls_label=F.transpose(topk_cls_label,axes=(0,2,1))
            L_cnn_cls = My_Softmax_cross_entropy.softmax_cross_entropy(topk_cls_label, tgt_cls,ignore_label=-1)
            A_cnn = F.accuracy(topk_cls_label, tgt_cls,ignore_label=-1)
            L_cnn_mask = F.sigmoid_cross_entropy(topk_mask,tgt_seg)

            if mode=='train':
                self.cnn.cleargrads()
                L_cnn_bbox.backward()
                L_cnn_cls.backward()
                L_cnn_mask.backward()
                self.o_cnn.update()


            return {"prediction": topk_cls_label.data.get(),
                    "current_loss": L_cnn_mask.data.get(),
                    "current_accuracy": A_cnn.data.get(),
            }