예제 #1
0
	def forward_one_step(self, state, action, reward, next_state, test=False):
		xp = cuda.cupy if config.use_gpu else np
		n_batch = state.shape[0]
		state = Variable(state.reshape((n_batch, config.rl_history_length * 34)))
		next_state = Variable(next_state.reshape((n_batch, config.rl_history_length * 34)))
		if config.use_gpu:
			state.to_gpu()
			next_state.to_gpu()
		q = self.compute_q_variable(state, test=test)
		q_ = self.compute_q_variable(next_state, test=test)
		max_action_indices = xp.argmax(q_.data, axis=1)
		if config.use_gpu:
			max_action_indices = cuda.to_cpu(max_action_indices)

		target_q = self.compute_target_q_variable(next_state, test=test)

		target = q.data.copy()

		for i in xrange(n_batch):
			max_action_index = max_action_indices[i]
			target_value = reward[i] + config.rl_discount_factor * target_q.data[i][max_action_indices[i]]
			action_index = self.get_index_for_action(action[i])
			old_value = target[i, action_index]
			diff = target_value - old_value
			if diff > 1.0:
				target_value = 1.0 + old_value	
			elif diff < -1.0:
				target_value = -1.0 + old_value	
			target[i, action_index] = target_value

		target = Variable(target)
		loss = F.mean_squared_error(target, q)
		return loss, q
예제 #2
0
 def tiling(x: chainer.Variable, rows, cols):
     x = chainer.cuda.to_cpu(x.data)
     x = x[:, :3, :, :]
     x = numpy.asarray(numpy.clip(x * 127.5 + 127.5, 0.0, 255.0),
                       dtype=numpy.uint8)
     _, _, h, w = x.shape
     x = x.reshape((rows, cols, 3, h, w))
     x = x.transpose(0, 3, 1, 4, 2)
     x = x.reshape((rows * h, cols * w, 3))
     return x
    def make_image(trainer):
        x_ref, x_rot, eps = data
        batch = x_ref.shape[0]
        width = x_ref.shape[-1]
        height = x_ref.shape[-2]
        channel = x_ref.shape[-3]
        xp = gen.xp

        image_size = batch

        converter = chainer.dataset.concat_examples
        x_real = Variable(converter(x_rot, device))
        x_ref = Variable(converter(x_ref, device))
        eps = Variable(converter(eps, device))

        x_real = Variable(x_real.data.astype(np.float32)) / 255.0
        x_ref = Variable(x_ref.data.astype(np.float32)) / 255.0
        eps = Variable(eps.data.astype(np.float32))

        with chainer.using_config('train', False):
            x = gen(x_ref, eps)

        x_ref = chainer.cuda.to_cpu(x_ref.data)
        x_real = chainer.cuda.to_cpu(x_real.data)
        x_gen = chainer.cuda.to_cpu(x.data)

        x_ref = x_ref.reshape((1, image_size, channel, height, width))
        x_real = x_real.reshape((1, image_size, channel, height, width))
        x_gen = x_gen.reshape((1, image_size, channel, height, width))

        x = np.concatenate((x_ref, x_real, x_gen), axis=0)
        x = x * 255
        x = x.clip(0.0, 255.0)
        # gen_output_activation_func is sigmoid
        x = np.asarray(x, dtype=np.uint8)
        # gen output_activation_func is tanh
        # x = np.asarray(np.clip((x+1) * 0.5 * 255, 0.0, 255.0), dtype=np.uint8)
        _, _, _, H, W = x.shape
        #x = x.reshape((n_images, 3, 1, H, W))
        # col, row, ch, H, W -> col, H, row, W, ch
        x = x.transpose(0, 3, 1, 4, 2)
        if channel == 3:
            x = x.reshape((3 * H, image_size * W, 3))
        elif channel == 1:
            x = x.reshape((3 * H, image_size * W))

        preview_dir = '{}/preview'.format(dst)
        preview_path = preview_dir + \
                       '/image{:0>6}.png'.format(trainer.updater.iteration)
        if not os.path.exists(preview_dir):
            os.makedirs(preview_dir)
        Image.fromarray(x).save(preview_path)
예제 #4
0
def cumprod(x, axis=-1):
    if not isinstance(x, Variable):
        x = Variable(x)

    if axis is None:
        x = x.reshape(-1)
        axis = 0
    elif axis < 0:
        axis = x.ndim + axis
    assert axis >= 0 and axis < x.ndim

    xp = cuda.get_array_module(x)
    ndim = x.ndim
    dims = x.shape[axis]
    shape_new = x.shape[:axis] + (dims, ) + x.shape[axis:]
    x = functions.expand_dims(x, axis)
    x = functions.broadcast_to(x, shape_new)

    # TODO: use cupy.tril
    mask = numpy.tril(numpy.ones((dims, dims), numpy.bool))
    if xp is cupy:
        mask = cuda.to_gpu(mask)
    expander = [1] * axis + [dims, dims] + [1] * (ndim - axis - 1)
    mask = mask.reshape(expander)
    mask = xp.broadcast_to(mask, shape_new)
    x = functions.where(mask, x, xp.ones_like(x.data))
    return prod(x, axis + 1)
예제 #5
0
def main():

    # Set up a neural network to train
    gen = Generator()
    chainer.serializers.load_npz('result/gen_iter_500000.npz', gen)

    np.random.seed(0)
    xp = gen.xp
    z = np.random.normal(0.0, 1.0, (100, 100))
    for i in range(0, 10):
        for j in range(1, 10):
            # interpolate gradually
            #z[i*10 + j] = z[i*10] * 0.1 * (10-j)
            sub = z[10] - z[30]
            z[i * 10 + j] = z[i * 10] - 0.1 * j * sub
    z = Variable(xp.asarray(z.reshape(100, 100, 1, 1), dtype=np.float32))

    with chainer.using_config('train', False):
        x = gen(z)
    x = chainer.cuda.to_cpu(x.data)

    x = np.asarray(np.clip(x * 255, 0.0, 255.0), dtype=np.uint8)
    _, _, H, W = x.shape
    x = x.reshape((10, 10, 1, H, W))
    x = x.transpose(0, 3, 1, 4, 2)
    x = x.reshape((10 * H, 10 * W))
    preview_dir = 'interpolate/preview'
    preview_path = preview_dir + '/phenomenon.png'
    if not os.path.exists(preview_dir):
        os.makedirs(preview_dir)
    Image.fromarray(x).save(preview_path)
def to_onehot(y, num_classes, label_smoothing_prob=0, use_cuda=False):
    """Convert indices into one-hot encoding.
    Args:
        y (chainer.Variable, int): Indices of labels.
            A tensor of size `[B, 1]`.
        num_classes (int): the number of classes
        label_smoothing_prob (float, optional):
        use_cuda (bool, optional): if True, use GPUs
    Returns:
        y (chainer.Variable, float): A tensor of size
            `[B, 1, num_classes]`
    """
    batch_size = y.shape[0]
    y_onehot = np.eye(num_classes, dtype=np.float32)[
        y.data.reshape(batch_size).tolist()]
    y_onehot = Variable(y_onehot)
    if use_cuda:
        y_onehot.to_gpu()
    y_onehot = y_onehot.reshape(batch_size, 1, num_classes)

    # Label smoothing
    if label_smoothing_prob > 0:
        y = y * (1 - label_smoothing_prob) + 1 / \
            num_classes * label_smoothing_prob

    # TODO: fix bugs
    # if y.volatile:
    #     y_onehot.volatile = True

    return y_onehot
예제 #7
0
    def learning_based_method(self, net, word, z, top_n):
        '''
            Parameters:
                net (chainer.Chain) ... model
                word (str) ... input word for the model
                z (int) ... input attribute ID for the model
                top_n (int) ... Number of top-N similar words to return nearest_words
        '''
        try:
            x = self.word2vec[word].astype(np.float32)
        except KeyError:
            # the word not in vocabulary
            if word[0] == '(' and word[-1] == ')':  # (oov_word) -> oov_word
                word = word[1:-1]
            return [('({})'.format(word), 'n/a') for _ in range(top_n)]

        # To variable
        x = Variable(x.reshape((1, len(x))))
        z = Variable(np.array([[z]]).astype(np.int32))
        #z = Variable(z.reshape(1,len(z)))

        # Transform a word attribute z from x into y with reflection
        y = net.test(x, z)

        # Show top five similar words to y
        y = y.array[0]

        nearest_words = self.word2vec.similar_by_vector(
            y, top_n)  #self.word2vec.most_similar([y], [], 5)

        nearest_words = [(word[0], round(word[1], 4))
                         for word in nearest_words]
        return nearest_words
예제 #8
0
 def predict(self, state, action):
     state_action = np.concatenate((state, action),
                                   axis=0).astype(np.float32)
     state_action = Variable(
         state_action.reshape((1, state_action.shape[0])))
     next_state = self.model(state_action)
     return next_state
예제 #9
0
def fill_replay_buf(num_frames):
    average_reward = 0
    global frame
    #    import pdb; pdb.set_trace()
    episode_num = 0
    action = randomize_action(left, 1)
    old_health = 100
    old_ammo = 26
    for sframe in range(frame, frame + num_frames):
        reward = doom_game.make_action(action)
        terminal = doom_game.is_episode_finished()
        if terminal:
            doom_game.new_episode()
            terminal_pool[index - 1] = 1
            old_health = 100
            old_ammo = 26
            action = randomize_action(left, 1)
        state = doom_game.get_state()
        game_vars = state.game_variables
        new_health = game_vars[1]
        delta_health = new_health - old_health
        old_health = new_health
        new_ammo = game_vars[0]
        delta_ammo = new_ammo - old_ammo
        old_ammo = new_ammo
        reward += 0.05 * delta_health
        reward += 0.02 * delta_ammo

        train_image = cuda.to_gpu(
            (state.screen_buffer.astype(np.float32).transpose((2, 0, 1))),
            device=args.gpu)
        import pdb
        pdb.set_trace()
        #reward, terminal = game.process(screen)

        train_image = Variable(
            train_image.reshape((1, ) + train_image.shape) / 127.5 - 1,
            volatile=True)
        score = action_q(train_image, train=False)

        best_idx = int(F.argmax(score).data)

        # action = game.randomize_action(best, random_probability)
        action = randomize_action(actions[best_idx], random_probability)
        index = sframe % POOL_SIZE
        state_pool[index] = cuda.to_cpu(train_image.data)
        action_pool[index] = actions.index(action)
        reward_pool[index - 1] = reward
        average_reward = average_reward * 0.9 + reward * 0.1
        #if sframe % 100 == 0:
        # print(average_reward)
        terminal_pool[index - 1] = 0
    frame += num_frames
예제 #10
0
파일: evals.py 프로젝트: ahclab/reflection
def get_xy_mirror_distance(net, word2vec, word, z_id):
    # To variable
    x = word2vec[word].astype(numpy.float32)
    x = Variable(x.reshape((1, len(x))))
    #z = Variable(z.reshape(1,len(z)))
    z = Variable(numpy.array([[z_id]]).astype(numpy.int32))

    # Transform a word attribute z from x into y with reflection
    y = net.test(x, z).array[0]
    x = x.array[0]
    a = net.embed_a(net.z1(z), x).array[0]
    c = net.embed_c(net.z1(z), x).array[0]

    # Calulate the distance between x/y and a mirror
    xd = get_mirror_distance(x, a, c)
    yd = get_mirror_distance(y, a, c)

    return xd, yd
예제 #11
0
파일: mnist.py 프로젝트: masenov/bullet-nn
def mnist_train_0_batch(data, test, batch_size=64, nb_epochs=10):

    for epoch in range(nb_epochs):
        print("Current epoch: %d" % (epoch + 1))

        ## shuffle the dataset
        nb_data = len(data) - (len(data) % batch_size)
        shuffler = np.random.permutation(nb_data)

        for i in range(0, nb_data, batch_size):

            # clear or zero-out gradients
            model_0.cleargrads()

            # import subset of the data into numpy array with proper types
            x = np.array(data[shuffler[i:i + batch_size]][0]).astype(
                np.float32)
            y = np.array(data[shuffler[i:i + batch_size]][1]).astype(np.int32)

            # reshape for channel depth dimension and cast to chainer variable
            x = x.reshape(batch_size, 1, 28, 28)
            x = Variable(x)

            # evaluate data on model and backpropagate
            loss = foward_0(x, y)
            loss.backward()

            # update model parameters
            optimizer.update()

        ### evaluate on entire testing set ###
        print("Validation Set")
        # import data into numpy array with proper types
        x = np.array(test[:, ][0]).astype(np.float32)
        y = np.array(test[:, ][1]).astype(np.int32)

        # reshape and cast to chainer variable
        x = Variable(x.reshape(len(test), 1, 28, 28))

        pred = foward_0(x, None, predict=True)

        acc = (pred == y).mean()
        print("Accuracy : {} \nError Rate: {}".format(acc * 100,
                                                      (1 - acc) * 100))
예제 #12
0
파일: evals.py 프로젝트: ahclab/reflection
def _get_nearest_words(net, word2vec, word, z_id, n_top, show=True):
    # To variable
    x = word2vec[word].astype(numpy.float32)
    x = Variable(x.reshape((1, len(x))))
    #z = Variable(z.reshape(1,len(z)))
    z = Variable(numpy.array([[z_id]]).astype(numpy.int32))

    # Transform a word attribute z from x into y with reflection
    y = net.test(x, z)

    # Show top five similar words to y
    y = y.array[0]
    n_nearest = max(n_top)
    nearest_words = word2vec.similar_by_vector(
        y, topn=n_nearest)  #word2vec.most_similar([y], [], n_nearest)

    if show:
        print(word, nearest_words)

    return nearest_words
예제 #13
0
def play_using_saved_q(q_filepath,
                       num_episodes=1,
                       save_replay=False,
                       replay_filepath=None,
                       device=0):
    #d = int(device)
    #cuda.get_device(d).use()
    if save_replay and not replay_filepath:
        print("Error: please provide a filepath for replays")
    #saved_q = Q(width=640, height=480, latent_size=256, action_size=3)
    #saved_q = ControlYOLO(**{'pgrid_dims': [10, 8], 'bb_num': 1, 'num_classes': 10, 'drop_prob': 0.5})
    saved_q = YOLO(**{
        'pgrid_dims': [10, 8],
        'bb_num': 3,
        'num_classes': 3,
        'drop_prob': 0.5
    })
    #saved_q.to_gpu(device=d)
    #import pdb; pdb.set_trace()
    serializers.load_hdf5(q_filepath, saved_q)
    doom_game = gd.setup_game(show_window=False)
    for i in range(int(num_episodes)):
        doom_game.new_episode(replay_filepath + str(i) + "_rec.lmp")
        total_reward = 0
        ct = 0
        while not doom_game.is_episode_finished():
            ct += 1
            if ct % 10 == 0:
                print ct
            state = doom_game.get_state()
            #screen_buf = cuda.to_gpu((state.screen_buffer.astype(np.float32).transpose((2, 0, 1))), device=d)
            screen_buf = state.screen_buffer.astype(np.float32).transpose(
                (2, 0, 1))
            screen_buf = Variable(
                screen_buf.reshape((1, ) + screen_buf.shape) / 127.5 - 1,
                volatile=True)
            scores = saved_q(screen_buf, train=False)
            best_idx = int(F.argmax(scores).data)
            total_reward += doom_game.make_action(actions[best_idx])
        print("Total reward:", total_reward)
    doom_game.close()
예제 #14
0
파일: mnist.py 프로젝트: masenov/bullet-nn
def mnist_train_0(data, test, nb_epochs=10):

    for epoch in range(nb_epochs):
        print("Current epoch: %d" % (epoch + 1))

        # clear gradient array
        model_0.cleargrads()

        # import subset of the data into numpy array with proper types
        subset = [i for i in range(500)]
        x = np.array(data[subset][0]).astype(np.float32)
        y = np.array(data[subset][1]).astype(np.int32)

        # reshape it for chainer and cast to chainer variable
        x = x.reshape(len(subset), 1, 28, 28)
        x = Variable(x)

        # evaluate data on model and backpropagate
        loss = foward_0(x, y)
        loss.backward()

        # update model parameters
        optimizer.update()

        ### evaluate on testing set ###
        # import data into numpy array with proper types
        subset = [i for i in range(100)]
        x = np.array(test[subset][0]).astype(np.float32)
        y = np.array(test[subset][1]).astype(np.int32)

        # reshape and cast to chainer variable
        x = Variable(x.reshape(len(subset), 1, 28, 28))

        # evaluate test data using the current network parameters
        pred = foward_0(x, None, predict=True)

        # calculate accuracy
        acc = (pred == y).mean()
        print("Accuracy : {} \nError Rate: {}".format(acc * 100,
                                                      (1 - acc) * 100))
예제 #15
0
파일: train.py 프로젝트: dsanno/chainer-dqn
        action = None
        action_q = q.copy()
        action_q.reset_state()
        while True:
            if action is not None:
                game.play(action)

            pixmap = QPixmap.grabWindow(window_id, left, top, w, h)
            image = pixmap.toImage()
            bits = image.bits()
            bits.setsize(image.byteCount())
            screen = Image.fromarray(np.array(bits).reshape((h, w, 4))[:,:,2::-1])
            reward, terminal = game.process(screen)
            if reward is not None:
                train_image = xp.asarray(screen.resize((train_width, train_height))).astype(np.float32).transpose((2, 0, 1))
                train_image = Variable(train_image.reshape((1,) + train_image.shape) / 127.5 - 1, volatile=True)
                score = action_q(train_image, train=False)

                best = int(np.argmax(score.data))
                action = game.randomize_action(best, random_probability)
                print action, float(score.data[0][action]), best, float(score.data[0][best]), reward
                index = frame % POOL_SIZE
                state_pool[index] = cuda.to_cpu(train_image.data)
                action_pool[index] = action
                reward_pool[index - 1] = reward
                average_reward = average_reward * 0.9999 + reward * 0.0001
                print "average reward: ", average_reward
                if terminal:
                    terminal_pool[index - 1] = 1
                    if only_result:
                        i = index - 2
num = len(x)

x = Variable(x)
t = Variable(t)

model = Model()
optimizer = optimizers.Adam()
optimizer.setup(model)

#while(1):
for i in range(2000):
    #for j in range(num):
    model.cleargrads()
    y = model(x)
    #print(y.data)
    loss = F.mean_squared_error(y, t.reshape(num, 1))
    loss.backward()
    optimizer.update()
    print("loss:", loss.data)

test_path = "test.csv"
csv_file = open(test_path, "r", encoding="utf_8", errors="", newline="\n")
test_f = csv.reader(csv_file,
                    delimiter=",",
                    doublequote=True,
                    lineterminator="\r\n",
                    quotechar='"',
                    skipinitialspace=True)

test_x = []
#テストデータ変換
예제 #17
0
class LSM():
    """
    chainerのモデル風に使えるモデル.
    """
    def __init__(self, *, dimension=2, learning_rate=0.1, define_by_run=False):
        self.dimension = dimension
        self.learning_rate = learning_rate
        self.define_by_run = define_by_run

        if self.define_by_run:
            self.w = numpy.random.randn(self.dimension + 1)
            self.w = self.w.astype(numpy.float32)
            self.w = Variable(self.w.reshape(self.dimension + 1))
            self.w.cleargrad()
            if self.w.grad is None:
                self.grads = numpy.zeros([self.dimension + 1])
            else:
                self.grads = self.w.grad.reshape(self.dimension + 1)
        else:
            self.w = numpy.random.randn(self.dimension + 1)
            self.grads = numpy.zeros([self.dimension + 1])

    def __call__(self, *args):
        # パラメータが多すぎたらエラー
        if (len(args) > 2):
            print("Please check parameter.")
        elif (len(args) > 0):
            # ただのスコア計算なら
            self.x = numpy.array(args[0])
            self.x = self.x.astype(numpy.float32)
            self.data = self.__score__()
            if self.define_by_run:
                pred_y = self.data
                self.data = self.data.data.reshape(self.data.data.shape[0])

            # 学習するなら
            if (len(args) > 1):
                self.y = numpy.array(args[1])
                self.y = self.y.astype(numpy.float32)
                if self.define_by_run:
                    self.J = func_J(Variable(self.y), pred_y)
                else:
                    self.error = (self.y - self.data)

        return self

    def __score__(self):
        """
        データ点を入れたときのyの推定値.
        """
        if self.define_by_run:
            scores = func_y(self.w, Variable(self.x), self.dimension)
        else:
            self.X = numpy.array([(x**numpy.ones([self.dimension + 1]))
                                  for x in self.x])
            self.X = self.X**numpy.arange(self.dimension + 1)
            scores = numpy.dot(self.X, self.w)

        return scores

    def zerograds(self):
        attr_self = [i for i in dir(self) if "__" not in i]
        if "x" in attr_self:
            del self.x
        if "y" in attr_self:
            del self.y
        if "X" in attr_self:
            del self.X
        if "data" in attr_self:
            del self.data
        if "error" in attr_self:
            del self.error

        if self.define_by_run:
            self.w.cleargrad()
            if self.w.grad is None:
                self.grads = numpy.zeros([self.dimension + 1])
            else:
                self.grads = self.w.grad.reshape(self.dimension + 1)
        else:
            self.grads = numpy.zeros([self.dimension + 1])

    def backward(self):
        if self.define_by_run:
            self.J.backward(retain_grad=True)
            self.grads = -self.w.grad.reshape(self.dimension + 1)
        else:
            self.grads = numpy.dot(self.error, self.X)
예제 #18
0
def play_draw_and_record_yolo(yolo_filepath,
                              replay_filepath,
                              num_episodes=1,
                              device=0):
    d = int(device)
    cuda.get_device(d).use()

    yolo = YOLO(**{
        'pgrid_dims': [10, 8],
        'bb_num': 3,
        'num_classes': 3,
        'drop_prob': 0.5
    })
    yolo.to_gpu()
    serializers.load_hdf5(yolo_filepath, yolo)

    doom_game = gd.setup_game(show_window=False)
    for i in range(int(num_episodes)):
        doom_game.new_episode(replay_filepath + str(i) + "_rec.lmp")
        total_reward = 0
        while not doom_game.is_episode_finished():
            state = doom_game.get_state()
            screen_buf = cuda.to_gpu(
                (state.screen_buffer.astype(np.float32).transpose((2, 0, 1))),
                device=d)
            screen_buf = Variable(
                screen_buf.reshape((1, ) + screen_buf.shape) / 127.5 - 1,
                volatile=True)
            grid_var, scores = yolo.proposals_and_q(screen_buf, train=False)
            best_idx = int(F.argmax(scores).data)
            total_reward += doom_game.make_action(actions[best_idx])

            grid = cuda.to_cpu(grid_var.data[0])

            boxes = []
            base_img = doom_game.get_state().screen_buffer
            """
            for x, y in np.ndindex((10,8)):
                proposals = grid[x, y]
                class_probs = proposals[20:]
                best_class = class_probs.index(max(class_probs))
                for c in range(3):
                    conf_idx = c * 7
                    if proposals[conf_idx]: # >= 0.6:
                        scaled = yolo.scale_coords(proposals[c+1:c+6])
                        box = (scaled, x, y, best_class)
                        boxes.append(box)
            """
            scaled = yolo.scale_coords(
                np.array(
                    [0.1921875, 0.03125, 0.18540496, 0.3354102, 0.66666667]))
            boxes.append((scaled, 4, 4, 1))

            import pdb
            pdb.set_trace()
            # sort boxes by confidence
            for box in boxes:
                w = box[0][2]
                h = box[0][3]
                xcenter = box[0][0] - 320 + box[1] * 64
                xmin = int(round(xcenter - w / 2))
                xmax = int(round(xcenter + w / 2))
                ycenter = box[0][1] - 240 + box[2] * 60
                ymin = int(round(ycenter - h / 2))
                ymax = int(round(ycenter + h / 2))
                z_ = round(box[0][4])
                best_class = box[3]
                #import pdb; pdb.set_trace()
                # draw the bounding boxes
                for x_ in range(xmin, xmax + 1):
                    base_img[ymin, x_, best_class] = 255
                    base_img[ymax, x_, best_class] = 255
                for y_ in range(ymin, ymax + 1):
                    base_img[y_, xmin, best_class] = 255
                    base_img[y_, xmax, best_class] = 255
            import pdb
            pdb.set_trace()

        print("Total reward:", total_reward)
    doom_game.close()
예제 #19
0
import numpy as np
import chainer
import chainer.links as L
from chainer import Variable
from chainer import serializers
from mnist import MnistModel

train, test = chainer.datasets.get_mnist()

# 訓練済みのデータを使ってモデル初期化
model = L.Classifier(MnistModel())
serializers.load_npz('./output/model_final', model)
x, t = test[1]

x = Variable(x.reshape(1, 784), volatile='on')
y = model.predictor(x)
pred = np.argmax(y.data, axis=1)
print(y.data.flatten().tolist())
print("Acc: {}, Pred: {}".format(t, pred))
예제 #20
0
def target(agent):
    print "started target thread."
    global frame, random_probability, average_reward
    try:
        thread.start_new_thread(train, ())
        next_clock = time.clock() + interval
        save_iter = 1000
        save_count = 0
        action = None
        action_q = q.copy()
        action_q.reset_state()
        while True:
            if action is not None:
                agent.send_action(action)

            screen = agent.receive_image()
            reward, terminal = agent.process(screen)
            if reward is not None:
                train_image = xp.asarray(
                    screen.resize(
                        (train_width,
                         train_height))).astype(np.float32).transpose(
                             (2, 0, 1))
                train_image = Variable(
                    train_image.reshape((1, ) + train_image.shape) / 127.5 - 1,
                    volatile=True)
                score = action_q(train_image, train=False)

                best = int(np.argmax(score.data))
                action = agent.randomize_action(best, random_probability)
                print action, float(score.data[0][action]), best, float(
                    score.data[0][best]), reward
                index = frame % POOL_SIZE
                state_pool[index] = cuda.to_cpu(train_image.data)
                action_pool[index] = action
                reward_pool[index - 1] = reward
                average_reward = average_reward * 0.9999 + reward * 0.0001
                print "average reward: ", average_reward
                if terminal:
                    terminal_pool[index - 1] = 1
                    action_q = q.copy()
                    action_q.reset_state()
                else:
                    terminal_pool[index - 1] = 0
                frame += 1
                save_iter -= 1
                random_probability *= random_reduction_rate
                if random_probability < min_random_probability:
                    random_probability = min_random_probability
            else:
                action = None

            if save_iter <= 0:
                print 'save: ', save_count
                serializers.save_hdf5(
                    '{0}_{1:03d}.model'.format(args_output, save_count), q)
                serializers.save_hdf5(
                    '{0}_{1:03d}.state'.format(args_output, save_count),
                    optimizer)
                save_iter = 10000
                save_count += 1
            current_clock = time.clock()
            wait = next_clock - current_clock
            print 'wait: ', wait
            if wait > 0:
                next_clock += interval
                time.sleep(wait)
            elif wait > -interval / 2:
                next_clock += interval
            else:
                next_clock = current_clock + interval
    except KeyboardInterrupt:
        pass
예제 #21
0
    def update_core(self):
        # TIPS: in case of experiments, set n_critic as 5 is best result.
        gen_optimizer = self.get_optimizer('gen')
        critic_optimizer = self.get_optimizer('critic')
        xp = self.generator.xp

        for i in range(self.n_critic):
            # grab data
            batch = self.get_iterator('main').next()
            batchsize = len(batch)
            batch = self.converter(batch, self.device)
            real_data, real_label = batch
            real_label = Variable(self.onehot(batchsize, real_label))
            real_data = Variable(real_data) / 255.

            gen_label = self.onehot(batchsize,
                                    self.generator.random_label(batchsize))

            z = self.generator.make_input_z_with_given_label(
                batchsize, gen_label)

            # Generator
            gen_data = self.generator(z)

            # -1
            gen_data = gen_data.reshape(batchsize, -1)
            real_data = real_data.reshape(batchsize, -1)
            real_label = real_label.reshape(batchsize, -1)
            gen_label = gen_label.reshape(batchsize, -1)

            # Critic(Discrimintor)
            critic_real = self.critic(F.concat((real_label, real_data),
                                               axis=1))
            critic_fake = self.critic(F.concat((gen_label, gen_data), axis=1))

            # Loss
            loss_gan = F.average(critic_fake - critic_real)
            std_x_real = xp.std(real_data.data, axis=0, keepdims=True)
            epsilon = xp.random.uniform(0., 1., real_data.data.shape).astype(
                np.float32)
            x_perturb = real_data + 0.5 * epsilon * std_x_real
            x_perturb = F.concat((gen_label, x_perturb), axis=1)

            grad, = chainer.grad([self.critic(x_perturb)], [x_perturb],
                                 enable_double_backprop=True)
            grad = F.sqrt(F.batch_l2_norm_squared(grad))

            loss_grad = self.l * F.mean_absolute_error(grad,
                                                       xp.ones_like(grad.data))

            critic_loss = loss_gan + loss_grad

            self.critic.cleargrads()
            critic_loss.backward()
            critic_optimizer.update()
            chainer.report({'critic_loss': critic_loss})
            chainer.report({'loss_grad': loss_grad})
            chainer.report({'loss_gan': loss_gan})
            if i == 0:
                gen_loss = F.average(-critic_fake)
                self.generator.cleargrads()
                gen_loss.backward()
                gen_optimizer.update()
                chainer.report({'gen_loss': gen_loss})
예제 #22
0
    def evaluate(self):
        domain = ['in', 'truth', 'out']
        if self.eval_hook:
            self.eval_hook(self)

        for k, dataset in enumerate(['test', 'train']):
            batch = self._iterators[dataset].next()
            x_in, t_out = chainer.dataset.concat_examples(batch, self.device)
            x_in = Variable(x_in)  # original image
            t_out = Variable(
                t_out)  # corresponding translated image (ground truth)

            with chainer.using_config(
                    'train', False), chainer.function.no_backprop_mode():
                x_out = self._targets['dec_y'](
                    self._targets['enc_x'](x_in))  # translated image by NN

            ## unfold stack and apply softmax
            if self.args.class_num > 0 and self.args.stack > 0:
                x_in = x_in.reshape(x_in.shape[0] * self.args.stack,
                                    x_in.shape[1] // self.args.stack,
                                    x_in.shape[2], x_in.shape[3])
                x_out = F.softmax(
                    x_out.reshape(x_out.shape[0] * self.args.stack,
                                  x_out.shape[1] // self.args.stack,
                                  x_out.shape[2], x_out.shape[3]))
                t_out = t_out.reshape(t_out.shape[0] * self.args.stack,
                                      t_out.shape[1] // self.args.stack,
                                      t_out.shape[2], t_out.shape[3])
                #print(x_out.shape, t_out.shape)
                # select middle slices
                x_in = x_in[(self.args.stack // 2)::self.args.stack]
                x_out = x_out[(self.args.stack // 2)::self.args.stack]
                t_out = t_out[(self.args.stack // 2)::self.args.stack]

            if dataset == 'test':  # for test dataset, compute some statistics
                fig = plt.figure(figsize=(12, 6 * len(x_out)))
                gs = gridspec.GridSpec(2 * len(x_out),
                                       4,
                                       wspace=0.1,
                                       hspace=0.1)
                loss_rec_L1 = F.mean_absolute_error(x_out, t_out)
                loss_rec_L2 = F.mean_squared_error(x_out, t_out)
                loss_rec_CE = softmax_focalloss(x_out,
                                                t_out,
                                                gamma=self.args.focal_gamma,
                                                class_weight=self.class_weight)
                result = {
                    "myval/loss_L1": loss_rec_L1,
                    "myval/loss_L2": loss_rec_L2,
                    "myval/loss_CE": loss_rec_CE
                }

            ## iterate over batch
            for i, var in enumerate([x_in, t_out, x_out]):
                if i % 3 != 0 and self.args.class_num > 0:  # t_out, x_out
                    imgs = var2unit_img(var, 0, 1)  # softmax
                    #imgs[:,:,:,0] = 0 # class 0 => black  ######
                    #imgs = np.roll(imgs,1,axis=3)[:,:,:,:3]  ## R0B, show only 3 classes (-1,0,1)
                else:
                    imgs = var2unit_img(var)  # tanh
#                print(imgs.shape,np.min(imgs),np.max(imgs))
                for j in range(len(imgs)):
                    ax = fig.add_subplot(gs[j + k * len(x_out), i])
                    ax.set_title(dataset + "_" + domain[i], fontsize=8)
                    if (imgs[j].shape[2] == 3):  ## RGB
                        ax.imshow(imgs[j],
                                  interpolation='none',
                                  vmin=0,
                                  vmax=1)
                    elif (imgs[j].shape[2] >= 4):  ## categorical
                        cols = ['k', 'b', 'c', 'g', 'y', 'r', 'm', 'w'] * 5
                        cmap = colors.ListedColormap(cols)
                        im = np.argmax(imgs[j], axis=2)
                        norm = colors.BoundaryNorm(list(range(len(cols) + 1)),
                                                   cmap.N)
                        ax.imshow(im,
                                  interpolation='none',
                                  cmap=cmap,
                                  norm=norm)
                    else:
                        ax.imshow(imgs[j][:, :, -1],
                                  interpolation='none',
                                  cmap='gray',
                                  vmin=0,
                                  vmax=1)
                    ax.set_xticks([])
                    ax.set_yticks([])

            ## difference image
            if (x_out.shape[1] >= 4):  ## categorical
                eps = 1e-7
                p = F.clip(
                    x_out, x_min=eps, x_max=1 -
                    eps)  ## we assume the input is already applied softmax
                q = -F.clip(t_out, x_min=eps, x_max=1 - eps) * F.log(p)
                diff = F.sum(q * ((1 - p)**2), axis=1, keepdims=True)
                vmin = -1
                vmax = 1
            else:
                diff = (x_out - t_out)
                vmin = -0.1
                vmax = 0.1
            diff = diff.data.get().transpose(0, 2, 3, 1)
            for j in range(len(diff)):
                ax = fig.add_subplot(gs[j + k * len(x_out), 3])
                ax.imshow(diff[j][:, :, 0],
                          interpolation='none',
                          cmap='coolwarm',
                          vmin=vmin,
                          vmax=vmax)
                ax.set_xticks([])
                ax.set_yticks([])

        gs.tight_layout(fig)
        plt.savefig(os.path.join(self.vis_out,
                                 'count{:0>4}.jpg'.format(self.count)),
                    dpi=200)
        self.count += 1
        plt.close()

        return result
예제 #23
0
                text = index_test[i + n - len(index_test)][0]
                label = index_test[i + n - len(index_test)][1]
                feature = index_test[i + n - len(index_test)][2]
            else:
                text = index_test[i + n][0]
                label = index_test[i + n][1]
                feature = index_test[i + n][2]
            Text.append(text)
            Label.append(label)
            Feature.append(feature)
        Text = np.array(Text, dtype="int32")
        Label = np.array(Label, dtype="int32")
        Feature = np.array(Feature, dtype="float32")

        Feature = np.mat(Feature)
        Feature = Feature.reshape(-1, 1)
        Feature = np.array(Feature)
        # print("feature vector = ", Feature)

        Feature = Variable(Feature)
        model.cleargrads()
        loss = model(Text, Label, Feature)
        #loss.backward()
        #optimizer.update()

        losses_test.append(loss.data)
    print(losses_test)
'''
    # Testing
    for i in range(0, len(index_test), BATCH_SIZE):
        Text = []
예제 #24
0
        sum_gen_loss += loss_gen.data.get()

        if epoch % interval == 0 and batch == 0:
            serializers.save_npz('xy.model', gen_g_model)
            serializers.save_npz('yx.model', gen_f_model)

            for i in range(Ntest):
                black = (x_test[i] * 127.5 + 127.5).transpose(1, 2, 0).astype(
                    np.uint8)
                pylab.subplot(2, Ntest, 2 * i + 1)
                pylab.imshow(black)
                pylab.axis('off')
                pylab.savefig(image_xy + '/output_xy_%d.png' % epoch)

                x = Variable(cuda.to_gpu(x_test[i]))
                x = x.reshape(1, channels, width, height)
                with chainer.using_config('train', False):
                    x_y = gen_g_model(x)
                x_y = x_y.data.get()
                tmp = (np.clip(x_y[0, :, :, :] * 127.5 + 127.5, 0,
                               255)).transpose(1, 2, 0).astype(np.uint8)
                pylab.subplot(2, Ntest, 2 * i + 2)
                pylab.imshow(tmp)
                pylab.axis('off')
                pylab.savefig(image_yx + '/output_xy_%d.png' % epoch)

            pylab.close()

            for i in range(Ntest):
                white = (y_test[i] * 127.5 + 127.5).transpose(1, 2, 0).astype(
                    np.uint8)
예제 #25
0
    def update_core(self):        
        opt_enc_x = self.get_optimizer('enc_x')
        opt_dec_y = self.get_optimizer('dec_y')
        opt_dis = self.get_optimizer('dis')

        ## image conversion
        batch = self.get_iterator('main').next()
        x_in, t_out = self.converter(batch, self.device)
        x_in = Variable(x_in)
        x_z = self.enc_x(add_noise(x_in, sigma=self.args.noise))
        x_out = self.dec_y(x_z)

        ## unfold stack and apply softmax
        if self.args.class_num>0 and self.args.stack>0:
            #x_out = F.concat([F.softmax(x_out[:,(st*self.args.class_num):((st+1)*self.args.class_num)]) for st in range(self.args.stack)])
            x_in = x_in.reshape(x_in.shape[0]*self.args.stack,x_in.shape[1]//self.args.stack,x_in.shape[2],x_in.shape[3])
            x_out = F.softmax(x_out.reshape(x_out.shape[0]*self.args.stack,x_out.shape[1]//self.args.stack,x_out.shape[2],x_out.shape[3]))
            t_out = t_out.reshape(t_out.shape[0]*self.args.stack,t_out.shape[1]//self.args.stack,t_out.shape[2],t_out.shape[3])

#        print(x_in.shape,x_out.shape, t_out.shape)

        loss_gen=0
        ## regularisation on the latent space
        if self.args.lambda_reg>0:
            loss_reg_enc_x = losses.loss_func_reg(x_z[-1],'l2') 
            loss_gen = loss_gen + self.args.lambda_reg * loss_reg_enc_x
            chainer.report({'loss_reg': loss_reg_enc_x}, self.enc_x)

        if self.args.lambda_dice>0:
            loss_dice = dice(x_out, t_out, class_weight=self.class_weight)
            loss_gen = loss_gen + self.args.lambda_dice * loss_dice
            chainer.report({'loss_dice': loss_dice}, self.dec_y)            

        if self.args.lambda_rec_ce>0:
            loss_rec_ce = softmax_focalloss(x_out, t_out, gamma=self.args.focal_gamma, class_weight=self.class_weight)
            # for st in range(self.args.stack):
            #     loss_rec_ce += softmax_focalloss(x_out[:,(st*self.args.stack):((st+1)*self.args.stack)], t_out[:,(st*self.args.stack):((st+1)*self.args.stack)])
            loss_gen = loss_gen + self.args.lambda_rec_ce*loss_rec_ce
            chainer.report({'loss_CE': loss_rec_ce}, self.dec_y)
        # reconstruction error
        if self.args.lambda_rec_l1>0:
            loss_rec_l1 = weighted_error(x_out, t_out,exponent=1,class_weight=self.class_weight)
            #loss_rec_l1 = F.mean_absolute_error(x_out, t_out)
            loss_gen = loss_gen + self.args.lambda_rec_l1*loss_rec_l1       
            chainer.report({'loss_L1': loss_rec_l1}, self.dec_y)
        if self.args.lambda_rec_l2>0:
            loss_rec_l2 = weighted_error(x_out, t_out,exponent=2,class_weight=self.class_weight)
            #loss_rec_l2 = F.mean_squared_error(x_out, t_out)
            loss_gen = loss_gen + self.args.lambda_rec_l2*loss_rec_l2
            chainer.report({'loss_L2': loss_rec_l2}, self.dec_y)

        # total variation
        if self.args.lambda_tv > 0:
            loss_tv = total_variation2(x_out, self.args.tv_tau)
            loss_gen = loss_gen + self.args.lambda_tv * loss_tv
            chainer.report({'loss_tv': loss_tv}, self.dec_y)

        # Adversarial loss
        if self.args.lambda_dis>0 and self.iteration >= self.args.dis_warmup:
            # stack again
            if self.args.class_num>0 and self.args.stack>0:
                #x_out = F.concat([F.softmax(x_out[:,(st*self.args.class_num):((st+1)*self.args.class_num)]) for st in range(self.args.stack)])
                x_in = x_in.reshape(x_in.shape[0]//self.args.stack,x_in.shape[1]*self.args.stack,x_in.shape[2],x_in.shape[3])
                x_out = x_out.reshape(x_out.shape[0]//self.args.stack,x_out.shape[1]*self.args.stack,x_out.shape[2],x_out.shape[3])
                t_out = t_out.reshape(t_out.shape[0]//self.args.stack,t_out.shape[1]*self.args.stack,t_out.shape[2],t_out.shape[3])

            x_in_out = F.concat([x_in,x_out])
            y_fake = self.dis(x_in_out)
            if self.args.dis_wgan:
                loss_adv = -F.average(y_fake)
            else:
                #batchsize,_,w,h = y_fake.data.shape
                #loss_dis = F.sum(F.softplus(-y_fake)) / batchsize / w / h
                loss_adv = self.loss_func_comp(y_fake,1.0,self.args.dis_jitter)
            chainer.report({'loss_dis': loss_adv}, self.dec_y)
            loss_gen = loss_gen + self.args.lambda_dis * loss_adv

        # update generator model
        self.enc_x.cleargrads()
        self.dec_y.cleargrads()
        loss_gen.backward()
        opt_enc_x.update(loss=loss_gen)
        opt_dec_y.update(loss=loss_gen)

        ## discriminator
        if self.args.lambda_dis>0 and self.iteration >= self.args.dis_warmup:
            x_in_out_copy = self._buffer.query(x_in_out.array)
            if self.args.dis_wgan: ## synthesised -, real +
                eps = self.xp.random.uniform(0, 1, size=len(batch)).astype(self.xp.float32)[:, None, None, None]
                loss_real = -F.average(self.dis(F.concat([x_in, t_out])))
                loss_fake = F.average(self.dis(x_in_out_copy))
                y_mid = eps * x_in_out + (1.0 - eps) * x_in_out_copy
                # gradient penalty
                gd, = chainer.grad([self.dis(y_mid)], [y_mid], enable_double_backprop=True)
                gd = F.sqrt(F.batch_l2_norm_squared(gd) + 1e-6)
                loss_dis_gp = F.mean_squared_error(gd, self.xp.ones_like(gd.data))                
                chainer.report({'loss_gp': self.args.lambda_wgan_gp * loss_dis_gp}, self.dis)
                loss_dis = (loss_fake + loss_real) * 0.5 + self.args.lambda_wgan_gp * loss_dis_gp
            else:
                loss_real = self.loss_func_comp(self.dis(F.concat([x_in, t_out])),1.0,self.args.dis_jitter)
                loss_fake = self.loss_func_comp(self.dis(x_in_out_copy),0.0,self.args.dis_jitter)
                ## mis-matched input-output pair should be discriminated as fake
                if self._buffer.num_imgs > 40 and self.args.lambda_mispair>0: 
                    f_in = self.xp.concatenate(random.sample(self._buffer.images, len(x_in)))
                    f_in = Variable(f_in[:,:x_in.shape[1],:,:])  # extract the first x_in channels of the concatenated [x_in,x_out]
                    loss_mispair = self.loss_func_comp(self.dis(F.concat([f_in,t_out])),0.0,self.args.dis_jitter)
                    chainer.report({'loss_mispair': loss_mispair}, self.dis)
                else:
                    loss_mispair = 0
                loss_dis = 0.5*(loss_fake + loss_real) + self.args.lambda_mispair * loss_mispair

            # common for discriminator
            chainer.report({'loss_fake': loss_fake}, self.dis)
            chainer.report({'loss_real': loss_real}, self.dis)
            self.dis.cleargrads()
            loss_dis.backward()
            opt_dis.update(loss=loss_dis)
예제 #26
0
            pixmap = QPixmap.grabWindow(window_id, left, top, w, h)
            image = pixmap.toImage()
            bits = image.bits()
            bits.setsize(image.byteCount())
            screen = Image.fromarray(
                np.array(bits).reshape((h, w, 4))[:, :, 2::-1])
            reward, terminal = game.process(screen)
            logging.debug("reward={}, terminal={}".format(reward, terminal))
            if reward is not None:
                train_image = xp.asarray(
                    screen.resize(
                        (train_width,
                         train_height))).astype(np.float32).transpose(
                             (2, 0, 1))
                train_image = Variable(
                    train_image.reshape((1, ) + train_image.shape) / 127.5 - 1,
                    volatile=True)
                score = action_q(train_image, train=False)

                best = int(np.argmax(score.data))
                action = game.randomize_action(best, random_probability)
                #print action, float(score.data[0][action]), best, float(score.data[0][best]), reward
                index = frame % POOL_SIZE
                state_pool[index] = cuda.to_cpu(train_image.data)
                action_pool[index] = action
                reward_pool[index - 1] = reward
                average_reward = average_reward * 0.9999 + reward * 0.0001
                logging.debug("average reward: ", average_reward)
                if terminal:
                    terminal_pool[index - 1] = 1
                    if only_result:
예제 #27
0
    def train(self, lossfun, n_epochs=100):
        print('Start training CycleGLO')
        losses = []
        for epoch in range(n_epochs):
            print(epoch)
            self.opt_g.new_epoch()
            self.opt_f.new_epoch()
            self.opt_zx.new_epoch()
            self.opt_zy.new_epoch()
            for i in range(len(self.dataset)):
                x = self.dataset[i][0]
                y = self.dataset[i][1]
                #print(x, y)
                x, y = Variable(x), Variable(y)
                #print(x.shape, y.shape)

                self.g.cleargrads()
                self.f.cleargrads()
                self.g.z.cleargrads()
                self.f.z.cleargrads()

                xy = self.g(self.zx[i])
                yx = self.f(self.zy[i])
                #print(xy, yx)
                yxy = self.g(yx.data)
                xyx = self.f(xy.data)
                #print(yxy, xyx)
                xy_copy = Variable(self.getAndUpdateBuffer(
                    'x', xy.data, epoch))
                yx_copy = Variable(self.getAndUpdateBuffer(
                    'y', yx.data, epoch))
                #print(yx_copy.shape)
                x_loss = lossfun(yx_copy, x.reshape((1, self.n_pixels)))
                y_loss = lossfun(xy_copy, y.reshape((1, self.n_pixels)))

                g_loss = lossfun(xy, y.reshape((1, self.n_pixels)))
                f_loss = lossfun(yx, x.reshape((1, self.n_pixels)))

                cycle_x_loss = lossfun(xyx, x.reshape((1, self.n_pixels)))
                cycle_y_loss = lossfun(yxy, y.reshape((1, self.n_pixels)))
                gen_loss = self.lambda2 * g_loss + self.lambda2 * f_loss + cycle_x_loss + cycle_y_loss

                if self.learning_rate_decay > 0 and epoch % self.learning_rate_interval == 0:
                    if self.opt_g.alpha > self.learning_rate_decay:
                        self.opt_g.alpha -= self.learning_rate_decay
                    if self.opt_f.alpha > self.learning_rate_decay:
                        self.opt_f.alpha -= self.learning_rate_decay

                x_loss.backward()
                y_loss.backward()
                self.opt_zx.update()
                self.opt_zy.update()

                #### Update
                gen_loss.backward()
                self.opt_g.update()
                self.opt_f.update()

                self.zx[i] = project_z_to_ball(self.zx[i] -
                                               0.1 * self.g.z.z.grad)
                self.zy[i] = project_z_to_ball(self.zy[i] -
                                               0.1 * self.f.z.z.grad)

                losses += [(x_loss, y_loss, g_loss, f_loss, cycle_x_loss,
                            cycle_y_loss, gen_loss)]

        print('done!')
        self.xrspace_mean = np.mean(self.zx, axis=0)
        self.xrspace_std = np.std(self.zx, axis=0)

        self.yrspace_mean = np.mean(self.zy, axis=0)
        self.yrspace_std = np.std(self.zy, axis=0)
        print(self.xrspace_mean, self.xrspace_std, self.yrspace_mean,
              self.yrspace_std)

        to_plot = [l[-1].data for l in losses]
        x_axis = list(range(n_epochs))
        plt.plot(to_plot)
        plt.title('Loss per epoch of CycleGAN')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        print('last loss:', to_plot[-1])
        plt.show()