Ejemplo n.º 1
0
    def __call__(self, *args):

        assert len(args) >= 2
        x = args[:-1]
        t = args[-1]
        self.y, self.y_aux = self.predictor(*x)
        self.loss = S.softmax_cross_entropy(self.y, t)
        self.loss += S.softmax_cross_entropy(self.y_aux, t)
        return self.loss
    def train(self, data, target, batch_size=100, epoch=5, test_size=0.3, report_interval_epoch=1):
        '''
        トレーニングをするメソッド
        '''
        dp = DataProcessor()
        dp.set_normalization_params(data)
        self.resource.save_normalization_params(dp.means, dp.stds)
        _data = dp.format_x(data)
        _target = dp.format_y(target)
        # データをトレーニング用とテスト用に分割
        train_x, test_x, train_y, test_y = train_test_split(_data, _target, test_size=test_size)

        optimizer = chainer.optimizers.Adam()
        optimizer.use_cleargrads()
        optimizer.setup(self.model)
        loss = lambda pred, teacher: softmax_cross_entropy.softmax_cross_entropy(pred, teacher)
        for x_batch, y_batch, epoch_end in dp.batch_iter(train_x, train_y, batch_size, epoch):
            predicted = self.model(x_batch)
            optimizer.update(loss, predicted, y_batch)
            if epoch_end:
                train_acc = accuracy.accuracy(predicted, y_batch)
                predicted_to_test = self.model(test_x)
                test_acc = accuracy.accuracy(predicted_to_test, test_y)
                print("train accuracy={}, test accuracy={}".format(train_acc.data, test_acc.data))
                self.resource.save_model(self.model)
def forward(model, params, vocab, inv_vocab, src, target, mode, batch_size):

    state = initial_state(xp, batch_size, params.hidden_size)

    model.reset()

    src_b, inter_b, target_b = fill_batch_to_fixed_size(
        params, vocab, src, target)
    loss = xp.zeros((), dtype=xp.float32)
    dropout_ratio = params.dropout if isinstance(params.dropout,
                                                 float) else 0.0

    for n in range(params.numfold_enc):
        xb = chainer.Variable(src_b[:, n])
        ib = chainer.Variable(xp.zeros((batch_size), dtype=xp.int32))

        if mode == 'training':
            state = model.encode(xb,
                                 ib,
                                 state,
                                 dropout_flag=isinstance(dropout_ratio, float),
                                 dropout_ratio=dropout_ratio)
        else:
            state = model.encode(xb, ib, state)

    if mode == 'training' or mode == 'validating':

        for n in range(params.numfold_dec):
            xb = chainer.Variable(
                xp.zeros((batch_size, params.input_size), dtype=xp.float32))
            ib = chainer.Variable(
                xp.array([inter_b[i][n] for i in range(batch_size)],
                         dtype=xp.int32))
            y, state = model.decode(xb, ib, state, batch_size, xp)
            t = chainer.Variable(
                xp.array([target_b[i][n] for i in range(batch_size)],
                         dtype=xp.int32))
            loss = loss + softmax_cross_entropy(y, t)
        return loss

    # mode == 'test-on-train':
    else:

        words = [vocab['BOS']] * batch_size
        output = [''] * batch_size
        for n in range(params.numfold_dec):
            xb = chainer.Variable(
                xp.zeros((batch_size, params.input_size), dtype=xp.float32))
            ib = chainer.Variable(xp.array(words, dtype=xp.int32))
            y, state = model.decode(xb, ib, state, batch_size, xp)
            words = y.data.argmax(1)
            for i in range(batch_size):
                if output[i].endswith('EOS'):
                    continue
                output[i] = output[i] + ' ' + inv_vocab[int(words[i])]

        for i in range(batch_size):
            output[i] = output[i].replace('EOS', '').strip()
        return output
Ejemplo n.º 4
0
 def calc_loss(self, y, t):
     return sce.softmax_cross_entropy(y, t)
Ejemplo n.º 5
0
def main():
    # Set the number of sets
    parser = argparse.ArgumentParser(description='IaGo:')
    parser.add_argument('--set',
                        '-s',
                        type=int,
                        default=10000,
                        help='Number of game sets played to train')
    args = parser.parse_args()
    N = 32

    # Model definition
    model1 = network.SLPolicy()
    serializers.load_npz("../models/RL/model0.npz", model1)
    optimizer = optimizers.Adam(alpha=0.0005)
    optimizer.setup(model1)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4))
    #serializers.load_npz("./backup/rl_optimizer.npz", optimizer)
    # REINFORCE algorithm

    for set in tqdm(range(0, args.set)):
        # Randomly choose competitor model from reinforced models
        model2 = network.SLPolicy()
        model2_path = np.random.choice(glob.glob("../models/RL/model0.npz"))
        print(model2_path)
        serializers.load_npz(model2_path, model2)

        result = 0
        state_seq, action_seq, reward_seq = [], [], []
        for i in tqdm(range(2 * N)):
            game = rl_self_play.Game(model1, model2)
            if i % 2 == 1:
                # Switch head and tail
                pos = random.choice([[2, 4], [3, 5], [4, 2], [5, 3]])
                game.state[pos[0], pos[1]] = 2
            states, actions, judge = game()
            rewards = [judge] * len(states)
            state_seq += states
            action_seq += actions
            reward_seq += rewards
            if judge == 1:
                result += 1

        # Update model
        x = np.array(state_seq)
        x = np.stack([x == 1, x == 2], axis=0).astype(np.float32)
        x = chainer.Variable(x.transpose(1, 0, 2, 3))
        y = Variable(np.array(action_seq).astype(np.int32))
        r = Variable(np.array(reward_seq).astype(np.float32))
        pred = model1(x)
        c = softmax_cross_entropy(pred, y, reduce="no")
        model1.cleargrads()
        loss = F.mean(c * r)
        loss.backward()
        optimizer.update()
        print("Set:" + str(set) + ", Result:" + str(result / (2 * N)) +
              ", Loss:" + str(loss.data))
        with open("./log_test.txt", "a") as f:
            f.write(str(result / (2 * N)) + ", \n")

        model = copy.deepcopy(model1)
        #model.to_cpu()
        #serializers.save_npz("./backup/model"+str(set)+".npz", model)
        #serializers.save_npz("./backup/optimizer"+str(set)+".npz", optimizer)

        if (set + 1) % 500 == 0:
            model = copy.deepcopy(model1)
            #model.to_cpu()
            serializers.save_npz(
                "../models/RL/model" + str((set + 1) // 500) + ".npz", model)
            serializers.save_npz("../models/rl_optimizer.npz", optimizer)
Ejemplo n.º 6
0
def mask_loss(score, mask_gt):
    loss = softmax_cross_entropy.softmax_cross_entropy(score, mask_gt)
    return loss
def chainer_softmax_cross_entropy(*args):
    args = chainer_converter(args)
    return softmax_cross_entropy(*args)