def mini_batch(hcpevec):
    features1 = np.empty((len(hcpevec), FEATURES1_NUM, 9, 9), dtype=np.float32)
    features2 = np.empty((len(hcpevec), FEATURES2_NUM, 9, 9), dtype=np.float32)
    move = np.empty((len(hcpevec)), dtype=np.int32)
    result = np.empty((len(hcpevec)), dtype=np.int32)
    value = np.empty((len(hcpevec)), dtype=np.float32)

    cppshogi.hcpe_decode_with_value(hcpevec, features1, features2, move,
                                    result, value)

    return (Variable(cuda.to_gpu(features1)), Variable(cuda.to_gpu(features2)),
            Variable(cuda.to_gpu(result.reshape((len(hcpevec), 1)))),
            Variable(cuda.to_gpu(value.reshape((len(value), 1)))))
Ejemplo n.º 2
0
    def __getitem__(self, idx):
        feature1 = np.empty((FEATURES1_NUM, 9, 9), dtype=np.float32)
        feature2 = np.empty((FEATURES2_NUM, 9, 9), dtype=np.float32)
        move = np.empty(1, dtype=np.int32)
        result = np.empty(1, dtype=np.float32)
        value = np.empty_like(result)

        # 要素を普通に取り出すとnp.void型になってしまう
        cppshogi.hcpe_decode_with_value(
            self.data[idx:idx + 1], feature1, feature2, move, result, value
        )

        z = result - value + 0.5

        return feature1, feature2, np.int64(move), result, z, value
Ejemplo n.º 3
0
    def mini_batch(self, hcpevec):
        cppshogi.hcpe_decode_with_value(
            hcpevec, self.features1, self.features2, self.move,
            self.result, self.value
        )

        # aobaで評価値がない場合はvalueが-1になるようにした
        # 負のadvantageは良くないように見えるので、
        # そのままのvalueでpolicy gradientの目的関数にする
        z = np.where(self.value == -1, 0.5, self.value)
        # z = self.result - self.value + 0.5

        return (self.torch_features1.to(self.device),
                self.torch_features2.to(self.device),
                self.torch_move.to(self.device),
                self.torch_result.to(self.device),
                torch.tensor(z).to(self.device),
                self.torch_value.to(self.device))
def mini_batch(hcpevec):
    features1 = np.empty((len(hcpevec), FEATURES1_NUM, 9, 9), dtype=np.float32)
    features2 = np.empty((len(hcpevec), FEATURES2_NUM, 9, 9), dtype=np.float32)
    move = np.empty((len(hcpevec)), dtype=np.int32)
    result = np.empty((len(hcpevec)), dtype=np.float32)
    value = np.empty((len(hcpevec)), dtype=np.float32)

    cppshogi.hcpe_decode_with_value(hcpevec, features1, features2, move,
                                    result, value)

    z = result.astype(np.float32) - value + 0.5

    return (torch.tensor(features1).to(device),
            torch.tensor(features2).to(device),
            torch.tensor(move.astype(np.int64)).to(device),
            torch.tensor(result.reshape(
                (len(hcpevec), 1))).to(device), torch.tensor(z).to(device),
            torch.tensor(value.reshape((len(value), 1))).to(device))
Ejemplo n.º 5
0
parser = argparse.ArgumentParser()
parser.add_argument('--test_data', type=str, default=r'H:\src\DeepLearningShogi\x64\Release_NoOpt\test.hcpe', help='test data file')
parser.add_argument('--initmodel', '-m', default=r'H:\src\DeepLearningShogi\dlshogi\model_rl_val_wideresnet10_110_1', help='Initialize the model from given file')
args = parser.parse_args()

model = PolicyValueNetwork()
model.to_gpu()

print('Load model from', args.initmodel)
serializers.load_npz(args.initmodel, model)

hcpevec = np.fromfile(args.test_data, dtype=HuffmanCodedPosAndEval)

features1 = np.empty((len(hcpevec), FEATURES1_NUM, 9, 9), dtype=np.float32)
features2 = np.empty((len(hcpevec), FEATURES2_NUM, 9, 9), dtype=np.float32)
move = np.empty((len(hcpevec)), dtype=np.int32)
result = np.empty((len(hcpevec)), dtype=np.int32)
value = np.empty((len(hcpevec)), dtype=np.float32)

cppshogi.hcpe_decode_with_value(hcpevec, features1, features2, move, result, value)

x1 = Variable(cuda.to_gpu(features1))
x2 = Variable(cuda.to_gpu(features2))

with chainer.no_backprop_mode():
    with chainer.using_config('train', False):
        y1, y2 = model(x1, x2)

print(y1.data)
print(F.sigmoid(y2).data)