def mini_batch(hcpevec): features1 = np.empty((len(hcpevec), FEATURES1_NUM, 9, 9), dtype=np.float32) features2 = np.empty((len(hcpevec), FEATURES2_NUM, 9, 9), dtype=np.float32) move = np.empty((len(hcpevec)), dtype=np.int32) result = np.empty((len(hcpevec)), dtype=np.int32) value = np.empty((len(hcpevec)), dtype=np.float32) cppshogi.hcpe_decode_with_value(hcpevec, features1, features2, move, result, value) return (Variable(cuda.to_gpu(features1)), Variable(cuda.to_gpu(features2)), Variable(cuda.to_gpu(result.reshape((len(hcpevec), 1)))), Variable(cuda.to_gpu(value.reshape((len(value), 1)))))
def __getitem__(self, idx): feature1 = np.empty((FEATURES1_NUM, 9, 9), dtype=np.float32) feature2 = np.empty((FEATURES2_NUM, 9, 9), dtype=np.float32) move = np.empty(1, dtype=np.int32) result = np.empty(1, dtype=np.float32) value = np.empty_like(result) # 要素を普通に取り出すとnp.void型になってしまう cppshogi.hcpe_decode_with_value( self.data[idx:idx + 1], feature1, feature2, move, result, value ) z = result - value + 0.5 return feature1, feature2, np.int64(move), result, z, value
def mini_batch(self, hcpevec): cppshogi.hcpe_decode_with_value( hcpevec, self.features1, self.features2, self.move, self.result, self.value ) # aobaで評価値がない場合はvalueが-1になるようにした # 負のadvantageは良くないように見えるので、 # そのままのvalueでpolicy gradientの目的関数にする z = np.where(self.value == -1, 0.5, self.value) # z = self.result - self.value + 0.5 return (self.torch_features1.to(self.device), self.torch_features2.to(self.device), self.torch_move.to(self.device), self.torch_result.to(self.device), torch.tensor(z).to(self.device), self.torch_value.to(self.device))
def mini_batch(hcpevec): features1 = np.empty((len(hcpevec), FEATURES1_NUM, 9, 9), dtype=np.float32) features2 = np.empty((len(hcpevec), FEATURES2_NUM, 9, 9), dtype=np.float32) move = np.empty((len(hcpevec)), dtype=np.int32) result = np.empty((len(hcpevec)), dtype=np.float32) value = np.empty((len(hcpevec)), dtype=np.float32) cppshogi.hcpe_decode_with_value(hcpevec, features1, features2, move, result, value) z = result.astype(np.float32) - value + 0.5 return (torch.tensor(features1).to(device), torch.tensor(features2).to(device), torch.tensor(move.astype(np.int64)).to(device), torch.tensor(result.reshape( (len(hcpevec), 1))).to(device), torch.tensor(z).to(device), torch.tensor(value.reshape((len(value), 1))).to(device))
parser = argparse.ArgumentParser() parser.add_argument('--test_data', type=str, default=r'H:\src\DeepLearningShogi\x64\Release_NoOpt\test.hcpe', help='test data file') parser.add_argument('--initmodel', '-m', default=r'H:\src\DeepLearningShogi\dlshogi\model_rl_val_wideresnet10_110_1', help='Initialize the model from given file') args = parser.parse_args() model = PolicyValueNetwork() model.to_gpu() print('Load model from', args.initmodel) serializers.load_npz(args.initmodel, model) hcpevec = np.fromfile(args.test_data, dtype=HuffmanCodedPosAndEval) features1 = np.empty((len(hcpevec), FEATURES1_NUM, 9, 9), dtype=np.float32) features2 = np.empty((len(hcpevec), FEATURES2_NUM, 9, 9), dtype=np.float32) move = np.empty((len(hcpevec)), dtype=np.int32) result = np.empty((len(hcpevec)), dtype=np.int32) value = np.empty((len(hcpevec)), dtype=np.float32) cppshogi.hcpe_decode_with_value(hcpevec, features1, features2, move, result, value) x1 = Variable(cuda.to_gpu(features1)) x2 = Variable(cuda.to_gpu(features2)) with chainer.no_backprop_mode(): with chainer.using_config('train', False): y1, y2 = model(x1, x2) print(y1.data) print(F.sigmoid(y2).data)