Esempio n. 1
0
 def test_create(self):
     self.assertEqual(2, len(
         self.net(tf.convert_to_tensor(
             encoders.prepareForNetwork([self.env.board], [self.env.isPlaying], [self.env.moveNeeded],
                                        [self.env.gamePhase[1 if self.env.isPlaying == 1 else 0]],
                                        [self.env.selected]).reshape((-1, 24, 4))))))
     print(self.net(tf.convert_to_tensor(
         encoders.prepareForNetwork([self.env.board, self.env.board], [self.env.isPlaying, self.env.isPlaying],
                                    [2, 1],
                                    [self.env.gamePhase[1 if self.env.isPlaying == 1 else 0], 0],
                                    [self.env.selected, None]).reshape((-1, 24, 4)))))
Esempio n. 2
0
def train_net(in_path, out_path, train_data, tensorboard_path):
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    physical_devices = tf.config.list_physical_devices('GPU')
    for gpu_instance in physical_devices:
        tf.config.experimental.set_memory_growth(gpu_instance, True)
    tensorboard_callback = keras.callbacks.TensorBoard(tensorboard_path,
                                                       update_freq=10)
    current_Network = Network.get_net(configs.FILTERS, configs.HIDDEN_SIZE,
                                      configs.OUT_FILTERS, configs.NUM_ACTIONS,
                                      configs.INPUT_SIZE, None,
                                      configs.NUM_RESIDUAL)
    current_Network.load_weights(in_path)
    current_Network.compile(optimizer='adam',
                            loss={
                                'policy_output':
                                Network.cross_entropy_with_logits,
                                'value_output': 'mse'
                            },
                            loss_weights=[0.5, 0.5],
                            metrics=['accuracy'])
    current_Network.fit(encoders.prepareForNetwork(train_data[0],
                                                   train_data[1],
                                                   train_data[2],
                                                   train_data[3],
                                                   train_data[4]),
                        {
                            'policy_output': train_data[5],
                            'value_output': train_data[6]
                        },
                        epochs=configs.EPOCHS,
                        batch_size=configs.BATCH_SIZE,
                        callbacks=[tensorboard_callback])
    current_Network.save_weights(out_path)
Esempio n. 3
0
 def test_fit(self):
     import Network
     self.net.compile(optimizer='adam',
                      loss={'policy_output': Network.cross_entropy_with_logits, 'value_output': 'mse'},
                      loss_weights=[0.5, 0.5],
                      metrics=['accuracy'])
     self.net.fit(
         encoders.prepareForNetwork([self.env.board, self.env.board], [self.env.isPlaying, self.env.isPlaying],
                                    [2, 1],
                                    [self.env.gamePhase[1 if self.env.isPlaying == 1 else 0], 0],
                                    [self.env.selected, None]).reshape((-1, 24, 4))
         ,
         {'policy_output': np.array(
             [[.25, 0., 0.25, 0., 0., 0., 0., 0., 0., 0., 0.5, 0., 0., 0., 0., 0., 0., 0., 0., 0., .0, 0.,
               -1., 0.], [.25, 0., 0.25, 0., 0., 0., 0., 0., .1, 0., 0.4, 0., 0., 0., 0., 0., 0., 0., 0., 0., .0, 0.,
                          -1., 0.]]),
             'value_output': np.array([[0.5], [-0.25]])}, epochs=4,
         batch_size=2)
Esempio n. 4
0
 def test_fit_generated(self):
     from tensorflow import keras
     memory = np.zeros((48000, 7), dtype=object)
     val = mp.Value("L")
     self.mcts.generatePlay(memory, self.nnet, val, 1, 1)
     policy_out, board_in = np.zeros((val.value, 24), dtype=np.float32), np.zeros((val.value, 8, 3),
                                                                                  dtype=np.float32)
     for idx in range(val.value):
         policy_out[idx], board_in[idx] = memory[idx, 5], memory[idx, 0]
     tensorboard_callback = keras.callbacks.TensorBoard("TensorBoard", update_freq=2, profile_batch=0)
     self.nnet.fit(
         encoders.prepareForNetwork(board_in, memory[:val.value, 1], memory[:val.value, 4],
                                    memory[:val.value, 3],
                                    memory[:val.value, 2]),
         {'policy_output': policy_out,
          'value_output': memory[:val.value, 6].astype(np.float32)}, epochs=5,
         batch_size=128, callbacks=[tensorboard_callback])
     self.nnet.save("models/test_save")
Esempio n. 5
0
 def setValAndPriors(self, nnet):
     if self.is_terminal_node() == 0:
         self.priors, val = nnet(
             convert_to_tensor(
                 encoders.prepareForNetwork(
                     [self.state[0]], [self.state[1]], [self.state[3]],
                     [self.state[2][1 if self.state[1] == 1 else 0]],
                     [self.state[7]])))
         mask = np.ones(self.priors.shape, dtype=bool)
         mask[0, self.valid_moves] = False
         self.priors = np.array(self.priors)
         self.priors[mask] = -100.
         self.priors = softmax(convert_to_tensor(self.priors)).numpy()
     else:
         if self.is_terminal_node() == 2:
             val = 0
         else:
             val = self.is_terminal_node()
         self.priors = np.zeros((1, 24))
     return float(val)
Esempio n. 6
0
 def test_selected_reschape(self):
     while self.env.moveNeeded != 2:
         self.env.makeMove(self.random_gen.choice(self.env.getValidMoves()))
     self.assertEqual(encoders.prepareForNetwork([self.env.board], [self.env.isPlaying], [self.env.moveNeeded],
                                                 [self.env.gamePhase[1 if self.env.isPlaying == 1 else 0]],
                                                 [self.env.selected])[0, 4, 0, 0], 2)
Esempio n. 7
0
 def test_reshape(self):
     self.env.makeMove(0)
     self.assertEqual(1, encoders.prepareForNetwork([self.env.board], [self.env.isPlaying], [self.env.moveNeeded],
                                                    [self.env.gamePhase[1 if self.env.isPlaying == 1 else 0]],
                                                    [self.env.selected])[0, 0, 0, 1])
Esempio n. 8
0
 def test_shape(self):
     self.assertEqual((1, 8, 3, 4),
                      encoders.prepareForNetwork([self.env.board], [self.env.isPlaying], [self.env.moveNeeded],
                                                 [self.env.gamePhase[1 if self.env.isPlaying == 1 else 0]],
                                                 [self.env.selected]).shape)