class Actor: def __init__(self, game, layers=[], checkpoint=None, format='one_hot', optimizer='adam'): self.game = game self.format = format self.layers = layers self.optimizer = optimizer self.network = Network( [game.state_size(format)] + layers + [game.num_possible_moves()], [], minibatch_size=50, steps=1, loss_function='cross_entropy', validation_fraction=0, test_fraction=0, learning_rate=0.001, optimizer=optimizer, output_functions=[tf.nn.softmax] ) self.network.build() if checkpoint: self.load_checkpoint(checkpoint) def select_move(self, state, stochastic=False): possible_moves = self.game.get_moves(state) formatted_state = self.game.format_for_nn(state, format=self.format) predictions = self.network.predict([formatted_state])[0] predictions = predictions[:len(possible_moves)] if not stochastic: move = np.argmax(predictions) return possible_moves[move] predictions = np.array(predictions) ps = predictions.sum() if predictions.sum() == 0: move = np.random.choice(np.arange(0, len(predictions))) else: predictions = predictions / predictions.sum() move = np.random.choice(np.arange(0, len(predictions)), p=predictions) return possible_moves[move] def save_checkpoint(self, checkpoint): self.network.save(checkpoint) def load_checkpoint(self, checkpoint): self.network.load(checkpoint)
def run_case(n, hyperparameter_file='hyperparameters.csv'): """ Runs the dataset and network defined on line n in the given hyperparameter file. :param n: The line of the scenario. :param hyperparameter_file: A CSV file containing references to data sets and hyperparameters. :return: """ params = pd.read_csv(hyperparameter_file).loc[n - 1] dataset = params['Dataset'] try: dataset = eval(dataset) except NameError: pass output_functions = params['Output_functions'] try: output_functions = eval(output_functions) except NameError: pass network = Network(eval(params['Network']), dataset, minibatch_size=int(params['Minibatch_size']), steps=int(params['Steps']), loss_function=params['Loss_function'], output_functions=output_functions, case_fraction=float(params['case_fraction']), validation_fraction=float(params['validation_fraction']), validation_interval=int(params['validation_interval']), test_fraction=float(params['test_fraction']), learning_rate=float(params['learning_rate']), optimizer=params['optimizer'], one_hot_encode_target=bool( params['one_hot_encode_target'])) network.build() network.train() network.test() # network.mapping_test(10, [0]) network.mapping_test(dataset, [0], [0]) network.visualize_weights(weight_layers=[0, 1], bias_layers=[0, 1])
return result if __name__ == '__main__': data = load_data('model/100x50-500/replays.txt') network = Network([game.state_size(state_format)] + layers + [game.num_possible_moves()], data, minibatch_size=500, steps=5000, loss_function='cross_entropy', case_fraction=0.1, validation_fraction=0, validation_interval=1000, test_fraction=0, learning_rate=0.01, optimizer='rmsprop', accuracy_argmax=True, output_functions=[tf.nn.softmax]) network.build() #network.save('model/pre-trained/step-0') # for i in range(4): # network.train(plot_results=True) # network.save(f'model/pre-trained/game_{i*50}') network.train(plot_results=True) network.save('model/manual/test4') #network.test()