-
Notifications
You must be signed in to change notification settings - Fork 0
/
genetic_algorithm.py
96 lines (80 loc) · 3.36 KB
/
genetic_algorithm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# genetic_algorithm.py
#
#
# Spencer Kraisler 2018
#
#
# This file contains methods for the genetic algorithm which optimizes the AI to play blackjack.
#
#
from blackjack import Player
from blackjack import game
from simplex import Network
import random as rand
import numpy as np
# this method creates a matrix of random values whose shape is a parameter tuple (e.g. (2,4))
def getRandMatrix(shape, limit):
R = np.zeros(shape)
for i in range(shape[0]):
for j in range(shape[1]):
R[i][j] = rand.uniform(-limit,limit)
return R
# this method creates a matrix of random 1's and 0's the same shape as X
# the probability of a one appearing is given by the one_rate parameter
def getRandBinaryMatrix(shape, one_rate):
B = np.zeros(shape)
for i in range(shape[0]):
for j in range(shape[1]):
r = rand.random()
if r < one_rate: B[i][j] = 1
else: B[i][j] = 0
return B
# takes a matrix and changes values to random ones based on the mutation rate parameter
def mutate(X, mutation_rate):
B = getRandBinaryMatrix(X.shape, mutation_rate)
R = getRandMatrix(X.shape, 1)
return X + R * B
# accepts an array of SNNs (and their structure) and uses the cost function to return the best performing network
def getMaxNetwork(network_array):
max_network = network_array[0]
max_network_cost = cost_blackjack(max_network)
for i in range(len(network_array)):
if cost_blackjack(network_array[i]) > max_network_cost:
max_network = network_array[i]
max_network_cost = cost_blackjack(max_network)
return max_network
# accepts a network and randomly adds values to some weight elements in the weight matrices based on a mutation rate
def mutateNetwork(network, mutation_rate):
mutant_network = Network(network.layers[0].dim,network.layers[1].dim,network.layers[2].dim)
mutant_network.layers[1].weight_matrix = mutate(network.layers[1].weight_matrix, mutation_rate)
mutant_network.layers[2].weight_matrix = mutate(network.layers[2].weight_matrix, mutation_rate)
return mutant_network
# accepts a network and returns a metric that measures the network's performance in blackjack
def cost_blackjack(network):
AI = Player('AI', network=network)
stats = AI.getPlayerPerformance(10, False)
win_rate = stats[0]
ave_bank = stats[1]
ave_reward = stats[2]
cost = win_rate + ave_bank / 1000.0 + 1.1 * ave_reward
return cost
# creates a generation of networks
# gen_size is the number of networks in generation, and h_l_s is the hidden layer size (recommended 20)
def createGeneration(gen_size, hidden_layer_size):
models = []
for i in range(gen_size):
models.append(Network(2, hidden_layer_size, 4))
return models
# accepts a generation of networks and trains them based on a mutation rate (recommended 0.05)
# epoch is the number of training iterations for a single generation (recommeded 200)
# print_info is boolean: should it print data every 10 epochs or not
def trainGeneration(models, epoch, mutation_rate, print_info):
for i in range(epoch):
max_network = getMaxNetwork(models)
AI = Player('AI', network = max_network)
stats = AI.getPlayerPerformance(10, False)
win_rate = stats[0]
ave_bank = stats[1]
if print_info == True and i % 10 == 0: print("Epoch: " + str(i) + " - Win rate: " + str(round(win_rate * 100.0, 1)) + "% - Ave. bank (5 plays): $" + str(round(ave_bank, 2)))
for i in range(len(models)):
models[i] = mutateNetwork(max_network, mutation_rate * (0.60 - win_rate) / 0.60)