Ejemplo n.º 1
0
def random_against_model(ngames=100):
	from keras.models import model_from_json
	random.seed()
	model = model_from_json(open('keras_model.json').read())
	model.load_weights('model_weights.h5')
	model.compile(optimizer='adam',
		loss='sparse_categorical_crossentropy',
		metrics=['accuracy'])
	outcomes = []
	for _ in xrange(ngames):
		board = Board()

		prev_move = np.zeros(81, dtype='int32')
		turn = constants.X_PIECE
		while not board.game_over():
			if turn == constants.X_PIECE:
				x1 = np.asarray(board.board.flatten())
				x2 = prev_move
				X = np.asarray([np.hstack([x1, x2])])
				probs =  model.predict_proba(X)[0]
				#we need to eliminate any moves that are not allowed
				probs = [p if np.unravel_index(p_i, (9, 9)) in board.next_moves else 0\
					for p_i, p in enumerate(probs)]

				probs = my_normalize(probs)

				idx = range(len(probs))
				#predicted move to make
				move_idx = np.random.choice(idx, p=probs)
				
				row, col = np.unravel_index(move_idx, (9, 9))
				board.add_piece(row, col, turn)
			elif turn == constants.O_PIECE:
				try:
					row, col = random.sample(board.next_moves, 1)[0]
					board.add_piece(row, col, turn)
				except ValueError:
					print 'Try again'
					continue

			else:
				raise ValueError('Mistakes have been made')

			turn = (turn + 1) % 2

		print '{} Won the game!'.format(board.board_winner())
		outcomes.append(board.board_winner())

	print 'AI Won {:0.02f}% of the games!'.format(sum(1 if i == constants.X_PIECE else 0 for i in outcomes)/float(len(outcomes)))
	print '{:0.02f}% ties'.format(sum(1 if i == -1 else 0 for i in outcomes)/float(len(outcomes)))
Ejemplo n.º 2
0
def random_play():
	random.seed()
	num_games = 20000
	batch_size = 10000
	games = [] #game = (list of board configs, winner)

	current_batch = 0
	for i in tqdm(range(num_games)):
		board = Board()
		boards = [copy.copy(board.board)]

		turn = constants.X_PIECE
		while not board.game_over():
			row, col = random.sample(board.next_moves, 1)[0]
			board.add_piece(row, col, turn)

			turn = (turn + 1) % 2
			boards.append(copy.copy(board.board))

		games.append((boards, board.board_winner()))
		current_batch += 1
		
		if current_batch == batch_size:
			with open('{}-saved_games.pkl'.format(time.time()), 'wb') as f:
				pickle.dump(games, f)
			current_batch = 0
			games = []
Ejemplo n.º 3
0
def play_against_model():
	from keras.models import model_from_json
	model = model_from_json(open('keras_model.json').read())
	model.load_weights('model_weights.h5')
	model.compile(optimizer='adam',
		loss='sparse_categorical_crossentropy',
		metrics=['accuracy'])
	board = Board()

	prev_move = np.zeros(81, dtype='int32')
	turn = constants.X_PIECE
	while not board.game_over():
		if turn == constants.X_PIECE:
			x1 = np.asarray(board.board.flatten())
			x2 = prev_move
			X = np.asarray([np.hstack([x1, x2])])
			probs =  model.predict_proba(X)[0]
			#we need to eliminate any moves that are not allowed
			probs = [p if np.unravel_index(p_i, (9, 9)) in board.next_moves else 0\
				for p_i, p in enumerate(probs)]

			probs = my_normalize(probs)

			idx = range(len(probs))
			#predicted move to make
			move_idx = np.random.choice(idx, p=probs)
			
			row, col = np.unravel_index(move_idx, (9, 9))
			board.add_piece(row, col, turn)
		elif turn == constants.O_PIECE:
			print 'Allowed:'
			print board.next_moves
			try:
				row, col = [int(x) for x in raw_input('User move:').split()]
				board.add_piece(row, col, turn)
			except ValueError:
				print 'Try again'
				continue

		else:
			raise ValueError('Mistakes have been made')

		turn = (turn + 1) % 2
		print board

	print '{} Won the game!'.format(board.board_winner())
Ejemplo n.º 4
0
def generate_random_games(num_games, seed=1337):
	random.seed(seed)
	games = []
	for i in tqdm(range(num_games)):
		board = Board()
		boards = [copy.copy(board.board)]

		turn = constants.X_PIECE
		while not board.game_over():
			row, col = random.sample(board.next_moves, 1)[0]
			board.add_piece(row, col, turn)

			turn = (turn + 1) % 2
			boards.append(copy.copy(board.board))

		games.append((boards, board.board_winner()))

	return games
Ejemplo n.º 5
0
def trained_model_play():
	"""
	NOTE: The models expect the board to be presented as player X's turn
	
	
	Algo:
	1. Start with 20000 randomly generated games
	2. Train a model to predict "winning" moves
	3. Generate 20000 new games, playing the model against itself
	4. Go to 2
	"""

	BOARD_DIM = 81 #i.e. 9x9
	POSS_MOVE_DIM = 81 #ie. same as board size
	INPUT_DIM = BOARD_DIM + POSS_MOVE_DIM #board, last_move
	OUTPUT_DIM = POSS_MOVE_DIM #which move should we make?

	NB_EPOCH = 5
	NB_ITER = 5 #number of reinforcement learning iterations

	#NOTE: X_PIECE always went first in the training data
	model = Sequential()
	model.add(Dense(2 * INPUT_DIM, input_dim=INPUT_DIM, activation='relu'))
	model.add(Dropout(0.2))
	model.add(Dense(2 * INPUT_DIM, activation='tanh'))
	model.add(Dropout(0.2))
	model.add(Dense(OUTPUT_DIM))
	model.add(Activation('softmax'))
	model.compile(optimizer='adam',
		loss='sparse_categorical_crossentropy',
		metrics=['accuracy'])


	num_games = 20000
	#game = (list of board configs, winner)
	games = generate_random_games(num_games)

	#we only want games with a definitive winner
	won_games = [(g, w) for g, w in games if w != constants.NO_PIECE]
	print 'Using {} games that have winner'.format(len(won_games))

	#we can easily scale up the number of games by transposing them
	won_games.extend(transpose_batch(won_games))

	train_model_on_games(model, won_games, nb_epoch=NB_EPOCH)

	for j in range(NB_ITER):
		games = []
		for i in range(num_games):
			board = Board()
			boards = [board.board]

			prev_move = np.zeros(BOARD_DIM, dtype='int32')
			turn = constants.X_PIECE
			while not board.game_over():
				if turn == constants.X_PIECE:
					x1 = np.asarray(board.board.flatten())
				elif turn == constants.O_PIECE:
					board_rep = invert_board(board.board)
					x1 = np.asarray(board_rep.flatten())
				else:
					raise ValueError('Mistakes have been made')

				x2 = prev_move
				X = np.asarray([np.hstack([x1, x2])])
				probs =  model.predict_proba(X)[0]
				#we need to eliminate any moves that are not allowed
				probs = [p if np.unravel_index(p_i, (9, 9)) in board.next_moves else 0\
					for p_i, p in enumerate(probs)]

				probs = my_normalize(probs)

				idx = range(len(probs))
				#predicted move to make
				move_idx = np.random.choice(idx, p=probs)
				
				row, col = np.unravel_index(move_idx, (9, 9))
				board.add_piece(row, col, turn)
				turn = (turn + 1) % 2
				prev_move = to_categorical([move_idx], 81)[0]

				boards.append(copy.copy(board.board))

			games.append((boards, board.board_winner()))

		won_games = [(g, w) for g, w in games if w != constants.NO_PIECE]
		print 'Using {} games that have winner after reinforcement iter {}'.format(len(won_games), j)
		#we can easily scale up the number of games by transposing them
		won_games.extend(transpose_batch(won_games))
		train_model_on_games(model, won_games, nb_epoch=NB_EPOCH)
	
	with open('keras_model.json', 'w') as f:
		f.write(model.to_json())
	
	model.save_weights('model_weights.h5')