def generate_data_rl(self, ngames=50, fname=''): """ generate a new batch of data with the latest prediction model self.model_predict by playing against self. """ vf = lambda x: self.model.predict(x) p2 = RLPlayer(vf) p2.epsilon = self.epsilon p2.record_history = 1 p2.include_action = 1 return self.generate_data_bot(p2, ngames, fname)
def generate_data_bot(self, bot, ngames=50, fname=''): """ generate a new batch of data with the latest prediction model self.model rl vs. specified bot """ vf = lambda x: self.model.predict(x) p1 = RLPlayer(vf) p1.epsilon = self.epsilon p1.record_history = 1 p1.include_action = 1 bot.record_history = 0 d_this = self.record_game(ngames, [p1, bot], fname) self.add_data(d_this) return d_this
def generate_data_smithy(self, ngames=50, fname=''): """ generate a new batch of data with the latest prediction model self.model_predict rl vs. smithy bot """ vf = lambda x: self.model_predict.predict(x) p1 = RLPlayer(vf) p1.epsilon = self.epsilon p1.record_history = 1 p2 = SmithyBot() p2.record_history = 0 d_this = record_game(ngames, [p1, p2], fname) self.add_data(d_this) return d_this
def generate_data(self, ngames=50, fname=''): """ generate a new batch of data with the latest prediction model self.model_predict rl vs. random bot """ vbuy = lambda x: self.model_predict.predict(x) # vact = lambda x: self.model_act.predict(x) # p1 = BuyActRLplayer(vbuy, vact) p1 = RLPlayer(vbuy) p1.epsilon = self.epsilon p1.record_history = 1 p1.include_action = 1 p2 = RandomPlayer() p2.record_history = 0 d_this, _ = self.record_game(ngames, [p1, p2], fname) self.add_data(d_this) return d_this
def run_agent(fn='sarsa_v0', version='SarsaBootstrapAgent', gamma_comp=0.02, epsilon=0.05, dropout=0.2): if version == 'SarsaBootstrapAgent': dql = SarsaBootstrapAgent() elif version == 'DQLSarsaAgent': dql = DQLSarsaAgent() p1 = SmithyBot() p1.record_history = 1 p2 = SmithyBot() p2.record_history = 0 dbuy, _ = dql.record_game(1, [p1, p2]) sa = np.array([np.concatenate([s, a]) for s, a, r, _, _, _ in dbuy]) r = np.array([r for s, a, r, _, _, _ in dbuy]) dql.epsilon = 0.05 dql.mtrain = 5000 # one iteration creates roughly 1e4 samples # therefore this remembers the data of pass 40 iterations. dql.replaybuffer = int(4e5) dql.target_iterations = 1 dql.predict_iterations = 10 dql.epochs = 10 dql.gamma = 1 - gamma_comp # incentivize short games dql.reward_points_per_turn = 0.0 # I think having win reward makes it too noisy dql.win_reward = 0 # print the settings print('mtrain {:d}, replaybuffer {:d}, predict iter {:d}, epochs {:d}, gamma {:.02f}'.\ format(dql.mtrain, dql.replaybuffer, dql.predict_iterations, dql.epochs, dql.gamma)) # a very dumb way to initiate the network weights dql.length = sa.shape[0] dql.create_model(sa, r, dropout=dropout) # start training iterations. for i in range(1000): print('data generation iteration {:d}'.format(i)) dql.epsilon = 0.05 / ((i + 1) / 20) print('dql epsilon: {:.04f}'.format(dql.epsilon)) dql.generate_data_smithy(100) dql.generate_data_rl(100) #dql.generate_data(100) print('data sample size = {:d}'.format(dql.data.shape[0])) dql.do_target_iteration() dql.save_model('./model/{:s}_iteration_{:03d}'.format(fn, i + 1)) # evaluate against random bot and smithy bot p1 = RLPlayer(lambda x: dql.model_predict.predict(x)) p1.epsilon = 0.0 p_smith = SmithyBot() print(compare_bots([p1, RandomPlayer()], 10)) print(compare_bots([p1, p_smith], 10)) # print output of Q(s,a) estimates for the first SmithyBot game print('Q(s,a) estimates of a SmithyBot game') print(dql.model_predict.predict(sa).T)
def generate_data_smithy(self, ngames=50, fname=''): """ generate a new batch of data with the latest prediction model self.model_predict rl vs. smithy bot """ vbuy = lambda x: self.model_predict.predict(x) # vact = lambda x: self.model_act.predict(x) # p1 = BuyActRLplayer(vbuy, vact) p1 = RLPlayer(vbuy) p1.epsilon = self.epsilon p1.record_history = 1 p1.include_action = 1 p2 = SmithyBot() # try including smithy bot's data in the training. p2.record_history = 0 d_this, _ = self.record_game(ngames, [p1, p2], fname) self.add_data(d_this) return d_this
def compare_rl_bots(fn1, fn2, num_games=50, order=0): # set up the two rl bots, and make them fight. print('setting up bot1...') dql = SarsaAgent() dql.create_model_5layers() data = dql.load_game_data('1game') dql.fit(data) dql.load_model(fn1) p1 = RLPlayer(lambda x: dql.model.predict(x)) p1.name = fn1 p1.epsilon = 0 print('setting up bot2...') dql2 = SarsaAgent() dql2.create_model_5layers() data = dql2.load_game_data('1game') dql2.fit(data) dql2.load_model(fn1) p2 = RLPlayer(lambda x: dql2.model.predict(x)) p2.name = fn2 p2.epsilon = 0 print('fight!') return compare_bots([p1, p2], num_games, order)
def load_rl_bot(fn, dql='', version='SarsaActBuyAgent', pre_witch=0): if dql == '': if version == 'SarsaActBuyAgent': pr = RandomPlayer() pr.record_history = 1 dql = SarsaActBuyAgent() data = dql.record_game(1, [pr, SmithyBot()], verbose=0) dql.length = (data[0].shape[1] + data[1].shape[1]) dql.create_model_5layers() dql.create_act_model() dql.fit(data) dql.fit_act(data) dql.load_model(fn) p1 = BuyActRLplayer(lambda x: dql.model.predict(x), lambda x: dql.model_act.predict(x)) p1.name = fn elif version == 'SarsaAgent': if pre_witch: variable_cards_this = [ village, cellar, smithy, festival, market, laboratory, chapel, warehouse, council_room, militia, moat ] pr = RandomPlayer() pr.record_history = 1 dql = SarsaAgent() dql.variable_cards = variable_cards_this data = dql.record_game(1, [pr, SmithyBot()]) print(data[0].shape) print(data[1].shape) else: pr = RandomPlayer() pr.record_history = 1 data = record_game(1, [pr, SmithyBot()]) dql = SarsaAgent(length=(data[0].shape[1] + data[1].shape[1])) dql.create_model(num_layers=get_num_layers(fn + '_ar.h5')) dql.fit(data) dql.load_model(fn) p1 = RLPlayer(lambda x: dql.model.predict(x)) p1.name = fn elif version == 'DQLSarsaAgent': dql = DQLSarsaAgent() p1 = SmithyBot() p1.record_history = 1 dbuy, _ = dql.record_game(1, [p1, SmithyBot()]) sa = np.array( [np.concatenate([s, a]) for s, a, r, _, _, _ in dbuy]) r = np.array([r for s, a, r, _, _, _ in dbuy]) dql.create_model(sa, r) dql.load_model(fn) p1 = RLPlayer(lambda x: dql.model_predict.predict(x)) p1.name = fn else: print('No such version of Agent!') raise else: # if dql is NN is already initialized. dql.load_model(fn) if version == 'DQLSarsaAgent': p1 = RLPlayer(lambda x: dql.model_predict.predict(x)) else: p1 = RLPlayer(lambda x: dql.model.predict(x)) p1.name = fn p1.epsilon = 0 return (p1, dql)