def eval (self): _map = ['w', 's', 'a', 'd'] scores = [] for a in range (10): g = Game () while (not g.is_stale ()): state_ = g.get_state () state = [0 for a in range (16)] for i in range (len (state_)): if state_ [i] != 0: state [i] = math.log (state_ [i], 2) / 8.0 in_ = np.array (state) out = self.graph.evaluate (in_) [0] g.process_move (_map [out]) scores += [g.get_score ()] return (sum (scores) / 10.0) ** 2
x_data = [] running_length = 500 averages = [] num_games = int (4 * 1e3) print 'Playing %s games.' % num_games while (num_games >= 0): while (not g.is_stale ()): # get current states reward info s = State (g.get_state ()) # save current score current_score = g.get_score () # get reward info for current state r = sm.lookup (s) #r = spm.lookup (s) # get move decision move = r.get_move () # make move g.process_move (move) # get new score new_score = g.get_score () # calc score gain