Esempio n. 1
0
    def __init__(self,
                 epsilon=0.05,
                 gamma=0.8,
                 alpha=0.2,
                 numTraining=900,
                 extractor=SimpleExtractor(),
                 **args):
        "You can initialize Q-values here..."

        args['epsilon'] = epsilon
        args['gamma'] = gamma
        args['alpha'] = alpha
        args['numTraining'] = numTraining
        self.featExtractor = extractor
        self.index = 0  # This is always Pacman
        self.weights = CustomCounter()
        self.q_values = CustomCounter()
        self.lastAction = None
        ReinforcementAgent.__init__(self,
                                    epsilon=epsilon,
                                    gamma=gamma,
                                    alpha=alpha,
                                    numTraining=numTraining)

        "*** YOUR CODE HERE ***"
Esempio n. 2
0
def mat_features(states, extractor=SimpleExtractor(), ftrs=None):
    """Transform a list of states in state matrices"""
    if isinstance(states, np.ndarray):
        m = list(map(functools.partial(extractor.getMatrixFeatures, features=ftrs), states))
        return m
    else:
        return [extractor.getMatrixFeatures(states, features=ftrs)]
    def __init__(self, epsilon=0.1, alpha=0.5, gamma=0.9, discount=0.9):
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.discount = discount
        self.legalActions = Const.ACTIONS
        self.featExtractor = DangerExtractorInstance()
        self.featExtractor = SimpleExtractor()
        self.weights = [0] * self.featExtractor.featureNum
        self.game_state = None

        self.train_episodes = 10000

        self.env = Env()
Esempio n. 4
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)
        self.weights = util.Counter()

        #100 episodes of training
        self.weights['enemy'] = -10
        self.weights['bias'] = -10
        self.weights['bullet'] = -200
        self.weights['edge'] = -10
        self.weights['hitEnemy'] = 10
        self.weights['moveFoward'] = 10

        self.legalActions = Const.ACTIONS
        self.featExtractor = SimpleExtractor()
        self.lastAction = 1
Esempio n. 5
0
def enhancedPacmanFeatures(state, action):
    """
    For each state, this function is called with each legal action.
    It should return a counter with { <feature name> : <feature value>, ... }
    """
    # features = util.Counter()
    "*** YOUR CODE HERE ***"
    # successor = state.generateSuccessor(0, action)
    # foodCount = successor.getFood().count()
    # features['foodCount'] = foodCount
    #features = neuralDistances(state, action)
    featureExtract = SimpleExtractor()
    features = featureExtract.getFeatures(state, action)
    #print("from enhanced")
    #print(features)
    #print("features: ",features.items())
    # it looks like 'capsule 0' is the problem; its values is a list of values not an int
    return features
Esempio n. 6
0
    def __init__(self, epsilon=0.1, alpha=0.5, gamma=0.9, discount=0.9):
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.discount = discount
        self.legalActions = Const.ACTIONS
        self.featExtractor = SimpleExtractor()
        self.featExtractor2 = PositionExtractor()
        self.weights = util.Counter()
        self.game_state = None

        self.env = Env()

        self.input_num = self.featExtractor2.getFeatureNum()
        self.hidden_num = 100
        self.output_num = 6
        self.W1 = np.random.rand(self.input_num, self.hidden_num)
        self.W2 = np.random.rand(self.hidden_num, self.output_num)
Esempio n. 7
0
import numpy as np
import gym
import matplotlib.pyplot as plt
from tanks import Env
from const import Const
from featureExtractors import DangerExtractor
from featureExtractors import SimpleExtractor

# env = gym.make('CartPole-v0')
train_episodes = 100
learning_rate = 0.01
level_type = "minimal"
game_speed = 1000
env = Env(level_type, game_speed, train_episodes)
gamma = 0.99
featExtractor = SimpleExtractor()


def discount_rewards(r):
    """ take 1D float array of rewards and compute discounted reward """
    discounted_r = np.zeros_like(r)
    running_add = 0
    for t in reversed(range(0, r.size)):
        running_add = running_add * gamma + r[t]
        discounted_r[t] = running_add
    return discounted_r


class agent():
    def __init__(self, lr, s_size, a_size, h_size):
        # These lines established the feed-forward part of the network. The agent takes a state and produces an action.