Exemple #1
0
    def env_step(self, action):
        self.steps += 1

        # Action is one of N,S,W,E
        action = action.charArray[0]

        self.step_out('ACTION:', action)

        if not action in self.valid_actions.keys():
            print 'WARNING: Invalid action %s' % (action)
            obs = Observation()
            obs.intArray = self.world.agent_state
            return Reward_observation_terminal(0, obs, False)

        # The actions might result in movement in a direction other than the one
        # intended with a probability of (1 - action_prob)
        if self.enable_stochastic_actions:
            dice = random.random()
            if dice > self.action_prob:
                # Randomness! Choose uniformly between each other action
                other_actions = list(
                    set(self.valid_actions.keys()) - set(action))
                action = random.choice(other_actions)

            # Move the agent
            self.step_out('RESULT ACTION:', action)

        self.move_agent(self.valid_actions[action])

        # Apply wind from the new state
        if self.enable_wind:
            pstate = self.world[self.world.agent_state[0]][
                self.world.agent_state[1]]
            if pstate.wind:
                p, dir = pstate.wind
                dice = random.random()
                if dice <= p:
                    # Fudge & crackers! Our agent gets caught by the wind!
                    self.step_out('WIND IN %s!' % (dir))
                    self.move_agent(dir)

        agent_state = self.world.reduce_pos(self.world.agent_state)

        pstate = self.world[agent_state[0]][agent_state[1]]

        # Return observation
        obs = Observation()
        obs.intArray = self.world.agent_state

        #print('IT\'S A NEW WORLD:')
        self.step_out(self.world)
        #self.debug('\n' + str(self.world))
        self.step_out("REWARD:", pstate.reward)

        terminal = pstate.terminal
        if self.steps > self.step_limit:
            self.debug("STEP LIMIT REACHED!")
            terminal = True

        return Reward_observation_terminal(pstate.reward, obs, terminal)
Exemple #2
0
def test_agent_step():
    print "Testing."
    color_range = 128
    size_of_observation = 128+210*160

    print "Setting up agent."
    agent = setup()

    color = 1
    observation = Observation()
    observation.intArray = np.ones(size_of_observation, dtype=np.uint8)
    observation.intArray *= color
    agent.agent_start(observation)
    agent.agent_train(False)

    for i in range(2, 256):
        print "Round %d" % i
        reward = float(i)
        color = i
        observation = Observation()
        observation.intArray = np.ones(size_of_observation, dtype=np.uint8)
        observation.intArray *= color

        agent.agent_step(reward, observation)
        agent.agent_train(False)

    reward = float(i)
    color = i
    observation = Observation()
    observation.intArray = np.ones(size_of_observation, dtype=np.uint8)
    observation.intArray *= color

    agent.agent_step(reward, observation)

    agent.agent_train(True)
class test_empty_environment(Environment):
	whichEpisode=0
	emptyObservation=Observation()
	nonEmptyObservation=Observation(2,4,5)

	def env_init(self):  
		self.nonEmptyObservation.intArray=[0,1]
		self.nonEmptyObservation.doubleArray=[0.0/4.0,1.0/4.0,2.0/4.0,3.0/4.0]
		self.nonEmptyObservation.charArray=['a','b','c','d','e']
		return ""

	def env_start(self):
		self.whichEpisode=self.whichEpisode+1
		
		
		if self.whichEpisode % 2 == 0:
			return self.emptyObservation
		else:
			return self.nonEmptyObservation
	
	def env_step(self,action):
		ro=Reward_observation_terminal()
		
		if self.whichEpisode % 2 == 0:
			ro.o=self.emptyObservation
		else:
			ro.o=self.nonEmptyObservation

		return ro	

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		return None
Exemple #4
0
	def agent_init(self,taskSpecString):

		self.numActions = 4
		self.numStates = 144
		self.qfunction = [self.numActions*[0.0] for i in range(self.numStates)]
		self.lastAction=Action()
		self.lastObs=Observation()
Exemple #5
0
 def makeObservation(self):
     returnObs = Observation()
     returnObs.doubleArray = self.pos.tolist()
     if self.fuel_loc is not None:
         returnObs.doubleArray += [self.fuel]
     returnObs.intArray = [self.pass_loc, self.pass_dest]
     return returnObs
Exemple #6
0
class test_1_environment(Environment):
    stepCount = 0
    o = Observation()

    def env_init(self):
        return "sample task spec"

    def env_start(self):
        self.stepCount = 0

        self.o.intArray = [1]
        self.o.doubleArray = [0.0 / 2.0, 1.0 / 2.0]
        self.o.charArray = ['a', 'b', 'c']

        return self.o

    def env_step(self, action):
        ro = Reward_observation_terminal()
        terminal = False

        if self.stepCount < 5:
            self.o.doubleArray = []
            self.o.charArray = []
            self.o.intArray = [self.stepCount]

            self.stepCount = self.stepCount + 1

            if self.stepCount == 5:
                terminal = True

            ro.r = 1.0

        else:
            self.o.doubleArray = [
                0.0078125, -0.0078125, 0.0, 0.0078125e150, -0.0078125e150
            ]
            self.o.charArray = ['g', 'F', '?', ' ', '&']
            self.o.intArray = [173, -173, 2147483647, 0, -2147483648]

            ro.r = -2.0

        ro.o = self.o
        ro.terminal = terminal
        return ro

    def env_cleanup(self):
        pass

    def env_message(self, inMessage):
        timesToPrint = self.stepCount % 3

        outMessage = inMessage + b"|"
        for i in range(0, timesToPrint):
            outMessage = outMessage + bytes("%d" % (self.stepCount),
                                            encoding='ascii')
            outMessage = outMessage + b"."

        outMessage = outMessage + b"|" + inMessage

        return outMessage
Exemple #7
0
    def env_start(self):
        self.currentState = 10

        returnObs = Observation()
        returnObs.intArray = [self.currentState]

        return returnObs
Exemple #8
0
 def env_start(self):
     self.reset()
     returnObs = Observation()
     returnObs.doubleArray = [
         self.cart_location, self.cart_velocity
     ] + self.pole_angle.tolist() + self.pole_velocity.tolist()
     return returnObs
    def env_start(self):
        if self.fixedStartState:
            stateValid = self.setAgentState(self.startRow, self.startCol)
            if not stateValid:
                print "The fixed start state was NOT valid: " + str(
                    int(self.startRow)) + "," + str(int(self.startRow))
                self.setRandomState()
        else:
            self.setRandomState()

        returnObs = Observation()
        returnObs.intArray = [self.calculateFlatState()]
        #Up, Right, Down, Option1, Option2
        returnObs.charArray = ["T", "T", "T", "T"]
        if len(self.optionsArray[self.startRow][self.startCol]) != 0:
            for i in range(len(
                    self.optionsArray[self.startRow][self.startCol])):
                returnObs.charArray[
                    3 +
                    self.optionsArray[self.startRow][self.startCol][i]] = "T"

    # print returnObs.charArray
    #Now add characters based on options present

        return returnObs
Exemple #10
0
    def env_step(self, thisAction):
        log = logging.getLogger('pyrl.environments.gridworld.env_step')
        episodeOver = 0
        intAction = thisAction.intArray[0]
        log.debug("Action to take: %d", intAction)
        theReward = self.takeAction(intAction)

        if self.isAtGoal():
            log.info("Episode completed!!")
            episodeOver = 1

        if self.reward_noise > 0:
            theReward += numpy.random.normal(scale=self.reward_noise)

        theObs = Observation()
        theObs.doubleArray = self.getState()

        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = episodeOver

        log.info("(Action - State - Reward): (%d - %s - %f)", intAction,
                 pformat(theObs), theReward)

        return returnRO
Exemple #11
0
    def agent_init(self, taskSpec):
        """Initialize the RL agent.

        Args:
            taskSpec: The RLGlue task specification string.
        """
        # (Re)initialize parameters (incase they have been changed during a trial
        self.init_parameters()
        # Parse the task specification and set up the weights and such
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec)
        if self.agent_supported(TaskSpec):
            self.numStates = len(TaskSpec.getDoubleObservations())
            self.discStates = numpy.array(TaskSpec.getIntObservations())
            self.numDiscStates = int(
                reduce(lambda a, b: a * (b[1] - b[0] + 1), self.discStates,
                       1.0))
            self.numActions = TaskSpec.getIntActions()[0][1] + 1

            self.model.model_init(self.numDiscStates, TaskSpec.getDoubleObservations(), \
                              self.numActions, TaskSpec.getRewardRange()[0])
            self.planner.planner_init(self.numDiscStates, TaskSpec.getDoubleObservations(), \
                              self.numActions, TaskSpec.getRewardRange()[0])

        else:
            print "Task Spec could not be parsed: " + taskSpecString

        self.lastAction = Action()
        self.lastObservation = Observation()
    def env_start(self):
        """ Start the game! """
        # Set up start states
        self.world.add_starts(*self.start_states)

        # Set up terminal states
        self.world.add_terminals(*self.terminal_states.keys())
        for (row, col), reward in self.terminal_states.items():
            self.world[row][col].reward = reward

        # Initialize state of the agent to one of start_states
        r = random.randrange(len(self.start_states))
        self.world.agent_state = list(self.start_states[r])

        # Initialize step counter
        self.steps = 0

        self.step_out('START WORLD:')
        self.step_out(self.world)

        # Pass agent state over to the agent
        obs = Observation()
        obs.intArray = self.world.agent_state

        return obs
Exemple #13
0
 def env_start(self):
     self.setStartState()
     returnObs = Observation()
     returnObs.intArray = [
         self.calculateFlatState(self.agentRow, self.agentCol)
     ]
     return returnObs
Exemple #14
0
    def agent_init(self, taskSpecString):
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString)
        if TaskSpec.valid:
            assert len(TaskSpec.getIntObservations()
                       ) == 1, "expecting 1-dimensional discrete observations"
            assert len(TaskSpec.getDoubleObservations()
                       ) == 0, "expecting no continuous observations"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntObservations()[0][0]
            ), " expecting min observation to be a number not a special value"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntObservations()[0][1]
            ), " expecting max observation to be a number not a special value"
            self.numStates = TaskSpec.getIntObservations()[0][1] + 1

            assert len(TaskSpec.getIntActions()
                       ) == 1, "expecting 1-dimensional discrete actions"
            assert len(TaskSpec.getDoubleActions()
                       ) == 0, "expecting no continuous actions"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntActions()[0][0]
            ), " expecting min action to be a number not a special value"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntActions()[0][1]
            ), " expecting max action to be a number not a special value"
            self.numActions = TaskSpec.getIntActions()[0][1] + 1

            self.value_function = numpy.zeros(
                [self.numStates, self.numActions])
        else:
            print "Task Spec could not be parsed: " + taskSpecString

        self.lastAction = Action()
        self.lastObservation = Observation()
Exemple #15
0
    def env_step(self, thisAction):
        episodeOver = 0
        theReward = 0

        if thisAction.intArray[0] == 0:
            self.currentState = self.currentState - 1
        if thisAction.intArray[0] == 1:
            self.currentState = self.currentState + 1

        if self.currentState <= 0:
            self.currentState = 0
            theReward = -1
            episodeOver = 1

        if self.currentState >= 20:
            self.currentState = 20
            theReward = 1
            episodeOver = 1

        theObs = Observation()
        theObs.intArray = [self.currentState]

        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = episodeOver

        return returnRO
    def agent_init(self, taskSpecString):
        print "Agent Up"
        # print taskSpecString
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString)
        if TaskSpec.valid:
            print len(
                TaskSpec.getDoubleActions()), ": ", TaskSpec.getDoubleActions(
                ), '\n', len(TaskSpec.getDoubleObservations()
                             ), ": ", TaskSpec.getDoubleObservations()
            assert len(TaskSpec.getIntObservations()
                       ) == 0, "expecting no discrete observations"
            assert len(TaskSpec.getDoubleObservations(
            )) == 12, "expecting 12-dimensional continuous observations"

            assert len(
                TaskSpec.getIntActions()) == 0, "expecting no discrete actions"
            assert len(TaskSpec.getDoubleActions()
                       ) == 4, "expecting 4-dimensional continuous actions"

            self.obs_specs = TaskSpec.getDoubleObservations()
            self.actions_specs = TaskSpec.getDoubleActions()
            # print "Observations: ",self.obs_specs
            # print "actions_specs:", self.actions_specs

        else:
            print "Task Spec could not be parsed: " + taskSpecString

        self.lastAction = Action()
        self.lastObservation = Observation()
Exemple #17
0
    def agent_init(self, spec):
        taskSpec = TaskSpecVRLGLUE3.TaskSpecParser(spec)
        if taskSpec.valid:
            self.num_actions = taskSpec.getIntActions()[0][1] + 1
        else:
            raise "Invalid task spec"
        self.last_observation = Observation()

        self.batch_size = 32  # batch size for SGD
        self.ep_start = 1  # initial value of epsilon in epsilon-greedy exploration
        self.ep = self.ep_start  # exploration probability
        self.ep_end = 0.1  # final value of epsilon in epsilon-greedy exploration
        self.ep_endt = 1000000  # number of frames over which epsilon is linearly annealed
        self.episode_qvals = []
        self.all_qvals = []
        self.learn_start = 0  # number of steps after which learning starts
        self.is_testing = False
        self.replay_memory = 1000000
        self.phi_length = 4  # number of most recent frames for input to Q-function
        self.reset_after = 10000  # replace Q_hat with Q after this many steps
        self.step_counter = 0
        self.episode_counter = 0
        self.total_reward = 0
        self.qvals = []

        self.train_table = TransitionTable(self.phi_length, self.replay_memory,
                                           RESIZED_WIDTH, RESIZED_HEIGHT)
        self.test_table = TransitionTable(self.phi_length, self.phi_length,
                                          RESIZED_WIDTH, RESIZED_HEIGHT)
        if self.network_file is None:
            self.network = DeepQLearner(RESIZED_WIDTH, RESIZED_HEIGHT,
                                        self.num_actions, self.phi_length,
                                        self.batch_size)
        else:
            self.network = cPickle.load(open(self.network_file))
    def agent_init(self, taskSpecString):

        self.numActions = 4
        self.numStates = 144
        self.qfunction = [
            self.numActions * [0.0] for i in range(self.numStates)
        ]
        #x coordinate
        self.phi1 = np.array([i for i in range(12)])
        #y coordinate
        self.phi2 = np.array([i for i in range(12)])

        #self.theta = np.array([ for i in range(4)])
        self.thetax = np.array([[
            random.random(),
            random.random(),
            random.random(),
            random.random()
        ] for i in range(12)])
        self.thetay = np.array([[
            random.random(),
            random.random(),
            random.random(),
            random.random()
        ] for i in range(12)])
        self.thetaxy = np.array([[[
            random.random(),
            random.random(),
            random.random(),
            random.random()
        ] for i in range(12)] for j in range(12)])

        self.lastAction = Action()
        self.lastObs = Observation()
    def env_step(self, thisAction):
        # Make sure the action is valid
        assert len(thisAction.intArray) == 1, "Expected 1 integer action."
        assert thisAction.intArray[0] >= 0, "Expected action to be in [0,4]"
        assert thisAction.intArray[0] < 4, "Expected action to be in [0,4]"

        self.updatePosition(thisAction.intArray[0])

        lastActionValue = thisAction.intArray[0]
        theObs = Observation()
        theObs.intArray = [self.calculateFlatState()]
        theObs.charArray = ["T", "T", "T", "T"]
        if len(self.optionsArray[self.agentRow][self.agentCol]) != 0:
            for i in range(len(
                    self.optionsArray[self.agentRow][self.agentCol])):
                theObs.charArray[
                    2 +
                    self.optionsArray[self.agentRow][self.agentCol][i]] = "T"

        returnRO = Reward_observation_terminal()
        returnRO.r = self.calculateReward(lastActionValue)
        returnRO.o = theObs
        returnRO.terminal = self.checkCurrentTerminal()

        return returnRO
Exemple #20
0
    def agent_init(self, taskSpec):
        """Initialize the RL agent.

        Args:
            taskSpec: The RLGlue task specification string.
        """

        # (Re)initialize parameters (incase they have been changed during a trial
        self.init_parameters()
        # Parse the task specification and set up the weights and such
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec)
        if not self.agent_supported(TaskSpec):
            print "Task Spec could not be parsed: " + taskSpecString
            sys.exit(1)

        self.numStates = len(TaskSpec.getDoubleObservations())
        self.discStates = numpy.array(TaskSpec.getIntObservations())
        self.numDiscStates = int(
            reduce(lambda a, b: a * (b[1] - b[0] + 1), self.discStates, 1.0))
        self.numActions = TaskSpec.getIntActions()[0][1] + 1
        if self.numStates == 0:
            # Only discrete states
            self.numStates = 1
            if self.fa_name != "trivial":
                print "Selected basis requires at least one continuous feature. Using trivial basis."
                self.fa_name = "trivial"

        # Set up the function approximation
        if self.fa_name == 'fourier':
            self.basis = fourier.FourierBasis(self.numStates,
                                              TaskSpec.getDoubleObservations(),
                                              order=self.params.setdefault(
                                                  'fourier_order', 3))
        elif self.fa_name == 'rbf':
            num_functions = self.numStates if self.params.setdefault(
                'rbf_number', 0) == 0 else self.params['rbf_number']
            self.basis = rbf.RBFBasis(self.numStates,
                                      TaskSpec.getDoubleObservations(),
                                      num_functions=num_functions,
                                      beta=self.params.setdefault(
                                          'rbf_beta', 0.9))
        elif self.fa_name == 'tile':
            self.basis = tilecode.TileCodingBasis(
                self.numStates,
                TaskSpec.getDoubleObservations(),
                num_tiles=self.params.setdefault('tile_number', 100),
                num_weights=self.params.setdefault('tile_weights', 2048))
        else:
            self.basis = trivial.TrivialBasis(self.numStates,
                                              TaskSpec.getDoubleObservations())

        self.weights = numpy.zeros(
            (self.numDiscStates, self.basis.getNumBasisFunctions(),
             self.numActions))
        self.traces = numpy.zeros(self.weights.shape)
        self.init_stepsize(self.weights.shape, self.params)

        self.lastAction = Action()
        self.lastObservation = Observation()
Exemple #21
0
 def env_start(self):
     log = logging.getLogger('pyrl.environments.gridworld.env_start')
     self.reset()
     log.info("Environment started")
     returnObs = Observation()
     returnObs.doubleArray = self.getState()
     log.debug("Observation to return: %s", pformat(returnObs))
     return returnObs
Exemple #22
0
    def env_start(self):
        self.seed()
        self.reset()
        #self.seps=0

        returnObs = Observation()
        returnObs.intArray = [self.s]
        return returnObs
Exemple #23
0
    def agent_init(self, taskSpec):
        """
        Initializes agent.

        taskSpec: string
            Currently unused. Required by RL-Glue agent interface.
        """
        self.lastAction = Action()
        self.lastObservation = Observation()
Exemple #24
0
    def agent_init(self,taskSpecString):
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString)
        if TaskSpec.valid:
            assert len(TaskSpec.getIntObservations())==1, "expecting 1-dimensional discrete observations"
            assert len(TaskSpec.getDoubleObservations())==0, "expecting no continuous observations"
            assert not TaskSpec.isSpecial(TaskSpec.getIntObservations()[0][0]), " expecting min observation to be a number not a special value"
            assert not TaskSpec.isSpecial(TaskSpec.getIntObservations()[0][1]), " expecting max observation to be a number not a special value"
            self.numStates=TaskSpec.getIntObservations()[0][1]+1;

            assert len(TaskSpec.getIntActions())==1, "expecting 1-dimensional discrete actions"
            assert len(TaskSpec.getDoubleActions())==0, "expecting no continuous actions"
            assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), " expecting min action to be a number not a special value"
            assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), " expecting max action to be a number not a special value"
            self.numActions=TaskSpec.getIntActions()[0][1]+1;

            self.episode = 0

        else:
            print "Task Spec could not be parsed: "+taskSpecString;
            
        chimatfile = open('chi_mat.dat','r')
        unpickler = pickle.Unpickler(chimatfile)
        self.chi_mat = np.mat(unpickler.load())

        # 0,1,2,3 - primitive actions, 4... - options
        self.value_function=[(self.chi_mat.shape[1]+self.numActions)*[0.0] for i in range(self.numStates)]

        self.absStateMembership = []
        self.statesInAbsState = [[] for i in xrange(self.chi_mat.shape[1])]
        for (row_i,row) in enumerate(self.chi_mat):
            self.absStateMembership.append(row.argmax())
            self.statesInAbsState[row.argmax()].append(row_i)

        #print 'Abstract state to which state belongs:'
        #print self.absStateMembership
        #print 'States in each abstract state:'
        #print self.statesInAbsState

        #This is just to get a mapping from the indices of chi_mat to the values returned by the environment
        validstatefile = open('valid_states.dat','r')
        unpickler = pickle.Unpickler(validstatefile)
        self.valid_states = unpickler.load()
        #print 'Mapping from row indices to flat state rep:'
        #print self.valid_states

        self.lastAction=Action()
        self.lastObservation=Observation()

        tmatrixfile = open('tmatrixperfect.dat','r')
        unpickler = pickle.Unpickler(tmatrixfile)
        self.t_mat = np.mat(unpickler.load())
        
        pmatrixfile = open('pmatrixperfect.dat','r')
        self.p_mat = pickle.load(pmatrixfile)

        self.connect_mat = self.chi_mat.T*self.t_mat*self.chi_mat
Exemple #25
0
    def env_start(self):

        State = random.randint(0, 3)
        returnObs = Observation()
        #zero for all the 4 starting states
        self.presentCol = 0
        self.presentRow = self.Start_states[State][0]
        returnObs.intArray = [self.rolloutstate()]

        return returnObs
    def env_start(self):
        """
        Get the state of the environment and return it.
        """
        self.state = [0 for i in range(9)]
        #self.env_play()
        obs = Observation()
        obs.intArray = self.state

        return obs
Exemple #27
0
    def agent_init(self, taskSpec):
        """Initialize the RL agent.

        Args:
            taskSpec: The RLGlue task specification string.
        """

        # (Re)initialize parameters (incase they have been changed during a trial
        log = logging.getLogger('pyrl.agents.sarsa_lambda.agent_init')
        self.init_parameters()
        # Parse the task specification and set up the weights and such
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec)
        if not self.agent_supported(TaskSpec):
            print "Task Spec could not be parsed: " + taskSpec
            sys.exit(1)

        self.numStates = len(TaskSpec.getDoubleObservations())
        log.info("Ranges: %s", TaskSpec.getDoubleObservations())
        self.discStates = numpy.array(TaskSpec.getIntObservations())
        self.numDiscStates = int(
            reduce(lambda a, b: a * (b[1] - b[0] + 1), self.discStates, 1.0))
        self.numActions = TaskSpec.getIntActions()[0][1] + 1

        # print "TSactions ", TaskSpec.getIntActions(), "TSObservation ", TaskSpec.getIntObservations()

        if self.numStates == 0:
            # Only discrete states
            self.numStates = 1
            if self.fa_name != "trivial":
                print "Selected basis requires at least one continuous feature. Using trivial basis."
                self.fa_name = "trivial"

        # Set up the function approximation
        if self.fa_name == 'fourier':
            self.basis = fourier.FourierBasis(self.numStates,
                                              TaskSpec.getDoubleObservations(),
                                              order=self.params.setdefault(
                                                  'fourier_order', 3))
        else:
            self.basis = trivial.TrivialBasis(self.numStates,
                                              TaskSpec.getDoubleObservations())

        log.debug("Num disc states: %d", self.numDiscStates)
        numStates = self.basis.getNumBasisFunctions()
        log.debug("Num states: %d", numStates)
        log.debug("Num actions: %d", self.numActions)
        self.weights = numpy.zeros(
            (self.numDiscStates, numStates, self.numActions))
        self.traces = numpy.zeros(self.weights.shape)
        self.init_stepsize(self.weights.shape, self.params)
        # print "Weights:", self.weights
        self.lastAction = Action()
        self.lastObservation = Observation()
        log.debug("Sarsa Lambda agent after initialization: %s",
                  pformat(self.__dict__))
Exemple #28
0
    def getObservation(self):
        returnObs = Observation()
        features = [1.]
        if self.original_features:
            features += mdptetris.features_original()
        if self.dellacherie_features:
            features += mdptetris.features_dellacherie()

        returnObs.intArray = [mdptetris.current_piece()]
        returnObs.doubleArray = features
        return returnObs
Exemple #29
0
    def env_start(self):
        """ Instantiate a new :class:`PinballModel` environment

        :returns: The initial state
	:rtype: :class:`Observation`

	"""
        self.pinball = PinballModel(self.configuration)
        obs = Observation()
        obs.doubleArray = self.pinball.get_state()
        return obs
Exemple #30
0
    def env_start(self):

        k = random.randint(0, 1)
        State = random.randint(self.states[k][0], self.states[k][1])
        returnObs = Observation()
        #zero for all the 4 starting states
        self.presentCol = random.randint(0, 999)
        #self.presentCol = 10900
        self.presentRow = State
        returnObs.intArray = [self.rolloutstate()]

        return returnObs