def agent_init(self, taskSpecString):
        print "Agent Up"
        # print taskSpecString
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString)
        if TaskSpec.valid:
            print len(
                TaskSpec.getDoubleActions()), ": ", TaskSpec.getDoubleActions(
                ), '\n', len(TaskSpec.getDoubleObservations()
                             ), ": ", TaskSpec.getDoubleObservations()
            assert len(TaskSpec.getIntObservations()
                       ) == 0, "expecting no discrete observations"
            assert len(TaskSpec.getDoubleObservations(
            )) == 12, "expecting 12-dimensional continuous observations"

            assert len(
                TaskSpec.getIntActions()) == 0, "expecting no discrete actions"
            assert len(TaskSpec.getDoubleActions()
                       ) == 4, "expecting 4-dimensional continuous actions"

            self.obs_specs = TaskSpec.getDoubleObservations()
            self.actions_specs = TaskSpec.getDoubleActions()
            # print "Observations: ",self.obs_specs
            # print "actions_specs:", self.actions_specs

        else:
            print "Task Spec could not be parsed: " + taskSpecString

        self.lastAction = Action()
        self.lastObservation = Observation()
Beispiel #2
0
    def agent_init(self, task_spec_string):
        """
        This function is called once at the beginning of an experiment.

        Arguments: task_spec_string - A string defining the task.  This string
                                      is decoded using
                                      TaskSpecVRLGLUE3.TaskSpecParser
        """
        # DO SOME SANITY CHECKING ON THE TASKSPEC
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_string)
        if TaskSpec.valid:

            assert ((len(TaskSpec.getIntObservations()) == 0) !=
                    (len(TaskSpec.getDoubleObservations()) == 0)), \
                "expecting continous or discrete observations.  Not both."
            assert len(TaskSpec.getDoubleActions()) == 0, \
                "expecting no continuous actions"
            assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), \
                " expecting min action to be a number not a special value"
            assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), \
                " expecting max action to be a number not a special value"
            self.num_actions = TaskSpec.getIntActions()[0][1]+1
        else:
            print "INVALID TASK SPEC"

        self.data_set = ale_data_set.DataSet(width=CROPPED_WIDTH,
                                             height=CROPPED_HEIGHT,
                                             max_steps=self.max_history,
                                             phi_length=self.phi_length)

        # just needs to be big enough to create phi's
        self.test_data_set = ale_data_set.DataSet(width=CROPPED_WIDTH,
                                                  height=CROPPED_HEIGHT,
                                                  max_steps=10,
                                                  phi_length=self.phi_length)
        self.epsilon = 1.
        self.epsilon_rate = .9 / self.max_history

        self.testing = False

        if self.nn_file is None:
            self.network = self._init_network()
        else:
            handle = open(self.nn_file, 'r')
            self.network = cPickle.load(handle)

        self._open_results_file()
        self._open_learning_file()

        self.step_counter = 0
        self.episode_counter = 0
        self.batch_counter = 0

        self.holdout_data = None

        # In order to add an element to the data set we need the
        # previous state and action and the current reward.  These
        # will be used to store states and actions.
        self.last_img = None
        self.last_action = None
Beispiel #3
0
    def agent_init(self, spec):
        taskSpec = TaskSpecVRLGLUE3.TaskSpecParser(spec)
        if taskSpec.valid:
            self.num_actions = taskSpec.getIntActions()[0][1] + 1
        else:
            raise "Invalid task spec"
        self.last_observation = Observation()

        self.batch_size = 32  # batch size for SGD
        self.ep_start = 1  # initial value of epsilon in epsilon-greedy exploration
        self.ep = self.ep_start  # exploration probability
        self.ep_end = 0.1  # final value of epsilon in epsilon-greedy exploration
        self.ep_endt = 1000000  # number of frames over which epsilon is linearly annealed
        self.episode_qvals = []
        self.all_qvals = []
        self.learn_start = 0  # number of steps after which learning starts
        self.is_testing = False
        self.replay_memory = 1000000
        self.phi_length = 4  # number of most recent frames for input to Q-function
        self.reset_after = 10000  # replace Q_hat with Q after this many steps
        self.step_counter = 0
        self.episode_counter = 0
        self.total_reward = 0
        self.qvals = []

        self.train_table = TransitionTable(self.phi_length, self.replay_memory,
                                           RESIZED_WIDTH, RESIZED_HEIGHT)
        self.test_table = TransitionTable(self.phi_length, self.phi_length,
                                          RESIZED_WIDTH, RESIZED_HEIGHT)
        if self.network_file is None:
            self.network = DeepQLearner(RESIZED_WIDTH, RESIZED_HEIGHT,
                                        self.num_actions, self.phi_length,
                                        self.batch_size)
        else:
            self.network = cPickle.load(open(self.network_file))
    def agent_init(self, task_spec_str):
        task_spec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_str)

        if not task_spec.valid:
            raise ValueError(
                'Task spec could not be parsed: {}'.format(task_spec_str))

        self.gamma = task_spec.getDiscountFactor()  # 割引率
        # DQN 作成
        # Arg1: 入力層サイズ
        # Arg2: 隠れ層ノード数
        # Arg3: 出力層サイズ
        self.Q = QNet(self.bdim * self.n_frames, self.bdim * self.n_frames,
                      self.dim)

        if self.gpu >= 0:
            cuda.get_device(self.gpu).use()
            self.Q.to_gpu()
        self.xp = np if self.gpu < 0 else cuda.cupy

        self.targetQ = copy.deepcopy(self.Q)

        self.optimizer = optimizers.RMSpropGraves(lr=0.00025,
                                                  alpha=0.95,
                                                  momentum=0.0)
        self.optimizer.setup(self.Q)
Beispiel #5
0
    def agent_init(self, taskSpec):
        
        # taskspec check
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec)
        if TaskSpec.valid:
            assert len(TaskSpec.getDoubleObservations())>0, "expecting at least one continuous observation"
            self.state_range = np.asarray(TaskSpec.getDoubleObservations())
            
            # Check action form, and then set number of actions
            assert len(TaskSpec.getIntActions())==0, "expecting no discrete actions"
            assert len(TaskSpec.getDoubleActions())==2, "expecting 1-dimensional continuous actions"

        else:
            print "Task Spec could not be parsed"
            
        self.lbounds=[]
        self.ubounds=[]
        
        for r in self.state_range:
            self.lbounds.append(r[0])
            self.ubounds.append(r[1])
            
        self.lbounds = np.array(self.lbounds)
        self.ubounds = np.array(self.ubounds)
        
        # Some initializations for rlglue
        self.lastAction = Action()

        self.time = 0
        self.epsilon = 1.0  # Initial exploratoin rate

        # Pick a DQN from DQN_class
        self.DQN = DQN_class()  
Beispiel #6
0
    def agent_init(self, taskspec):
        """ This function is called once at the begining of an episode.
        Performs sanity checks with the environment.

        :param taskspec: The task specifications
        :type taskspec: str

        """
        spec = TaskSpecVRLGLUE3.TaskSpecParser(taskspec)
        if len(spec.getIntActions()) != 1:
            raise Exception("Expecting 1-dimensional discrete actions")
        if len(spec.getDoubleActions()) != 0:
            raise Exception("Expecting no continuous actions")
        if spec.isSpecial(spec.getIntActions()[0][0]):
            raise Exception(
                "Expecting min action to be a number not a special value")
        if spec.isSpecial(spec.getIntActions()[0][1]):
            raise Exception(
                "Expecting max action to be a number not a special value")

        observation_ranges = spec.getDoubleObservations()
        self.basis = FourierBasis(len(observation_ranges), self.fa_order,
                                  observation_ranges)
        self.weights = np.zeros((self.basis.numTerms, len(self.options)))

        self.last_action = 0
        self.last_features = []
        self.last_observation = []
Beispiel #7
0
    def agent_init(self, taskSpecString):
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString)
        if TaskSpec.valid:
            assert len(TaskSpec.getIntObservations()
                       ) == 1, "expecting 1-dimensional discrete observations"
            assert len(TaskSpec.getDoubleObservations()
                       ) == 0, "expecting no continuous observations"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntObservations()[0][0]
            ), " expecting min observation to be a number not a special value"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntObservations()[0][1]
            ), " expecting max observation to be a number not a special value"
            self.numStates = TaskSpec.getIntObservations()[0][1] + 1

            assert len(TaskSpec.getIntActions()
                       ) == 1, "expecting 1-dimensional discrete actions"
            assert len(TaskSpec.getDoubleActions()
                       ) == 0, "expecting no continuous actions"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntActions()[0][0]
            ), " expecting min action to be a number not a special value"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntActions()[0][1]
            ), " expecting max action to be a number not a special value"
            self.numActions = TaskSpec.getIntActions()[0][1] + 1

            self.value_function = numpy.zeros(
                [self.numStates, self.numActions])
        else:
            print "Task Spec could not be parsed: " + taskSpecString

        self.lastAction = Action()
        self.lastObservation = Observation()
Beispiel #8
0
    def agent_init(self, taskSpec):
        """Initialize the RL agent.

        Args:
            taskSpec: The RLGlue task specification string.
        """
        # (Re)initialize parameters (incase they have been changed during a trial
        self.init_parameters()
        # Parse the task specification and set up the weights and such
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec)
        if self.agent_supported(TaskSpec):
            self.numStates = len(TaskSpec.getDoubleObservations())
            self.discStates = numpy.array(TaskSpec.getIntObservations())
            self.numDiscStates = int(
                reduce(lambda a, b: a * (b[1] - b[0] + 1), self.discStates,
                       1.0))
            self.numActions = TaskSpec.getIntActions()[0][1] + 1

            self.model.model_init(self.numDiscStates, TaskSpec.getDoubleObservations(), \
                              self.numActions, TaskSpec.getRewardRange()[0])
            self.planner.planner_init(self.numDiscStates, TaskSpec.getDoubleObservations(), \
                              self.numActions, TaskSpec.getRewardRange()[0])

        else:
            print "Task Spec could not be parsed: " + taskSpecString

        self.lastAction = Action()
        self.lastObservation = Observation()
Beispiel #9
0
    def agent_init(self, taskSpecification):
        """
        This function is called once at the beginning of an experiment.

        :param taskSpecification: A string defining the task.  This string
        is decoded using TaskSpecVRLGLUE3.TaskSpecParser
        :return:
        """

        # DO SOME SANITY CHECKING ON THE TASKSPEC
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecification)

        if TaskSpec.valid:

            assert ((len(TaskSpec.getIntObservations()) == 0) !=
                    (len(TaskSpec.getDoubleObservations()) == 0)), \
                "expecting continous or discrete observations.  Not both."
            assert not TaskSpec.isSpecial(TaskSpec.getDoubleActions()[0][0]), \
                " expecting min action to be a number not a special value"
            assert not TaskSpec.isSpecial(TaskSpec.getDoubleActions()[0][1]), \
                " expecting max action to be a number not a special value"
            #self.num_actions = TaskSpec.getIntActions()[0][1]+1
        else:
            print "INVALID TASK SPEC"

        observations = TaskSpec.getDoubleObservations(
        )  # TODO: take care of int observations
        self.observation_size = len(observations)

        actions = TaskSpec.getDoubleActions()
        self.action_size = len(actions)

        self.testing = False
        self.batch_size = 32
        self.episode_counter = 0
        self.step_counter = 0

        if self.nn_file is None:
            self.action_network = self._init_action_network(
                len(observations), len(actions))
            self.value_network = self._init_value_network(len(observations), 1)
        else:
            handle = open(self.nn_file, 'r')
            self.network = cPickle.load(handle)

        self.action_stdev = 0.01
        self.gamma = 0.9  # TaskSpec.getDiscountFactor()

        self.data_set = data_set.DataSet(
            len(observations),
            len(actions),
            observation_dtype='float32',
            action_dtype='float32',
        )
        # just needs to be big enough to create phi's
        self.test_data_set = data_set.DataSet(len(observations),
                                              len(actions),
                                              observation_dtype='float32',
                                              action_dtype='float32')
Beispiel #10
0
    def agent_init(self, taskSpec):
        """Initialize the RL agent.

        Args:
            taskSpec: The RLGlue task specification string.
        """

        # (Re)initialize parameters (incase they have been changed during a trial
        self.init_parameters()
        # Parse the task specification and set up the weights and such
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec)
        if not self.agent_supported(TaskSpec):
            print "Task Spec could not be parsed: " + taskSpecString
            sys.exit(1)

        self.numStates = len(TaskSpec.getDoubleObservations())
        self.discStates = numpy.array(TaskSpec.getIntObservations())
        self.numDiscStates = int(
            reduce(lambda a, b: a * (b[1] - b[0] + 1), self.discStates, 1.0))
        self.numActions = TaskSpec.getIntActions()[0][1] + 1
        if self.numStates == 0:
            # Only discrete states
            self.numStates = 1
            if self.fa_name != "trivial":
                print "Selected basis requires at least one continuous feature. Using trivial basis."
                self.fa_name = "trivial"

        # Set up the function approximation
        if self.fa_name == 'fourier':
            self.basis = fourier.FourierBasis(self.numStates,
                                              TaskSpec.getDoubleObservations(),
                                              order=self.params.setdefault(
                                                  'fourier_order', 3))
        elif self.fa_name == 'rbf':
            num_functions = self.numStates if self.params.setdefault(
                'rbf_number', 0) == 0 else self.params['rbf_number']
            self.basis = rbf.RBFBasis(self.numStates,
                                      TaskSpec.getDoubleObservations(),
                                      num_functions=num_functions,
                                      beta=self.params.setdefault(
                                          'rbf_beta', 0.9))
        elif self.fa_name == 'tile':
            self.basis = tilecode.TileCodingBasis(
                self.numStates,
                TaskSpec.getDoubleObservations(),
                num_tiles=self.params.setdefault('tile_number', 100),
                num_weights=self.params.setdefault('tile_weights', 2048))
        else:
            self.basis = trivial.TrivialBasis(self.numStates,
                                              TaskSpec.getDoubleObservations())

        self.weights = numpy.zeros(
            (self.numDiscStates, self.basis.getNumBasisFunctions(),
             self.numActions))
        self.traces = numpy.zeros(self.weights.shape)
        self.init_stepsize(self.weights.shape, self.params)

        self.lastAction = Action()
        self.lastObservation = Observation()
Beispiel #11
0
    def agent_init(self,taskSpecString):
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString)
        if TaskSpec.valid:
            assert len(TaskSpec.getIntObservations())==1, "expecting 1-dimensional discrete observations"
            assert len(TaskSpec.getDoubleObservations())==0, "expecting no continuous observations"
            assert not TaskSpec.isSpecial(TaskSpec.getIntObservations()[0][0]), " expecting min observation to be a number not a special value"
            assert not TaskSpec.isSpecial(TaskSpec.getIntObservations()[0][1]), " expecting max observation to be a number not a special value"
            self.numStates=TaskSpec.getIntObservations()[0][1]+1;

            assert len(TaskSpec.getIntActions())==1, "expecting 1-dimensional discrete actions"
            assert len(TaskSpec.getDoubleActions())==0, "expecting no continuous actions"
            assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), " expecting min action to be a number not a special value"
            assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), " expecting max action to be a number not a special value"
            self.numActions=TaskSpec.getIntActions()[0][1]+1;

            self.episode = 0

        else:
            print "Task Spec could not be parsed: "+taskSpecString;
            
        chimatfile = open('chi_mat.dat','r')
        unpickler = pickle.Unpickler(chimatfile)
        self.chi_mat = np.mat(unpickler.load())

        # 0,1,2,3 - primitive actions, 4... - options
        self.value_function=[(self.chi_mat.shape[1]+self.numActions)*[0.0] for i in range(self.numStates)]

        self.absStateMembership = []
        self.statesInAbsState = [[] for i in xrange(self.chi_mat.shape[1])]
        for (row_i,row) in enumerate(self.chi_mat):
            self.absStateMembership.append(row.argmax())
            self.statesInAbsState[row.argmax()].append(row_i)

        #print 'Abstract state to which state belongs:'
        #print self.absStateMembership
        #print 'States in each abstract state:'
        #print self.statesInAbsState

        #This is just to get a mapping from the indices of chi_mat to the values returned by the environment
        validstatefile = open('valid_states.dat','r')
        unpickler = pickle.Unpickler(validstatefile)
        self.valid_states = unpickler.load()
        #print 'Mapping from row indices to flat state rep:'
        #print self.valid_states

        self.lastAction=Action()
        self.lastObservation=Observation()

        tmatrixfile = open('tmatrixperfect.dat','r')
        unpickler = pickle.Unpickler(tmatrixfile)
        self.t_mat = np.mat(unpickler.load())
        
        pmatrixfile = open('pmatrixperfect.dat','r')
        self.p_mat = pickle.load(pmatrixfile)

        self.connect_mat = self.chi_mat.T*self.t_mat*self.chi_mat
Beispiel #12
0
def processTaskSpec(ts):
    # you can cut the taskspec by the main words with new line
    #ts= """VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1 OBSERVATIONS INTS (3 0 1) DOUBLES (2 -1.2 0.5) (-.07 .07) CHARCOUNT 1024
    #     ACTIONS INTS (2 0 4) CHARCOUNT 1024 REWARDS (-5.0 UNSPEC) EXTRA some other stuff goes here"""
    print()
    print()
    print(
        "======================================================================================================="
    )
    print(ts)
    print()
    print()
    TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(ts)
    if TaskSpec.valid:
        print(
            "======================================================================================================="
        )
        print("Version: [" + TaskSpec.getVersion() + "]")
        print("ProblemType: [" + TaskSpec.getProblemType() + "]")
        print("DiscountFactor: [" + str(TaskSpec.getDiscountFactor()) + "]")
        print(
            "======================================================================================================="
        )
        print("\t \t \t \t Observations")
        print(
            "======================================================================================================="
        )
        print("Observations: [" + TaskSpec.getObservations() + "]")
        print("Integers:", TaskSpec.getIntObservations())
        print("Doubles: ", TaskSpec.getDoubleObservations())
        print("Chars:   ", TaskSpec.getCharCountObservations())
        print(
            "======================================================================================================="
        )
        print("\t \t \t \t Actions")
        print(
            "======================================================================================================"
        )
        print("Actions: [" + TaskSpec.getActions() + "]")
        print("Integers:", TaskSpec.getIntActions())
        print("Doubles: ", TaskSpec.getDoubleActions())
        print("Chars:   ", TaskSpec.getCharCountActions())
        print(
            "======================================================================================================="
        )
        print("Reward :[" + TaskSpec.getReward() + "]")
        print("Reward Range:", TaskSpec.getRewardRange())
        print("Extra: [" + TaskSpec.getExtra() + "]")
        print("remeber that by using len() you get the cardinality of lists!")
        print("Thus:")
        print("len(Doubles) ==> ", len(TaskSpec.getDoubleObservations()),
              " Double Observations")
    else:
        print("Task spec was invalid, but I can try to get version: " +
              TaskSpec.getVersion())
    def agent_init(self, taskSpecification):
        """
        This function is called once at the beginning of an experiment.

        :param taskSpecification: A string defining the task.  This string
        is decoded using TaskSpecVRLGLUE3.TaskSpecParser
        :return:
        """

        # DO SOME SANITY CHECKING ON THE TASKSPEC
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecification)

        if TaskSpec.valid:

            assert ((len(TaskSpec.getIntObservations()) == 0) !=
                    (len(TaskSpec.getDoubleObservations()) == 0)), \
                "expecting continous or discrete observations.  Not both."
            assert not TaskSpec.isSpecial(TaskSpec.getDoubleActions()[0][0]), \
                " expecting min action to be a number not a special value"
            assert not TaskSpec.isSpecial(TaskSpec.getDoubleActions()[0][1]), \
                " expecting max action to be a number not a special value"
            #self.num_actions = TaskSpec.getIntActions()[0][1]+1
        else:
            print "INVALID TASK SPEC"

        self.observation_ranges = TaskSpec.getDoubleObservations(
        )  # TODO: take care of int observations
        self.observation_size = len(self.observation_ranges)

        self.action_ranges = TaskSpec.getDoubleActions()
        self.action_size = len(self.action_ranges)

        self.testing = False
        self.episode_counter = 0
        self.step_counter = 0
        self.total_reward = 0

        if self.nn_action_file is None:
            self.action_network = self._init_action_network(
                self.observation_size, self.action_size, minibatch_size=1)
        else:
            handle = open(self.nn_action_file, 'r')
            self.action_network = cPickle.load(handle)

        if self.nn_value_file is None:
            self.value_network = self._init_value_network(
                self.observation_size, 1, minibatch_size=1)
        else:
            handle = open(self.nn_value_file, 'r')
            self.value_network = cPickle.load(handle)

        self.discount = TaskSpec.getDiscountFactor()

        self.action_ranges = np.asmatrix(self.action_ranges)
        self.observation_ranges = np.asmatrix(self.observation_ranges)
Beispiel #14
0
    def agent_init(self, taskSpec):
        """Initialize the RL agent.

        Args:
            taskSpec: The RLGlue task specification string.
        """

        # (Re)initialize parameters (incase they have been changed during a trial
        log = logging.getLogger('pyrl.agents.sarsa_lambda.agent_init')
        self.init_parameters()
        # Parse the task specification and set up the weights and such
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec)
        if not self.agent_supported(TaskSpec):
            print "Task Spec could not be parsed: " + taskSpec
            sys.exit(1)

        self.numStates = len(TaskSpec.getDoubleObservations())
        log.info("Ranges: %s", TaskSpec.getDoubleObservations())
        self.discStates = numpy.array(TaskSpec.getIntObservations())
        self.numDiscStates = int(
            reduce(lambda a, b: a * (b[1] - b[0] + 1), self.discStates, 1.0))
        self.numActions = TaskSpec.getIntActions()[0][1] + 1

        # print "TSactions ", TaskSpec.getIntActions(), "TSObservation ", TaskSpec.getIntObservations()

        if self.numStates == 0:
            # Only discrete states
            self.numStates = 1
            if self.fa_name != "trivial":
                print "Selected basis requires at least one continuous feature. Using trivial basis."
                self.fa_name = "trivial"

        # Set up the function approximation
        if self.fa_name == 'fourier':
            self.basis = fourier.FourierBasis(self.numStates,
                                              TaskSpec.getDoubleObservations(),
                                              order=self.params.setdefault(
                                                  'fourier_order', 3))
        else:
            self.basis = trivial.TrivialBasis(self.numStates,
                                              TaskSpec.getDoubleObservations())

        log.debug("Num disc states: %d", self.numDiscStates)
        numStates = self.basis.getNumBasisFunctions()
        log.debug("Num states: %d", numStates)
        log.debug("Num actions: %d", self.numActions)
        self.weights = numpy.zeros(
            (self.numDiscStates, numStates, self.numActions))
        self.traces = numpy.zeros(self.weights.shape)
        self.init_stepsize(self.weights.shape, self.params)
        # print "Weights:", self.weights
        self.lastAction = Action()
        self.lastObservation = Observation()
        log.debug("Sarsa Lambda agent after initialization: %s",
                  pformat(self.__dict__))
Beispiel #15
0
    def agent_init(self, taskSpec):
        """Initialize the RL agent.

        Args:
            taskSpec: The RLGlue task specification string.
        """
        self.init_parameters()
        # Consider looking at sarsa_lambda agent for a good example of filling out these methods
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec)
        assert len(TaskSpec.getIntActions()) == 1
        self.numActions = TaskSpec.getIntActions()[0][1] + 1

        self.lastAction = Action()
        self.lastObservation = Observation()
        self.counter = 0
Beispiel #16
0
    def agent_init(self, taskSpec):
        # See the sample_sarsa_agent in the mines-sarsa-example project for how to parse the task spec
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec)
        if TaskSpec.valid:
            assert len(TaskSpec.getIntObservations()
                       ) == 1, "expecting 1-dimensional discrete observations"

            assert len(TaskSpec.getDoubleObservations()
                       ) == 0, "expecting no continuous observations"

            assert not TaskSpec.isSpecial(
                TaskSpec.getIntObservations()[0][0]
            ), " expecting min observation to be a number not a special value"

            assert not TaskSpec.isSpecial(
                TaskSpec.getIntObservations()[0][1]
            ), " expecting max observation to be a number not a special value"

            self.numStates = TaskSpec.getIntObservations()[0][1] + 2

            assert len(TaskSpec.getIntActions()
                       ) == 1, "expecting 1-dimensional discrete actions"

            assert len(TaskSpec.getDoubleActions()
                       ) == 0, "expecting no continuous actions"

            assert not TaskSpec.isSpecial(
                TaskSpec.getIntActions()[0][0]
            ), " expecting min action to be a number not a special value"

            assert not TaskSpec.isSpecial(
                TaskSpec.getIntActions()[0][1]
            ), " expecting max action to be a number not a special value"

            self.numActions = TaskSpec.getIntActions()[0][1] + 1

            #self.value_function = [self.numActions * [0.0] for i in range(self.numStates)]

        else:
            print "Task Spec could not be parsed: " + taskSpec

        self.lastAction = Action()
        self.lastObservation = Observation()

        S0 = TaskSpec.getIntObservations()[0][1] + 1
        for a in range(self.numActions):
            self.R[S0][a] = self.rmax
            self.T[S0][a][S0] = 1.0
Beispiel #17
0
    def agent_init(self, taskSpecification):
        """
        This function is called once at the beginning of an experiment.

        :param taskSpecification: A string defining the task.  This string
        is decoded using TaskSpecVRLGLUE3.TaskSpecParser
        :return:
        """

        # DO SOME SANITY CHECKING ON THE TASKSPEC
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecification)

        if TaskSpec.valid:

            assert ((len(TaskSpec.getIntObservations()) == 0) !=
                    (len(TaskSpec.getDoubleObservations()) == 0)), \
                "expecting continous or discrete observations.  Not both."
            assert ((len(TaskSpec.getIntActions()) == 0) !=
                    (len(TaskSpec.getDoubleActions()) == 0)), \
                "expecting continous or discrete actions.  Not both."
            # assert not TaskSpec.isSpecial(TaskSpec.getDoubleActions()[0][0]), \
            #     " expecting min action to be a number not a special value"
            # assert not TaskSpec.isSpecial(TaskSpec.getDoubleActions()[0][1]), \
            #     " expecting max action to be a number not a special value"
            #self.num_actions = TaskSpec.getIntActions()[0][1]+1
        else:
            print "INVALID TASK SPEC"

        self.observation_ranges = TaskSpec.getDoubleObservations()
        self.observation_size = len(self.observation_ranges)

        self.continuous_actions = len(TaskSpec.getDoubleActions()) > 0
        if self.continuous_actions:
            self.action_ranges = TaskSpec.getDoubleActions()
        else:
            self.action_ranges = TaskSpec.getIntActions()
        self.action_size = len(self.action_ranges)

        self._init_network()

        self.discount = TaskSpec.getDiscountFactor()

        self.action_ranges = np.asmatrix(self.action_ranges, dtype=floatX)
        self.observation_ranges = np.asmatrix(self.observation_ranges, dtype=floatX)
Beispiel #18
0
    def agent_init(self, taskSpecString):
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString)
        self.all_allowed_actions = dict()
        self.Q_value_function = dict()
        if TaskSpec.valid:
            self.nbrReaches = len(TaskSpec.getIntActions())
            self.Bad_Action_Penalty = min(TaskSpec.getRewardRange()[0])
            rewardRange = (min(TaskSpec.getRewardRange()[0]),
                           max(TaskSpec.getRewardRange()[0]))
            self.habitatSize = len(
                TaskSpec.getIntObservations()) / self.nbrReaches
            self.discount = TaskSpec.getDiscountFactor()
            theExtra = TaskSpec.getExtra().split('BUDGET')
            self.edges = eval(theExtra[0])
            self.budget = eval(theExtra[1].split("by")[0])
#            self.nbrReaches = TaskSpec.getIntActions()[0][0][0]
#            self.Bad_Action_Penalty=min(TaskSpec.getRewardRange()[0])
#            rewardRange = (min(TaskSpec.getRewardRange()[0]), max(TaskSpec.getRewardRange()[0]))
#            self.habitatSize = TaskSpec.getIntObservations()[0][0][0] / self.nbrReaches
#            self.discount = TaskSpec.getDiscountFactor()
#            self.edges=eval(TaskSpec.getExtra().split('by')[0])
        else:
            print "Task Spec could not be parsed: " + taskSpecString

        self.lastAction = Action()
        self.lastObservation = Observation()

        # COSTS
        cost_per_invaded_reach = 10
        cost_per_tree = 0.1
        cost_per_empty_slot = 0.09
        eradication_cost = 0.5
        restoration_cost = 0.9
        variable_eradication_cost = 0.4
        variable_restoration_cost_empty = 0.4
        variable_restoration_cost_invaded = 0.8

        #CREATE ACTION PARAMETER OBJECT
        self.actionParameterObj = ActionParameterClass(
            cost_per_tree, eradication_cost, restoration_cost, 0, 0,
            cost_per_invaded_reach, cost_per_empty_slot,
            variable_eradication_cost, variable_restoration_cost_invaded,
            variable_restoration_cost_empty, self.budget)
    def agent_init(self, taskSpecString):
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString)
        if TaskSpec.valid:
            assert len(TaskSpec.getIntObservations()
                       ) == 1, "expecting 1-dimensional discrete observations"
            assert len(TaskSpec.getDoubleObservations()
                       ) == 0, "expecting no continuous observations"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntObservations()[0][0]
            ), " expecting min observation to be a number not a special value"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntObservations()[0][1]
            ), " expecting max observation to be a number not a special value"
            self.numStates = TaskSpec.getIntObservations()[0][1] + 1

            assert len(TaskSpec.getIntActions()
                       ) == 1, "expecting 1-dimensional discrete actions"
            assert len(TaskSpec.getDoubleActions()
                       ) == 0, "expecting no continuous actions"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntActions()[0][0]
            ), " expecting min action to be a number not a special value"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntActions()[0][1]
            ), " expecting max action to be a number not a special value"
            self.numActions = TaskSpec.getIntActions()[0][1] + 1

            #Initializes value function to 0 for each action. In Each State...each state has an array of all possible actions and a value function for each one.

            #cheating
            self.numStates = 992

            self.value_function = [
                self.numActions * [0.0] for i in range(self.numStates)
            ]

            #Need to intialize all option actions to be higher than 0. I should be able to just set ALL options in ALL states to be higher than 0.
            #If I coded everything else right, then only the correct states will actually have access to these options, so fun times.
        else:
            print "Task Spec could not be parsed: " + taskSpecString

        self.lastAction = Action()
        self.lastObservation = Observation()
Beispiel #20
0
    def agent_init(self, task_spec_str):
        task_spec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_str)
        self.eps_step = 0

        if not task_spec.valid:
            raise ValueError(
                'Task spec could not be parsed: {}'.format(task_spec_str))

        self.gamma = task_spec.getDiscountFactor()  # 割引率
        # DQN 作成
        # Arg1: 入力層サイズ
        # Arg2: 隠れ層ノード数
        # Arg3: 出力層サイズ
        #self.Q = QNet(self.bdim*self.n_frames, self.bdim*self.n_frames, self.dim)
        self.Q = QNet(self.bdim * self.n_frames, self.dim * 3, self.dim)
        if self.file_idx >= 0:
            serializers.load_hdf5(
                self.model_name + "_{0:05}.hdf5".format(self.file_idx), self.Q)
            self.step_counter = self.file_idx * 1000
            self.learn_start += self.step_counter

        if self.gpu >= 0:
            cuda.get_device(self.gpu).use()
            self.Q.to_gpu()
        self.xp = np if self.gpu < 0 else cuda.cupy

        self.targetQ = copy.deepcopy(self.Q)

        self.optimizer = optimizers.RMSpropGraves(lr=0.01,
                                                  alpha=0.95,
                                                  momentum=0.0,
                                                  eps=0.01)
        # self.optimizer = optimizers.Adam(alpha=0.01, beta1=0.9, beta2=0.999, final_lr=0.1, gamma=0.001, eps=1e-08, eta=1.0)
        #self.optimizer = optimizers.SGD(lr=0.01)
        self.optimizer.setup(self.Q)

        if self.file_idx >= 0:
            serializers.load_hdf5(
                self.opt_name + "_{0:05}.hdf5".format(self.file_idx),
                self.optimizer)

        self.file_idx = self.file_idx + 1
    def agent_init(self, task_spec_string):
        """ 
        This function is called once at the beginning of an experiment.

        Arguments: task_spec_string - A string defining the task.  This string
                                      is decoded using 
                                      TaskSpecVRLGLUE3.TaskSpecParser
        """

        self.image = None
        self.show_ale = True
        self.saving = True

        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_string)
        if TaskSpec.valid:

            assert ((len(TaskSpec.getIntObservations())== 0) !=
                    (len(TaskSpec.getDoubleObservations()) == 0 )), \
                "expecting continous or discrete observations.  Not both."
            assert len(TaskSpec.getDoubleActions())==0, \
                "expecting no continuous actions"
            assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), \
                " expecting min action to be a number not a special value"
            assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), \
                " expecting max action to be a number not a special value"
            self.num_actions = TaskSpec.getIntActions()[0][1] + 1

        self.int_states = len(TaskSpec.getIntObservations()) > 0

        # Create empty lists for data collection.
        self.states = []
        self.actions = []
        self.rewards = []
        self.absorbs = []

        #Create appropriate RL-Glue objects for storing these.
        self.last_action = Action()
        self.last_observation = Observation()
    def agent_init(self, task_spec_str):
        task_spec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_str)

        if not task_spec.valid:
            raise ValueError(
                'Task spec could not be parsed: {}'.format(task_spec_str))

        self.gamma = task_spec.getDiscountFactor()

        #n_framesかけるのはなぜ。(過去を遡る必要はないのではないだろうか)
        self.Q = QNet(self.bdim * self.n_frames, 30, self.n_rows * self.n_cols)

        if self.gpu >= 0:
            cuda.get_device(self.gpu).use()
            self.Q.to_gpu()
        self.xp = np if self.gpu < 0 else cuda.cupy

        self.targetQ = copy.deepcopy(self.Q)

        self.optimizer = optimizers.RMSpropGraves(lr=0.00025,
                                                  alpha=0.95,
                                                  momentum=0.0)
        self.optimizer.setup(self.Q)
    def agent_init(self, taskSpecification):
        #copied from sample sarsa agent
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecification)
        if TaskSpec.valid:
            assert len(TaskSpec.getIntObservations()
                       ) == 1, "expecting 1-dimensional discrete observations"
            assert len(TaskSpec.getDoubleObservations()
                       ) == 0, "expecting no continuous observations"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntObservations()[0][0]
            ), " expecting min observation to be a number not a special value"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntObservations()[0][1]
            ), " expecting max observation to be a number not a special value"
            self.numStates = TaskSpec.getIntObservations()[0][1] + 1

            assert len(TaskSpec.getIntActions()
                       ) == 1, "expecting 1-dimensional discrete actions"
            assert len(TaskSpec.getDoubleActions()
                       ) == 0, "expecting no continuous actions"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntActions()[0][0]
            ), " expecting min action to be a number not a special value"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntActions()[0][1]
            ), " expecting max action to be a number not a special value"
            self.numActions = TaskSpec.getIntActions()[0][1] + 1

            self.qvalues = [
                self.numActions * [0.0] for i in range(self.numStates)
            ]

        else:
            print "Task Spec could not be parsed: " + taskSpecString

        self.lastAction = Action()
        self.lastObservation = Observation()
Beispiel #24
0
    def parse_taskspec(self, spec):
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(spec)
        if TaskSpec.valid:
            self._n_double_dims = len(TaskSpec.getDoubleObservations())
            self._n_int_dims = len(TaskSpec.getIntObservations())
            self._n_double_act_dims = len(TaskSpec.getDoubleActions())
            self._n_int_act_dims = len(TaskSpec.getIntActions())

            self.limits = np.array(TaskSpec.getDoubleObservations())
            self.act_limits = None
            if self._n_double_act_dims != 0:
                self.act_limits = np.array(TaskSpec.getDoubleActions())
            if self._n_int_act_dims != 0:
                if self.act_limits is None:
                    self.act_limits = np.array(TaskSpec.getIntActions())
                else:
                    self.act_limits = np.append(self.act_limits,
                                                np.array(
                                                    TaskSpec.getIntActions()),
                                                axis=0)
            print self.act_limits
            self.act_range = self.act_limits[:, 1] - self.act_limits[:, 0]
            self._n_int_actions = np.prod(
                self.act_range[self._n_double_act_dims:])
            print spec
            # print 'Double state variables:'
            # print len(TaskSpec.getDoubleObservations())
            # print 'Integer state variables:'
            # print len(TaskSpec.getIntObservations())
            # print 'Double Actions dimensions:'
            # print self.double_action_dims()
            # print 'Integer Action dimensions:'
            # print self.int_action_dims()
            # print '#number of Integer Actions'
            # print self.num_actions()
        else:
            print "Task Spec could not be parsed: " + spec
    def agent_init(self, taskSpec):

        print("Reading taskSpec: " + taskSpec.decode())

        # Parse taskSpec
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec)
        if TaskSpec.valid:
            print("Parsing task spec...")
            self.max_u = TaskSpec.getDoubleActions()[0][1]
            self.n_action = len(TaskSpec.getDoubleActions())
            self.n_obs = len(TaskSpec.getDoubleObservations())
            print(f"Number of actions: {self.n_action}")
            print(f"Number of obs: {self.n_obs}")
            print("Task spec parsed!")
        else:
            print("Task Spec could not be parsed: " + taskSpec)

        print("Initialization of training...")

        # Variables
        # Iteration initial state: fixed during one iteration
        self.initial_state = Observation()
        self.states = []  # States encountered from the start of the training
        self.agentPolicy = np.zeros((self.n_action, self.n_obs))
        self.deltas = [
            2 * np.zeros((self.n_action, self.n_obs)) - 1
            for i in range(self.N)
        ]
        self.deltaPolicies = [self.agentPolicy for i in range(2 * self.N)
                              ]  # 2N policies for the 2N rollouts
        # Rewards obtained at the end of the 2N rollouts
        self.rewards = [0. for i in range(2 * self.N)]
        self.count = 0  # Counter which increments only after one agent step
        self.ev_count = 0  # Counter for evaluation

        print("Training initialized!")
Beispiel #26
0
    def agent_init(self, taskSpec):
        self.init_parameters()
        # Parse the task specification and set up the weights and such
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec)
        if TaskSpec.valid:
            # Check observation form, and then set up number of features/states
            assert len(TaskSpec.getDoubleObservations()
                       ) > 0, "expecting at least one continuous observation"
            self.numStates = len(TaskSpec.getDoubleObservations())

            # Check action form, and then set number of actions
            assert len(TaskSpec.getIntActions()
                       ) == 1, "expecting 1-dimensional discrete actions"
            assert len(TaskSpec.getDoubleActions()
                       ) == 0, "expecting no continuous actions"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntActions()[0][0]
            ), " expecting min action to be a number not a special value"
            assert not TaskSpec.isSpecial(
                TaskSpec.getIntActions()[0][1]
            ), " expecting max action to be a number not a special value"
            self.numActions = TaskSpec.getIntActions()[0][1] + 1

            # Set up the function approximation
            self.net = nl.net.newff(
                TaskSpec.getDoubleObservations(),
                [self.num_hidden, self.numActions],
                [nl.net.trans.TanSig(),
                 nl.net.trans.PureLin()])
            self.traces = copy.deepcopy(map(lambda x: x.np, self.net.layers))
            self.clearTraces()
        else:
            print "Task Spec could not be parsed: " + taskSpecString

        self.lastAction = Action()
        self.lastObservation = Observation()
	def agent_init(self,taskSpecString):
		print taskSpecString
		TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString)
		if TaskSpec.valid:
			print len(TaskSpec.getDoubleActions()),": ",TaskSpec.getDoubleActions(),'\n',len(TaskSpec.getDoubleObservations()),": ",TaskSpec.getDoubleObservations()
			# assert len(TaskSpec.getIntObservations())==12, "expecting 1-dimensional discrete observations"
			assert len(TaskSpec.getDoubleObservations())==0, "expecting no continuous observations"
			# assert not TaskSpec.isSpecial(TaskSpec.getIntObservations()[0][0]), " expecting min observation to be a number not a special value"
			# assert not TaskSpec.isSpecial(TaskSpec.getIntObservations()[0][1]), " expecting max observation to be a number not a special value"
			self.numStates=TaskSpec.getIntObservations()[0][1]+1;

			assert len(TaskSpec.getIntActions())==1, "expecting 1-dimensional discrete actions"
			assert len(TaskSpec.getDoubleActions())==0, "expecting no continuous actions"
			assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), " expecting min action to be a number not a special value"
			assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), " expecting max action to be a number not a special value"
			self.numActions=TaskSpec.getIntActions()[0][1]+1;

			self.value_function=[self.numActions*[0.0] for i in range(self.numStates)]

		else:
			print "Task Spec could not be parsed: "+taskSpecString;

		self.lastAction=Action()
		self.lastObservation=Observation()
Beispiel #28
0
    def agent_init(self, taskSpecString):
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString)
        self.all_allowed_actions = dict()
        self.Q_value_function = dict()
        if TaskSpec.valid:
            self.nbrReaches = len(TaskSpec.getIntActions())
            self.Bad_Action_Penalty=min(TaskSpec.getRewardRange()[0])
            rewardRange = (min(TaskSpec.getRewardRange()[0]), max(TaskSpec.getRewardRange()[0]))
            self.habitatSize = len(TaskSpec.getIntObservations()) / self.nbrReaches
            self.sarsa_gamma = TaskSpec.getDiscountFactor()
            theExtra=TaskSpec.getExtra().split('BUDGET')
            self.edges=eval(theExtra[0])
            self.budget=eval(theExtra[1].split("by")[0])
#            self.nbrReaches = TaskSpec.getIntActions()[0][0][0]
#            self.Bad_Action_Penalty=min(TaskSpec.getRewardRange()[0])
#            rewardRange = (min(TaskSpec.getRewardRange()[0]), max(TaskSpec.getRewardRange()[0]))
#            self.habitatSize = TaskSpec.getIntObservations()[0][0][0] / self.nbrReaches
#            self.sarsa_gamma = TaskSpec.getDiscountFactor()
#            self.edges=eval(TaskSpec.getExtra().split('by')[0])
        else:
            print "Task Spec could not be parsed: " + taskSpecString

        self.lastAction = Action()
        self.lastObservation = Observation()
Beispiel #29
0
 def agent_init(self, taskSpecString):
     taskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString)
     if taskSpec.valid:
         logger.info('TaskSpec parsed: ' + taskSpecString)
     else:
         logger.info('TaskSpec could not be parsed: ' + taskSpecString)
    def agent_init(self,task_spec_string):
        """ 
        This function is called once at the beginning of an experiment.

        Arguments: task_spec_string - A string defining the task.  This string
                                      is decoded using 
                                      TaskSpecVRLGLUE3.TaskSpecParser
        """
        self.start_time = time.time()
        self.image = None
        self.show_ale = False
        self.total_reward = 0
        self.mini_batch_size = 32
        self.num_mini_batches = 1
        self.frame_count = 0
        self.frames_trained = 0
        self.qvalue_sum = 0
        self.qvalue_count = 0
        self.predicted_reward
        learning_rate = .00001
        self.testing_policy = False
        self.epoch_counter = 0
        self.epochs_until_test = 5
        self.policy_test_file_name = "results.csv"
        load_file = False
        load_file_name = "cnnparams.pkl"
        self.save_file_name = "cnnparams.pkl"
        self.counter = 0
        self.cur_action = 0
        
        #starting value for epsilon-greedy
        self.epsilon = 1

        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_string)
        if TaskSpec.valid:
            
            assert ((len(TaskSpec.getIntObservations())== 0) != \
                (len(TaskSpec.getDoubleObservations()) == 0 )), \
                "expecting continous or discrete observations.  Not both."
            assert len(TaskSpec.getDoubleActions())==0, \
                "expecting no continuous actions"
            assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), \
                " expecting min action to be a number not a special value"
            assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), \
                " expecting max action to be a number not a special value"
            self.num_actions=TaskSpec.getIntActions()[0][1]+1

        self.num_actions = 3
        
        self.int_states = len(TaskSpec.getIntObservations()) > 0

        # Create neural network and initialize trainer and dataset
        
        if load_file:
            thefile = open(load_file_name, "r")
            
            self.cnn = cPickle.load(thefile)
        else:
        
            self.first_conv_layer = maxout.MaxoutConvC01B(16, 1, (8, 8), (1, 1), 
                            (1, 1), "first conv layer", irange=.1, 
                                            kernel_stride=(4, 4), min_zero=True)
                                            
            self.second_conv_layer = maxout.MaxoutConvC01B(32, 1, (4, 4), 
                            (1, 1), (1, 1), "second conv layer", irange=.1, 
                                            kernel_stride=(2, 2), min_zero=True)
                                            
            self.rect_layer = mlp.RectifiedLinear(dim=256, 
                            layer_name="rectified layer", irange=.1)
                            
            self.output_layer = mlp.Linear(self.num_actions, "output layer", 
                            irange=.1)

            layers = [self.first_conv_layer, self.second_conv_layer, 
                            self.rect_layer, self.output_layer]

            self.cnn = mlp.MLP(layers, input_space = Conv2DSpace((80, 80), 
                                    num_channels=4, axes=('c', 0, 1, 'b')), 
                                    batch_size=self.mini_batch_size)

        self.data = nqd.NeuralRewardPredictorDataset(self.cnn, mini_batch_size = self.mini_batch_size, 
                                            num_mini_batches = self.num_mini_batches, 
                                            learning_rate=learning_rate)

        #Create appropriate RL-Glue objects for storing these.
        self.last_action=Action()
        self.last_observation=Observation()

        thefile = open(self.policy_test_file_name, "w")
        thefile.write("Reward, Predicted reward, Frames trained\n")
        thefile.close()