Exemple #1
0
    def agent_init(self, task_spec_string):
        """
        This function is called once at the beginning of an experiment.

        Arguments: task_spec_string - A string defining the task.  This string
                                      is decoded using
                                      TaskSpecVRLGLUE3.TaskSpecParser
        """
        # DO SOME SANITY CHECKING ON THE TASKSPEC
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_string)
        if TaskSpec.valid:

            assert ((len(TaskSpec.getIntObservations()) == 0) !=
                    (len(TaskSpec.getDoubleObservations()) == 0)), \
                "expecting continous or discrete observations.  Not both."
            assert len(TaskSpec.getDoubleActions()) == 0, \
                "expecting no continuous actions"
            assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), \
                " expecting min action to be a number not a special value"
            assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), \
                " expecting max action to be a number not a special value"
            self.num_actions = TaskSpec.getIntActions()[0][1]+1
        else:
            print "INVALID TASK SPEC"

        self.data_set = ale_data_set.DataSet(width=CROPPED_WIDTH,
                                             height=CROPPED_HEIGHT,
                                             max_steps=self.max_history,
                                             phi_length=self.phi_length)

        # just needs to be big enough to create phi's
        self.test_data_set = ale_data_set.DataSet(width=CROPPED_WIDTH,
                                                  height=CROPPED_HEIGHT,
                                                  max_steps=10,
                                                  phi_length=self.phi_length)
        self.epsilon = 1.
        self.epsilon_rate = .9 / self.max_history

        self.testing = False

        if self.nn_file is None:
            self.network = self._init_network()
        else:
            handle = open(self.nn_file, 'r')
            self.network = cPickle.load(handle)

        self._open_results_file()
        self._open_learning_file()

        self.step_counter = 0
        self.episode_counter = 0
        self.batch_counter = 0

        self.holdout_data = None

        # In order to add an element to the data set we need the
        # previous state and action and the current reward.  These
        # will be used to store states and actions.
        self.last_img = None
        self.last_action = None
Exemple #2
0
    def __init__(self, q_network, epsilon_start, epsilon_min,
                 epsilon_decay, replay_memory_size, exp_pref,
                 replay_start_size, update_frequency):

        self.network = q_network
        self.epsilon_start = epsilon_start
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.replay_memory_size = replay_memory_size
        self.exp_pref = exp_pref
        self.replay_start_size = replay_start_size
        self.update_frequency = update_frequency
        self.phi_length = self.network.num_frames
        self.image_width = self.network.input_width
        self.image_height = self.network.input_height


        # CREATE A FOLDER TO HOLD RESULTS
        time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime())
        self.exp_dir = self.exp_pref + time_str + \
                       "{}".format(self.network.lr).replace(".", "p") + "_" \
                       + "{}".format(self.network.discount).replace(".", "p")

        try:
            os.stat(self.exp_dir)
        except OSError:
            os.makedirs(self.exp_dir)

        self.num_actions = self.network.num_actions
        self.testing = False
        self.total_reward = 0
        self.episode_counter = 0


        self.data_set = ale_data_set.DataSet(self.network.state_count,
                                             max_steps=self.replay_memory_size,
                                             phi_length=self.phi_length)
                                             
        self.test_data_set = ale_data_set.DataSet(self.network.state_count,
                                                  max_steps=self.replay_memory_size,
                                                  phi_length=self.phi_length)
    
        self.epsilon = self.epsilon_start
        if self.epsilon_decay != 0:
            self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) /
                                 self.epsilon_decay)
        else:
            self.epsilon_rate = 0
            
        self.holdout_data = None
Exemple #3
0
    def __init__(self, sarsa_network, args, epsilon_min,
                 epsilon_decay, exp_pref, logger, rng):

        self.network = sarsa_network
        self.epsilon_start = args.epsilon_start
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.exp_pref = exp_pref
        self.rng = rng
        self.phi_length = self.network.num_frames
        self.image_height = self.network.input_height
        self.image_width = self.network.input_width
        self.data_set = ale_data_set.DataSet(self.image_height, self.image_width, self.phi_length, rng)
        self._game_name = args.game
        self.logger = logger

        # Create folder to save the network
        self.model_dir = "./%s/%s_sarsa" % (args.saved_model_dir, args.game)
        if not os.path.isdir(self.model_dir):
            os.makedirs(self.model_dir)

        # CREATE A FOLDER TO HOLD RESULTS
#        time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime())
#        self.exp_dir = self.exp_pref + time_str + \
#                       "{}".format(self.network.lr).replace(".", "p") + "_" \
#                       + "{}".format(self.network.discount).replace(".", "p")
#
#        try:
#            os.stat(self.exp_dir)
#        except OSError:
#            os.makedirs(self.exp_dir)
#
        self.num_actions = self.network.num_actions

        self.epsilon = self.epsilon_start
        if self.epsilon_decay != 0:
            self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) /
                                 self.epsilon_decay)
        else:
            self.epsilon_rate = 0

        #self._open_results_file()
        #self._open_learning_file()

        self.episode_counter = 0
        self.last_img = None
        self.last_action = None
Exemple #4
0
    def __init__(self, sarsa_network, epsilon_start, epsilon_min,
                 epsilon_decay, exp_pref, rng):

        self.network = sarsa_network
        self.epsilon_start = epsilon_start
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.exp_pref = exp_pref
        self.rng = rng
        self.phi_length = self.network.num_frames
        self.image_height = self.network.input_height
        self.image_width = self.network.input_width
        self.data_set = ale_data_set.DataSet(self.image_height,
                                             self.image_width, self.phi_length,
                                             rng)

        # CREATE A FOLDER TO HOLD RESULTS
        time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime())
        self.exp_dir = self.exp_pref + time_str + \
                       "{}".format(self.network.lr).replace(".", "p") + "_" \
                       + "{}".format(self.network.discount).replace(".", "p")

        try:
            os.stat(self.exp_dir)
        except OSError:
            os.makedirs(self.exp_dir)

        self.num_actions = self.network.num_actions

        self.epsilon = self.epsilon_start
        if self.epsilon_decay != 0:
            self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) /
                                 self.epsilon_decay)
        else:
            self.epsilon_rate = 0

        self._open_results_file()
        self._open_learning_file()

        self.episode_counter = 0

        self.last_img = None
        self.last_action = None
Exemple #5
0
    def __init__(self, q_network, epsilon_start, epsilon_min, epsilon_decay,
                 replay_memory_size, exp_pref, replay_start_size,
                 update_frequency, rng):

        self.network = q_network
        self.epsilon_start = epsilon_start
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.replay_memory_size = replay_memory_size
        self.exp_pref = exp_pref
        self.replay_start_size = replay_start_size
        self.update_frequency = update_frequency
        self.rng = rng

        self.phi_length = self.network.num_frames
        self.image_width = self.network.input_width
        self.image_height = self.network.input_height

        # CREATE A FOLDER TO HOLD RESULTS
        time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime())
        self.exp_dir = self.exp_pref + time_str + \
                       "{}".format(self.network.lr).replace(".", "p") + "_" \
                       + "{}".format(self.network.discount).replace(".", "p")

        try:
            os.stat(self.exp_dir)
        except OSError:
            os.makedirs(self.exp_dir)

        self.save_info_file()

        self.num_actions = self.network.num_actions

        self.data_set = ale_data_set.DataSet(width=self.image_width,
                                             height=self.image_height,
                                             rng=rng,
                                             max_steps=self.replay_memory_size,
                                             phi_length=self.phi_length)

        # just needs to be big enough to create phi's
        self.test_data_set = ale_data_set.DataSet(width=self.image_width,
                                                  height=self.image_height,
                                                  rng=rng,
                                                  max_steps=self.phi_length *
                                                  2,
                                                  phi_length=self.phi_length)
        self.epsilon = self.epsilon_start
        if self.epsilon_decay != 0:
            self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) /
                                 self.epsilon_decay)
        else:
            self.epsilon_rate = 0

        self.testing = False

        self._open_results_file()
        self._open_learning_file()

        self.episode_counter = 0
        self.batch_counter = 0

        self.holdout_data = None
        self.holdout_ram = None

        # In order to add an element to the data set we need the
        # previous state and action and the current reward.  These
        # will be used to store states and actions.
        self.last_img = None
        self.last_action = None
        self.last_ram = None
Exemple #6
0
    def __init__(self,
                 q_network,
                 epsilon_start,
                 epsilon_min,
                 epsilon_decay,
                 replay_memory_size,
                 experiment_directory,
                 replay_start_size,
                 update_frequency,
                 rng,
                 recording=True):

        self.results_file = self.learning_file = None
        self.best_epoch_reward = None

        self.network = q_network
        self.epsilon_start = epsilon_start
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.replay_memory_size = replay_memory_size
        self.replay_start_size = replay_start_size
        self.update_frequency = update_frequency
        self.rng = rng

        self.phi_length = self.network.num_frames
        self.image_width = self.network.input_width
        self.image_height = self.network.input_height

        self.recording = recording

        self.exp_dir = experiment_directory
        if self.recording:
            try:
                os.stat(self.exp_dir)
            except OSError:
                os.makedirs(self.exp_dir)
            self.record_parameters()

        self.num_actions = self.network.num_actions

        self.data_set = ale_data_set.DataSet(width=self.image_width,
                                             height=self.image_height,
                                             rng=rng,
                                             max_steps=self.replay_memory_size,
                                             phi_length=self.phi_length)

        # just needs to be big enough to create phi's
        self.test_data_set = ale_data_set.DataSet(width=self.image_width,
                                                  height=self.image_height,
                                                  rng=rng,
                                                  max_steps=self.phi_length *
                                                  2,
                                                  phi_length=self.phi_length)
        self.epsilon = self.epsilon_start
        if self.epsilon_decay != 0:
            self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) /
                                 self.epsilon_decay)
        else:
            self.epsilon_rate = 0

        self.testing = False

        self._open_results_file()
        self._open_learning_file()

        self.episode_counter = 0
        self.batch_counter = 0

        self.holdout_data = None

        # In order to add an element to the data set we need the
        # previous state and action and the current reward.  These
        # will be used to store states and actions.
        self.last_img = None
        self.last_action = None
Exemple #7
0
    def __init__(self,
                 q_network,
                 epsilon_start,
                 epsilon_min,
                 epsilon_decay,
                 replay_memory_size,
                 exp_pref,
                 replay_start_size,
                 update_frequency,
                 rng,
                 max_epochs,
                 use_human_net=False,
                 use_human_exp_replay=False,
                 human_net=None,
                 human_exp_replay=None):

        self.network = q_network
        self.epsilon_start = epsilon_start
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.replay_memory_size = replay_memory_size
        self.exp_pref = exp_pref
        self.replay_start_size = replay_start_size
        self.update_frequency = update_frequency
        self.rng = rng
        self.phi_length = self.network.num_frames
        self.image_width = self.network.input_width
        self.image_height = self.network.input_height
        self.num_actions = self.network.num_actions

        # Daniel: some stuff I added.
        self.max_epochs = max_epochs
        self.use_human_net = use_human_net
        if self.use_human_net:
            self.human_net = human_net
        self.use_human_exp_replay = use_human_exp_replay
        if self.use_human_exp_replay:
            self.human_exp_replay = human_exp_replay
        assert (self.use_human_net == False) or (self.use_human_exp_replay
                                                 == False)
        self.actions_train_ep = [0 for i in range(self.num_actions)]
        self.actions_test_ep = [0 for i in range(self.num_actions)]

        # CREATE A FOLDER TO HOLD RESULTS
        time_str = time.strftime("_%m-%d-%H-%M_", time.localtime())
        self.exp_dir = self.exp_pref + time_str + \
                       "{}".format(self.network.lr).replace(".", "p") + "_" \
                       + "{}".format(self.network.discount).replace(".", "p")
        try:
            os.stat(self.exp_dir)
        except OSError:
            os.makedirs(self.exp_dir)

        self.data_set = ale_data_set.DataSet(width=self.image_width,
                                             height=self.image_height,
                                             rng=rng,
                                             max_steps=self.replay_memory_size,
                                             phi_length=self.phi_length)

        # just needs to be big enough to create phi's
        self.test_data_set = ale_data_set.DataSet(width=self.image_width,
                                                  height=self.image_height,
                                                  rng=rng,
                                                  max_steps=self.phi_length *
                                                  2,
                                                  phi_length=self.phi_length)
        self.epsilon = self.epsilon_start
        if self.epsilon_decay != 0:
            self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) /
                                 self.epsilon_decay)
        else:
            self.epsilon_rate = 0
        self.testing = False

        self._open_results_file()
        self._open_learning_file()
        self._open_actions_file()

        self.episode_counter = 0
        self.batch_counter = 0
        self.holdout_data = None

        # In order to add an element to the data set we need the
        # previous state and action and the current reward.  These
        # will be used to store states and actions.
        self.last_img = None
        self.last_action = None

        # Exponential moving average of runtime performance.
        self.steps_sec_ema = 0.
Exemple #8
0
    def __init__(self, q_network, epsilon_start, epsilon_min, epsilon_decay,
                 replay_memory_size, exp_pref, replay_start_size,
                 update_frequency, rng):

        self.network = q_network
        self.epsilon_start = epsilon_start
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.replay_memory_size = replay_memory_size
        self.exp_pref = exp_pref
        self.replay_start_size = replay_start_size
        self.update_frequency = update_frequency
        self.random_state = rng

        ## Remember the dimensionality of the input space
        self.phi_length = self.network.num_frames
        self.image_width = self.network.input_width
        self.image_height = self.network.input_height
        ## The output lyaer size of the q-value network approximator
        self.num_actions = self.network.num_actions

        ## Allocate experience replay datasets: a large one for trainig ...
        self.dataset_training = ale_data_set.DataSet(
            width=self.image_width,
            height=self.image_height,
            rng=self.random_state,
            max_steps=self.replay_memory_size,
            phi_length=self.phi_length)
        ##   ... and a small one for testing.
        ## Since during the testing pahse no learning takes place, we just need
        ##  this dataset to be big enough to hold the current phi ( state x phi_length ).
        ##  Thus "max_steps" is set to double the size of phi.
        self.dataset_testing = ale_data_set.DataSet(width=self.image_width,
                                                    height=self.image_height,
                                                    rng=self.random_state,
                                                    max_steps=self.phi_length *
                                                    2,
                                                    phi_length=self.phi_length)

        ## The epsilon-probability changes across the epochs!
        self.epsilon = self.epsilon_start
        if self.epsilon_decay != 0:
            self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) /
                                 self.epsilon_decay)
        else:
            self.epsilon_rate = 0

        # CREATE A FOLDER TO HOLD THE RESULTS
        time_str = time.strftime("_%m-%d-%H-%M", time.gmtime())
        self.exp_dir = self.exp_pref + time_str +  "_" \
                       + "{}".format( self.network.lr ).replace( ".", "p" ) + "_" \
                       + "{}".format( self.network.discount ).replace( ".", "p" )

        try:
            os.stat(self.exp_dir)
        except OSError:
            os.makedirs(self.exp_dir)

        self._open_results_file()
        self._open_learning_file()

        self.holdout_observations = None
        ## Logging:
        logging.info("NeuralAgent: actions %d, phi %d" % (
            self.num_actions,
            self.phi_length,
        ))
Exemple #9
0
    def __init__(self, q_network, epsilon_start, epsilon_min, epsilon_decay,
                 replay_memory_size, exp_dir, replay_start_size,
                 update_frequency, rng):

        self.network = q_network
        self.epsilon_start = epsilon_start
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.replay_memory_size = replay_memory_size
        self.exp_dir = exp_dir
        self.replay_start_size = replay_start_size
        self.update_frequency = update_frequency
        self.rng = rng

        self.logger = logging.getLogger("DeepLogger")
        self.episode_no = 0

        self.phi_length = self.network.num_frames
        self.image_width = self.network.input_width
        self.image_height = self.network.input_height

        self.num_actions = self.network.num_actions

        self.data_set = ale_data_set.DataSet(width=self.image_width,
                                             height=self.image_height,
                                             rng=rng,
                                             max_steps=self.replay_memory_size,
                                             phi_length=self.phi_length)

        # just needs to be big enough to create phi's
        self.test_data_set = ale_data_set.DataSet(width=self.image_width,
                                                  height=self.image_height,
                                                  rng=rng,
                                                  max_steps=self.phi_length *
                                                  2,
                                                  phi_length=self.phi_length)
        self.epsilon = self.epsilon_start
        if self.epsilon_decay != 0:
            self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) /
                                 self.epsilon_decay)
        else:
            self.epsilon_rate = 0

        self.testing = False

        self._open_results_file()
        self._open_learning_file()

        # weird name, does not count epsiodes, only iterated when testing?
        self.episode_counter = 0
        self.batch_counter = 0

        self.holdout_data = None

        # In order to add an element to the data set we need the
        # previous state and action and the current reward.  These
        # will be used to store states and actions.
        self.last_img = None
        self.last_action = None

        # Exponential moving average of runtime performance
        self.steps_sec_ema = 0
    def agent_init(self, task_spec_string):
        """
        This function is called once at the beginning of an experiment.

        Arguments: task_spec_string - A string defining the task.  This string
                                      is decoded using
                                      TaskSpecVRLGLUE3.TaskSpecParser
        """
        # DO SOME SANITY CHECKING ON THE TASKSPEC
        TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_string)
        if TaskSpec.valid:

            assert ((len(TaskSpec.getIntObservations()) == 0) !=
                    (len(TaskSpec.getDoubleObservations()) == 0)), \
                "expecting continous or discrete observations.  Not both."
            assert len(TaskSpec.getDoubleActions()) == 0, \
                "expecting no continuous actions"
            assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), \
                " expecting min action to be a number not a special value"
            assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), \
                " expecting max action to be a number not a special value"
            self.num_actions = TaskSpec.getIntActions()[0][1]+1
        else:
            print "INVALID TASK SPEC"

        self.data_set = ale_data_set.DataSet(width=CROPPED_WIDTH,
                                             height=CROPPED_HEIGHT,
                                             max_steps=self.max_history,
                                             phi_length=self.phi_length)

        # just needs to be big enough to create phi's
        self.test_data_set = ale_data_set.DataSet(width=CROPPED_WIDTH,
                                                  height=CROPPED_HEIGHT,
                                                  max_steps=10,
                                                  phi_length=self.phi_length)
        self.epsilon = self.epsilon_start
        if self.epsilon_decay != 0:
            self.epsilon_rate = .9 / self.epsilon_decay
        else:
            self.epsilon_rate = 0
            

        self.testing = False

        if self.nn_file is None:
            self.network = self._init_network()
        else:
            handle = open(self.nn_file, 'r')
            self.network = cPickle.load(handle)

        # If an trained network has been specified, 
        # use it to initialize weights
        if self.nn_trained_share is None:
            print "No sharing between networks"
        else:
            print "Sharing between networks ", self.nn_trained_share
            print "Layers", self.share_layers
            handle = open(self.nn_trained_share, 'r')
            trained_network = cPickle.load(handle)

            if self.share_layers.find('1') != -1:
                # Sharing weights of the first convolutional layer
                print "Sharing weights for Convolution Layer 1"
                self.network.q_layers[2].W.set_value(trained_network.q_layers[2].W.get_value())
                self.network.q_layers[2].b.set_value(trained_network.q_layers[2].b.get_value())
                self.network.q_layers[2].bias_params[0].set_value(trained_network.q_layers[2].bias_params[0].get_value())

                if self.flip == 1:
                    print "Flipping weights in the first convolutional layer"
                    W_old = trained_network.q_layers[2].W.get_value()
                    for i in xrange(4):
                        for j in xrange(16):
                            temp = W_old[i, :, :, j]
                            W_old[i, :, :, j] = temp[::-1].T
                    self.network.q_layers[2].W.set_value(W_old)
                            


            # Sharing weights of the second convolutional layer
            if self.share_layers.find('2') != -1:
                print "Sharing weights for Convolution Layer 2"
                self.network.q_layers[3].W.set_value(trained_network.q_layers[3].W.get_value())
                self.network.q_layers[3].b.set_value(trained_network.q_layers[3].b.get_value())
                self.network.q_layers[3].bias_params[0].set_value(trained_network.q_layers[3].bias_params[0].get_value())

            # Sharing weights of the fully connected layer
            if self.share_layers.find('3') != -1:
                print "Sharing weights for FC layer"
                self.network.q_layers[5].W.set_value(trained_network.q_layers[5].W.get_value())
                self.network.q_layers[5].b.set_value(trained_network.q_layers[5].b.get_value())
                self.network.q_layers[5].bias_params[0].set_value(trained_network.q_layers[5].bias_params[0].get_value())

        self._open_results_file()
        self._open_learning_file()

        self.step_counter = 0
        self.episode_counter = 0
        self.batch_counter = 0

        self.holdout_data = None

        # In order to add an element to the data set we need the
        # previous state and action and the current reward.  These
        # will be used to store states and actions.
        self.last_img = None
        self.last_action = None
    def __init__(self,
                 q_network,
                 epsilon_start,
                 epsilon_min,
                 epsilon_decay,
                 replay_memory_size,
                 exp_pref,
                 update_frequency,
                 replay_start_size,
                 rng,
                 transitions_sequence_length,
                 transition_range,
                 penalty_method,
                 weight_min,
                 weight_max,
                 weight_decay_length,
                 beta,
                 two_train=False,
                 late2=True,
                 close2=True,
                 verbose=False,
                 double=False,
                 save_pkl=True):
        self.double_dqn = double
        self.network = q_network
        self.num_actions = q_network.num_actions
        self.epsilon_start = epsilon_start
        self.update_frequency = update_frequency
        self.beta = beta

        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.replay_memory_size = replay_memory_size
        self.exp_dir = exp_pref + '_' + str(weight_max) + '_' + str(weight_min)
        if late2:
            self.exp_dir += '_l2'
        if close2:
            self.exp_dir += '_close2'
        else:
            self.exp_dir += '_len' + str(
                transitions_sequence_length) + '_r' + str(transition_range)
        if two_train:
            self.exp_dir += '_TTR'

        self.replay_start_size = replay_start_size
        self.rng = rng
        self.transition_len = transitions_sequence_length
        self.two_train = two_train
        self.verbose = verbose
        if verbose > 0:
            print "Using verbose", verbose
            self.exp_dir += '_vb' + str(verbose)

        self.phi_length = self.network.num_frames
        self.image_width = self.network.input_width
        self.image_height = self.network.input_height
        self.penalty_method = penalty_method
        self.batch_size = self.network.batch_size
        self.discount = self.network.discount
        self.transition_range = transition_range
        self.late2 = late2
        self.close2 = close2
        self.same_update = False
        self.save_pkl = save_pkl

        self.start_index = 0
        self.terminal_index = None

        self.weight_max = weight_max
        self.weight_min = weight_min
        self.weight = self.weight_max
        self.weight_decay_length = weight_decay_length
        self.weight_decay = (self.weight_max -
                             self.weight_min) / self.weight_decay_length

        self.batchnum = 0
        self.epi_len = 0
        self.batch_count = 0
        self.epi_state = None
        self.epi_actions = None
        self.epi_rewards = None
        self.epi_terminals = None
        self.Q_tilde = None
        self.y_ = None

        try:
            os.stat(self.exp_dir)
        except OSError:
            os.makedirs(self.exp_dir)

        self.data_set = ale_data_set.DataSet(
            width=self.image_width,
            height=self.image_height,
            rng=rng,
            max_steps=self.replay_memory_size,
            phi_length=self.phi_length,
            discount=self.discount,
            batch_size=self.batch_size,
            transitions_len=self.transition_len)

        # just needs to be big enough to create phi's
        self.test_data_set = ale_data_set.DataSet(width=self.image_width,
                                                  height=self.image_height,
                                                  rng=rng,
                                                  max_steps=self.phi_length *
                                                  2,
                                                  phi_length=self.phi_length)
        self.epsilon = self.epsilon_start
        if self.epsilon_decay != 0:
            self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) /
                                 self.epsilon_decay)
        else:
            self.epsilon_rate = 0

        self.testing = False

        self._open_results_file()
        self._open_learning_file()
        self._open_recording_file()

        self.step_counter = 0
        self.episode_reward = 0
        self.start_time = None
        self.loss_averages = None
        self.total_reward = 0

        self.episode_counter = 0
        self.batch_counter = 0

        self.holdout_data = None

        # In order to add an element to the data set we need the
        # previous state and action and the current reward.  These
        # will be used to store states and actions.
        self.last_img = None
        self.last_action = None

        # Exponential moving average of runtime performance.
        self.steps_sec_ema = 0.
        self.program_start_time = None
        self.last_count_time = None
        self.epoch_time = None
        self.total_time = None