def agent_init(self, taskSpecString): print "Agent Up" # print taskSpecString TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString) if TaskSpec.valid: print len( TaskSpec.getDoubleActions()), ": ", TaskSpec.getDoubleActions( ), '\n', len(TaskSpec.getDoubleObservations() ), ": ", TaskSpec.getDoubleObservations() assert len(TaskSpec.getIntObservations() ) == 0, "expecting no discrete observations" assert len(TaskSpec.getDoubleObservations( )) == 12, "expecting 12-dimensional continuous observations" assert len( TaskSpec.getIntActions()) == 0, "expecting no discrete actions" assert len(TaskSpec.getDoubleActions() ) == 4, "expecting 4-dimensional continuous actions" self.obs_specs = TaskSpec.getDoubleObservations() self.actions_specs = TaskSpec.getDoubleActions() # print "Observations: ",self.obs_specs # print "actions_specs:", self.actions_specs else: print "Task Spec could not be parsed: " + taskSpecString self.lastAction = Action() self.lastObservation = Observation()
def agent_init(self, task_spec_string): """ This function is called once at the beginning of an experiment. Arguments: task_spec_string - A string defining the task. This string is decoded using TaskSpecVRLGLUE3.TaskSpecParser """ # DO SOME SANITY CHECKING ON THE TASKSPEC TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_string) if TaskSpec.valid: assert ((len(TaskSpec.getIntObservations()) == 0) != (len(TaskSpec.getDoubleObservations()) == 0)), \ "expecting continous or discrete observations. Not both." assert len(TaskSpec.getDoubleActions()) == 0, \ "expecting no continuous actions" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), \ " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), \ " expecting max action to be a number not a special value" self.num_actions = TaskSpec.getIntActions()[0][1]+1 else: print "INVALID TASK SPEC" self.data_set = ale_data_set.DataSet(width=CROPPED_WIDTH, height=CROPPED_HEIGHT, max_steps=self.max_history, phi_length=self.phi_length) # just needs to be big enough to create phi's self.test_data_set = ale_data_set.DataSet(width=CROPPED_WIDTH, height=CROPPED_HEIGHT, max_steps=10, phi_length=self.phi_length) self.epsilon = 1. self.epsilon_rate = .9 / self.max_history self.testing = False if self.nn_file is None: self.network = self._init_network() else: handle = open(self.nn_file, 'r') self.network = cPickle.load(handle) self._open_results_file() self._open_learning_file() self.step_counter = 0 self.episode_counter = 0 self.batch_counter = 0 self.holdout_data = None # In order to add an element to the data set we need the # previous state and action and the current reward. These # will be used to store states and actions. self.last_img = None self.last_action = None
def agent_init(self, spec): taskSpec = TaskSpecVRLGLUE3.TaskSpecParser(spec) if taskSpec.valid: self.num_actions = taskSpec.getIntActions()[0][1] + 1 else: raise "Invalid task spec" self.last_observation = Observation() self.batch_size = 32 # batch size for SGD self.ep_start = 1 # initial value of epsilon in epsilon-greedy exploration self.ep = self.ep_start # exploration probability self.ep_end = 0.1 # final value of epsilon in epsilon-greedy exploration self.ep_endt = 1000000 # number of frames over which epsilon is linearly annealed self.episode_qvals = [] self.all_qvals = [] self.learn_start = 0 # number of steps after which learning starts self.is_testing = False self.replay_memory = 1000000 self.phi_length = 4 # number of most recent frames for input to Q-function self.reset_after = 10000 # replace Q_hat with Q after this many steps self.step_counter = 0 self.episode_counter = 0 self.total_reward = 0 self.qvals = [] self.train_table = TransitionTable(self.phi_length, self.replay_memory, RESIZED_WIDTH, RESIZED_HEIGHT) self.test_table = TransitionTable(self.phi_length, self.phi_length, RESIZED_WIDTH, RESIZED_HEIGHT) if self.network_file is None: self.network = DeepQLearner(RESIZED_WIDTH, RESIZED_HEIGHT, self.num_actions, self.phi_length, self.batch_size) else: self.network = cPickle.load(open(self.network_file))
def agent_init(self, task_spec_str): task_spec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_str) if not task_spec.valid: raise ValueError( 'Task spec could not be parsed: {}'.format(task_spec_str)) self.gamma = task_spec.getDiscountFactor() # 割引率 # DQN 作成 # Arg1: 入力層サイズ # Arg2: 隠れ層ノード数 # Arg3: 出力層サイズ self.Q = QNet(self.bdim * self.n_frames, self.bdim * self.n_frames, self.dim) if self.gpu >= 0: cuda.get_device(self.gpu).use() self.Q.to_gpu() self.xp = np if self.gpu < 0 else cuda.cupy self.targetQ = copy.deepcopy(self.Q) self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.0) self.optimizer.setup(self.Q)
def agent_init(self, taskSpec): # taskspec check TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec) if TaskSpec.valid: assert len(TaskSpec.getDoubleObservations())>0, "expecting at least one continuous observation" self.state_range = np.asarray(TaskSpec.getDoubleObservations()) # Check action form, and then set number of actions assert len(TaskSpec.getIntActions())==0, "expecting no discrete actions" assert len(TaskSpec.getDoubleActions())==2, "expecting 1-dimensional continuous actions" else: print "Task Spec could not be parsed" self.lbounds=[] self.ubounds=[] for r in self.state_range: self.lbounds.append(r[0]) self.ubounds.append(r[1]) self.lbounds = np.array(self.lbounds) self.ubounds = np.array(self.ubounds) # Some initializations for rlglue self.lastAction = Action() self.time = 0 self.epsilon = 1.0 # Initial exploratoin rate # Pick a DQN from DQN_class self.DQN = DQN_class()
def agent_init(self, taskspec): """ This function is called once at the begining of an episode. Performs sanity checks with the environment. :param taskspec: The task specifications :type taskspec: str """ spec = TaskSpecVRLGLUE3.TaskSpecParser(taskspec) if len(spec.getIntActions()) != 1: raise Exception("Expecting 1-dimensional discrete actions") if len(spec.getDoubleActions()) != 0: raise Exception("Expecting no continuous actions") if spec.isSpecial(spec.getIntActions()[0][0]): raise Exception( "Expecting min action to be a number not a special value") if spec.isSpecial(spec.getIntActions()[0][1]): raise Exception( "Expecting max action to be a number not a special value") observation_ranges = spec.getDoubleObservations() self.basis = FourierBasis(len(observation_ranges), self.fa_order, observation_ranges) self.weights = np.zeros((self.basis.numTerms, len(self.options))) self.last_action = 0 self.last_features = [] self.last_observation = []
def agent_init(self, taskSpecString): TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString) if TaskSpec.valid: assert len(TaskSpec.getIntObservations() ) == 1, "expecting 1-dimensional discrete observations" assert len(TaskSpec.getDoubleObservations() ) == 0, "expecting no continuous observations" assert not TaskSpec.isSpecial( TaskSpec.getIntObservations()[0][0] ), " expecting min observation to be a number not a special value" assert not TaskSpec.isSpecial( TaskSpec.getIntObservations()[0][1] ), " expecting max observation to be a number not a special value" self.numStates = TaskSpec.getIntObservations()[0][1] + 1 assert len(TaskSpec.getIntActions() ) == 1, "expecting 1-dimensional discrete actions" assert len(TaskSpec.getDoubleActions() ) == 0, "expecting no continuous actions" assert not TaskSpec.isSpecial( TaskSpec.getIntActions()[0][0] ), " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial( TaskSpec.getIntActions()[0][1] ), " expecting max action to be a number not a special value" self.numActions = TaskSpec.getIntActions()[0][1] + 1 self.value_function = numpy.zeros( [self.numStates, self.numActions]) else: print "Task Spec could not be parsed: " + taskSpecString self.lastAction = Action() self.lastObservation = Observation()
def agent_init(self, taskSpec): """Initialize the RL agent. Args: taskSpec: The RLGlue task specification string. """ # (Re)initialize parameters (incase they have been changed during a trial self.init_parameters() # Parse the task specification and set up the weights and such TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec) if self.agent_supported(TaskSpec): self.numStates = len(TaskSpec.getDoubleObservations()) self.discStates = numpy.array(TaskSpec.getIntObservations()) self.numDiscStates = int( reduce(lambda a, b: a * (b[1] - b[0] + 1), self.discStates, 1.0)) self.numActions = TaskSpec.getIntActions()[0][1] + 1 self.model.model_init(self.numDiscStates, TaskSpec.getDoubleObservations(), \ self.numActions, TaskSpec.getRewardRange()[0]) self.planner.planner_init(self.numDiscStates, TaskSpec.getDoubleObservations(), \ self.numActions, TaskSpec.getRewardRange()[0]) else: print "Task Spec could not be parsed: " + taskSpecString self.lastAction = Action() self.lastObservation = Observation()
def agent_init(self, taskSpecification): """ This function is called once at the beginning of an experiment. :param taskSpecification: A string defining the task. This string is decoded using TaskSpecVRLGLUE3.TaskSpecParser :return: """ # DO SOME SANITY CHECKING ON THE TASKSPEC TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecification) if TaskSpec.valid: assert ((len(TaskSpec.getIntObservations()) == 0) != (len(TaskSpec.getDoubleObservations()) == 0)), \ "expecting continous or discrete observations. Not both." assert not TaskSpec.isSpecial(TaskSpec.getDoubleActions()[0][0]), \ " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial(TaskSpec.getDoubleActions()[0][1]), \ " expecting max action to be a number not a special value" #self.num_actions = TaskSpec.getIntActions()[0][1]+1 else: print "INVALID TASK SPEC" observations = TaskSpec.getDoubleObservations( ) # TODO: take care of int observations self.observation_size = len(observations) actions = TaskSpec.getDoubleActions() self.action_size = len(actions) self.testing = False self.batch_size = 32 self.episode_counter = 0 self.step_counter = 0 if self.nn_file is None: self.action_network = self._init_action_network( len(observations), len(actions)) self.value_network = self._init_value_network(len(observations), 1) else: handle = open(self.nn_file, 'r') self.network = cPickle.load(handle) self.action_stdev = 0.01 self.gamma = 0.9 # TaskSpec.getDiscountFactor() self.data_set = data_set.DataSet( len(observations), len(actions), observation_dtype='float32', action_dtype='float32', ) # just needs to be big enough to create phi's self.test_data_set = data_set.DataSet(len(observations), len(actions), observation_dtype='float32', action_dtype='float32')
def agent_init(self, taskSpec): """Initialize the RL agent. Args: taskSpec: The RLGlue task specification string. """ # (Re)initialize parameters (incase they have been changed during a trial self.init_parameters() # Parse the task specification and set up the weights and such TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec) if not self.agent_supported(TaskSpec): print "Task Spec could not be parsed: " + taskSpecString sys.exit(1) self.numStates = len(TaskSpec.getDoubleObservations()) self.discStates = numpy.array(TaskSpec.getIntObservations()) self.numDiscStates = int( reduce(lambda a, b: a * (b[1] - b[0] + 1), self.discStates, 1.0)) self.numActions = TaskSpec.getIntActions()[0][1] + 1 if self.numStates == 0: # Only discrete states self.numStates = 1 if self.fa_name != "trivial": print "Selected basis requires at least one continuous feature. Using trivial basis." self.fa_name = "trivial" # Set up the function approximation if self.fa_name == 'fourier': self.basis = fourier.FourierBasis(self.numStates, TaskSpec.getDoubleObservations(), order=self.params.setdefault( 'fourier_order', 3)) elif self.fa_name == 'rbf': num_functions = self.numStates if self.params.setdefault( 'rbf_number', 0) == 0 else self.params['rbf_number'] self.basis = rbf.RBFBasis(self.numStates, TaskSpec.getDoubleObservations(), num_functions=num_functions, beta=self.params.setdefault( 'rbf_beta', 0.9)) elif self.fa_name == 'tile': self.basis = tilecode.TileCodingBasis( self.numStates, TaskSpec.getDoubleObservations(), num_tiles=self.params.setdefault('tile_number', 100), num_weights=self.params.setdefault('tile_weights', 2048)) else: self.basis = trivial.TrivialBasis(self.numStates, TaskSpec.getDoubleObservations()) self.weights = numpy.zeros( (self.numDiscStates, self.basis.getNumBasisFunctions(), self.numActions)) self.traces = numpy.zeros(self.weights.shape) self.init_stepsize(self.weights.shape, self.params) self.lastAction = Action() self.lastObservation = Observation()
def agent_init(self,taskSpecString): TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString) if TaskSpec.valid: assert len(TaskSpec.getIntObservations())==1, "expecting 1-dimensional discrete observations" assert len(TaskSpec.getDoubleObservations())==0, "expecting no continuous observations" assert not TaskSpec.isSpecial(TaskSpec.getIntObservations()[0][0]), " expecting min observation to be a number not a special value" assert not TaskSpec.isSpecial(TaskSpec.getIntObservations()[0][1]), " expecting max observation to be a number not a special value" self.numStates=TaskSpec.getIntObservations()[0][1]+1; assert len(TaskSpec.getIntActions())==1, "expecting 1-dimensional discrete actions" assert len(TaskSpec.getDoubleActions())==0, "expecting no continuous actions" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), " expecting max action to be a number not a special value" self.numActions=TaskSpec.getIntActions()[0][1]+1; self.episode = 0 else: print "Task Spec could not be parsed: "+taskSpecString; chimatfile = open('chi_mat.dat','r') unpickler = pickle.Unpickler(chimatfile) self.chi_mat = np.mat(unpickler.load()) # 0,1,2,3 - primitive actions, 4... - options self.value_function=[(self.chi_mat.shape[1]+self.numActions)*[0.0] for i in range(self.numStates)] self.absStateMembership = [] self.statesInAbsState = [[] for i in xrange(self.chi_mat.shape[1])] for (row_i,row) in enumerate(self.chi_mat): self.absStateMembership.append(row.argmax()) self.statesInAbsState[row.argmax()].append(row_i) #print 'Abstract state to which state belongs:' #print self.absStateMembership #print 'States in each abstract state:' #print self.statesInAbsState #This is just to get a mapping from the indices of chi_mat to the values returned by the environment validstatefile = open('valid_states.dat','r') unpickler = pickle.Unpickler(validstatefile) self.valid_states = unpickler.load() #print 'Mapping from row indices to flat state rep:' #print self.valid_states self.lastAction=Action() self.lastObservation=Observation() tmatrixfile = open('tmatrixperfect.dat','r') unpickler = pickle.Unpickler(tmatrixfile) self.t_mat = np.mat(unpickler.load()) pmatrixfile = open('pmatrixperfect.dat','r') self.p_mat = pickle.load(pmatrixfile) self.connect_mat = self.chi_mat.T*self.t_mat*self.chi_mat
def processTaskSpec(ts): # you can cut the taskspec by the main words with new line #ts= """VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1 OBSERVATIONS INTS (3 0 1) DOUBLES (2 -1.2 0.5) (-.07 .07) CHARCOUNT 1024 # ACTIONS INTS (2 0 4) CHARCOUNT 1024 REWARDS (-5.0 UNSPEC) EXTRA some other stuff goes here""" print() print() print( "=======================================================================================================" ) print(ts) print() print() TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(ts) if TaskSpec.valid: print( "=======================================================================================================" ) print("Version: [" + TaskSpec.getVersion() + "]") print("ProblemType: [" + TaskSpec.getProblemType() + "]") print("DiscountFactor: [" + str(TaskSpec.getDiscountFactor()) + "]") print( "=======================================================================================================" ) print("\t \t \t \t Observations") print( "=======================================================================================================" ) print("Observations: [" + TaskSpec.getObservations() + "]") print("Integers:", TaskSpec.getIntObservations()) print("Doubles: ", TaskSpec.getDoubleObservations()) print("Chars: ", TaskSpec.getCharCountObservations()) print( "=======================================================================================================" ) print("\t \t \t \t Actions") print( "======================================================================================================" ) print("Actions: [" + TaskSpec.getActions() + "]") print("Integers:", TaskSpec.getIntActions()) print("Doubles: ", TaskSpec.getDoubleActions()) print("Chars: ", TaskSpec.getCharCountActions()) print( "=======================================================================================================" ) print("Reward :[" + TaskSpec.getReward() + "]") print("Reward Range:", TaskSpec.getRewardRange()) print("Extra: [" + TaskSpec.getExtra() + "]") print("remeber that by using len() you get the cardinality of lists!") print("Thus:") print("len(Doubles) ==> ", len(TaskSpec.getDoubleObservations()), " Double Observations") else: print("Task spec was invalid, but I can try to get version: " + TaskSpec.getVersion())
def agent_init(self, taskSpecification): """ This function is called once at the beginning of an experiment. :param taskSpecification: A string defining the task. This string is decoded using TaskSpecVRLGLUE3.TaskSpecParser :return: """ # DO SOME SANITY CHECKING ON THE TASKSPEC TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecification) if TaskSpec.valid: assert ((len(TaskSpec.getIntObservations()) == 0) != (len(TaskSpec.getDoubleObservations()) == 0)), \ "expecting continous or discrete observations. Not both." assert not TaskSpec.isSpecial(TaskSpec.getDoubleActions()[0][0]), \ " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial(TaskSpec.getDoubleActions()[0][1]), \ " expecting max action to be a number not a special value" #self.num_actions = TaskSpec.getIntActions()[0][1]+1 else: print "INVALID TASK SPEC" self.observation_ranges = TaskSpec.getDoubleObservations( ) # TODO: take care of int observations self.observation_size = len(self.observation_ranges) self.action_ranges = TaskSpec.getDoubleActions() self.action_size = len(self.action_ranges) self.testing = False self.episode_counter = 0 self.step_counter = 0 self.total_reward = 0 if self.nn_action_file is None: self.action_network = self._init_action_network( self.observation_size, self.action_size, minibatch_size=1) else: handle = open(self.nn_action_file, 'r') self.action_network = cPickle.load(handle) if self.nn_value_file is None: self.value_network = self._init_value_network( self.observation_size, 1, minibatch_size=1) else: handle = open(self.nn_value_file, 'r') self.value_network = cPickle.load(handle) self.discount = TaskSpec.getDiscountFactor() self.action_ranges = np.asmatrix(self.action_ranges) self.observation_ranges = np.asmatrix(self.observation_ranges)
def agent_init(self, taskSpec): """Initialize the RL agent. Args: taskSpec: The RLGlue task specification string. """ # (Re)initialize parameters (incase they have been changed during a trial log = logging.getLogger('pyrl.agents.sarsa_lambda.agent_init') self.init_parameters() # Parse the task specification and set up the weights and such TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec) if not self.agent_supported(TaskSpec): print "Task Spec could not be parsed: " + taskSpec sys.exit(1) self.numStates = len(TaskSpec.getDoubleObservations()) log.info("Ranges: %s", TaskSpec.getDoubleObservations()) self.discStates = numpy.array(TaskSpec.getIntObservations()) self.numDiscStates = int( reduce(lambda a, b: a * (b[1] - b[0] + 1), self.discStates, 1.0)) self.numActions = TaskSpec.getIntActions()[0][1] + 1 # print "TSactions ", TaskSpec.getIntActions(), "TSObservation ", TaskSpec.getIntObservations() if self.numStates == 0: # Only discrete states self.numStates = 1 if self.fa_name != "trivial": print "Selected basis requires at least one continuous feature. Using trivial basis." self.fa_name = "trivial" # Set up the function approximation if self.fa_name == 'fourier': self.basis = fourier.FourierBasis(self.numStates, TaskSpec.getDoubleObservations(), order=self.params.setdefault( 'fourier_order', 3)) else: self.basis = trivial.TrivialBasis(self.numStates, TaskSpec.getDoubleObservations()) log.debug("Num disc states: %d", self.numDiscStates) numStates = self.basis.getNumBasisFunctions() log.debug("Num states: %d", numStates) log.debug("Num actions: %d", self.numActions) self.weights = numpy.zeros( (self.numDiscStates, numStates, self.numActions)) self.traces = numpy.zeros(self.weights.shape) self.init_stepsize(self.weights.shape, self.params) # print "Weights:", self.weights self.lastAction = Action() self.lastObservation = Observation() log.debug("Sarsa Lambda agent after initialization: %s", pformat(self.__dict__))
def agent_init(self, taskSpec): """Initialize the RL agent. Args: taskSpec: The RLGlue task specification string. """ self.init_parameters() # Consider looking at sarsa_lambda agent for a good example of filling out these methods TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec) assert len(TaskSpec.getIntActions()) == 1 self.numActions = TaskSpec.getIntActions()[0][1] + 1 self.lastAction = Action() self.lastObservation = Observation() self.counter = 0
def agent_init(self, taskSpec): # See the sample_sarsa_agent in the mines-sarsa-example project for how to parse the task spec TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec) if TaskSpec.valid: assert len(TaskSpec.getIntObservations() ) == 1, "expecting 1-dimensional discrete observations" assert len(TaskSpec.getDoubleObservations() ) == 0, "expecting no continuous observations" assert not TaskSpec.isSpecial( TaskSpec.getIntObservations()[0][0] ), " expecting min observation to be a number not a special value" assert not TaskSpec.isSpecial( TaskSpec.getIntObservations()[0][1] ), " expecting max observation to be a number not a special value" self.numStates = TaskSpec.getIntObservations()[0][1] + 2 assert len(TaskSpec.getIntActions() ) == 1, "expecting 1-dimensional discrete actions" assert len(TaskSpec.getDoubleActions() ) == 0, "expecting no continuous actions" assert not TaskSpec.isSpecial( TaskSpec.getIntActions()[0][0] ), " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial( TaskSpec.getIntActions()[0][1] ), " expecting max action to be a number not a special value" self.numActions = TaskSpec.getIntActions()[0][1] + 1 #self.value_function = [self.numActions * [0.0] for i in range(self.numStates)] else: print "Task Spec could not be parsed: " + taskSpec self.lastAction = Action() self.lastObservation = Observation() S0 = TaskSpec.getIntObservations()[0][1] + 1 for a in range(self.numActions): self.R[S0][a] = self.rmax self.T[S0][a][S0] = 1.0
def agent_init(self, taskSpecification): """ This function is called once at the beginning of an experiment. :param taskSpecification: A string defining the task. This string is decoded using TaskSpecVRLGLUE3.TaskSpecParser :return: """ # DO SOME SANITY CHECKING ON THE TASKSPEC TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecification) if TaskSpec.valid: assert ((len(TaskSpec.getIntObservations()) == 0) != (len(TaskSpec.getDoubleObservations()) == 0)), \ "expecting continous or discrete observations. Not both." assert ((len(TaskSpec.getIntActions()) == 0) != (len(TaskSpec.getDoubleActions()) == 0)), \ "expecting continous or discrete actions. Not both." # assert not TaskSpec.isSpecial(TaskSpec.getDoubleActions()[0][0]), \ # " expecting min action to be a number not a special value" # assert not TaskSpec.isSpecial(TaskSpec.getDoubleActions()[0][1]), \ # " expecting max action to be a number not a special value" #self.num_actions = TaskSpec.getIntActions()[0][1]+1 else: print "INVALID TASK SPEC" self.observation_ranges = TaskSpec.getDoubleObservations() self.observation_size = len(self.observation_ranges) self.continuous_actions = len(TaskSpec.getDoubleActions()) > 0 if self.continuous_actions: self.action_ranges = TaskSpec.getDoubleActions() else: self.action_ranges = TaskSpec.getIntActions() self.action_size = len(self.action_ranges) self._init_network() self.discount = TaskSpec.getDiscountFactor() self.action_ranges = np.asmatrix(self.action_ranges, dtype=floatX) self.observation_ranges = np.asmatrix(self.observation_ranges, dtype=floatX)
def agent_init(self, taskSpecString): TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString) self.all_allowed_actions = dict() self.Q_value_function = dict() if TaskSpec.valid: self.nbrReaches = len(TaskSpec.getIntActions()) self.Bad_Action_Penalty = min(TaskSpec.getRewardRange()[0]) rewardRange = (min(TaskSpec.getRewardRange()[0]), max(TaskSpec.getRewardRange()[0])) self.habitatSize = len( TaskSpec.getIntObservations()) / self.nbrReaches self.discount = TaskSpec.getDiscountFactor() theExtra = TaskSpec.getExtra().split('BUDGET') self.edges = eval(theExtra[0]) self.budget = eval(theExtra[1].split("by")[0]) # self.nbrReaches = TaskSpec.getIntActions()[0][0][0] # self.Bad_Action_Penalty=min(TaskSpec.getRewardRange()[0]) # rewardRange = (min(TaskSpec.getRewardRange()[0]), max(TaskSpec.getRewardRange()[0])) # self.habitatSize = TaskSpec.getIntObservations()[0][0][0] / self.nbrReaches # self.discount = TaskSpec.getDiscountFactor() # self.edges=eval(TaskSpec.getExtra().split('by')[0]) else: print "Task Spec could not be parsed: " + taskSpecString self.lastAction = Action() self.lastObservation = Observation() # COSTS cost_per_invaded_reach = 10 cost_per_tree = 0.1 cost_per_empty_slot = 0.09 eradication_cost = 0.5 restoration_cost = 0.9 variable_eradication_cost = 0.4 variable_restoration_cost_empty = 0.4 variable_restoration_cost_invaded = 0.8 #CREATE ACTION PARAMETER OBJECT self.actionParameterObj = ActionParameterClass( cost_per_tree, eradication_cost, restoration_cost, 0, 0, cost_per_invaded_reach, cost_per_empty_slot, variable_eradication_cost, variable_restoration_cost_invaded, variable_restoration_cost_empty, self.budget)
def agent_init(self, taskSpecString): TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString) if TaskSpec.valid: assert len(TaskSpec.getIntObservations() ) == 1, "expecting 1-dimensional discrete observations" assert len(TaskSpec.getDoubleObservations() ) == 0, "expecting no continuous observations" assert not TaskSpec.isSpecial( TaskSpec.getIntObservations()[0][0] ), " expecting min observation to be a number not a special value" assert not TaskSpec.isSpecial( TaskSpec.getIntObservations()[0][1] ), " expecting max observation to be a number not a special value" self.numStates = TaskSpec.getIntObservations()[0][1] + 1 assert len(TaskSpec.getIntActions() ) == 1, "expecting 1-dimensional discrete actions" assert len(TaskSpec.getDoubleActions() ) == 0, "expecting no continuous actions" assert not TaskSpec.isSpecial( TaskSpec.getIntActions()[0][0] ), " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial( TaskSpec.getIntActions()[0][1] ), " expecting max action to be a number not a special value" self.numActions = TaskSpec.getIntActions()[0][1] + 1 #Initializes value function to 0 for each action. In Each State...each state has an array of all possible actions and a value function for each one. #cheating self.numStates = 992 self.value_function = [ self.numActions * [0.0] for i in range(self.numStates) ] #Need to intialize all option actions to be higher than 0. I should be able to just set ALL options in ALL states to be higher than 0. #If I coded everything else right, then only the correct states will actually have access to these options, so fun times. else: print "Task Spec could not be parsed: " + taskSpecString self.lastAction = Action() self.lastObservation = Observation()
def agent_init(self, task_spec_str): task_spec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_str) self.eps_step = 0 if not task_spec.valid: raise ValueError( 'Task spec could not be parsed: {}'.format(task_spec_str)) self.gamma = task_spec.getDiscountFactor() # 割引率 # DQN 作成 # Arg1: 入力層サイズ # Arg2: 隠れ層ノード数 # Arg3: 出力層サイズ #self.Q = QNet(self.bdim*self.n_frames, self.bdim*self.n_frames, self.dim) self.Q = QNet(self.bdim * self.n_frames, self.dim * 3, self.dim) if self.file_idx >= 0: serializers.load_hdf5( self.model_name + "_{0:05}.hdf5".format(self.file_idx), self.Q) self.step_counter = self.file_idx * 1000 self.learn_start += self.step_counter if self.gpu >= 0: cuda.get_device(self.gpu).use() self.Q.to_gpu() self.xp = np if self.gpu < 0 else cuda.cupy self.targetQ = copy.deepcopy(self.Q) self.optimizer = optimizers.RMSpropGraves(lr=0.01, alpha=0.95, momentum=0.0, eps=0.01) # self.optimizer = optimizers.Adam(alpha=0.01, beta1=0.9, beta2=0.999, final_lr=0.1, gamma=0.001, eps=1e-08, eta=1.0) #self.optimizer = optimizers.SGD(lr=0.01) self.optimizer.setup(self.Q) if self.file_idx >= 0: serializers.load_hdf5( self.opt_name + "_{0:05}.hdf5".format(self.file_idx), self.optimizer) self.file_idx = self.file_idx + 1
def agent_init(self, task_spec_string): """ This function is called once at the beginning of an experiment. Arguments: task_spec_string - A string defining the task. This string is decoded using TaskSpecVRLGLUE3.TaskSpecParser """ self.image = None self.show_ale = True self.saving = True TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_string) if TaskSpec.valid: assert ((len(TaskSpec.getIntObservations())== 0) != (len(TaskSpec.getDoubleObservations()) == 0 )), \ "expecting continous or discrete observations. Not both." assert len(TaskSpec.getDoubleActions())==0, \ "expecting no continuous actions" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), \ " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), \ " expecting max action to be a number not a special value" self.num_actions = TaskSpec.getIntActions()[0][1] + 1 self.int_states = len(TaskSpec.getIntObservations()) > 0 # Create empty lists for data collection. self.states = [] self.actions = [] self.rewards = [] self.absorbs = [] #Create appropriate RL-Glue objects for storing these. self.last_action = Action() self.last_observation = Observation()
def agent_init(self, task_spec_str): task_spec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_str) if not task_spec.valid: raise ValueError( 'Task spec could not be parsed: {}'.format(task_spec_str)) self.gamma = task_spec.getDiscountFactor() #n_framesかけるのはなぜ。(過去を遡る必要はないのではないだろうか) self.Q = QNet(self.bdim * self.n_frames, 30, self.n_rows * self.n_cols) if self.gpu >= 0: cuda.get_device(self.gpu).use() self.Q.to_gpu() self.xp = np if self.gpu < 0 else cuda.cupy self.targetQ = copy.deepcopy(self.Q) self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.0) self.optimizer.setup(self.Q)
def agent_init(self, taskSpecification): #copied from sample sarsa agent TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecification) if TaskSpec.valid: assert len(TaskSpec.getIntObservations() ) == 1, "expecting 1-dimensional discrete observations" assert len(TaskSpec.getDoubleObservations() ) == 0, "expecting no continuous observations" assert not TaskSpec.isSpecial( TaskSpec.getIntObservations()[0][0] ), " expecting min observation to be a number not a special value" assert not TaskSpec.isSpecial( TaskSpec.getIntObservations()[0][1] ), " expecting max observation to be a number not a special value" self.numStates = TaskSpec.getIntObservations()[0][1] + 1 assert len(TaskSpec.getIntActions() ) == 1, "expecting 1-dimensional discrete actions" assert len(TaskSpec.getDoubleActions() ) == 0, "expecting no continuous actions" assert not TaskSpec.isSpecial( TaskSpec.getIntActions()[0][0] ), " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial( TaskSpec.getIntActions()[0][1] ), " expecting max action to be a number not a special value" self.numActions = TaskSpec.getIntActions()[0][1] + 1 self.qvalues = [ self.numActions * [0.0] for i in range(self.numStates) ] else: print "Task Spec could not be parsed: " + taskSpecString self.lastAction = Action() self.lastObservation = Observation()
def parse_taskspec(self, spec): TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(spec) if TaskSpec.valid: self._n_double_dims = len(TaskSpec.getDoubleObservations()) self._n_int_dims = len(TaskSpec.getIntObservations()) self._n_double_act_dims = len(TaskSpec.getDoubleActions()) self._n_int_act_dims = len(TaskSpec.getIntActions()) self.limits = np.array(TaskSpec.getDoubleObservations()) self.act_limits = None if self._n_double_act_dims != 0: self.act_limits = np.array(TaskSpec.getDoubleActions()) if self._n_int_act_dims != 0: if self.act_limits is None: self.act_limits = np.array(TaskSpec.getIntActions()) else: self.act_limits = np.append(self.act_limits, np.array( TaskSpec.getIntActions()), axis=0) print self.act_limits self.act_range = self.act_limits[:, 1] - self.act_limits[:, 0] self._n_int_actions = np.prod( self.act_range[self._n_double_act_dims:]) print spec # print 'Double state variables:' # print len(TaskSpec.getDoubleObservations()) # print 'Integer state variables:' # print len(TaskSpec.getIntObservations()) # print 'Double Actions dimensions:' # print self.double_action_dims() # print 'Integer Action dimensions:' # print self.int_action_dims() # print '#number of Integer Actions' # print self.num_actions() else: print "Task Spec could not be parsed: " + spec
def agent_init(self, taskSpec): print("Reading taskSpec: " + taskSpec.decode()) # Parse taskSpec TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec) if TaskSpec.valid: print("Parsing task spec...") self.max_u = TaskSpec.getDoubleActions()[0][1] self.n_action = len(TaskSpec.getDoubleActions()) self.n_obs = len(TaskSpec.getDoubleObservations()) print(f"Number of actions: {self.n_action}") print(f"Number of obs: {self.n_obs}") print("Task spec parsed!") else: print("Task Spec could not be parsed: " + taskSpec) print("Initialization of training...") # Variables # Iteration initial state: fixed during one iteration self.initial_state = Observation() self.states = [] # States encountered from the start of the training self.agentPolicy = np.zeros((self.n_action, self.n_obs)) self.deltas = [ 2 * np.zeros((self.n_action, self.n_obs)) - 1 for i in range(self.N) ] self.deltaPolicies = [self.agentPolicy for i in range(2 * self.N) ] # 2N policies for the 2N rollouts # Rewards obtained at the end of the 2N rollouts self.rewards = [0. for i in range(2 * self.N)] self.count = 0 # Counter which increments only after one agent step self.ev_count = 0 # Counter for evaluation print("Training initialized!")
def agent_init(self, taskSpec): self.init_parameters() # Parse the task specification and set up the weights and such TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec) if TaskSpec.valid: # Check observation form, and then set up number of features/states assert len(TaskSpec.getDoubleObservations() ) > 0, "expecting at least one continuous observation" self.numStates = len(TaskSpec.getDoubleObservations()) # Check action form, and then set number of actions assert len(TaskSpec.getIntActions() ) == 1, "expecting 1-dimensional discrete actions" assert len(TaskSpec.getDoubleActions() ) == 0, "expecting no continuous actions" assert not TaskSpec.isSpecial( TaskSpec.getIntActions()[0][0] ), " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial( TaskSpec.getIntActions()[0][1] ), " expecting max action to be a number not a special value" self.numActions = TaskSpec.getIntActions()[0][1] + 1 # Set up the function approximation self.net = nl.net.newff( TaskSpec.getDoubleObservations(), [self.num_hidden, self.numActions], [nl.net.trans.TanSig(), nl.net.trans.PureLin()]) self.traces = copy.deepcopy(map(lambda x: x.np, self.net.layers)) self.clearTraces() else: print "Task Spec could not be parsed: " + taskSpecString self.lastAction = Action() self.lastObservation = Observation()
def agent_init(self,taskSpecString): print taskSpecString TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString) if TaskSpec.valid: print len(TaskSpec.getDoubleActions()),": ",TaskSpec.getDoubleActions(),'\n',len(TaskSpec.getDoubleObservations()),": ",TaskSpec.getDoubleObservations() # assert len(TaskSpec.getIntObservations())==12, "expecting 1-dimensional discrete observations" assert len(TaskSpec.getDoubleObservations())==0, "expecting no continuous observations" # assert not TaskSpec.isSpecial(TaskSpec.getIntObservations()[0][0]), " expecting min observation to be a number not a special value" # assert not TaskSpec.isSpecial(TaskSpec.getIntObservations()[0][1]), " expecting max observation to be a number not a special value" self.numStates=TaskSpec.getIntObservations()[0][1]+1; assert len(TaskSpec.getIntActions())==1, "expecting 1-dimensional discrete actions" assert len(TaskSpec.getDoubleActions())==0, "expecting no continuous actions" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), " expecting max action to be a number not a special value" self.numActions=TaskSpec.getIntActions()[0][1]+1; self.value_function=[self.numActions*[0.0] for i in range(self.numStates)] else: print "Task Spec could not be parsed: "+taskSpecString; self.lastAction=Action() self.lastObservation=Observation()
def agent_init(self, taskSpecString): TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString) self.all_allowed_actions = dict() self.Q_value_function = dict() if TaskSpec.valid: self.nbrReaches = len(TaskSpec.getIntActions()) self.Bad_Action_Penalty=min(TaskSpec.getRewardRange()[0]) rewardRange = (min(TaskSpec.getRewardRange()[0]), max(TaskSpec.getRewardRange()[0])) self.habitatSize = len(TaskSpec.getIntObservations()) / self.nbrReaches self.sarsa_gamma = TaskSpec.getDiscountFactor() theExtra=TaskSpec.getExtra().split('BUDGET') self.edges=eval(theExtra[0]) self.budget=eval(theExtra[1].split("by")[0]) # self.nbrReaches = TaskSpec.getIntActions()[0][0][0] # self.Bad_Action_Penalty=min(TaskSpec.getRewardRange()[0]) # rewardRange = (min(TaskSpec.getRewardRange()[0]), max(TaskSpec.getRewardRange()[0])) # self.habitatSize = TaskSpec.getIntObservations()[0][0][0] / self.nbrReaches # self.sarsa_gamma = TaskSpec.getDiscountFactor() # self.edges=eval(TaskSpec.getExtra().split('by')[0]) else: print "Task Spec could not be parsed: " + taskSpecString self.lastAction = Action() self.lastObservation = Observation()
def agent_init(self, taskSpecString): taskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString) if taskSpec.valid: logger.info('TaskSpec parsed: ' + taskSpecString) else: logger.info('TaskSpec could not be parsed: ' + taskSpecString)
def agent_init(self,task_spec_string): """ This function is called once at the beginning of an experiment. Arguments: task_spec_string - A string defining the task. This string is decoded using TaskSpecVRLGLUE3.TaskSpecParser """ self.start_time = time.time() self.image = None self.show_ale = False self.total_reward = 0 self.mini_batch_size = 32 self.num_mini_batches = 1 self.frame_count = 0 self.frames_trained = 0 self.qvalue_sum = 0 self.qvalue_count = 0 self.predicted_reward learning_rate = .00001 self.testing_policy = False self.epoch_counter = 0 self.epochs_until_test = 5 self.policy_test_file_name = "results.csv" load_file = False load_file_name = "cnnparams.pkl" self.save_file_name = "cnnparams.pkl" self.counter = 0 self.cur_action = 0 #starting value for epsilon-greedy self.epsilon = 1 TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_string) if TaskSpec.valid: assert ((len(TaskSpec.getIntObservations())== 0) != \ (len(TaskSpec.getDoubleObservations()) == 0 )), \ "expecting continous or discrete observations. Not both." assert len(TaskSpec.getDoubleActions())==0, \ "expecting no continuous actions" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), \ " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), \ " expecting max action to be a number not a special value" self.num_actions=TaskSpec.getIntActions()[0][1]+1 self.num_actions = 3 self.int_states = len(TaskSpec.getIntObservations()) > 0 # Create neural network and initialize trainer and dataset if load_file: thefile = open(load_file_name, "r") self.cnn = cPickle.load(thefile) else: self.first_conv_layer = maxout.MaxoutConvC01B(16, 1, (8, 8), (1, 1), (1, 1), "first conv layer", irange=.1, kernel_stride=(4, 4), min_zero=True) self.second_conv_layer = maxout.MaxoutConvC01B(32, 1, (4, 4), (1, 1), (1, 1), "second conv layer", irange=.1, kernel_stride=(2, 2), min_zero=True) self.rect_layer = mlp.RectifiedLinear(dim=256, layer_name="rectified layer", irange=.1) self.output_layer = mlp.Linear(self.num_actions, "output layer", irange=.1) layers = [self.first_conv_layer, self.second_conv_layer, self.rect_layer, self.output_layer] self.cnn = mlp.MLP(layers, input_space = Conv2DSpace((80, 80), num_channels=4, axes=('c', 0, 1, 'b')), batch_size=self.mini_batch_size) self.data = nqd.NeuralRewardPredictorDataset(self.cnn, mini_batch_size = self.mini_batch_size, num_mini_batches = self.num_mini_batches, learning_rate=learning_rate) #Create appropriate RL-Glue objects for storing these. self.last_action=Action() self.last_observation=Observation() thefile = open(self.policy_test_file_name, "w") thefile.write("Reward, Predicted reward, Frames trained\n") thefile.close()