def agent_init(self, taskSpecification): ts = TaskSpecParser(taskSpecification) print ts.getIntObservations() time.sleep(10) self.window = curses.initscr() curses.noecho() curses.cbreak() self.window.keypad(1)
def agent_init(self, task_spec): """Re-initialize the agent for a new training round.""" ts = TaskSpecParser(task_spec) # Grid size intobs = ts.getIntObservations() self.size = (intobs[0][1], intobs[1][1]) # Exploration setup self.explore_steps = self.size[0] / 10
def agent_init(self,taskSpec): self.episode = 0 self.steps = 1 print "TaskSpec:" print taskSpec print self.action = Action() self.action_types = [] self.action.intArray = [] self.action.doubleArray = [] self.action.numInts = 1 rnd.seed(0) TaskSpec = TaskSpecParser(taskSpec) self.input_ranges = TaskSpec.getDoubleObservations() print 'observations ranges',self.input_ranges #parse action self.action_ranges = TaskSpec.getIntActions()[0] print "action ranges",self.action_ranges self.actionlist = range(self.action_ranges[0],self.action_ranges[1]+1) self.nactions = len(self.actionlist) print "action list",self.actionlist #build a nearest neighbor function approximator self.Q = NNQ(nactions=self.nactions,input_ranges=self.input_ranges,nelemns=[7,7,3,3],alpha=0.5,lm=0.8) self.SelectAction = e_greedy_selection(epsilon=0.1) #set eplison for e-greedy exploration self.SelectAction.parent = self # discount factor self.gamma = 1.0#TaskSpec.getDiscountFactor() self.proving+= 1
def agent_init(self, taskSpec): self.episode = 0 self.steps = 1 print "TaskSpec:" print taskSpec print self.action = Action() self.action_types = [] self.action.intArray = [] self.action.doubleArray = [] self.action.numInts = 1 rnd.seed(0) TaskSpec = TaskSpecParser(taskSpec) self.input_ranges = TaskSpec.getDoubleObservations() print 'observations ranges', self.input_ranges #parse action self.action_ranges = TaskSpec.getIntActions()[0] print "action ranges", self.action_ranges self.actionlist = range(self.action_ranges[0], self.action_ranges[1] + 1) self.nactions = len(self.actionlist) print "action list", self.actionlist #build a nearest neighbor function approximator self.Q = NNQ(nactions=self.nactions, input_ranges=self.input_ranges, nelemns=[7, 7, 3, 3], alpha=0.5, lm=0.8) self.SelectAction = e_greedy_selection( epsilon=0.1) #set eplison for e-greedy exploration self.SelectAction.parent = self # discount factor self.gamma = 1.0 #TaskSpec.getDiscountFactor() self.proving += 1
def agent_init(self,taskSpecString): TaskSpec = TaskSpecParser(taskSpecString); if TaskSpec.valid: print "Task spec was valid"; else: print "Task Spec could not be parsed: "+taskSpecString; exit() #parse action self.action = Action() self.int_action_ranges = TaskSpec.getIntActions() self.double_action_ranges = TaskSpec.getDoubleActions() self.action.numInts = len(self.int_action_ranges) self.action.numDoubles = len(self.double_action_ranges) self.action.numChars = TaskSpec.getCharCountActions() #print "int",self.int_action_ranges,self.action.numInts #print "doubles",self.double_action_ranges,self.action.numDoubles #print "chars",self.action.numChars random.seed(0)