def launch_environment(parameters): """Start the sumo-rlglue environment (This function is executed in a separate process using multiprocessing.) """ import rl_glue_sumo_environment environment = rl_glue_sumo_environment.SumoEnvironment() EnvironmentLoader.loadEnvironment(environment)
def main(): EnvironmentLoader.loadEnvironment( InvasiveEnvironment( simulationParameterObj = None, actionParameterObj = None, Bad_Action_Penalty = -10000, fixedStartState = False, nbrReaches = REACHES, habitatSize = HABITATS, seed = 1))
description= 'Run a POMDP problem file as a domain in RL-Glue in network mode.') group = parser.add_mutually_exclusive_group(required=True) group.add_argument( "--file", type=str, default=config_files[0], help="Run POMDP domain given the path to a POMDP problem file.") group.add_argument("--list", action='store_true', default=False, help="List path to included POMDP problem files.") args = parser.parse_args() if args.list: print "Included POMDP problem files:" for file in config_files: print file else: EnvironmentLoader.loadEnvironment( POMDPEnvironment(spec_filename=args.file)) parser = argparse.ArgumentParser( description='Run a specified POMDP in RL-Glue in network mode.') parser.add_argument("--pomdp_file", type=str, help="Filename for POMDP spec file to load and use.", required=True) args = parser.parse_args() EnvironmentLoader.loadEnvironment(POMDPEnvironment(args.pomdp_file))
returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = episodeOver return returnRO def env_cleanup(self): pass def env_message(self,inMessage): return "I don't know how to respond to your message"; def addGridworldArgs(parser): parser.add_argument("--size_x", type=float, default=10, help="Size of the gridworld in the x (horizontal) dimension, where 1.0 is the unit of movement.") parser.add_argument("--size_y", type=float, default=10, help="Size of the gridworld in the y (vertical) dimension, where 1.0 is the unit of movement.") parser.add_argument("--goal_x", type=float, default=10, help="Goal x coordinate") parser.add_argument("--goal_y", type=float, default=10, help="Goal y coordinate") parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate") parser.add_argument("--fudge", type=float, default=1.4143, help="Distance from goal allowed before episode is counted as finished") parser.add_argument("--random_restarts", type=bool, default=False, help="Randomly assign x,y initial locations.") if __name__=="__main__": import argparse parser = argparse.ArgumentParser(description='Run 2D Noisy Continuous Gridworld environment in network mode.') addGridworldArgs(parser) args = parser.parse_args() EnvironmentLoader.loadEnvironment(Gridworld(size_x=args.size_x, size_y=args.size_y, goal_x=args.goal_x, goal_y=args.goal_y, noise=args.noise, random_start=args.random_restarts, fudge=args.fudge))
self.o.doubleArray=list(range(0,50000)) terminal=0 if self.stepCount==200: terminal=1 ro=Reward_observation_terminal() ro.r=1.0 ro.o=self.o ro.terminal=terminal return ro self.o.intArray=list(range(0,5)) #cheating, might break something self.o.doubleArray=list(range(0,5)) terminal=0 if self.stepCount==5000: terminal=1 ro=Reward_observation_terminal() ro.r=1.0 ro.o=self.o ro.terminal=terminal return ro def env_cleanup(self): pass def env_message(self,inMessage): return None; if __name__=="__main__": EnvironmentLoader.loadEnvironment(test_speed_environment())
def printState(self): numRows=len(self.map) numCols=len(self.map[0]) print "Agent is at: "+str(self.agentRow)+","+str(self.agentCol) print "Columns:0-10 10-17" print "Col ", for col in range(0,numCols): print col%10, for row in range(0,numRows): print print "Row: "+str(row)+" ", for col in range(0,numCols): if self.agentRow==row and self.agentCol==col: print "A", else: if self.map[row][col] == self.WORLD_GOAL: print "G", if self.map[row][col] == self.WORLD_MINE: print "M", if self.map[row][col] == self.WORLD_OBSTACLE: print "*", if self.map[row][col] == self.WORLD_FREE: print " ", print if __name__=="__main__": EnvironmentLoader.loadEnvironment(mines_environment())
result = re.match('set (.+) (.+)', msg) if msg.startswith('set'): param, value = msg.split(None, 2)[1:] self.debug('set', param, value) self.env_message_set_param(param, value) elif msg.startswith('get'): param = msg.split(None, 1)[1] return self.env_message_get_param(param) else: return self.env_message_handler(msg) def debug(self, *args): """ Print a debug msg """ if self.debug: args = [str(a) for a in args] print "%s: %s" % (self.name, ' '.join(args)) def step_out(self, *args): if self.output_steps: args = [str(a) for a in args] print ' '.join(args) if __name__ == '__main__': #p = PuddleEnvironment() #p.env_start() EnvironmentLoader.loadEnvironment(PuddleEnvironment())
return (-1 - i) if self.presentCol > 6999 and self.presentCol < 8000: if self.presentRow > 5999 and self.presentRow < 9000: return -1 if self.presentCol > 5999 and self.presentCol < 7000: if self.presentRow > 4999 and self.presentRow < 8000: return -2 #Reward if self.presentCol > 10999: if self.presentRow < 1000: return 10 return 0 #Checking if the current position is the goal state def goalcheck(self): # Only A goal state if self.presentCol > 10999: if self.presentRow < 1000: return True return False if __name__ == "__main__": EnvironmentLoader.loadEnvironment(puddle_world())
r = 0.0 if np.any(self.state_ranges[:,0] > self.state[:]) or \ np.any(self.state_ranges[:,1] < self.state[:]): # r = -1 r = -np.sum(3.0 * self.state_ranges[:, 1]**2) r *= 6000 - self.num_sim_steps terminate = True else: # perr = np.linalg.norm(self.prevState[:2] - self.state_goal[:2]) # nerr = np.linalg.norm(self.state[:2] - self.state_goal[:2]) # r = math.exp(-np.sum(abs(self.state[:2]-self.state_goal[:2])/(.1*(self.state_ranges[:2,1]-self.state_ranges[:2,0]))))* \ # math.exp(-np.sum(abs(self.state[3:5]-self.state_goal[3:5])/(.1*(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))))* \ # math.exp(-np.sum(abs(self.state[6:8]-self.state_goal[6:8])/(.1*(self.state_ranges[6:8,1]-self.state_ranges[6:8,0])))) # r = math.exp(-np.sum(abs(self.state[:2]-self.state_goal[:2])/(.1*(self.state_ranges[:2,1]-self.state_ranges[:2,0]))))* \ # math.exp(-np.sum(abs(self.state[3:5]-self.state_goal[3:5])/(.1*(self.state_ranges[3:5,1]-self.state_ranges[3:5,0])))) # r -= (np.sum(((self.state[:2]-self.state_goal[:2])/(self.state_ranges[:2,1]-self.state_ranges[:2,0]))**2)+ \ # np.sum(((self.state[3:5]-self.state_goal[3:5])/(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))**2)) r -= (self.state[0] - self.state_goal[0])**2 r -= (self.state[1] - self.state_goal[1])**2 r -= self.state[3]**2 r -= self.state[4]**2 terminate = False print("reward " + str(r)) return r, terminate if __name__ == "__main__": EnvironmentLoader.loadEnvironment(vrep_environment())
theReward=-1 episodeOver=1 if self.currentState >= 20: self.currentState=20 theReward=1 episodeOver=1 theObs=Observation() theObs.intArray=[self.currentState] returnRO=Reward_observation_terminal() returnRO.r=theReward returnRO.o=theObs returnRO.terminal=episodeOver return returnRO def env_cleanup(self): pass def env_message(self,inMessage): if inMessage=="what is your name?": return "my name is dqn_environment, Python edition!"; else: return "I don't know how to respond to your message"; if __name__=="__main__": EnvironmentLoader.loadEnvironment(dqn_environment())
default=0, help="Standard deviation of additive noise to generate") parser.add_argument( "--fudge", type=float, default=1.4143, help="Distance from goal allowed before episode is counted as finished" ) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description= 'Run 2D Noisy Continuous Gridworld environment in network mode.') addTaxiArgs(parser) args = parser.parse_args() fuelloc = None if args.fuel_loc[0] < 0 else args.fuel_loc walls = numpy.array(args.wall) if args.wall is not None else None landmarks = numpy.array( args.landmark) if args.landmark is not None else None EnvironmentLoader.loadEnvironment( Taxi(args.size_x, args.size_y, walls=walls, landmarks=landmarks, fuel_loc=fuelloc, fickleness=args.fickleness, noise=args.noise, fudge=args.fudge))
returnRO.r = theReward returnRO.o = theObs returnRO.terminal = episodeOver return returnRO def env_cleanup(self): pass def env_message(self,inMessage): return "I don't know how to respond to your message"; @register_environment class MountainCar(MountainCarND): name = "Mountain Car" def __init__(self, **kwargs): kwargs['dimension'] = 2 super(MountainCar, self).__init__(**kwargs) if __name__=="__main__": import argparse parser = argparse.ArgumentParser(description='Run Noisy Mountain Car environment in network mode.') parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate, affects the action effects.") parser.add_argument("--random_restarts", type=bool, default=False, help="Restart the cart with a random location and velocity.") args = parser.parse_args() EnvironmentLoader.loadEnvironment(MountainCar(noise=args.noise, random_start=args.random_restarts))
""" MENACE agent environment wrapper """ import sys # Ugly! sys.path.append('../') from agents.symmetry_agent import SymmetryAgent from wrapper_environment import WrapperEnvironment from rlglue.environment import EnvironmentLoader class SymmetryEnvironment(WrapperEnvironment, SymmetryAgent): name = 'symmetry_agent' player = 2 def env_play(self): action = self.do_step(self.state) self.state = action.intArray if __name__ == "__main__": EnvironmentLoader.loadEnvironment(SymmetryEnvironment())
self.map[i:i + self.n_cols])) + '\n' self.history.append(current_map) # 試合の様子を記録 if rot.r == self.game.r_lose: f = open('history.txt', 'a') history = '\n'.join(self.history) f.writelines('# START\n' + history + '# END\n\n') f.close() # 決着がついた場合は agentのagent_end # 決着がついていない場合は agentのagent_step に続く return rot def env_cleanup(self): pass def env_message(self, message): pass if __name__ == '__main__': parser = argparse.ArgumentParser(description='Deep Q-Learning') parser.add_argument('--size', '-s', default=6, type=int, help='Reversi board size') args = parser.parse_args() EnvironmentLoader.loadEnvironment(KmoriReversiEnvironment(args.size))
episodeOver=0 theReward=0 theObs=Observation() theObs.intArray=np.zeros(50816) returnRO=Reward_observation_terminal() returnRO.r=theReward returnRO.o=theObs returnRO.terminal=episodeOver return returnRO def env_cleanup(self): pass def env_message(self,inMessage): if inMessage=="what is your name?": return "my name is dqn_environment, Python edition!"; else: return "I don't know how to respond to your message"; if __name__ == '__main__': App = TetrisApp() #App.run() EnvironmentLoader.loadEnvironment(TetrisApp()) print('main start')
type=float, default=5., help="Payment received per unit product sold.") parser.add_argument("--cost", type=float, default=2., help="Cost per unit product purchased.") parser.add_argument("--discount_factor", type=float, default=0.999, help="Discount factor to learn over.") parser.add_argument( "--time_period", type=int, default=20, help="Time period for problem. (Number of steps to run)") parser.add_argument( "--noise", type=float, default=0, help="Standard deviation of additive noise to generate") args = parser.parse_args() EnvironmentLoader.loadEnvironment( BatchReplenishment(demand_mean=args.demand_mean, demand_std=args.demand_std, payoff=args.payoff, cost=args.cost, gamma=args.discount_factor, time_period=args.time_period, noise=args.noise))
def checkCurrentTerminal(self): return self.checkTerminal(self.agentRow, self.agentCol) def updatePosition(self, theAction): # When the move would result in hitting an obstacles, the agent simply doesn't move newRow = self.agentRow newCol = self.agentCol newRow += self.FIXED_DISTANCE * math.cos(theAction) newCol += self.FIXED_DISTANCE * math.sin(theAction) # Check if new position is out of bounds or inside an obstacle if self.checkValid(newRow, newCol): self.agentRow = newRow self.agentCol = newCol return False else: return True def calculateReward(self, hitBoundary): if hitBoundary: return -0.5 if distance.euclidean([self.agentRow, self.agentCol], self.END_STATE) < 0.5 * self.FIXED_DISTANCE: return 10.0 return 0.0 if __name__ == "__main__": EnvironmentLoader.loadEnvironment(threeroom_environment())
if self.reward_noise > 0: theReward += numpy.random.normal(scale=self.reward_noise) theObs = Observation() theObs.doubleArray = self.state.tolist() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = episodeOver return returnRO def env_cleanup(self): pass def env_message(self,inMessage): return "I don't know how to respond to your message"; if __name__=="__main__": import argparse parser = argparse.ArgumentParser(description='Run Noisy Acrobot environment in network mode.') parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate, affects the action effects.") parser.add_argument("--random_restarts", type=bool, default=False, help="Restart the state with random values.") args = parser.parse_args() EnvironmentLoader.loadEnvironment(Acrobot(noise=args.noise, random_start=args.random_restarts))
return returnRO def env_cleanup(self): pass def env_message(self,inMessage): return "I don't know how to respond to your message"; def addTaxiArgs(parser): parser.add_argument("--size_x", type=float, default=5, help="Size of the gridworld in the x (horizontal) dimension, where 1.0 is the unit of movement.") parser.add_argument("--size_y", type=float, default=5, help="Size of the gridworld in the y (vertical) dimension, where 1.0 is the unit of movement.") parser.add_argument("--landmark", action="append", nargs=2, help="Add a landmark, give x y coordinates", type=float) parser.add_argument("--wall", type=float, action="append", nargs=2, help="Add a wall, give x coordinate and size in y with sign indicating starting at the bottom (+) or top (-)") parser.add_argument("--fuel_loc", type=float, default=[2.0, 1.0], nargs=2, help="x y coordinate of the fuel station") parser.add_argument("--fickleness", type=float, default=0, help="Probability of the passenger changing their destination mid-route.") parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate") parser.add_argument("--fudge", type=float, default=1.4143, help="Distance from goal allowed before episode is counted as finished") if __name__=="__main__": import argparse parser = argparse.ArgumentParser(description='Run 2D Noisy Continuous Gridworld environment in network mode.') addTaxiArgs(parser) args = parser.parse_args() fuelloc = None if args.fuel_loc[0] < 0 else args.fuel_loc walls = numpy.array(args.wall) if args.wall is not None else None landmarks = numpy.array(args.landmark) if args.landmark is not None else None EnvironmentLoader.loadEnvironment(Taxi(args.size_x, args.size_y, walls=walls, landmarks=landmarks, fuel_loc=fuelloc, fickleness=args.fickleness, noise=args.noise, fudge=args.fudge))
action='store', type='string', default='./rlglue_param.json', help="json file of simulation parameters") parser.add_option('-d', '--dst', dest='savepath', action='store', type='string', default='./result/data', help="Save data path.") (options, args) = parser.parse_args() #print 'options', options if not options.test: EnvironmentLoader.loadEnvironment(ElecpyEnvironment(options)) else: objEnv = ElecpyEnvironment(options) objEnv.env_init() for epi in range(3): print 'Episode {0}'.format(epi) objEnv.env_start() cnt_step = 0 while True: cnt_step += 1 action = Action(numInts=1) action.intArray = [0] rot = objEnv.env_step(action) if rot.terminal: break
base = self.var[0] if self.pos[1] <= 1.0 else self.var[1] a = self.var[2] return base - (int(self.pos[0]) % 5) * a elif intAction < 4: return -1.0 elif intAction >= 4: return -1.4 else: print "ERROR in FuelWorld.takeAction" if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description= 'Run 2D MultiRoom Noisy Continuous Gridworld environment in network mode.' ) gridworld.addGridworldArgs(parser) parser.add_argument( "--fuel_noise", type=float, default=0.0, help= "If non-zero then gives the standard deviation of the additive Gaussian noise to add to the fuel expenditure." ) args = parser.parse_args() EnvironmentLoader.loadEnvironment( FuelWorld(noise=args.noise, fudge=args.fudge, fuel_noise=args.fuel_noise))
theReward=-1 episodeOver=1 if self.currentState >= 20: self.currentState=20 theReward=1 episodeOver=1 theObs=Observation() theObs.intArray=[self.currentState] returnRO=Reward_observation_terminal() returnRO.r=theReward returnRO.o=theObs returnRO.terminal=episodeOver return returnRO def env_cleanup(self): pass def env_message(self,inMessage): if inMessage=="what is your name?": return "my name is skeleton_environment, Python edition!"; else: return "I don't know how to respond to your message"; if __name__=="__main__": EnvironmentLoader.loadEnvironment(skeleton_environment())
returnRO.terminal = episodeOver return returnRO def env_cleanup(self): pass def env_message(self, inMessage): return "I don't know how to respond to your message" if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description='Run Noisy Acrobot environment in network mode.') parser.add_argument( "--noise", type=float, default=0, help= "Standard deviation of additive noise to generate, affects the action effects." ) parser.add_argument("--random_restarts", type=bool, default=False, help="Restart the state with random values.") args = parser.parse_args() EnvironmentLoader.loadEnvironment( Acrobot(noise=args.noise, random_start=args.random_restarts))
pygame.display.flip() pygame.quit() if __name__ == "__main__": parser = argparse.ArgumentParser(description='Pinball domain') parser.add_argument('configuration', help='the configuration file') parser.add_argument('--width', action='store', type=int, default=500, help='screen width (default: 500)') parser.add_argument('--height', action='store', type=int, default=500, help='screen height (default: 500)') parser.add_argument('-r', '--rlglue', action='store_true', help='expose the environment through RL-Glue') args = parser.parse_args() if args.rlglue: print 'Starting rl-glue' EnvironmentLoader.loadEnvironment(PinballRLGlue(args.configuration)) else: run_pinballview(args.width, args.height, args.configuration)
terminal = 0 if self.stepCount == 200: terminal = 1 ro = Reward_observation_terminal() ro.r = 1.0 ro.o = self.o ro.terminal = terminal return ro self.o.intArray = range(0, 5) #cheating, might break something self.o.doubleArray = range(0, 5) terminal = 0 if self.stepCount == 5000: terminal = 1 ro = Reward_observation_terminal() ro.r = 1.0 ro.o = self.o ro.terminal = terminal return ro def env_cleanup(self): pass def env_message(self, inMessage): return None if __name__ == "__main__": EnvironmentLoader.loadEnvironment(test_speed_environment())
returnRO.terminal = int(self.counter >= self.T) return returnRO def env_cleanup(self): pass def env_message(self,inMessage): return "I don't know how to respond to your message"; if __name__=="__main__": import argparse parser = argparse.ArgumentParser(description='Run 2D Noisy Continuous Gridworld environment in network mode.') parser.add_argument("--demand_mean", type=float, default=10., help="Mean demand for the product.") parser.add_argument("--demand_std", type=float, default=1., help="Standard deviation of demand for the product.") parser.add_argument("--payoff", type=float, default=5., help="Payment received per unit product sold.") parser.add_argument("--cost", type=float, default=2., help="Cost per unit product purchased.") parser.add_argument("--discount_factor", type=float, default=0.999, help="Discount factor to learn over.") parser.add_argument("--time_period", type=int, default=20, help="Time period for problem. (Number of steps to run)") parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate") args = parser.parse_args() EnvironmentLoader.loadEnvironment(BatchReplenishment(demand_mean=args.demand_mean, demand_std=args.demand_std, payoff=args.payoff, cost=args.cost, gamma=args.discount_factor, time_period = args.time_period, noise=args.noise))
rot.o = observation current_map = 'map\n' for i in range(0, len(self.map), self.n_cols): current_map += ' '.join(map(str, self.map[i:i + self.n_cols])) + '\n' if (i % 16 == 0): current_map += "\n" self.history.append(current_map) if rot.r == -1: f = open('history.txt', 'a') history = '\n'.join(self.history) f.writelines('# START\n' + history + '# END\n\n') f.close() # 決着がついた場合は agentのagent_end # 決着がついていない場合は agentのagent_step に続く return rot def env_cleanup(self): pass def env_message(self, message): pass if __name__ == '__main__': EnvironmentLoader.loadEnvironment(MarubatsuEnvironment())
""" Random player environment """ import random from rlglue.environment import EnvironmentLoader from wrapper_environment import WrapperEnvironment class RandomEnvironment(WrapperEnvironment): name = 'random' def env_play(self): """ Pick the first free spot, and play there. """ open_spots = [] for i in range(len(self.state)): if self.state[i] == 0: open_spots.append(i) self.state[random.choice(open_spots)] = self.color if __name__ == "__main__": EnvironmentLoader.loadEnvironment(RandomEnvironment())
1, 1, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0 ] numRows = len(self.worldmap) numCols = len(self.worldmap[0]) for row in range(0, numRows): print print "Row: " + str(row) + " ", for col in range(0, numCols): if self.checkValid(row, col): flat = self.calculateFlatState(row, col) flat_i = self.validstates.index(flat) print unicode(argmaxes[flat_i]), else: print "X", """ if self.map[row][col] == self.GOAL: print "G", if self.map[row][col] == self.WALL: print "X", if self.map[row][col] == self.START: print "S", if self.map[row][col] == self.FREE: print " ", """ print if __name__ == "__main__": EnvironmentLoader.loadEnvironment(threeroom_environment())
if inMessage.startswith("set-start-state"): splitString = inMessage.split(" ") self.state = array(eval(splitString[1])) self.fixedStartState = True return "Message understood. Using fixed start state." return "InvasiveEnvironment(Python) does not respond to messages." def setAgentState(self, S): assert len(S)==self.simulationParameterObj.habitatSize*self.simulationParameterObj.nbrReaches self.state = S valid = True return valid def setRandomState(self): S = array([random.randint(1, 3) for i in xrange(self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize)]) self.setAgentState(S) def checkValid(self, S): valid = True return valid def printState(self): print "Agent is at: " + str(self.state) if __name__ == "__main__": EnvironmentLoader.loadEnvironment( InvasiveEnvironment(simulationParameterObj=None, actionParameterObj=None, Bad_Action_Penalty=-10000,fixedStartState=False, nbrReaches=7, habitatSize=4, seed=1))
def printState(self): numRows = len(self.map) numCols = len(self.map[0]) print "Agent is at: " + str(self.agentRow) + "," + str(self.agentCol) print "Columns:0-10 10-17" print "Col ", for col in range(0, numCols): print col % 10, for row in range(0, numRows): print print "Row: " + str(row) + " ", for col in range(0, numCols): if self.agentRow == row and self.agentCol == col: print "A", else: if self.map[row][col] == self.WORLD_GOAL: print "G", if self.map[row][col] == self.WORLD_MINE: print "M", if self.map[row][col] == self.WORLD_OBSTACLE: print "*", if self.map[row][col] == self.WORLD_FREE: print " ", print if __name__ == "__main__": EnvironmentLoader.loadEnvironment(mines_environment())
# (string) -> string def env_message(self, inMessage): if inMessage.startswith("print"): self.toprint = 1 return "message understood, print" if inMessage.startswith("stop print"): self.toprint = 0 return "message understood, stop print" return "RmaxAgent(Python) does not understand your message." def clearscreen(self, numlines=100): """Clear the console. numlines is an optional argument used only as a fall-back. """ # Thanks to Steven D'Aprano, http://www.velocityreviews.com/forums if os.name == "posix": # Unix/Linux/MacOS/BSD/etc os.system('clear') elif os.name in ("nt", "dos", "ce"): # DOS/Windows os.system('CLS') else: # Fallback for other operating systems. print('\n' * numlines) if __name__ == "__main__": EnvironmentLoader.loadEnvironment(My_Environment())
if msg.startswith('set'): param, value = msg.split(None, 2)[1:] self.debug('set', param, value) self.env_message_set_param(param, value) elif msg.startswith('get'): param = msg.split(None, 1)[1] return self.env_message_get_param(param) else: return self.env_message_handler(msg) def debug(self, *args): """ Print a debug msg """ if self.debug: args = [str(a) for a in args] print "%s: %s" % (self.name, ' '.join(args)) def step_out(self, *args): if self.output_steps: args = [str(a) for a in args] print ' '.join(args) if __name__ == '__main__': #p = PuddleEnvironment() #p.env_start() EnvironmentLoader.loadEnvironment(PuddleEnvironment())
self.nonEmptyObservation.charArray=['a','b','c','d','e'] return "" def env_start(self): self.whichEpisode=self.whichEpisode+1 if self.whichEpisode % 2 == 0: return self.emptyObservation else: return self.nonEmptyObservation def env_step(self,action): ro=Reward_observation_terminal() if self.whichEpisode % 2 == 0: ro.o=self.emptyObservation else: ro.o=self.nonEmptyObservation return ro def env_cleanup(self): pass def env_message(self,inMessage): return None if __name__=="__main__": EnvironmentLoader.loadEnvironment(test_empty_environment())
return returnRO def env_cleanup(self): pass def env_message(self,inMessage): return "I don't know how to respond to your message"; if __name__=="__main__": import argparse path_to_problems = os.path.join(os.path.dirname(__file__), 'configs', 'pomdps', '*') config_files = glob.glob(path_to_problems) parser = argparse.ArgumentParser(description='Run a POMDP problem file as a domain in RL-Glue in network mode.') group = parser.add_mutually_exclusive_group(required=True) group.add_argument("--file", type=str, default=config_files[0], help="Run POMDP domain given the path to a POMDP problem file.") group.add_argument("--list", action='store_true', default=False, help="List path to included POMDP problem files.") args = parser.parse_args() if args.list: print "Included POMDP problem files:" for file in config_files: print file else: EnvironmentLoader.loadEnvironment(POMDPEnvironment(spec_filename=args.file)) parser = argparse.ArgumentParser(description='Run a specified POMDP in RL-Glue in network mode.') parser.add_argument("--pomdp_file", type=str, help="Filename for POMDP spec file to load and use.", required=True) args = parser.parse_args() EnvironmentLoader.loadEnvironment(POMDPEnvironment(args.pomdp_file))
#print("newState: "+str(self.state)) r = 0.0 if np.any(self.state_ranges[:,0] > self.state[:]) or \ np.any(self.state_ranges[:,1] < self.state[:]): # r = -1 r = -np.sum(3.0 * self.state_ranges[:,1]**2) r *= 6000-self.num_sim_steps terminate = True else: # perr = np.linalg.norm(self.prevState[:2] - self.state_goal[:2]) # nerr = np.linalg.norm(self.state[:2] - self.state_goal[:2]) # r = math.exp(-np.sum(abs(self.state[:2]-self.state_goal[:2])/(.1*(self.state_ranges[:2,1]-self.state_ranges[:2,0]))))* \ # math.exp(-np.sum(abs(self.state[3:5]-self.state_goal[3:5])/(.1*(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))))* \ # math.exp(-np.sum(abs(self.state[6:8]-self.state_goal[6:8])/(.1*(self.state_ranges[6:8,1]-self.state_ranges[6:8,0])))) # r = math.exp(-np.sum(abs(self.state[:2]-self.state_goal[:2])/(.1*(self.state_ranges[:2,1]-self.state_ranges[:2,0]))))* \ # math.exp(-np.sum(abs(self.state[3:5]-self.state_goal[3:5])/(.1*(self.state_ranges[3:5,1]-self.state_ranges[3:5,0])))) # r -= (np.sum(((self.state[:2]-self.state_goal[:2])/(self.state_ranges[:2,1]-self.state_ranges[:2,0]))**2)+ \ # np.sum(((self.state[3:5]-self.state_goal[3:5])/(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))**2)) r -= (self.state[0]-self.state_goal[0])**2 r -= (self.state[1]-self.state_goal[1])**2 r -= self.state[3]**2 r -= self.state[4]**2 terminate = False print("reward "+str(r)) return r,terminate if __name__=="__main__": EnvironmentLoader.loadEnvironment(vrep_environment())
self.fixedStartState = True return "Message understood. Using fixed start state." return "InvasiveEnvironment(Python) does not respond to messages." def setAgentState(self, S): assert len(S)==self.simulationParameterObj.habitatSize*self.simulationParameterObj.nbrReaches self.state = S valid = True return valid def setRandomState(self): S = array([random.randint(1, 3) for i in xrange(self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize)]) self.setAgentState(S) def checkValid(self, S): valid = True return valid def printState(self): print "Agent is at: " + str(self.state) # ============================ PARAMETERS ===================================== if __name__ == "__main__": EnvironmentLoader.loadEnvironment( InvasiveEnvironment(simulationParameterObj=None, actionParameterObj=None, Bad_Action_Penalty=-10000,fixedStartState=False, nbrReaches=7, habitatSize=4, seed=1)) # ============================ PARAMETERS =====================================
#return max(outcomes) max_element = -1 for o in outcomes: if o == +1: return o max_element = max(o,max_element) return max_element finally: board.undoMove(move) moves = [(move, evaluateMove(move)) for move in board.getValidMoves()] random.shuffle(moves) moves.sort(key = lambda (move, winner): winner) board.makeMove(moves[-1][0], player) class MiniMaxEnvironment(WrapperEnvironment): name = 'minimax' def env_play(self): b = Board(self.state) computerPlayer(b, Player_X) b.output() self.state = b.pieces if __name__ == "__main__": #game() EnvironmentLoader.loadEnvironment(MiniMaxEnvironment())
self.nonEmptyObservation.charArray = ['a', 'b', 'c', 'd', 'e'] return "" def env_start(self): self.whichEpisode = self.whichEpisode + 1 if self.whichEpisode % 2 == 0: return self.emptyObservation else: return self.nonEmptyObservation def env_step(self, action): ro = Reward_observation_terminal() if self.whichEpisode % 2 == 0: ro.o = self.emptyObservation else: ro.o = self.nonEmptyObservation return ro def env_cleanup(self): pass def env_message(self, inMessage): return None if __name__ == "__main__": EnvironmentLoader.loadEnvironment(test_empty_environment())
""" Environment which always plays the first free spot available on the board. """ from rlglue.environment import EnvironmentLoader from wrapper_environment import WrapperEnvironment class FirstFreeEnvironment(WrapperEnvironment): name = 'first_free' def env_play(self): """ Pick the first free spot, and play there. """ for i in range(len(self.state)): if self.state[i] == 0: self.state[i] = self.color return if __name__ == "__main__": EnvironmentLoader.loadEnvironment(FirstFreeEnvironment())
if event.type == pygame.QUIT: done = True if event.type == pygame.KEYUP or event.type == pygame.KEYDOWN: user_action = actions.get(event.key, PinballModel.ACC_NONE) if environment.take_action(user_action) == environment.END_EPISODE: done = True environment_view.blit() pygame.display.flip() pygame.quit() if __name__ == "__main__": parser = argparse.ArgumentParser(description='Pinball domain') parser.add_argument('configuration', help='the configuration file') parser.add_argument('--width', action='store', type=int, default=500, help='screen width (default: 500)') parser.add_argument('--height', action='store', type=int, default=500, help='screen height (default: 500)') parser.add_argument('-r', '--rlglue', action='store_true', help='expose the environment through RL-Glue') args = parser.parse_args() if args.rlglue: print 'Starting rl-glue' EnvironmentLoader.loadEnvironment(PinballRLGlue(args.configuration)) else: run_pinballview(args.width, args.height, args.configuration)
def main(): EnvironmentLoader.loadEnvironment(SumoEnvironment(sys.argv[1]))
returnObs.intArray=np.append(np.zeros(128), [ item for innerlist in arr for item in innerlist ]) scipy.misc.imsave('screen.png', arr) returnRO=Reward_observation_terminal() returnRO.r=theReward returnRO.o=returnObs returnRO.terminal=episodeOver # イベント処理 for event in pygame.event.get(): if event.type == QUIT: # 終了イベント sys.exit() return returnRO def env_cleanup(self): pass def env_message(self,inMessage): if inMessage=="what is your name?": return "my name is dqn_environment, Python edition!"; else: return "I don't know how to respond to your message"; if __name__ == "__main__": App = EdgeTracer() #App.run() EnvironmentLoader.loadEnvironment(EdgeTracer()) print('main start')
if self.inFuelCell(self.pos): self.fuel += 20.0 if self.fuel > 60.0: self.fuel = 60.0 if gridworld.Gridworld.isAtGoal(self): return 0.0 elif self.fuel < 0: return -400.0 elif self.inFuelCell(self.pos): # Fuel costs base = self.var[0] if self.pos[1] <= 1.0 else self.var[1] a = self.var[2] return base - (int(self.pos[0]) % 5)*a elif intAction < 4: return -1.0 elif intAction >= 4: return -1.4 else: print "ERROR in FuelWorld.takeAction" if __name__=="__main__": import argparse parser = argparse.ArgumentParser(description='Run 2D MultiRoom Noisy Continuous Gridworld environment in network mode.') gridworld.addGridworldArgs(parser) parser.add_argument("--fuel_noise", type=float, default=0.0, help="If non-zero then gives the standard deviation of the additive Gaussian noise to add to the fuel expenditure.") args = parser.parse_args() EnvironmentLoader.loadEnvironment(FuelWorld(noise=args.noise, fudge=args.fudge, fuel_noise=args.fuel_noise))
"the puddle's depth.") parser.add_argument("--puddle_penalty", type=float, default=-100, help="The reward penalty scale for walking through puddles.") args = parser.parse_args() kwargs = {} if args.puddle is not None: means = [] covs = [] for puddle in args.puddle: means.append(tuple(puddle[:2])) covs.append(tuple(puddle[2:])) kwargs['puddle_means'] = means kwargs['puddle_var'] = covs if args.size_x: kwargs['size_x'] = args.size_x if args.size_y: kwargs['size_y'] = args.size_y if args.goal_x: kwargs['goal_x'] = args.goal_x if args.goal_y: kwargs['goal_y'] = args.goal_y if args.noise: kwargs['noise'] = args.noise if args.fudge: kwargs['fudge'] = args.fudge if args.random_restarts: kwargs['random_start'] = args.random_restarts EnvironmentLoader.loadEnvironment(PuddleWorld(**kwargs))
'-t','--test', dest='test', action='store_true',default=False, help="test mode") parser.add_option( '-p','--param_file', dest='param_file', action='store', type='string', default='./rlglue_param.json', help="json file of simulation parameters") parser.add_option( '-d','--dst', dest='savepath', action='store', type='string', default='./result/data', help="Save data path.") (options, args) = parser.parse_args() #print 'options', options if not options.test: EnvironmentLoader.loadEnvironment(ElecpyEnvironment(options)) else: objEnv = ElecpyEnvironment(options) objEnv.env_init() for epi in range(3): print 'Episode {0}'.format(epi) objEnv.env_start() cnt_step = 0 while True: cnt_step += 1 action = Action(numInts=1) action.intArray = [0] rot = objEnv.env_step(action) if rot.terminal: break
def env_init(self): return "" def env_start(self): return Observation() def env_step(self,action): return Reward_observation_terminal() def env_cleanup(self): pass def env_message(self,inMessage): if inMessage==None: return "null" if inMessage=="": return "empty" if inMessage=="null": return None if inMessage=="empty": return "" return inMessage; if __name__=="__main__": EnvironmentLoader.loadEnvironment(test_message_environment())
theReward = -1 episodeOver = 1 if self.currentState >= 20: self.currentState = 20 theReward = 1 episodeOver = 1 theObs = Observation() theObs.intArray = [self.currentState] returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = episodeOver return returnRO def env_cleanup(self): pass def env_message(self, inMessage): if inMessage == "what is your name?": return "my name is skeleton_environment, Python edition!" else: return "I don't know how to respond to your message" if __name__ == "__main__": EnvironmentLoader.loadEnvironment(skeleton_environment())
parser = argparse.ArgumentParser( description= 'Run Noisy Cart Pole Balancing or Swing Up environment in network mode.' ) parser.add_argument( "--noise", type=float, default=0, help= "Standard deviation of additive noise to generate, affects the action effects." ) parser.add_argument( "--random_restarts", type=bool, default=False, help="Restart the cart with a random location and velocity.") parser.add_argument( "--mode", choices=["easy", "hard", "swingup"], default="easy", type=str, help= "Choose the type of cart pole domain. Easy/hard balancing, or swing up." ) args = parser.parse_args() EnvironmentLoader.loadEnvironment( CartPole(mode=args.mode, noise=args.noise, random_start=args.random_restarts))
type=float, default=-100, help="The reward penalty scale for walking through puddles.") args = parser.parse_args() kwargs = {} if args.puddle is not None: means = [] covs = [] for puddle in args.puddle: means.append(tuple(puddle[:2])) covs.append(tuple(puddle[2:])) kwargs['puddle_means'] = means kwargs['puddle_var'] = covs if args.size_x: kwargs['size_x'] = args.size_x if args.size_y: kwargs['size_y'] = args.size_y if args.goal_x: kwargs['goal_x'] = args.goal_x if args.goal_y: kwargs['goal_y'] = args.goal_y if args.noise: kwargs['noise'] = args.noise if args.fudge: kwargs['fudge'] = args.fudge if args.random_restarts: kwargs['random_start'] = args.random_restarts EnvironmentLoader.loadEnvironment(PuddleWorld(**kwargs))
ro.r=1.0 else: self.o.doubleArray=[0.0078125,-0.0078125,0.0,0.0078125e150,-0.0078125e150] self.o.charArray=['g','F','?',' ','&'] self.o.intArray=[173,-173,2147483647,0,-2147483648] ro.r=-2.0 ro.o=self.o ro.terminal=terminal return ro def env_cleanup(self): pass def env_message(self,inMessage): timesToPrint=self.stepCount%3 outMessage=inMessage+"|" for i in range(0, timesToPrint): outMessage=outMessage+"%d" % (self.stepCount) outMessage=outMessage+"." outMessage=outMessage+"|"+inMessage return outMessage if __name__=="__main__": EnvironmentLoader.loadEnvironment(test_1_environment())
if __name__ == "__main__": pygame.init() pygame.freetype.init() gameArea = pygame.Rect([0, 0], size) if len(sys.argv) > 1: useGlue = (sys.argv[1] == 'True') else: useGlue = False black = 0, 0, 0 screen = pygame.display.set_mode([size[0], int(size[1] * (4 / 3.0)) ]) #,pygame.FULLSCREEN) count = 0 if len(sys.argv) > 2: p1Type = int(sys.argv[2]) else: p1Type = 0 if len(sys.argv) > 3: p2Type = int(sys.argv[3]) else: p2Type = 1 if useGlue: EnvironmentLoader.loadEnvironment(World(p1Type, 2)) else: world = World(p1Type, p2Type) world.start() while True or count < 300: world.step() count += 1
def env_init(self): return "" def env_start(self): return Observation() def env_step(self, action): return Reward_observation_terminal() def env_cleanup(self): pass def env_message(self, inMessage): if inMessage == None: return "null" if inMessage == "": return "empty" if inMessage == "null": return None if inMessage == "empty": return "" return inMessage if __name__ == "__main__": EnvironmentLoader.loadEnvironment(test_message_environment())
def __init__(self, size_x=10, size_y=10, goal_x=10, goal_y=10, wind_center=7., wind_stdev=1.0, wind_power=2.0, noise=0.0, random_start=False, fudge=1.4143): gridworld.Gridworld.__init__(self, size_x=size_x, size_y=size_y, goal_x=goal_x, goal_y=goal_y, noise=noise, random_start=random_start, fudge=fudge) self.wind_center = wind_center self.wind_stdev = wind_stdev self.wind_power = wind_power self.domain_name = "Continuous Windy Gridworld by Will Dabney" def reset(self): if self.random_start: self.pos = numpy.random.random((2,)) * self.size else: self.pos = numpy.array([0.0, self.size[1]*0.5]) def takeAction(self, action): self.pos[1] += norm.pdf(self.pos[0], self.wind_center, self.wind_stdev) * self.wind_power return gridworld.Gridworld.takeAction(self, action) if __name__=="__main__": import argparse parser = argparse.ArgumentParser(description='Run 2D MultiRoom Noisy Continuous Gridworld environment in network mode.') gridworld.addGridworldArgs(parser) parser.add_argument("--wind_center", type=float, default=7, help="Center, or strongest point, in the x-direction of the wind") parser.add_argument("--wind_scale", type=float, default=1.0, help="Scale, or width, of the wind effects around the center.") parser.add_argument("--wind_power", type=float, default=2.0, help="The power, or strength, of the wind.") args = parser.parse_args() EnvironmentLoader.loadEnvironment( WindyGridworld(args.size_x, args.size_y, args.goal_x, args.goal_y, wind_center=args.wind_center, wind_stdev=args.wind_scale, wind_power=args.wind_power, noise=args.noise, random_start=args.random_restarts, fudge=args.fudge))