コード例 #1
0
ファイル: launcher.py プロジェクト: TobiasMR/DeepLight
def launch_environment(parameters):
    """Start the sumo-rlglue environment

    (This function is executed in a separate process using
    multiprocessing.)
    """
    import rl_glue_sumo_environment
    environment = rl_glue_sumo_environment.SumoEnvironment()
    EnvironmentLoader.loadEnvironment(environment)
def main():
    EnvironmentLoader.loadEnvironment(
        InvasiveEnvironment(
            simulationParameterObj = None,
            actionParameterObj = None,
            Bad_Action_Penalty = -10000,
            fixedStartState = False,
            nbrReaches = REACHES,
            habitatSize = HABITATS,
            seed = 1))
コード例 #3
0
ファイル: pomdp.py プロジェクト: yang0110/python-rl
        description=
        'Run a POMDP problem file as a domain in RL-Glue in network mode.')
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument(
        "--file",
        type=str,
        default=config_files[0],
        help="Run POMDP domain given the path to a POMDP problem file.")
    group.add_argument("--list",
                       action='store_true',
                       default=False,
                       help="List path to included POMDP problem files.")
    args = parser.parse_args()

    if args.list:
        print "Included POMDP problem files:"
        for file in config_files:
            print file
    else:
        EnvironmentLoader.loadEnvironment(
            POMDPEnvironment(spec_filename=args.file))

    parser = argparse.ArgumentParser(
        description='Run a specified POMDP in RL-Glue in network mode.')
    parser.add_argument("--pomdp_file",
                        type=str,
                        help="Filename for POMDP spec file to load and use.",
                        required=True)
    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(POMDPEnvironment(args.pomdp_file))
コード例 #4
0
ファイル: gridworld.py プロジェクト: yang0110/python-rl
        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = episodeOver

        return returnRO

    def env_cleanup(self):
        pass

    def env_message(self,inMessage):
        return "I don't know how to respond to your message";


def addGridworldArgs(parser):
    parser.add_argument("--size_x", type=float, default=10, help="Size of the gridworld in the x (horizontal) dimension, where 1.0 is the unit of movement.")
    parser.add_argument("--size_y", type=float, default=10, help="Size of the gridworld in the y (vertical) dimension, where 1.0 is the unit of movement.")
    parser.add_argument("--goal_x", type=float, default=10, help="Goal x coordinate")
    parser.add_argument("--goal_y", type=float, default=10, help="Goal y coordinate")
    parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate")
    parser.add_argument("--fudge", type=float, default=1.4143, help="Distance from goal allowed before episode is counted as finished")
    parser.add_argument("--random_restarts", type=bool, default=False, help="Randomly assign x,y initial locations.")

if __name__=="__main__":
    import argparse
    parser = argparse.ArgumentParser(description='Run 2D Noisy Continuous Gridworld environment in network mode.')
    addGridworldArgs(parser)
    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(Gridworld(size_x=args.size_x, size_y=args.size_y, goal_x=args.goal_x, goal_y=args.goal_y, noise=args.noise, random_start=args.random_restarts, fudge=args.fudge))
コード例 #5
0
                        self.o.doubleArray=list(range(0,50000))
                        terminal=0
                        if self.stepCount==200:
                                terminal=1
                        ro=Reward_observation_terminal()
                        ro.r=1.0
                        ro.o=self.o
                        ro.terminal=terminal
                        return ro

                self.o.intArray=list(range(0,5))
                #cheating, might break something
                self.o.doubleArray=list(range(0,5))
                terminal=0
                if self.stepCount==5000:
                        terminal=1
                ro=Reward_observation_terminal()
                ro.r=1.0
                ro.o=self.o
                ro.terminal=terminal
                return ro
                
        def env_cleanup(self):
                pass

        def env_message(self,inMessage):
                return None;

if __name__=="__main__":
        EnvironmentLoader.loadEnvironment(test_speed_environment())
コード例 #6
0
		
	def printState(self):
		numRows=len(self.map)
		numCols=len(self.map[0])
		print "Agent is at: "+str(self.agentRow)+","+str(self.agentCol)
		print "Columns:0-10                10-17"
		print "Col    ",
		for col in range(0,numCols):
			print col%10,
			
		for row in range(0,numRows):
			print
			print "Row: "+str(row)+" ",
			for col in range(0,numCols):
				if self.agentRow==row and self.agentCol==col:
					print "A",
				else:
					if self.map[row][col] == self.WORLD_GOAL:
						print "G",
					if self.map[row][col] == self.WORLD_MINE:
						print "M",
					if self.map[row][col] == self.WORLD_OBSTACLE:
						print "*",
					if self.map[row][col] == self.WORLD_FREE:
						print " ",
		print
		

if __name__=="__main__":
	EnvironmentLoader.loadEnvironment(mines_environment())
コード例 #7
0
ファイル: environment.py プロジェクト: okkhoy/gabe-and-joh
        result = re.match('set (.+) (.+)', msg)
        if msg.startswith('set'):
            param, value = msg.split(None, 2)[1:]
            self.debug('set', param, value)
            
            self.env_message_set_param(param, value)
        
        elif msg.startswith('get'):
            param = msg.split(None, 1)[1]
            
            return self.env_message_get_param(param)
        
        else:
            return self.env_message_handler(msg)
    
    def debug(self, *args):
        """ Print a debug msg """
        if self.debug:
            args = [str(a) for a in args]
            print "%s: %s" % (self.name, ' '.join(args))
    
    def step_out(self, *args):
        if self.output_steps:
            args = [str(a) for a in args]
            print ' '.join(args)
    
if __name__ == '__main__':
    #p = PuddleEnvironment()
    #p.env_start()
    EnvironmentLoader.loadEnvironment(PuddleEnvironment())
コード例 #8
0
ファイル: environment.py プロジェクト: nivedn3/RL_Assignments
                    return (-1 - i)

        if self.presentCol > 6999 and self.presentCol < 8000:
            if self.presentRow > 5999 and self.presentRow < 9000:
                return -1

        if self.presentCol > 5999 and self.presentCol < 7000:
            if self.presentRow > 4999 and self.presentRow < 8000:
                return -2

        #Reward
        if self.presentCol > 10999:
            if self.presentRow < 1000:
                return 10

        return 0

    #Checking if the current position is the goal state
    def goalcheck(self):

        # Only A goal state
        if self.presentCol > 10999:
            if self.presentRow < 1000:
                return True

        return False


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(puddle_world())
コード例 #9
0
ファイル: env_vrep.py プロジェクト: hughhugh/dqn-vrep
        r = 0.0
        if np.any(self.state_ranges[:,0] > self.state[:]) or \
           np.any(self.state_ranges[:,1] < self.state[:]):
            #            r = -1
            r = -np.sum(3.0 * self.state_ranges[:, 1]**2)
            r *= 6000 - self.num_sim_steps
            terminate = True
        else:
            #            perr = np.linalg.norm(self.prevState[:2] - self.state_goal[:2])
            #            nerr = np.linalg.norm(self.state[:2] - self.state_goal[:2])
            #            r = math.exp(-np.sum(abs(self.state[:2]-self.state_goal[:2])/(.1*(self.state_ranges[:2,1]-self.state_ranges[:2,0]))))* \
            #                math.exp(-np.sum(abs(self.state[3:5]-self.state_goal[3:5])/(.1*(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))))* \
            #                math.exp(-np.sum(abs(self.state[6:8]-self.state_goal[6:8])/(.1*(self.state_ranges[6:8,1]-self.state_ranges[6:8,0]))))
            #            r = math.exp(-np.sum(abs(self.state[:2]-self.state_goal[:2])/(.1*(self.state_ranges[:2,1]-self.state_ranges[:2,0]))))* \
            #                math.exp(-np.sum(abs(self.state[3:5]-self.state_goal[3:5])/(.1*(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))))
            #            r -= (np.sum(((self.state[:2]-self.state_goal[:2])/(self.state_ranges[:2,1]-self.state_ranges[:2,0]))**2)+ \
            #                  np.sum(((self.state[3:5]-self.state_goal[3:5])/(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))**2))
            r -= (self.state[0] - self.state_goal[0])**2
            r -= (self.state[1] - self.state_goal[1])**2
            r -= self.state[3]**2
            r -= self.state[4]**2

            terminate = False

        print("reward " + str(r))
        return r, terminate


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(vrep_environment())
コード例 #10
0
			theReward=-1
			episodeOver=1

		if self.currentState >= 20:
			self.currentState=20
			theReward=1
			episodeOver=1
		
		theObs=Observation()
		theObs.intArray=[self.currentState]
		
		returnRO=Reward_observation_terminal()
		returnRO.r=theReward
		returnRO.o=theObs
		returnRO.terminal=episodeOver
		
		return returnRO

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		if inMessage=="what is your name?":
			return "my name is dqn_environment, Python edition!";
		else:
			return "I don't know how to respond to your message";


if __name__=="__main__":
	EnvironmentLoader.loadEnvironment(dqn_environment())
コード例 #11
0
        default=0,
        help="Standard deviation of additive noise to generate")
    parser.add_argument(
        "--fudge",
        type=float,
        default=1.4143,
        help="Distance from goal allowed before episode is counted as finished"
    )


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(
        description=
        'Run 2D Noisy Continuous Gridworld environment in network mode.')
    addTaxiArgs(parser)
    args = parser.parse_args()
    fuelloc = None if args.fuel_loc[0] < 0 else args.fuel_loc
    walls = numpy.array(args.wall) if args.wall is not None else None
    landmarks = numpy.array(
        args.landmark) if args.landmark is not None else None
    EnvironmentLoader.loadEnvironment(
        Taxi(args.size_x,
             args.size_y,
             walls=walls,
             landmarks=landmarks,
             fuel_loc=fuelloc,
             fickleness=args.fickleness,
             noise=args.noise,
             fudge=args.fudge))
コード例 #12
0
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = episodeOver

        return returnRO

    def env_cleanup(self):
        pass

    def env_message(self,inMessage):
        return "I don't know how to respond to your message";

@register_environment
class MountainCar(MountainCarND):
    name = "Mountain Car"

    def __init__(self, **kwargs):
        kwargs['dimension'] = 2
        super(MountainCar, self).__init__(**kwargs)


if __name__=="__main__":
    import argparse
    parser = argparse.ArgumentParser(description='Run Noisy Mountain Car environment in network mode.')
    parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate, affects the action effects.")
    parser.add_argument("--random_restarts", type=bool, default=False, help="Restart the cart with a random location and velocity.")

    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(MountainCar(noise=args.noise, random_start=args.random_restarts))

コード例 #13
0
"""
MENACE agent environment wrapper
"""

import sys

# Ugly!
sys.path.append('../')

from agents.symmetry_agent import SymmetryAgent
from wrapper_environment import WrapperEnvironment
from rlglue.environment import EnvironmentLoader

class SymmetryEnvironment(WrapperEnvironment, SymmetryAgent):
    
    name = 'symmetry_agent'
    
    player = 2
    def env_play(self): 
        action = self.do_step(self.state)
        self.state = action.intArray
        

if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(SymmetryEnvironment())
コード例 #14
0
                                        self.map[i:i + self.n_cols])) + '\n'
        self.history.append(current_map)

        # 試合の様子を記録
        if rot.r == self.game.r_lose:
            f = open('history.txt', 'a')
            history = '\n'.join(self.history)
            f.writelines('# START\n' + history + '# END\n\n')
            f.close()

        # 決着がついた場合は agentのagent_end
        # 決着がついていない場合は agentのagent_step に続く
        return rot

    def env_cleanup(self):
        pass

    def env_message(self, message):
        pass


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Deep Q-Learning')
    parser.add_argument('--size',
                        '-s',
                        default=6,
                        type=int,
                        help='Reversi board size')
    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(KmoriReversiEnvironment(args.size))
コード例 #15
0
ファイル: tetris.py プロジェクト: ProjectRune/DQN_Tetris
		episodeOver=0
		theReward=0

		theObs=Observation()
		theObs.intArray=np.zeros(50816)
		
		returnRO=Reward_observation_terminal()
		returnRO.r=theReward
		returnRO.o=theObs
		returnRO.terminal=episodeOver
		
		return returnRO

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		if inMessage=="what is your name?":
			return "my name is dqn_environment, Python edition!";
		else:
			return "I don't know how to respond to your message";


if __name__ == '__main__':
	App = TetrisApp()
	#App.run()
	EnvironmentLoader.loadEnvironment(TetrisApp())
	print('main start')

コード例 #16
0
ファイル: batch_replenish.py プロジェクト: yang0110/python-rl
                        type=float,
                        default=5.,
                        help="Payment received per unit product sold.")
    parser.add_argument("--cost",
                        type=float,
                        default=2.,
                        help="Cost per unit product purchased.")
    parser.add_argument("--discount_factor",
                        type=float,
                        default=0.999,
                        help="Discount factor to learn over.")
    parser.add_argument(
        "--time_period",
        type=int,
        default=20,
        help="Time period for problem. (Number of steps to run)")
    parser.add_argument(
        "--noise",
        type=float,
        default=0,
        help="Standard deviation of additive noise to generate")
    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(
        BatchReplenishment(demand_mean=args.demand_mean,
                           demand_std=args.demand_std,
                           payoff=args.payoff,
                           cost=args.cost,
                           gamma=args.discount_factor,
                           time_period=args.time_period,
                           noise=args.noise))
コード例 #17
0
ファイル: environment.py プロジェクト: tknandu/ContinuousMaze
    def checkCurrentTerminal(self):
        return self.checkTerminal(self.agentRow, self.agentCol)

    def updatePosition(self, theAction):
        # When the move would result in hitting an obstacles, the agent simply doesn't move

        newRow = self.agentRow
        newCol = self.agentCol

        newRow += self.FIXED_DISTANCE * math.cos(theAction)
        newCol += self.FIXED_DISTANCE * math.sin(theAction)

        # Check if new position is out of bounds or inside an obstacle
        if self.checkValid(newRow, newCol):
            self.agentRow = newRow
            self.agentCol = newCol
            return False
        else:
            return True

    def calculateReward(self, hitBoundary):
        if hitBoundary:
            return -0.5
        if distance.euclidean([self.agentRow, self.agentCol], self.END_STATE) < 0.5 * self.FIXED_DISTANCE:
            return 10.0
        return 0.0


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(threeroom_environment())
コード例 #18
0
ファイル: acrobot.py プロジェクト: AAHays/python-rl
        if self.reward_noise > 0:
            theReward += numpy.random.normal(scale=self.reward_noise)

        theObs = Observation()
        theObs.doubleArray = self.state.tolist()

        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = episodeOver

        return returnRO

    def env_cleanup(self):
        pass

    def env_message(self,inMessage):
        return "I don't know how to respond to your message";


if __name__=="__main__":
    import argparse
    parser = argparse.ArgumentParser(description='Run Noisy Acrobot environment in network mode.')
    parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate, affects the action effects.")
    parser.add_argument("--random_restarts", type=bool, default=False, help="Restart the state with random values.")

    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(Acrobot(noise=args.noise, random_start=args.random_restarts))

コード例 #19
0
ファイル: taxi.py プロジェクト: AAHays/python-rl
		return returnRO

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		return "I don't know how to respond to your message";

def addTaxiArgs(parser):
	parser.add_argument("--size_x", type=float, default=5, help="Size of the gridworld in the x (horizontal) dimension, where 1.0 is the unit of movement.")
	parser.add_argument("--size_y", type=float, default=5, help="Size of the gridworld in the y (vertical) dimension, where 1.0 is the unit of movement.")
	parser.add_argument("--landmark", action="append", nargs=2, help="Add a landmark, give x y coordinates", type=float)
	parser.add_argument("--wall", type=float, action="append", nargs=2, help="Add a wall, give x coordinate and size in y with sign indicating starting at the bottom (+) or top (-)")
	parser.add_argument("--fuel_loc", type=float, default=[2.0, 1.0], nargs=2, help="x y coordinate of the fuel station")
	parser.add_argument("--fickleness", type=float, default=0, help="Probability of the passenger changing their destination mid-route.")
	parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate")
	parser.add_argument("--fudge", type=float, default=1.4143, help="Distance from goal allowed before episode is counted as finished")


if __name__=="__main__":
	import argparse
	parser = argparse.ArgumentParser(description='Run 2D Noisy Continuous Gridworld environment in network mode.')
	addTaxiArgs(parser)
	args = parser.parse_args()
	fuelloc = None if args.fuel_loc[0] < 0 else args.fuel_loc
	walls = numpy.array(args.wall) if args.wall is not None else None
	landmarks = numpy.array(args.landmark) if args.landmark is not None else None
	EnvironmentLoader.loadEnvironment(Taxi(args.size_x, args.size_y, walls=walls, landmarks=landmarks, fuel_loc=fuelloc, fickleness=args.fickleness, noise=args.noise, fudge=args.fudge))

コード例 #20
0
ファイル: rlglue_env.py プロジェクト: chenaoki/elecpy
                      action='store',
                      type='string',
                      default='./rlglue_param.json',
                      help="json file of simulation parameters")
    parser.add_option('-d',
                      '--dst',
                      dest='savepath',
                      action='store',
                      type='string',
                      default='./result/data',
                      help="Save data path.")
    (options, args) = parser.parse_args()
    #print 'options', options

    if not options.test:
        EnvironmentLoader.loadEnvironment(ElecpyEnvironment(options))

    else:
        objEnv = ElecpyEnvironment(options)
        objEnv.env_init()
        for epi in range(3):
            print 'Episode {0}'.format(epi)
            objEnv.env_start()
            cnt_step = 0
            while True:
                cnt_step += 1
                action = Action(numInts=1)
                action.intArray = [0]
                rot = objEnv.env_step(action)
                if rot.terminal:
                    break
コード例 #21
0
ファイル: fuelworld.py プロジェクト: yang0110/python-rl
            base = self.var[0] if self.pos[1] <= 1.0 else self.var[1]
            a = self.var[2]
            return base - (int(self.pos[0]) % 5) * a
        elif intAction < 4:
            return -1.0
        elif intAction >= 4:
            return -1.4
        else:
            print "ERROR in FuelWorld.takeAction"


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(
        description=
        'Run 2D MultiRoom Noisy Continuous Gridworld environment in network mode.'
    )
    gridworld.addGridworldArgs(parser)
    parser.add_argument(
        "--fuel_noise",
        type=float,
        default=0.0,
        help=
        "If non-zero then gives the standard deviation of the additive Gaussian noise to add to the fuel expenditure."
    )
    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(
        FuelWorld(noise=args.noise,
                  fudge=args.fudge,
                  fuel_noise=args.fuel_noise))
コード例 #22
0
            theReward=-1
            episodeOver=1

        if self.currentState >= 20:
            self.currentState=20
            theReward=1
            episodeOver=1

        theObs=Observation()
        theObs.intArray=[self.currentState]

        returnRO=Reward_observation_terminal()
        returnRO.r=theReward
        returnRO.o=theObs
        returnRO.terminal=episodeOver

        return returnRO

    def env_cleanup(self):
        pass

    def env_message(self,inMessage):
        if inMessage=="what is your name?":
            return "my name is skeleton_environment, Python edition!";
        else:
            return "I don't know how to respond to your message";


if __name__=="__main__":
    EnvironmentLoader.loadEnvironment(skeleton_environment())
コード例 #23
0
        returnRO.terminal = episodeOver

        return returnRO

    def env_cleanup(self):
        pass

    def env_message(self, inMessage):
        return "I don't know how to respond to your message"


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(
        description='Run Noisy Acrobot environment in network mode.')
    parser.add_argument(
        "--noise",
        type=float,
        default=0,
        help=
        "Standard deviation of additive noise to generate, affects the action effects."
    )
    parser.add_argument("--random_restarts",
                        type=bool,
                        default=False,
                        help="Restart the state with random values.")

    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(
        Acrobot(noise=args.noise, random_start=args.random_restarts))
コード例 #24
0
ファイル: pinball.py プロジェクト: yang0110/python-rl
        pygame.display.flip()

    pygame.quit()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Pinball domain')
    parser.add_argument('configuration', help='the configuration file')
    parser.add_argument('--width',
                        action='store',
                        type=int,
                        default=500,
                        help='screen width (default: 500)')
    parser.add_argument('--height',
                        action='store',
                        type=int,
                        default=500,
                        help='screen height (default: 500)')
    parser.add_argument('-r',
                        '--rlglue',
                        action='store_true',
                        help='expose the environment through RL-Glue')
    args = parser.parse_args()

    if args.rlglue:
        print 'Starting rl-glue'
        EnvironmentLoader.loadEnvironment(PinballRLGlue(args.configuration))
    else:
        run_pinballview(args.width, args.height, args.configuration)
コード例 #25
0
            terminal = 0
            if self.stepCount == 200:
                terminal = 1
            ro = Reward_observation_terminal()
            ro.r = 1.0
            ro.o = self.o
            ro.terminal = terminal
            return ro

        self.o.intArray = range(0, 5)
        #cheating, might break something
        self.o.doubleArray = range(0, 5)
        terminal = 0
        if self.stepCount == 5000:
            terminal = 1
        ro = Reward_observation_terminal()
        ro.r = 1.0
        ro.o = self.o
        ro.terminal = terminal
        return ro

    def env_cleanup(self):
        pass

    def env_message(self, inMessage):
        return None


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(test_speed_environment())
コード例 #26
0
ファイル: batch_replenish.py プロジェクト: AAHays/python-rl
		returnRO.terminal = int(self.counter >= self.T)

		return returnRO

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		return "I don't know how to respond to your message";


if __name__=="__main__":
	import argparse
	parser = argparse.ArgumentParser(description='Run 2D Noisy Continuous Gridworld environment in network mode.')
	parser.add_argument("--demand_mean", type=float, default=10., help="Mean demand for the product.")
	parser.add_argument("--demand_std", type=float, default=1., 
			    help="Standard deviation of demand for the product.")
	parser.add_argument("--payoff", type=float, default=5., help="Payment received per unit product sold.")
	parser.add_argument("--cost", type=float, default=2., help="Cost per unit product purchased.")
	parser.add_argument("--discount_factor", type=float, default=0.999, help="Discount factor to learn over.")
	parser.add_argument("--time_period", type=int, default=20, help="Time period for problem. (Number of steps to run)")
	parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate")
	args = parser.parse_args()
	EnvironmentLoader.loadEnvironment(BatchReplenishment(demand_mean=args.demand_mean, 
							     demand_std=args.demand_std, 
							     payoff=args.payoff, 
							     cost=args.cost, 
							     gamma=args.discount_factor,
							     time_period = args.time_period,
							     noise=args.noise))
コード例 #27
0
        rot.o = observation

        current_map = 'map\n'
        for i in range(0, len(self.map), self.n_cols):
            current_map += ' '.join(map(str,
                                        self.map[i:i + self.n_cols])) + '\n'
            if (i % 16 == 0):
                current_map += "\n"

        self.history.append(current_map)

        if rot.r == -1:
            f = open('history.txt', 'a')
            history = '\n'.join(self.history)
            f.writelines('# START\n' + history + '# END\n\n')
            f.close()

        # 決着がついた場合は agentのagent_end
        # 決着がついていない場合は agentのagent_step に続く
        return rot

    def env_cleanup(self):
        pass

    def env_message(self, message):
        pass


if __name__ == '__main__':
    EnvironmentLoader.loadEnvironment(MarubatsuEnvironment())
コード例 #28
0
"""
Random player environment
"""

import random
from rlglue.environment import EnvironmentLoader
from wrapper_environment import WrapperEnvironment

class RandomEnvironment(WrapperEnvironment):
    
    name = 'random'
    
    def env_play(self):
        """
        Pick the first free spot, and play there.
        """
        open_spots = []
        for i in range(len(self.state)):
            if self.state[i] == 0:
                open_spots.append(i)
        self.state[random.choice(open_spots)] = self.color

if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(RandomEnvironment())
コード例 #29
0
ファイル: environment.py プロジェクト: tknandu/HierarchicalRL
            1, 1, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0,
            0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0
        ]
        numRows = len(self.worldmap)
        numCols = len(self.worldmap[0])
        for row in range(0, numRows):
            print
            print "Row: " + str(row) + " ",
            for col in range(0, numCols):
                if self.checkValid(row, col):
                    flat = self.calculateFlatState(row, col)
                    flat_i = self.validstates.index(flat)
                    print unicode(argmaxes[flat_i]),
                else:
                    print "X",
                """
                if self.map[row][col] == self.GOAL:
                    print "G",
                if self.map[row][col] == self.WALL:
                    print "X",
                if self.map[row][col] == self.START:
                    print "S",
                if self.map[row][col] == self.FREE:
                    print " ",
                """
        print


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(threeroom_environment())
コード例 #30
0
        if inMessage.startswith("set-start-state"):
            splitString = inMessage.split(" ")
            self.state = array(eval(splitString[1]))
            self.fixedStartState = True
            return "Message understood.  Using fixed start state."

        return "InvasiveEnvironment(Python) does not respond to messages."

    def setAgentState(self, S):
        assert len(S)==self.simulationParameterObj.habitatSize*self.simulationParameterObj.nbrReaches
        self.state = S
        valid = True
        return valid

    def setRandomState(self):
        S = array([random.randint(1, 3) for i in
                   xrange(self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize)])
        self.setAgentState(S)

    def checkValid(self, S):
        valid = True
        return valid

    def printState(self):
        print "Agent is at: " + str(self.state)

if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(
        InvasiveEnvironment(simulationParameterObj=None, actionParameterObj=None, Bad_Action_Penalty=-10000,fixedStartState=False, nbrReaches=7,
            habitatSize=4, seed=1))
コード例 #31
0
    def printState(self):
        numRows = len(self.map)
        numCols = len(self.map[0])
        print "Agent is at: " + str(self.agentRow) + "," + str(self.agentCol)
        print "Columns:0-10                10-17"
        print "Col    ",
        for col in range(0, numCols):
            print col % 10,

        for row in range(0, numRows):
            print
            print "Row: " + str(row) + " ",
            for col in range(0, numCols):
                if self.agentRow == row and self.agentCol == col:
                    print "A",
                else:
                    if self.map[row][col] == self.WORLD_GOAL:
                        print "G",
                    if self.map[row][col] == self.WORLD_MINE:
                        print "M",
                    if self.map[row][col] == self.WORLD_OBSTACLE:
                        print "*",
                    if self.map[row][col] == self.WORLD_FREE:
                        print " ",
        print


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(mines_environment())
コード例 #32
0
    # (string) -> string

    def env_message(self, inMessage):
        if inMessage.startswith("print"):
            self.toprint = 1
            return "message understood, print"
        if inMessage.startswith("stop print"):
            self.toprint = 0
            return "message understood, stop print"
        return "RmaxAgent(Python) does not understand your message."

    def clearscreen(self, numlines=100):
        """Clear the console.
	numlines is an optional argument used only as a fall-back.
	"""
        # Thanks to Steven D'Aprano, http://www.velocityreviews.com/forums

        if os.name == "posix":
            # Unix/Linux/MacOS/BSD/etc
            os.system('clear')
        elif os.name in ("nt", "dos", "ce"):
            # DOS/Windows
            os.system('CLS')
        else:
            # Fallback for other operating systems.
            print('\n' * numlines)


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(My_Environment())
コード例 #33
0
        if msg.startswith('set'):
            param, value = msg.split(None, 2)[1:]
            self.debug('set', param, value)

            self.env_message_set_param(param, value)

        elif msg.startswith('get'):
            param = msg.split(None, 1)[1]

            return self.env_message_get_param(param)

        else:
            return self.env_message_handler(msg)

    def debug(self, *args):
        """ Print a debug msg """
        if self.debug:
            args = [str(a) for a in args]
            print "%s: %s" % (self.name, ' '.join(args))

    def step_out(self, *args):
        if self.output_steps:
            args = [str(a) for a in args]
            print ' '.join(args)


if __name__ == '__main__':
    #p = PuddleEnvironment()
    #p.env_start()
    EnvironmentLoader.loadEnvironment(PuddleEnvironment())
コード例 #34
0
		self.nonEmptyObservation.charArray=['a','b','c','d','e']
		return ""

	def env_start(self):
		self.whichEpisode=self.whichEpisode+1
		
		
		if self.whichEpisode % 2 == 0:
			return self.emptyObservation
		else:
			return self.nonEmptyObservation
	
	def env_step(self,action):
		ro=Reward_observation_terminal()
		
		if self.whichEpisode % 2 == 0:
			ro.o=self.emptyObservation
		else:
			ro.o=self.nonEmptyObservation

		return ro	

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		return None
	
if __name__=="__main__":
	EnvironmentLoader.loadEnvironment(test_empty_environment())
コード例 #35
0
ファイル: pomdp.py プロジェクト: AAHays/python-rl
		return returnRO

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		return "I don't know how to respond to your message";

if __name__=="__main__":
	import argparse
        path_to_problems = os.path.join(os.path.dirname(__file__), 'configs', 'pomdps', '*')
        config_files = glob.glob(path_to_problems)
	parser = argparse.ArgumentParser(description='Run a POMDP problem file as a domain in RL-Glue in network mode.')
        group = parser.add_mutually_exclusive_group(required=True)
	group.add_argument("--file", type=str, default=config_files[0], 
                           help="Run POMDP domain given the path to a POMDP problem file.")
        group.add_argument("--list", action='store_true', default=False, help="List path to included POMDP problem files.")
	args = parser.parse_args()

        if args.list:
            print "Included POMDP problem files:"
            for file in config_files:
                print file
        else:
            EnvironmentLoader.loadEnvironment(POMDPEnvironment(spec_filename=args.file))

	parser = argparse.ArgumentParser(description='Run a specified POMDP in RL-Glue in network mode.')
	parser.add_argument("--pomdp_file", type=str, help="Filename for POMDP spec file to load and use.", required=True)
	args = parser.parse_args()
	EnvironmentLoader.loadEnvironment(POMDPEnvironment(args.pomdp_file))
コード例 #36
0
ファイル: env_vrep.py プロジェクト: hughhugh/dqn-vrep
        #print("newState: "+str(self.state))
        r = 0.0
        if np.any(self.state_ranges[:,0] > self.state[:]) or \
           np.any(self.state_ranges[:,1] < self.state[:]):
#            r = -1
            r = -np.sum(3.0 * self.state_ranges[:,1]**2)
            r *= 6000-self.num_sim_steps
            terminate = True
        else:
#            perr = np.linalg.norm(self.prevState[:2] - self.state_goal[:2])
#            nerr = np.linalg.norm(self.state[:2] - self.state_goal[:2])
#            r = math.exp(-np.sum(abs(self.state[:2]-self.state_goal[:2])/(.1*(self.state_ranges[:2,1]-self.state_ranges[:2,0]))))* \
#                math.exp(-np.sum(abs(self.state[3:5]-self.state_goal[3:5])/(.1*(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))))* \
#                math.exp(-np.sum(abs(self.state[6:8]-self.state_goal[6:8])/(.1*(self.state_ranges[6:8,1]-self.state_ranges[6:8,0]))))
#            r = math.exp(-np.sum(abs(self.state[:2]-self.state_goal[:2])/(.1*(self.state_ranges[:2,1]-self.state_ranges[:2,0]))))* \
#                math.exp(-np.sum(abs(self.state[3:5]-self.state_goal[3:5])/(.1*(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))))
#            r -= (np.sum(((self.state[:2]-self.state_goal[:2])/(self.state_ranges[:2,1]-self.state_ranges[:2,0]))**2)+ \
#                  np.sum(((self.state[3:5]-self.state_goal[3:5])/(self.state_ranges[3:5,1]-self.state_ranges[3:5,0]))**2))
            r -= (self.state[0]-self.state_goal[0])**2
            r -= (self.state[1]-self.state_goal[1])**2
            r -= self.state[3]**2
            r -= self.state[4]**2
            
            terminate = False

        print("reward "+str(r))
        return r,terminate
		
if __name__=="__main__":
	EnvironmentLoader.loadEnvironment(vrep_environment())
コード例 #37
0
            self.fixedStartState = True
            return "Message understood.  Using fixed start state."

        return "InvasiveEnvironment(Python) does not respond to messages."

    def setAgentState(self, S):
        assert len(S)==self.simulationParameterObj.habitatSize*self.simulationParameterObj.nbrReaches
        self.state = S
        valid = True
        return valid

    def setRandomState(self):
        S = array([random.randint(1, 3) for i in
                   xrange(self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize)])
        self.setAgentState(S)

    def checkValid(self, S):
        valid = True
        return valid

    def printState(self):
        print "Agent is at: " + str(self.state)

# ============================ PARAMETERS =====================================

if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(
        InvasiveEnvironment(simulationParameterObj=None, actionParameterObj=None, Bad_Action_Penalty=-10000,fixedStartState=False, nbrReaches=7,
            habitatSize=4, seed=1))
            
# ============================ PARAMETERS =====================================
コード例 #38
0
                #return max(outcomes)
                max_element = -1
                for o in outcomes:
                    if o == +1:
                        return o
                    max_element = max(o,max_element)
                return max_element

        finally:
            board.undoMove(move)

    moves = [(move, evaluateMove(move)) for move in board.getValidMoves()]
    random.shuffle(moves)
    moves.sort(key = lambda (move, winner): winner)
    board.makeMove(moves[-1][0], player)

class MiniMaxEnvironment(WrapperEnvironment):
    
    name = 'minimax'
    
    def env_play(self):
        b = Board(self.state)
        computerPlayer(b, Player_X)
        b.output()
        self.state = b.pieces


if __name__ == "__main__":
    #game()
    EnvironmentLoader.loadEnvironment(MiniMaxEnvironment())
コード例 #39
0
        self.nonEmptyObservation.charArray = ['a', 'b', 'c', 'd', 'e']
        return ""

    def env_start(self):
        self.whichEpisode = self.whichEpisode + 1

        if self.whichEpisode % 2 == 0:
            return self.emptyObservation
        else:
            return self.nonEmptyObservation

    def env_step(self, action):
        ro = Reward_observation_terminal()

        if self.whichEpisode % 2 == 0:
            ro.o = self.emptyObservation
        else:
            ro.o = self.nonEmptyObservation

        return ro

    def env_cleanup(self):
        pass

    def env_message(self, inMessage):
        return None


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(test_empty_environment())
コード例 #40
0
"""
Environment which always plays the first free spot available on the board.
"""

from rlglue.environment import EnvironmentLoader
from wrapper_environment import WrapperEnvironment


class FirstFreeEnvironment(WrapperEnvironment):

    name = 'first_free'

    def env_play(self):
        """
        Pick the first free spot, and play there.
        """
        for i in range(len(self.state)):
            if self.state[i] == 0:
                self.state[i] = self.color
                return


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(FirstFreeEnvironment())
コード例 #41
0
ファイル: pinball.py プロジェクト: AAHays/python-rl
            if event.type == pygame.QUIT:
                done = True
            if event.type == pygame.KEYUP or event.type == pygame.KEYDOWN:
                user_action = actions.get(event.key, PinballModel.ACC_NONE)

	if environment.take_action(user_action) == environment.END_EPISODE:
	    done = True

        environment_view.blit()

        pygame.display.flip()

    pygame.quit()

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Pinball domain')
    parser.add_argument('configuration', help='the configuration file')
    parser.add_argument('--width', action='store', type=int,
                        default=500, help='screen width (default: 500)')
    parser.add_argument('--height', action='store', type=int,
                        default=500, help='screen height (default: 500)')
    parser.add_argument('-r', '--rlglue', action='store_true', help='expose the environment through RL-Glue')
    args = parser.parse_args()

    if args.rlglue:
	print 'Starting rl-glue'
	EnvironmentLoader.loadEnvironment(PinballRLGlue(args.configuration))
    else:
        run_pinballview(args.width, args.height, args.configuration)

コード例 #42
0
def main():
	EnvironmentLoader.loadEnvironment(SumoEnvironment(sys.argv[1]))
コード例 #43
0
ファイル: edge_tracer.py プロジェクト: hashima/DQN_Framework
	returnObs.intArray=np.append(np.zeros(128), [ item for innerlist in arr for item in innerlist ])
        scipy.misc.imsave('screen.png', arr)

	returnRO=Reward_observation_terminal()
	returnRO.r=theReward
	returnRO.o=returnObs
	returnRO.terminal=episodeOver

        # イベント処理
        for event in pygame.event.get():
            if event.type == QUIT:  # 終了イベント
                sys.exit()
		
	return returnRO
		
    def env_cleanup(self):
	pass

    def env_message(self,inMessage):
	if inMessage=="what is your name?":
		return "my name is dqn_environment, Python edition!";
	else:
		return "I don't know how to respond to your message";


if __name__ == "__main__":
	App = EdgeTracer()
	#App.run()
	EnvironmentLoader.loadEnvironment(EdgeTracer())
	print('main start')
コード例 #44
0
ファイル: fuelworld.py プロジェクト: AAHays/python-rl
		
		if self.inFuelCell(self.pos):
			self.fuel += 20.0
		if self.fuel > 60.0:
			self.fuel = 60.0

		if gridworld.Gridworld.isAtGoal(self):
			return 0.0
		elif self.fuel < 0:
			return -400.0
		elif self.inFuelCell(self.pos): # Fuel costs
			base = self.var[0] if self.pos[1] <= 1.0 else self.var[1]
			a = self.var[2]
			return base - (int(self.pos[0]) % 5)*a
		elif intAction < 4:
			return -1.0
		elif intAction >= 4:
			return -1.4
		else:
			print "ERROR in FuelWorld.takeAction"


if __name__=="__main__":
	import argparse
	parser = argparse.ArgumentParser(description='Run 2D MultiRoom Noisy Continuous Gridworld environment in network mode.')
	gridworld.addGridworldArgs(parser)
	parser.add_argument("--fuel_noise", type=float, default=0.0, 
			    help="If non-zero then gives the standard deviation of the additive Gaussian noise to add to the fuel expenditure.")
	args = parser.parse_args()
	EnvironmentLoader.loadEnvironment(FuelWorld(noise=args.noise, fudge=args.fudge, fuel_noise=args.fuel_noise))
コード例 #45
0
ファイル: puddleworld.py プロジェクト: AAHays/python-rl
                    "the puddle's depth.")
    parser.add_argument("--puddle_penalty", type=float, default=-100,
                help="The reward penalty scale for walking through puddles.")
    args = parser.parse_args()
    kwargs = {}
    if args.puddle is not None:
        means = []
        covs = []
        for puddle in args.puddle:
            means.append(tuple(puddle[:2]))
            covs.append(tuple(puddle[2:]))
        kwargs['puddle_means'] = means
        kwargs['puddle_var'] = covs

    if args.size_x:
        kwargs['size_x'] = args.size_x
    if args.size_y:
        kwargs['size_y'] = args.size_y
    if args.goal_x:
        kwargs['goal_x'] = args.goal_x
    if args.goal_y:
        kwargs['goal_y'] = args.goal_y
    if args.noise:
        kwargs['noise'] = args.noise
    if args.fudge:
        kwargs['fudge'] = args.fudge
    if args.random_restarts:
        kwargs['random_start'] = args.random_restarts

    EnvironmentLoader.loadEnvironment(PuddleWorld(**kwargs))
コード例 #46
0
ファイル: rlglue_env.py プロジェクト: chenaoki/elecpy
      '-t','--test', 
      dest='test', action='store_true',default=False,
      help="test mode")
  parser.add_option(
      '-p','--param_file', 
      dest='param_file', action='store', type='string', default='./rlglue_param.json',
      help="json file of simulation parameters")
  parser.add_option(
      '-d','--dst', 
      dest='savepath', action='store', type='string', default='./result/data',
      help="Save data path.")
  (options, args) = parser.parse_args()
  #print 'options', options

  if not options.test:
    EnvironmentLoader.loadEnvironment(ElecpyEnvironment(options))

  else:
    objEnv = ElecpyEnvironment(options)
    objEnv.env_init()
    for epi in range(3):
      print 'Episode {0}'.format(epi)
      objEnv.env_start()
      cnt_step = 0
      while True:
        cnt_step += 1
        action = Action(numInts=1)
        action.intArray = [0]
        rot = objEnv.env_step(action)
        if rot.terminal:
          break
コード例 #47
0
	def env_init(self):  
		return ""

	def env_start(self):
		return Observation()
	
	def env_step(self,action):
		return Reward_observation_terminal()

	def env_cleanup(self):
		pass
	
	def env_message(self,inMessage):
		if inMessage==None:
			return "null"

		if inMessage=="":
			return "empty"

		if inMessage=="null":
			return None

		if inMessage=="empty":
			return ""
		
		return inMessage;	

if __name__=="__main__":
	EnvironmentLoader.loadEnvironment(test_message_environment())
コード例 #48
0
            theReward = -1
            episodeOver = 1

        if self.currentState >= 20:
            self.currentState = 20
            theReward = 1
            episodeOver = 1

        theObs = Observation()
        theObs.intArray = [self.currentState]

        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = episodeOver

        return returnRO

    def env_cleanup(self):
        pass

    def env_message(self, inMessage):
        if inMessage == "what is your name?":
            return "my name is skeleton_environment, Python edition!"
        else:
            return "I don't know how to respond to your message"


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(skeleton_environment())
コード例 #49
0
ファイル: cartpole.py プロジェクト: yang0110/python-rl
    parser = argparse.ArgumentParser(
        description=
        'Run Noisy Cart Pole Balancing or Swing Up environment in network mode.'
    )
    parser.add_argument(
        "--noise",
        type=float,
        default=0,
        help=
        "Standard deviation of additive noise to generate, affects the action effects."
    )
    parser.add_argument(
        "--random_restarts",
        type=bool,
        default=False,
        help="Restart the cart with a random location and velocity.")
    parser.add_argument(
        "--mode",
        choices=["easy", "hard", "swingup"],
        default="easy",
        type=str,
        help=
        "Choose the type of cart pole domain. Easy/hard balancing, or swing up."
    )

    args = parser.parse_args()
    EnvironmentLoader.loadEnvironment(
        CartPole(mode=args.mode,
                 noise=args.noise,
                 random_start=args.random_restarts))
コード例 #50
0
ファイル: puddleworld.py プロジェクト: okkhoy/cd-rl
        type=float,
        default=-100,
        help="The reward penalty scale for walking through puddles.")
    args = parser.parse_args()
    kwargs = {}
    if args.puddle is not None:
        means = []
        covs = []
        for puddle in args.puddle:
            means.append(tuple(puddle[:2]))
            covs.append(tuple(puddle[2:]))
        kwargs['puddle_means'] = means
        kwargs['puddle_var'] = covs

    if args.size_x:
        kwargs['size_x'] = args.size_x
    if args.size_y:
        kwargs['size_y'] = args.size_y
    if args.goal_x:
        kwargs['goal_x'] = args.goal_x
    if args.goal_y:
        kwargs['goal_y'] = args.goal_y
    if args.noise:
        kwargs['noise'] = args.noise
    if args.fudge:
        kwargs['fudge'] = args.fudge
    if args.random_restarts:
        kwargs['random_start'] = args.random_restarts

    EnvironmentLoader.loadEnvironment(PuddleWorld(**kwargs))
コード例 #51
0
			ro.r=1.0

		else:
			self.o.doubleArray=[0.0078125,-0.0078125,0.0,0.0078125e150,-0.0078125e150]
			self.o.charArray=['g','F','?',' ','&']
			self.o.intArray=[173,-173,2147483647,0,-2147483648]

			ro.r=-2.0

		ro.o=self.o
		ro.terminal=terminal
		return ro	

	def env_cleanup(self):
		pass

	def env_message(self,inMessage):
		timesToPrint=self.stepCount%3
		
		outMessage=inMessage+"|"
		for i in range(0, timesToPrint):
			outMessage=outMessage+"%d" % (self.stepCount)
			outMessage=outMessage+"."

		outMessage=outMessage+"|"+inMessage
		
		return outMessage
	
if __name__=="__main__":
	EnvironmentLoader.loadEnvironment(test_1_environment())
コード例 #52
0
ファイル: World.py プロジェクト: zergylord/Neuromon
if __name__ == "__main__":
    pygame.init()
    pygame.freetype.init()
    gameArea = pygame.Rect([0, 0], size)
    if len(sys.argv) > 1:
        useGlue = (sys.argv[1] == 'True')
    else:
        useGlue = False
    black = 0, 0, 0
    screen = pygame.display.set_mode([size[0],
                                      int(size[1] * (4 / 3.0))
                                      ])  #,pygame.FULLSCREEN)
    count = 0
    if len(sys.argv) > 2:
        p1Type = int(sys.argv[2])
    else:
        p1Type = 0
    if len(sys.argv) > 3:
        p2Type = int(sys.argv[3])
    else:
        p2Type = 1
    if useGlue:
        EnvironmentLoader.loadEnvironment(World(p1Type, 2))
    else:
        world = World(p1Type, p2Type)
        world.start()
        while True or count < 300:
            world.step()
            count += 1
コード例 #53
0
    def env_init(self):
        return ""

    def env_start(self):
        return Observation()

    def env_step(self, action):
        return Reward_observation_terminal()

    def env_cleanup(self):
        pass

    def env_message(self, inMessage):
        if inMessage == None:
            return "null"

        if inMessage == "":
            return "empty"

        if inMessage == "null":
            return None

        if inMessage == "empty":
            return ""

        return inMessage


if __name__ == "__main__":
    EnvironmentLoader.loadEnvironment(test_message_environment())
コード例 #54
0
	def __init__(self, size_x=10, size_y=10, goal_x=10, goal_y=10, wind_center=7., wind_stdev=1.0, wind_power=2.0, noise=0.0, random_start=False, fudge=1.4143):
		gridworld.Gridworld.__init__(self, size_x=size_x, size_y=size_y, goal_x=goal_x, goal_y=goal_y, noise=noise, random_start=random_start, fudge=fudge)
		self.wind_center = wind_center
		self.wind_stdev = wind_stdev
		self.wind_power = wind_power
		self.domain_name = "Continuous Windy Gridworld by Will Dabney"
		
	def reset(self):
		if self.random_start:
			self.pos = numpy.random.random((2,)) * self.size
		else:
			self.pos = numpy.array([0.0, self.size[1]*0.5])
	
	def takeAction(self, action):
		self.pos[1] += norm.pdf(self.pos[0], self.wind_center, self.wind_stdev) * self.wind_power
		return gridworld.Gridworld.takeAction(self, action)


if __name__=="__main__":
	import argparse
	parser = argparse.ArgumentParser(description='Run 2D MultiRoom Noisy Continuous Gridworld environment in network mode.')
	gridworld.addGridworldArgs(parser)
	parser.add_argument("--wind_center", type=float, default=7, help="Center, or strongest point, in the x-direction of the wind")
	parser.add_argument("--wind_scale", type=float, default=1.0, help="Scale, or width, of the wind effects around the center.")
	parser.add_argument("--wind_power", type=float, default=2.0, help="The power, or strength, of the wind.")
	args = parser.parse_args()
	EnvironmentLoader.loadEnvironment(
		WindyGridworld(args.size_x, args.size_y, args.goal_x, args.goal_y, wind_center=args.wind_center, 
			       wind_stdev=args.wind_scale, wind_power=args.wind_power, noise=args.noise, 
			       random_start=args.random_restarts, fudge=args.fudge))