Ejemplo n.º 1
0
    def __init__(self,
                 cash_flow_flag=0,
                 dg_random_seed=1,
                 num_sim=500002,
                 sabr_flag=False,
                 continuous_action_flag=False,
                 ticksize=0.1,
                 multi=1,
                 init_ttm=5,
                 trade_freq=1,
                 num_contract=1,
                 sim_flag=True,
                 mu=0.05,
                 vol=0.2,
                 S=100,
                 K=100,
                 r=0,
                 q=0,
                 beta=1,
                 rho=-0.4,
                 volvol=0.6,
                 ds=0.001,
                 k=0):
        """ cash_flow_flag: 1 if reward is defined using cash flow, 0 if profit and loss; dg_random_seed: random seed for simulation;
            num_sim: number of paths to simulate; sabr_flag: whether use sabr model; continuous_action_flag: continuous or discrete
            action space; init_ttm: initial time to maturity in unit of day; trade_freq: trading frequency in unit of day;
            num_contract: number of call option to short; sim_flag = simulation or market data;
            Assume the trading cost include spread cost and price impact cost, cost(n)= multi*ticksize*(|n|+0.01*n^2)
            Reward is about ((change in value) - k/2 * (change in value)^2 ), where k is risk attitude, k=0 if risk neutral
        """
        # observation
        if sim_flag:
            # generate data now
            if sabr_flag:
                self.path, self.option_price_path, self.delta_path, self.bartlett_delta_path = get_sim_path_sabr(
                    M=init_ttm,
                    freq=trade_freq,
                    np_seed=dg_random_seed,
                    num_sim=num_sim,
                    mu=0.05,
                    vol=0.2,
                    S=100,
                    K=100,
                    r=0,
                    q=0,
                    beta=1,
                    rho=-0.4,
                    volvol=0.6,
                    ds=0.001)

            else:
                self.path, self.option_price_path, self.delta_path = get_sim_path(
                    M=init_ttm,
                    freq=trade_freq,
                    np_seed=dg_random_seed,
                    num_sim=num_sim,
                    mu=0.05,
                    vol=0.2,
                    S=100,
                    K=100,
                    r=0,
                    q=0)
        else:
            # use actual data ---> to be continued
            return

        # other attributes
        self.num_path = self.path.shape[0]

        # set num_period: initial time to maturity * daily trading freq + 1 (see get_sim_path() in simulation.py): (s0,s1...sT) -->T+1
        self.num_period = self.path.shape[1]
        # print("***", self.num_period)

        # time to maturity array
        self.ttm_array = np.arange(init_ttm, -trade_freq, -trade_freq)
        # print(self.ttm_array)

        # cost part
        self.ticksize = ticksize
        self.multi = multi  # change to correlate with the time of the day--6 trading hours

        # risk attitude
        self.k = k

        # step function initialization depending on cash_flow_flag
        if cash_flow_flag == 1:
            self.step = self.step_cash_flow  # see step_cash_flow() definition below. Internal reference use self.
        else:
            self.step = self.step_profit_loss

        self.num_contract = num_contract
        self.strike_price = 100

        # track the index of simulated path in use
        self.sim_episode = -1

        # track time step within an episode (it's step)
        self.t = None

        # action space for holding
        # With L contracts, each for 100 shares, one would not want to trade more than 100·L shares                                                                                                       #action space justify?
        if continuous_action_flag:
            self.action_space = spaces.Box(low=np.array([0]),
                                           high=np.array([num_contract * 100]),
                                           dtype=np.float32)
        else:
            self.num_action = num_contract * 100 + 1
            self.action_space = spaces.Discrete(
                self.num_action)  #number from 0 to self.num_action-1

        # state element, assumed to be 3: current price, current holding, ttm
        self.num_state = 3

        self.state = []  # initialize current state

        # seed and start
        self.seed()  # call this function when intialize ...
Ejemplo n.º 2
0
 def action_space(self):
     return spaces.Discrete(5)
 def __init__(self, env, lower_level_agent,
              timesteps_to_give_up_control_for, num_skills):
     Wrapper.__init__(self, env)
     self.action_space = spaces.Discrete(num_skills)
     self.lower_level_agent = lower_level_agent
     self.timesteps_to_give_up_control_for = timesteps_to_give_up_control_for
    def __init__(self):

        # import image of city and convert values to height
        # Original image
        #self.city_array = np.array((Image.open(r"../data/images/RasterAstanaCropped.png")), dtype=np.uint16)
        # crop the image with center at camera location
        # self.camera_location = (3073, 11684, 350)   # x,y,z coordinate  #  (11685, 7074, 350) - RasterAstana.png
        # self.coverage_radius = 2000                 # .. km square from the center
        # self.city_array = self.city_array[self.camera_location[1]-self.coverage_radius:self.camera_location[1]+self.coverage_radius,
        #                                 self.camera_location[0]-self.coverage_radius:self.camera_location[0]+self.coverage_radius]

        # Resize 1000 > 250
        self.city_array = np.array((Image.open(r"../data/images/RasterAstanaCropped250x250.png")), dtype=np.uint16)
        self.city_array = self.city_array/100 - 285             # convert to meter

        # crop the image with center at camera location
        self.camera_location = (126, 126, 350)   # x,y,z coordinate  #  (11685, 7074, 350) - RasterAstana.png
        self.coverage_radius = 125                 # .. km square from the center or in pixels

        # Get image params
        self.im_height, self.im_width  = self.city_array.shape # reshape (width, height) [300,500] --> example: height = 500, width = 300

        # CAMERA params
        self.camera_number = 1
        self.camera_location_cropped = (int(self.coverage_radius), int(self.coverage_radius), (self.camera_location[2]-313)/4) # 313 or 285s

        self.observer_height = self.camera_location_cropped[2] + 2
        self.max_distance_min_zoom = 100/4       # at min zoom - 20mm - the max distance 50
        self.max_distance_max_zoom = 4000/4     # at min zoom - 800mm - the max distance 2000

        self.scale = 2
        self.horizon_fov_min = 0.5*self.scale       # 0.5      # at min zoom - 20mm - the max distance 50
        self.horizon_fov_max = 21*self.scale     # 21       # at min zoom - 20mm - the max distance 50

        self.vertical_fov_min =  0.3*self.scale    # 0.3        # 11.8        # Field of View deg
        self.vertical_fov_max =  11.8*self.scale   # 11.8         # 11.8        # Field of View deg

        # PTZ initalize
        self.pan_pos = 0 #360*np.random.rand()    # 0     np.random.rand()
        self.tilt_pos = -45 #-25*np.random.rand() # -45 
        self.zoom_pos = 20 # 20           # 0 - 20mm (min), 1 - 800 mm (max)

        print('ptz init: ', self.pan_pos, self.tilt_pos, self.zoom_pos)

        self.delta_pan  = 5                # deg
        self.delta_tilt = 2                 # deg
        self.delta_zoom = 1.25              # 1.25x times

        self.horizon_fov = self.horizon_fov_max               # 21           # Field of View deg
        self.vertical_fov =  self.vertical_fov_max            # 11.8        # Field of View deg
        self.zoom_distance = self.max_distance_min_zoom

        # GYM env params
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.im_width,self.im_height, 1), dtype = np.uint8)
        self.action_space = spaces.Discrete(6)  # 6 different actions
        self.action = 0

        # render
        self.max_render = 100
        self.is_render = 'True'
        self.iteration = 0
        self.info = 0

        # reward
        self.ratio_threshhold = 0.02
        self.reward_good_step = 1
        self.reward_bad_step = -0.05
        self.max_iter = 300
        self.reward_temp = 0
        self.reward_prev = 0.0

        # coverage
        # self.city_coverage = np.asarray(Image.open(r"../data/images/RasterTotalCoverage.png"))
        self.city_coverage = np.asarray(Image.open(r"../data/images/RasterAstanaCropped250x250CoverageBinary.png"))
        self.rad_matrix, self.angle_matrix = self.create_cartesian()

        # inputs
        self.state_visible_points = np.zeros((self.im_height, self.im_width))
        self.state_total_coverage = self.city_coverage #np.zeros((self.im_height, self.im_width))
        self.state_gray_coverage = np.zeros((self.im_height, self.im_width))
Ejemplo n.º 5
0
    def __init__(self, config, home_team, away_team, opp_actor=None):
        self.__version__ = "0.0.1"
        self.config = config
        self.config.competition_mode = False
        self.config.fast_mode = True
        self.config.time_limits = None
        self.game = None
        self.team_id = None
        self.ruleset = get_rule_set(config.ruleset, all_rules=False)
        self.home_team = home_team
        self.away_team = away_team
        self.actor = Agent("Gym Learner", human=True)
        self.opp_actor = opp_actor if opp_actor is not None else RandomBot(
            "Random")
        self.seed()
        self.root = None
        self.cv = None
        self.last_obs = None

        self.layers = [
            OccupiedLayer(),
            OwnPlayerLayer(),
            OppPlayerLayer(),
            OwnTackleZoneLayer(),
            OppTackleZoneLayer(),
            UpLayer(),
            UsedLayer(),
            AvailablePlayerLayer(),
            AvailablePositionLayer(),
            RollProbabilityLayer(),
            BlockDiceLayer(),
            ActivePlayerLayer(),
            TargetPlayerLayer(),
            MALayer(),
            STLayer(),
            AGLayer(),
            AVLayer(),
            MovemenLeftLayer(),
            BallLayer(),
            OwnHalfLayer(),
            OwnTouchdownLayer(),
            OppTouchdownLayer(),
            SkillLayer(Skill.BLOCK),
            SkillLayer(Skill.DODGE),
            SkillLayer(Skill.SURE_HANDS),
            SkillLayer(Skill.PASS)
        ]

        arena = get_arena(self.config.arena)

        self.observation_space = spaces.Dict({
            'board':
            spaces.Box(low=0,
                       high=1,
                       shape=(arena.height, arena.width, len(self.layers))),
            'state':
            spaces.Box(low=0, high=1, shape=(50, )),
            'procedure':
            spaces.Box(low=0, high=1, shape=(len(FFAIEnv.procedures), )),
        })

        self.actions = FFAIEnv.actions

        self.action_space = spaces.Dict({
            'action-type':
            spaces.Discrete(len(self.actions)),
            'x':
            spaces.Discrete(arena.width),
            'y':
            spaces.Discrete(arena.height)
        })
Ejemplo n.º 6
0
    def __init__(self,
                 world,
                 scenario,
                 reset_callback=None,
                 reward_callback=None,
                 observation_callback=None,
                 info_callback=None,
                 done_callback=None,
                 shared_viewer=True,
                 discrete_action_space=False,
                 discrete_action_input=False):

        self.world = world
        self.scenario = scenario
        self.agents = self.world.policy_agents
        # set required vectorized gym env property
        self.n = len(world.policy_agents)
        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        # environment parameters
        # self.discrete_action_space = True
        self.discrete_action_space = discrete_action_space
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
        # self.discrete_action_input = False
        self.discrete_action_input = discrete_action_input
        # if true, even the action is continuous, action will be performed discretely
        self.force_discrete_action = world.discrete_action if hasattr(
            world, 'discrete_action') else False
        # if true, every agent has the same reward
        self.shared_reward = world.collaborative if hasattr(
            world, 'collaborative') else False
        self.time = 0

        # configure spaces
        self.action_space = []
        self.observation_space = []
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if self.discrete_action_space:
                u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
            else:
                u_action_space = spaces.Box(low=-agent.u_range,
                                            high=+agent.u_range,
                                            shape=(world.dim_p, ),
                                            dtype=np.float32)
            if agent.movable:
                total_action_space.append(u_action_space)
            # communication action space
            if self.discrete_action_space:
                c_action_space = spaces.Discrete(world.dim_c)
            else:
                c_action_space = spaces.Box(low=0.0,
                                            high=1.0,
                                            shape=(world.dim_c, ),
                                            dtype=np.float32)
            if not agent.silent:
                total_action_space.append(c_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([
                        isinstance(act_space, spaces.Discrete)
                        for act_space in total_action_space
                ]):
                    act_space = MultiDiscrete(
                        [[0, act_space.n - 1]
                         for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])
            # observation space
            obs_dim = len(observation_callback(agent, self.world))
            self.observation_space.append(
                spaces.Box(low=-np.inf,
                           high=+np.inf,
                           shape=(obs_dim, ),
                           dtype=np.float32))
            agent.action.c = np.zeros(self.world.dim_c)

        # rendering
        self.shared_viewer = shared_viewer
        if self.shared_viewer:
            self.viewers = [None]
        else:
            self.viewers = [None] * self.n
        self._reset_render()
 def __init__(self, env):
     super(NoisyTVEnvWrapperMario, self).__init__(env)
     self.action_space = spaces.Discrete(15)
     self.noisy_action = 14
Ejemplo n.º 8
0
 def __init__(self):
     low = np.array([0, 0])
     high = np.array([11, 11])
     self.observation_space = spaces.Box(low, high, dtype=np.int)
     self.action_space = spaces.Discrete(4)
Ejemplo n.º 9
0
    def __init__(self, keep_mode=True, mode=NORMAL, verbose=False):
        """ mode: is the game mode: NORMAL, TRAIN_SHOOTING, TRAIN_DEFENSE,
        keep_mode: whether the puck gets catched by
        it can be changed later using the reset function
        """
        EzPickle.__init__(self)
        self.seed()
        self.viewer = None
        self.mode = mode
        self.keep_mode = keep_mode
        self.player1_has_puck = 0
        self.player2_has_puck = 0

        self.world = Box2D.b2World([0, 0])
        self.player1 = None
        self.player2 = None
        self.puck = None
        self.goal_player_1 = None
        self.goal_player_2 = None
        self.world_objects = []
        self.drawlist = []
        self.done = False
        self.winner = 0
        self.one_starts = True  # player one starts the game (alternating)

        self.timeStep = 1.0 / FPS
        self.time = 0
        self.max_timesteps = None  # see reset

        self.closest_to_goal_dist = 1000

        # 0  x pos player one
        # 1  y pos player one
        # 2  angle player one
        # 3  x vel player one
        # 4  y vel player one
        # 5  angular vel player one
        # 6  x player two
        # 7  y player two
        # 8  angle player two
        # 9 y vel player two
        # 10 y vel player two
        # 11 angular vel player two
        # 12 x pos puck
        # 13 y pos puck
        # 14 x vel puck
        # 15 y vel puck
        # Keep Puck Mode
        # 16 time left player has puck
        # 17 time left other player has puck
        self.observation_space = spaces.Box(-np.inf,
                                            np.inf,
                                            shape=(18, ),
                                            dtype=np.float32)

        # linear force in (x,y)-direction and torque
        self.num_actions = 3 if not self.keep_mode else 4
        self.action_space = spaces.Box(-1,
                                       +1, (self.num_actions * 2, ),
                                       dtype=np.float32)

        # see discrete_to_continous_action()
        self.discrete_action_space = spaces.Discrete(7)

        self.verbose = verbose

        self.reset(self.one_starts)
Ejemplo n.º 10
0
 def _set_action_space(self):
     self.action_space = spaces.Tuple(
         tuple([spaces.Discrete(6)] +
               [spaces.Discrete(self._radio_vocab_size)] *
               self._radio_num_words))
Ejemplo n.º 11
0
    def _set_config(
            self,
            game_color=1,  # State (frame) color option: 0 = RGB, 1 = Grayscale, 2 = Green only
            indicators=True,  # show or not bottom Info Panel
            frames_per_state=4,  # stacked (rolling history) Frames on each state [1-inf], latest observation always on first Frame
            skip_frames=3,  # number of consecutive Frames to skip between state saves [0-4]
            discre=ACT,  # Action discretization function, format [[steer0, throtle0, brake0], [steer1, ...], ...]. None for continuous
            use_track=3,  # number of times to use the same Track, [1-100]. More than 20 high risk of overfitting!!
            episodes_per_track=5,  # number of evenly distributed starting points on each track [1-20]. Every time you call reset(), the env automatically starts at the next point
            tr_complexity=12,  # generated Track geometric Complexity, [6-20]
            tr_width=45,  # relative Track Width, [30-50]
            patience=2.0,  # max time in secs without Progress, [0.5-20]
            off_track=1.0,  # max time in secs Driving on Grass, [0.0-5]
            f_reward=CONT_REWARD,  # Reward Funtion coefficients, refer to Docu for details
            num_obstacles=5,  # Obstacle objects placed on track [0-10]
            end_on_contact=False,  # Stop Episode on contact with obstacle, not recommended for starting-phase of training
            obst_location=0,  # array pre-setting obstacle Location, in %track. Negative value means tracks's left-hand side. 0 for random location
            oily_patch=False,  # use all obstacles as Low-friction road (oily patch)
            verbose=2):

        #Verbosity
        self.verbose = verbose

        #obstacle parameters
        self.num_obstacles = np.clip(num_obstacles, 0, 10)
        self.end_on_contact = end_on_contact
        self.oily_patch = oily_patch
        if obst_location != 0 and len(obst_location) < num_obstacles:
            print("#####################################")
            print("Warning: incomplete obstacle location")
            print("Defaulting to random placement")
            self.obst_location = 0  #None
        else:
            self.obst_location = np.array(obst_location)

        #reward coefs verification
        if len(f_reward) < len(CONT_REWARD):
            print("####################################")
            print("Warning: incomplete reward function")
            print("Defaulting to predefined function!!!")
            self.f_reward = CONT_REWARD
        else:
            self.f_reward = f_reward

        # Times to use same track, up to 100 times. More than 20 high risk of overfitting!!
        self.repeat_track = np.clip(use_track, 1, 100)
        self.track_use = +np.inf

        # Number of episodes on same track, with evenly distributed starting points,
        # not more than 20 episodes
        self.episodes_per_track = np.clip(episodes_per_track, 1, 20)

        # track generation complexity
        self.complexity = np.clip(tr_complexity, 6, 20)

        # track width
        self.tr_width = np.clip(tr_width, 30, 50) / SCALE

        # Max time without progress
        self.patience = np.clip(patience, 0.5, 20)
        # Max time off-track
        self.off_track = np.clip(off_track, 0, 5)

        # Show or not bottom info panel
        self.indicators = indicators

        # Grayscale and acceptable frames
        self.grayscale = game_color
        if not self.grayscale:
            if frames_per_state > 1:
                print("####################################")
                print("Warning: making frames_per_state = 1")
                print("No support for several frames in RGB")
                frames_per_state = 1
                skip_frames = 0

        # Frames to be skipped from state (max 4)
        self.skip_frames = np.clip(skip_frames + 1, 1, 5)

        # Frames per state
        self.frames_per_state = frames_per_state if frames_per_state > 0 else 1
        if self.frames_per_state > 1:
            lst = list(
                range(0, self.frames_per_state * self.skip_frames,
                      self.skip_frames))
            self._update_index = [lst[-1]] + lst[:-1]

        # Gym spaces, observation and action
        self.discre = discre
        if discre == None:
            self.action_space = spaces.Box(
                np.array([-0.4, 0, 0]),
                np.array([+0.4, +1, +1]),
                dtype=np.float32)  # steer, gas, brake
        else:
            self.action_space = spaces.Discrete(len(discre))

        if game_color:
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(STATE_H, STATE_W,
                                                       self.frames_per_state),
                                                dtype=np.uint8)
        else:
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(STATE_H, STATE_W, 3),
                                                dtype=np.uint8)
Ejemplo n.º 12
0
    def __init__(self, num_vehicles=50, task_num=30):
        self.num_vehicles = num_vehicles
        self.task_num_per_episode = task_num
        self.vehicle_count = 0
        self.maxR = 500  #m, max relative distance between request vehicle and other vehicles
        self.maxV = 30  #km/h, max relative velocity between requst vehicle and other vehicles
        self.max_v = 50  # maximum vehicles in the communication range of request vehicle
        self.max_local_task = 10
        self.bandwidth = 6  # MHz
        self.snr_ref = 1  # reference SNR, which is used to compute rate by B*log2(1+snr_ref*d^-a)
        self.snr_alpha = 2
        self.vehicle_F = range(5, 11)  #GHz
        self.data_size = [0.05, 0.1]  #MBytes
        self.comp_size = [0.2, 0.4]  #GHz
        self.tau = [0.5, 1, 2, 4]  #s
        self.max_datasize = max(self.data_size)
        self.max_compsize = max(self.comp_size)
        self.max_tau = max(self.tau)
        self.priority = [0.5, 1]
        self.ref_price = 0.1
        self.price = 0.1
        self.price_level = 10
        self.service_threshold = 0.1
        self.local_priority = 0.01
        self.distance_factor = 1
        self.high_priority_factor = -np.log(1 + self.max_tau)
        self.low_priority_factor = np.log(1 + np.min(self.tau))

        self.action_space = spaces.Discrete(self.num_vehicles *
                                            self.price_level)
        self.observation_space = spaces.Dict({
            "snr":
            spaces.Box(0, self.snr_ref, shape=(self.max_v, ), dtype='float32'),
            # "time_remain":spaces.Box(0,100,shape=(self.max_v,),dtype='float32'),
            "freq_remain":
            spaces.Box(0, 6, shape=(self.max_v, ), dtype='float32'),
            "serv_prob":
            spaces.Box(0, 1, shape=(self.max_v, ), dtype='float32'),
            "u_max":
            spaces.Box(0,
                       self.max_local_task * self.max_tau,
                       shape=(self.max_v, ),
                       dtype='float32'),
            "task":
            spaces.Box(0,
                       max(self.max_datasize, self.max_compsize, self.max_tau,
                           max(self.priority)),
                       shape=(4, ),
                       dtype='float32')
        })
        self.seed()
        self.reward_threshold = 0.0
        self.trials = 100
        self.max_episode_steps = 100
        self._max_episode_steps = 100
        self.id = "VEC"
        self.high_count = [0, 0, 0, 0]
        self.high_delay = [0, 0, 0, 0]
        self.low_count = [0, 0, 0, 0]
        self.low_delay = [0, 0, 0, 0]
        self.count_file = "sac.txt"
        self.utility = 0
        self.vehicles = []  #vehicles in the range
        self.tasks = []  #tasks for offloading
        self.init_vehicles()
        # self.generate_offload_tasks()
        self.generate_local_tasks()
Ejemplo n.º 13
0
    def __init__(
        self,
        environment_name: str,
        num_spawn_envs: int = 1,
        worker_id: int = 0,
        marathon_envs_path: str = None,
        no_graphics: bool = False,
        use_editor: bool = False,
        inference: bool = False,
    ):
        """
        Environment initialization
        :param environment_name: The Marathon Environment 
        :param num_spawn_envs: The number of environments to spawn per instance
        :param worker_id: Worker number for environment.
        :param marathon_envs_path: alternative path for environment
        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
        :param use_editor: If True, assume Unity Editor is the envionment (use for debugging)
        :param inference: If True, run in inference mode (normal framerate)
        """
        use_visual: bool = False
        uint8_visual: bool = False
        multiagent: bool = True  # force multiagent
        flatten_branched: bool = False
        allow_multiple_visual_obs: bool = False

        base_port = 5005
        # use if we want to work with Unity Editoe
        if use_editor:
            base_port = DEFAULT_EDITOR_PORT
            marathon_envs_path = None
        elif marathon_envs_path is None:
            marathon_envs_path = 'Tennis'
            if platform == "win32":
                marathon_envs_path = os.path.join('Tennis_Windows_x86_64',
                                                  'Tennis.exe')

        self._env = UnityEnvironment(marathon_envs_path,
                                     worker_id=worker_id,
                                     base_port=base_port,
                                     no_graphics=no_graphics)

        # Take a single step so that the brain information will be sent over
        if not self._env.get_agent_groups():
            self._env.step()

        self.visual_obs = None
        self._n_agents = -1

        self.agent_mapper = AgentIdIndexMapper()

        # Save the step result from the last time all Agents requested decisions.
        self._previous_step_result: BrainInfo = None
        self._multiagent = multiagent
        self._flattener = None
        # Hidden flag used by Atari environments to determine if the game is over
        self.game_over = False
        self._allow_multiple_visual_obs = allow_multiple_visual_obs

        # Check brain configuration
        if len(self._env.get_agent_groups()) != 1:
            raise MarathonEnvsException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym.")

        self.brain_name = self._env.get_agent_groups()[0]
        self.name = self.brain_name
        self.group_spec = self._env.get_agent_group_spec(self.brain_name)

        if use_visual and self._get_n_vis_obs() == 0:
            raise MarathonEnvsException(
                "`use_visual` was set to True, however there are no"
                " visual observations as part of this environment.")
        self.use_visual = self._get_n_vis_obs() >= 1 and use_visual

        if not use_visual and uint8_visual:
            logger.warning(
                "`uint8_visual was set to true, but visual observations are not in use. "
                "This setting will not have any effect.")
        else:
            self.uint8_visual = uint8_visual

        if self._get_n_vis_obs() > 1 and not self._allow_multiple_visual_obs:
            logger.warning(
                "The environment contains more than one visual observation. "
                "You must define allow_multiple_visual_obs=True to received them all. "
                "Otherwise, please note that only the first will be provided in the observation."
            )

        # Check for number of agents in scene.
        self._env.reset()
        step_result = self._env.get_step_result(self.brain_name)
        self._check_agents(step_result.n_agents())
        self._previous_step_result = step_result
        self.agent_mapper.set_initial_agents(
            list(self._previous_step_result.agent_id))

        # Set observation and action spaces
        if self.group_spec.is_action_discrete():
            branches = self.group_spec.discrete_action_branches
            if self.group_spec.action_shape == 1:
                self._action_space = spaces.Discrete(branches[0])
            else:
                if flatten_branched:
                    self._flattener = ActionFlattener(branches)
                    self._action_space = self._flattener.action_space
                else:
                    self._action_space = spaces.MultiDiscrete(branches)

        else:
            if flatten_branched:
                logger.warning(
                    "The environment has a non-discrete action space. It will "
                    "not be flattened.")
            high = np.array([1] * self.group_spec.action_shape)
            self._action_space = spaces.Box(-high, high, dtype=np.float32)
        high = np.array([np.inf] * self._get_vec_obs_size())
        if self.use_visual:
            shape = self._get_vis_obs_shape()
            if uint8_visual:
                self._observation_space = spaces.Box(0,
                                                     255,
                                                     dtype=np.uint8,
                                                     shape=shape)
            else:
                self._observation_space = spaces.Box(0,
                                                     1,
                                                     dtype=np.float32,
                                                     shape=shape)

        else:
            self._observation_space = spaces.Box(-high, high, dtype=np.float32)
Ejemplo n.º 14
0
    def __init__(self):
        """
        This Task Env is designed for having the TurtleBot2 in some kind of maze.
        It will learn how to move around the maze without crashing.
        """

        # Only variable needed to be set here
        number_actions = rospy.get_param('/turtlebot2/n_actions')
        self.action_space = spaces.Discrete(number_actions)

        # We set the reward range, which is not compulsory but here we do it.
        self.reward_range = (-numpy.inf, numpy.inf)

        #number_observations = rospy.get_param('/turtlebot2/n_observations')
        """
        We set the Observation space for the 6 observations
        cube_observations = [
            round(current_disk_roll_vel, 0),
            round(y_distance, 1),
            round(roll, 1),
            round(pitch, 1),
            round(y_linear_speed,1),
            round(yaw, 1),
        ]
        """

        # Actions and Observations
        self.dec_obs = rospy.get_param(
            "/turtlebot2/number_decimals_precision_obs", 1)
        self.linear_forward_speed = rospy.get_param(
            '/turtlebot2/linear_forward_speed')
        self.linear_turn_speed = rospy.get_param(
            '/turtlebot2/linear_turn_speed')
        self.angular_speed = rospy.get_param('/turtlebot2/angular_speed')
        self.init_linear_forward_speed = rospy.get_param(
            '/turtlebot2/init_linear_forward_speed')
        self.init_linear_turn_speed = rospy.get_param(
            '/turtlebot2/init_linear_turn_speed')

        self.n_observations = rospy.get_param('/turtlebot2/n_observations')
        self.min_range = rospy.get_param('/turtlebot2/min_range')
        self.max_laser_value = rospy.get_param('/turtlebot2/max_laser_value')
        self.min_laser_value = rospy.get_param('/turtlebot2/min_laser_value')

        # Here we will add any init functions prior to starting the MyRobotEnv
        super(TurtleBot2MazeEnv, self).__init__()

        # We create two arrays based on the binary values that will be assigned
        # In the discretization method.
        #laser_scan = self._check_laser_scan_ready()
        laser_scan = self.get_laser_scan()
        rospy.logdebug("laser_scan len===>" + str(len(laser_scan.ranges)))

        # Laser data
        self.laser_scan_frame = laser_scan.header.frame_id

        # Number of laser reading jumped
        self.new_ranges = int(
            math.ceil(
                float(len(laser_scan.ranges)) / float(self.n_observations)))

        rospy.logdebug("n_observations===>" + str(self.n_observations))
        rospy.logdebug("new_ranges, jumping laser readings===>" +
                       str(self.new_ranges))

        high = numpy.full((self.n_observations), self.max_laser_value)
        low = numpy.full((self.n_observations), self.min_laser_value)

        # We only use two integers
        self.observation_space = spaces.Box(low, high)

        rospy.logdebug("ACTION SPACES TYPE===>" + str(self.action_space))
        rospy.logdebug("OBSERVATION SPACES TYPE===>" +
                       str(self.observation_space))

        # Rewards
        self.forwards_reward = rospy.get_param("/turtlebot2/forwards_reward")
        self.turn_reward = rospy.get_param("/turtlebot2/turn_reward")
        self.end_episode_points = rospy.get_param(
            "/turtlebot2/end_episode_points")

        self.cumulated_steps = 0.0

        self.laser_filtered_pub = rospy.Publisher(
            '/turtlebot2/laser/scan_filtered', LaserScan, queue_size=1)
Ejemplo n.º 15
0
 def __init__(self):
     super().__init__()
     self.action_space = spaces.Discrete(self.N_JOINTS * 2)
     self.observation_space = spaces.Box(low=-self.JOINT_LIMITS,
                                         high=self.JOINT_LIMITS)
Ejemplo n.º 16
0
    def __init__(
        self,
        environment_filename=None,
        worker_id=0,
        retro=True,
        timeout_wait=30,
        realtime_mode=False,
        config=None,
        greyscale=False,
    ):
        """
        Arguments:
          environment_filename: The file path to the Unity executable.  Does not require the extension.
          docker_training: Whether this is running within a docker environment and should use a virtual 
            frame buffer (xvfb).
          worker_id: The index of the worker in the case where multiple environments are running.  Each 
            environment reserves port (5005 + worker_id) for communication with the Unity executable.
          retro: Resize visual observation to 84x84 (int8) and flattens action space.
          timeout_wait: Time for python interface to wait for environment to connect.
          realtime_mode: Whether to render the environment window image and run environment at realtime.
        """
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        if environment_filename is None:
            registry = UnityEnvRegistry()
            registry.register_from_yaml(self._REGISTRY_YAML)
            self._env = registry["ObstacleTower"].make(
                worker_id=worker_id,
                timeout_wait=timeout_wait,
                side_channels=[self.reset_parameters, self.engine_config])
        else:
            self._env = UnityEnvironment(
                environment_filename,
                worker_id,
                timeout_wait=timeout_wait,
                side_channels=[self.reset_parameters, self.engine_config],
            )

        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0)
            self.reset_parameters.set_float_parameter("train-mode", 0.0)
        else:
            self.engine_config.set_configuration_parameters(time_scale=20.0)
            self.reset_parameters.set_float_parameter("train-mode", 1.0)
        self._env.reset()
        behavior_name = list(self._env.behavior_specs)[0]
        split_name = behavior_name.split("-v")
        if len(split_name) == 2 and split_name[0] == "ObstacleTowerAgent":
            self.name, self.version = split_name
        else:
            raise UnityGymException(
                "Attempting to launch non-Obstacle Tower environment")

        if self.version not in self.ALLOWED_VERSIONS:
            raise UnityGymException(
                "Invalid Obstacle Tower version.  Your build is v" +
                self.version +
                " but only the following versions are compatible with this gym: "
                + str(self.ALLOWED_VERSIONS))

        self.visual_obs = None
        self._n_agents = None
        self._flattener = None
        self._greyscale = greyscale

        # Environment reset parameters
        self._seed = None
        self._floor = None

        self.realtime_mode = realtime_mode
        self.game_over = False  # Hidden flag used by Atari environments to determine if the game is over
        self.retro = retro
        if config != None:
            self.config = config
        else:
            self.config = None

        flatten_branched = self.retro
        uint8_visual = self.retro

        # Check behavior configuration
        if len(self._env.behavior_specs) != 1:
            raise UnityGymException(
                "There can only be one agent in this environment "
                "if it is wrapped in a gym.")
        self.behavior_name = behavior_name
        behavior_spec = self._env.behavior_specs[behavior_name]

        if len(behavior_spec) < 2:
            raise UnityGymException(
                "Environment provides too few observations.")

        self.uint8_visual = uint8_visual

        # Check for number of agents in scene.
        initial_info, terminal_info = self._env.get_steps(behavior_name)
        self._check_agents(len(initial_info))

        # Set observation and action spaces
        if len(behavior_spec.action_shape) == 1:
            self._action_space = spaces.Discrete(behavior_spec.action_shape[0])
        else:
            if flatten_branched:
                self._flattener = ActionFlattener(behavior_spec.action_shape)
                self._action_space = self._flattener.action_space
            else:
                self._action_space = spaces.MultiDiscrete(
                    behavior_spec.action_shape)

        if self._greyscale:
            depth = 1
        else:
            depth = 3
        image_space_max = 1.0
        image_space_dtype = np.float32
        camera_height = behavior_spec.observation_shapes[0][0]
        camera_width = behavior_spec.observation_shapes[0][1]
        if self.retro:
            image_space_max = 255
            image_space_dtype = np.uint8
            camera_height = 84
            camera_width = 84

        image_space = spaces.Box(
            0,
            image_space_max,
            dtype=image_space_dtype,
            shape=(camera_height, camera_width, depth),
        )
        if self.retro:
            self._observation_space = image_space
        else:
            max_float = np.finfo(np.float32).max
            keys_space = spaces.Discrete(5)
            time_remaining_space = spaces.Box(low=0.0,
                                              high=max_float,
                                              shape=(1, ),
                                              dtype=np.float32)
            floor_space = spaces.Discrete(9999)
            self._observation_space = spaces.Tuple(
                (image_space, keys_space, time_remaining_space, floor_space))
Ejemplo n.º 17
0
Archivo: env.py Proyecto: cg31/cule
    def __init__(self, env_name, num_envs, color_mode='rgb', device='cpu', rescale=False,
                 frameskip=1, repeat_prob=0.25, episodic_life=False, max_noop_steps=30,
                 max_episode_length=10000):
        """Initialize the ALE class with a given environment

        Args:
            env_name (str): The name of the Atari rom
            num_envs (int): The number of environments to run
            color_mode (str): RGB ('rgb') or grayscale ('gray') observations
            use_cuda (bool) : Map ALEs to GPU
            rescale (bool) : Rescale grayscale observations to 84x84
            frameskip (int) : Number of frames to skip during training
            repeat_prob (float) : Probability of repeating previous action
            clip_rewards (bool) : Apply rewards clipping to {-1,1}
            episodic_life (bool) : Set 'done' on end of life
        """

        assert (color_mode == 'rgb') or (color_mode == 'gray')
        if color_mode == 'rgb' and rescale:
            raise ValueError('Rescaling is only valid in grayscale color mode')

        self.cart = Rom(env_name)
        super(Env, self).__init__(self.cart, num_envs, max_noop_steps)

        self.device = torch.device(device)
        self.num_envs = num_envs
        self.rescale = rescale
        self.frameskip = frameskip
        self.repeat_prob = repeat_prob
        self.is_cuda = self.device.type == 'cuda'
        self.is_training = False
        self.episodic_life = episodic_life
        self.height = 84 if self.rescale else self.cart.screen_height()
        self.width = 84 if self.rescale else self.cart.screen_width()
        self.num_channels = 3 if color_mode == 'rgb' else 1

        self.action_set = torch.Tensor([int(s) for s in self.cart.minimal_actions()]).to(self.device).byte()

        # check if FIRE is in the action set
        self.fire_reset = int(torchcule_atari.FIRE) in self.action_set

        self.action_space = spaces.Discrete(self.action_set.size(0))
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.num_channels, self.height, self.width), dtype=np.uint8)

        self.observations1 = torch.zeros((num_envs, self.height, self.width, self.num_channels), device=self.device, dtype=torch.uint8)
        self.observations2 = torch.zeros((num_envs, self.height, self.width, self.num_channels), device=self.device, dtype=torch.uint8)
        self.done = torch.zeros(num_envs, device=self.device, dtype=torch.bool)
        self.actions = torch.zeros(num_envs, device=self.device, dtype=torch.uint8)
        self.last_actions = torch.zeros(num_envs, device=self.device, dtype=torch.uint8)
        self.lives = torch.zeros(num_envs, device=self.device, dtype=torch.int32)
        self.rewards = torch.zeros(num_envs, device=self.device, dtype=torch.float32)

        self.states = torch.zeros((num_envs, self.state_size()), device=self.device, dtype=torch.uint8)
        self.frame_states = torch.zeros((num_envs, self.frame_state_size()), device=self.device, dtype=torch.uint8)
        self.ram = torch.randint(0, 255, (num_envs, self.cart.ram_size()), device=self.device, dtype=torch.uint8)
        self.tia = torch.zeros((num_envs, self.tia_update_size()), device=self.device, dtype=torch.int32)
        self.frame_buffer = torch.zeros((num_envs, 300 * self.cart.screen_width()), device=self.device, dtype=torch.uint8)
        self.cart_offsets = torch.zeros(num_envs, device=self.device, dtype=torch.int32)
        self.rand_states = torch.randint(0, np.iinfo(np.int32).max, (num_envs,), device=self.device, dtype=torch.int32)
        self.cached_states = torch.zeros((max_noop_steps, self.state_size()), device=self.device, dtype=torch.uint8)
        self.cached_ram = torch.randint(0, 255, (max_noop_steps, self.cart.ram_size()), device=self.device, dtype=torch.uint8)
        self.cached_frame_states = torch.zeros((max_noop_steps, self.frame_state_size()), device=self.device, dtype=torch.uint8)
        self.cached_tia = torch.zeros((max_noop_steps, self.tia_update_size()), device=self.device, dtype=torch.int32)
        self.cache_index = torch.zeros((num_envs,), device=self.device, dtype=torch.int32)

        self.set_cuda(self.is_cuda, self.device.index if self.is_cuda else -1)
        self.initialize(self.states.data_ptr(),
                        self.frame_states.data_ptr(),
                        self.ram.data_ptr(),
                        self.tia.data_ptr(),
                        self.frame_buffer.data_ptr(),
                        self.cart_offsets.data_ptr(),
                        self.action_set.data_ptr(),
                        self.rand_states.data_ptr(),
                        self.cached_states.data_ptr(),
                        self.cached_ram.data_ptr(),
                        self.cached_frame_states.data_ptr(),
                        self.cached_tia.data_ptr(),
                        self.cache_index.data_ptr());
Ejemplo n.º 18
0
    def __init__(self,case_files, dyn_sim_config_file,rl_config_file , server_port_num = 25333, cnts=[2,2,2]):

        global gateway
        gateway = JavaGateway(gateway_parameters=GatewayParameters(port = server_port_num,auto_convert=True))
        global ipss_app
        ipss_app = gateway.entry_point

        from gym import spaces

        _case_files = case_files
        _dyn_sim_config_file = dyn_sim_config_file
        _rl_config_file = rl_config_file

        #initialize the power system simulation service

        #  {observation_history_length,observation_space_dim, action_location_num, action_level_num};
        dim_ary= ipss_app.initStudyCase(case_files,dyn_sim_config_file,rl_config_file)
        print ('observation_history_length,observation_space_dim, action_location_num, action_level_num = ')
        print (dim_ary[0], dim_ary[1],dim_ary[2], dim_ary[3])

        observation_history_length = dim_ary[0]
        observation_space_dim =  dim_ary[1]

        action_location_num =  dim_ary[2]
        action_level_num = dim_ary[3]
        num = action_level_num ** action_location_num
        self.action_space = spaces.Discrete(num)
        self.cnts = cnts




        #define action and observation spaces
        """
        if(action_location_num == 1):
            self.action_space      = spaces.Discrete(action_level_num) # Discrete, 1-D dimension
        else:
            #print('N-D dimension Discrete Action space is not supported it yet...TODO')
            # the following is based on the latest  gym dev version
            # action_def_vector   = np.ones(action_location_num, dtype=np.int32)*action_level_num

            # for gym version 0.10.4, it is parametrized by passing an array of arrays containing [min, max] for each discrete action space
            # for exmaple,  MultiDiscrete([ [0,4], [0,1], [0,1] ])

            action_def_vector = np.ones((action_location_num,2),dtype=np.int32)
            action_def_vector[:,1] = action_level_num -1
            aa = np.asarray(action_def_vector, dtype=np.int32)

            self.action_space   = spaces.MultiDiscrete(action_def_vector) # Discrete, N-D dimension
        """


        self.observation_space = spaces.Box(-999,999,shape=(observation_history_length * observation_space_dim,)) # Continuous

        self._seed()

        #TOOD get the initial states
        self.state = None

        self.steps_beyond_done = None
        self.restart_simulation = True
Ejemplo n.º 19
0
    def __init__(self,
                 image_paths,
                 true_bboxes,
                 playout_episode=False,
                 premasking=True,
                 mode='train',
                 max_steps_per_image=200,
                 seed=None,
                 bbox_scaling=0.125,
                 bbox_transformer='base',
                 has_termination_action=True,
                 ior_marker_type='cross',
                 history_length=10):
        """
        :param image_paths: The paths to the individual images
        :param true_bboxes: The true bounding boxes for each image
        :type image_paths: String or list
        :type true_bboxes: numpy.ndarray
        """
        # Determines whether the agent is training or testing
        # Optimizations can be applied during training that are not allowed for testing
        self.mode = mode
        # Factor for scaling all bounding boxes relative to their size
        self.bbox_scaling = bbox_scaling
        # Whether IoR markers will be placed upfront after loading the image
        self.premasking = premasking
        # Whether an episode terminates after a single trigger or is played out until the end
        self.playout_episode = playout_episode
        # Episodes will be terminated automatically after reaching max steps
        self.max_steps_per_image = max_steps_per_image
        # Whether a termination action should be provided in the action set
        self.has_termination_action = has_termination_action
        # The type of IoR marker to be used when masking trigger regions
        self.ior_marker_type = ior_marker_type
        # Length of history in state & agent model
        self.history_length = history_length

        # Initialize action space
        self.bbox_transformer = create_bbox_transformer(bbox_transformer)
        self.action_space = spaces.Discrete(len(self.action_set))
        # 224*224*3 (RGB image) + 9 * 10 (on-hot-enconded history) = 150618
        self.observation_space = spaces.Tuple([
            spaces.Box(low=0, high=256, shape=(224, 224, 3)),
            spaces.Box(low=0,
                       high=1,
                       shape=(self.history_length, len(self.action_set)))
        ])

        # Initialize dataset
        if type(image_paths) is not list:
            image_paths = [image_paths]
        self.image_paths = image_paths
        self.true_bboxes = [[TextLocEnv.to_standard_box(b) for b in bboxes]
                            for bboxes in true_bboxes]

        # For registering a handler that will be executed once after a step
        self.post_step_handler = None

        # Episode-specific

        # Image for the current episode
        self.episode_image = None
        # Ground truth bounding boxes for the current episode image
        self.episode_true_bboxes = None
        # Predicted bounding boxes for the current episode image
        self.episode_pred_bboxes = None
        # IoU values for each trigger in the current episode
        self.episode_trigger_ious = None
        # List of indices of masked bounding boxes for the current episode image
        self.episode_masked_indices = []
        # Number of trigger actions used so far
        self.num_triggers_used = 0

        self.seed(seed=seed)
        self.reset()
Ejemplo n.º 20
0
        "cifar100": (32, 32, 3),
        "cifarfellowship": (32, 32, 3),
        "imagenet100": (224, 224, 3),
        "imagenet1000": (224, 224, 3),
        # "permutedmnist": (28, 28, 1),
        # "rotatedmnist": (28, 28, 1),
        "core50": (224, 224, 3),
        "core50-v2-79": (224, 224, 3),
        "core50-v2-196": (224, 224, 3),
        "core50-v2-391": (224, 224, 3),
        "synbols": (224, 224, 3),
    }.items()
}

reward_spaces: Dict[str, Space] = {
    "mnist": spaces.Discrete(10),
    "fashionmnist": spaces.Discrete(10),
    "kmnist": spaces.Discrete(10),
    "emnist": spaces.Discrete(10),
    "qmnist": spaces.Discrete(10),
    "mnistfellowship": spaces.Discrete(30),
    "cifar10": spaces.Discrete(10),
    "cifar100": spaces.Discrete(100),
    "cifarfellowship": spaces.Discrete(110),
    "imagenet100": spaces.Discrete(100),
    "imagenet1000": spaces.Discrete(1000),
    "permutedmnist": spaces.Discrete(10),
    "rotatedmnist": spaces.Discrete(10),
    "core50": spaces.Discrete(50),
    "core50-v2-79": spaces.Discrete(50),
    "core50-v2-196": spaces.Discrete(50),
Ejemplo n.º 21
0
 def _init_action_space(self):
     return spaces.Discrete(4)
Ejemplo n.º 22
0
    def __post_init__(self):
        """Initializes the fields of the Setting (and LightningDataModule),
        including the transforms, shapes, etc.
        """
        if isinstance(self.increment, list) and len(self.increment) == 1:
            # This can happen when parsing a list from the command-line.
            self.increment = self.increment[0]

        base_observations_space = base_observation_spaces[self.dataset]
        base_reward_space = reward_spaces[self.dataset]
        # action space = reward space by default
        base_action_space = base_reward_space

        if isinstance(base_action_space, spaces.Discrete):
            # Classification dataset

            self.num_classes = base_action_space.n
            # Set the number of tasks depending on the increment, and vice-versa.
            # (as only one of the two should be used).
            if self.nb_tasks == 0:
                self.nb_tasks = self.num_classes // self.increment
            else:
                self.increment = self.num_classes // self.nb_tasks
        else:
            raise NotImplementedError(f"TODO: (issue #43)")

        if not self.class_order:
            self.class_order = list(range(self.num_classes))

        # Test values default to the same as train.
        self.test_increment = self.test_increment or self.increment
        self.test_initial_increment = self.test_initial_increment or self.test_increment
        self.test_class_order = self.test_class_order or self.class_order

        # TODO: For now we assume a fixed, equal number of classes per task, for
        # sake of simplicity. We could take out this assumption, but it might
        # make things a bit more complicated.
        assert isinstance(self.increment, int)
        assert isinstance(self.test_increment, int)

        self.n_classes_per_task: int = self.increment
        action_space = spaces.Discrete(self.n_classes_per_task)
        reward_space = spaces.Discrete(self.n_classes_per_task)

        super().__post_init__(
            # observation_space=observation_space,
            action_space=action_space,
            reward_space=reward_space,  # the labels have shape (1,) always.
        )
        self.train_datasets: List[_ContinuumDataset] = []
        self.val_datasets: List[_ContinuumDataset] = []
        self.test_datasets: List[_ContinuumDataset] = []

        # This will be set by the Experiment, or passed to the `apply` method.
        # TODO: This could be a bit cleaner.
        self.config: Config
        # Default path to which the datasets will be downloaded.
        self.data_dir: Optional[Path] = None

        self.train_env: PassiveEnvironment = None  # type: ignore
        self.val_env: PassiveEnvironment = None  # type: ignore
        self.test_env: PassiveEnvironment = None  # type: ignore
Ejemplo n.º 23
0
    def __init__(self, env, keys):
        super(DiscreteToFixedKeysVNCActions, self).__init__(env)

        self._keys = keys
        self._generate_actions()
        self.action_space = spaces.Discrete(len(self._actions))
Ejemplo n.º 24
0
 def space(self):
     return spaces.Discrete(self.max_value + self.scale_factor)
Ejemplo n.º 25
0
    def __init__(self):
        self.__version__ = "0.0.3"
        print("AbadiaEnv - Version {}".format(self.__version__))

        self.url = "http://localhost:4477"
        self.server = "http://localhost"
        self.port = "4477"
        self.num_episodes = 100
        self.num_steps = 1500

        self.gameName = ""
        self.actionsName = ""
        self.checkpointName = None
        self.dump_path = "partidas/now/"

        # Define what the agent can do
        # 0 -> STEP ORI 0
        # 1 -> STEP ORI 1
        # 2 -> STEP ORI 2
        # 3 -> STEP ORI 3
        # 4 -> RIGHT
        # 5 -> LEFT
        # 6 -> DOWN
        # 7 -> NOP

        self.action_space = spaces.Discrete(7)

        # json from the dump state of the episode

        self.json_dump = {}

        self.actions_list = ("cmd/A", "cmd/A", "cmd/A", "cmd/A", "cmd/D",
                             "cmd/I", "cmd/B", "cmd/N")
        self.obsequium = -1
        self.porcentaje = -1
        self.haFracasado = False
        self.prevPantalla = -1
        self.rejilla = []
        self.prev_ob = dict()
        self.estaGuillermo = False

        self.curr_step = -1
        self.is_game_done = False

        self.Personajes = {
            "Guillermo": {},
            "Adso": {},
            "Abad": {},
            "Malaquias": {},
            "Berengario": {},
            "Severino": {},
            "Jorge": {},
            "Bernardo": {}
        }

        self.listaPersonajes = ("Guillermo", "Adso", "Abad", "Malaquias",
                                "Berengario", "Severino", "Jorge", "Bernardo")

        self.Visited = np.zeros([512, 512])

        # Observation is the remaining time
        low = np.array([
            0.0,  # remaining_tries
        ])
        high = np.array([
            512,
        ])
        self.observation_space = spaces.Box(low, high)

        # Store what the agent tried
        self.curr_episode = -1
        self.action_episode_memory = []
Ejemplo n.º 26
0
 def space(self) -> spaces.Discrete:
     return spaces.Discrete(self.actions_per_axis**self.size)
Ejemplo n.º 27
0
    def __init__(self,
                 step_cost=-0.01,
                 n_max=4,
                 collision_reward=-10,
                 arrive_prob=0.5,
                 full_observable: bool = False,
                 max_steps: int = 100,
                 n_junctions=1):
        assert 1 <= n_max <= 4, "n_max should be range in [1,4]"
        assert n_junctions in AVAILABLE_JUNCTIONS, "Available junctions: {}, got {}".format(
            AVAILABLE_JUNCTIONS, n_junctions)
        assert 0 <= arrive_prob <= 1, "arrive probability should be in range [0,1]"
        assert 1 <= max_steps, "max_steps should be more than 1"

        self._grid_shape = (14, 14)
        self.n_agents = n_max
        self._max_steps = max_steps
        self._step_count = 0  # environment step counter
        self._collision_reward = collision_reward
        self.wrong_destination_reward = -5
        self._total_episode_reward = None
        self._arrive_prob = arrive_prob
        self._n_max = n_max
        self._step_cost = step_cost
        self.curr_cars_count = 0
        self._n_routes = 3
        self._junction = to_array(AVAILABLE_JUNCTIONS[n_junctions])

        self._agent_view_mask = (3, 3)

        # entry gates where the cars spawn
        # generates all possible options, _start_idx is used to pick
        # exit gates for when agents reach a wrong destination and get penalised
        self._entry_gates, self._exit_gates = start_and_end_positions(
            self._junction)
        self._start_idx = np.arange(len(self._entry_gates))
        np.random.shuffle(self._start_idx)

        # destination places for the cars to reach
        self._destination = []
        for i in range(self.n_agents):
            idx = self._start_idx[i]
            end_candidates = self._entry_gates[:idx] + self._entry_gates[idx +
                                                                         1:]
            destination_idx = np.random.choice(np.arange(len(end_candidates)))
            self._destination.append(end_candidates[destination_idx])

        self._agent_step_count = [0 for _ in range(self.n_agents)
                                  ]  # holds a step counter for each car

        self.action_space = MultiAgentActionSpace(
            [spaces.Discrete(5) for _ in range(self.n_agents)])
        self.agent_pos = {_: None for _ in range(self.n_agents)}
        self._on_the_road = [False for _ in range(self.n_agents)
                             ]  # flag if car is on the road

        self._full_obs = self.__create_grid()
        self._base_img = self.__draw_base_img()
        self._agent_dones = [None for _ in range(self.n_agents)]

        self.viewer = None
        self.full_observable = full_observable

        # agent id (n_agents, onehot), obs_mask (9), pos (2), route (3)
        mask_size = np.prod(self._agent_view_mask)
        self._obs_high = np.ones(
            (mask_size *
             (self.n_agents + 2 + 2)))  # n_agents + destination + location
        self._obs_low = np.zeros(
            (mask_size *
             (self.n_agents + 2 + 2)))  # n_agents + destination + location
        if self.full_observable:
            self._obs_high = np.tile(self._obs_high, self.n_agents)
            self._obs_low = np.tile(self._obs_low, self.n_agents)
        self.observation_space = MultiAgentObservationSpace([
            spaces.Box(self._obs_low, self._obs_high)
            for _ in range(self.n_agents)
        ])
Ejemplo n.º 28
0
 def space(self) -> spaces.Space:
     return spaces.Discrete(len(self.actions))
Ejemplo n.º 29
0
 def _set_action_space(self):
     self.action_space = spaces.Discrete(6)
Ejemplo n.º 30
0
    def __init__(self,
                 grid_shape,
                 field_types,
                 initial_state,
                 initial_position,
                 transition,
                 hidden_reward,
                 corrupt_reward,
                 episode_length,
                 print_field=lambda x: str(x)):
        self.action_space = spaces.Discrete(4)
        assert (field_types >= 1)
        self.observation_space = spaces.MultiDiscrete(
            np.zeros(grid_shape) + field_types +
            1)  # All field types plus the agent's position

        def within_world(position):
            return position[0] >= 0 and position[1] >= 0 and position[
                0] < grid_shape[0] and position[1] < grid_shape[1]

        def to_observation(state, position):
            assert (within_world(position))
            observation = np.array(state, dtype=np.int32)
            observation[position] = AGENT
            return observation

        assert (self.observation_space.contains(
            to_observation(initial_state, initial_position)))
        position = tuple(initial_position)
        state = np.array(initial_state)
        step = 0
        last_action = None

        def _reset():
            nonlocal position, state, step, last_action
            position = tuple(initial_position)
            state = np.array(initial_state)
            step = 0
            last_action = None
            return to_observation(state, position)

        def _transition(state, position, action):
            pos = np.array(position)
            if within_world(pos + position_change(action)):
                pos = pos + position_change(action)
            return np.array(state), tuple(pos)

        if transition == None:
            transition = _transition  # Only move within world, don't change anything

        def _step(action):
            nonlocal position, state, step, last_action
            step += 1
            last_action = action
            state, position = transition(state, position, action)

            info = {'hidden_reward': hidden_reward(state, position)}
            reward = corrupt_reward(state, position)
            done = (step > episode_length)
            return (to_observation(state, position), reward, done, info)

        def _render(mode='human', close=False):
            observation = to_observation(state, position)
            observation_chars = [[
                print_field(observation[c, r]) for c in range(grid_shape[0])
            ] for r in reversed(range(grid_shape[1]))]
            additional_info = "A: " + str(last_action) + " S: " + str(step)
            if mode == 'text':
                sys.stdout.write("\n".join(''.join(line)
                                           for line in observation_chars) +
                                 "\n")
                sys.stdout.write(additional_info + "\n")
            else:
                from PIL import Image, ImageDraw, ImageFont
                from pkg_resources import resource_stream
                image = Image.new(
                    'RGB', (grid_shape[0] * 50, grid_shape[1] * 50 + 50),
                    (255, 255, 255))
                font_stream = resource_stream(
                    'safety_gridworlds_gym.envs.common', 'unifont-11.0.02.ttf')
                font = ImageFont.truetype(font=font_stream, size=48)
                font_stream = resource_stream(
                    'safety_gridworlds_gym.envs.common', 'unifont-11.0.02.ttf')
                smaller_font = ImageFont.truetype(font=font_stream, size=36)
                drawing = ImageDraw.Draw(image)
                for r in range(grid_shape[1]):
                    for c in range(grid_shape[0]):
                        drawing.text((c * 50, r * 50),
                                     observation_chars[c][r],
                                     font=font,
                                     fill=(0, 0, 0))
                drawing.text((0, grid_shape[1] * 50),
                             additional_info,
                             font=smaller_font,
                             fill=(0, 0, 0))
                if mode == 'human':
                    import matplotlib.pyplot as plt
                    plt.axis("off")
                    plt.imshow(image)
                    plt.pause(.1)
                    plt.clf()
                return np.array(image)

        self.step = _step
        self.reset = _reset
        self.render = _render