def __init__( self, nb_bandits: int, nb_prices_per_bandit: int, reward_params: List[Tuple[float, int]], ): """ Initialize the environment. :param nb_bandits: number of bandits :param nb_prices_per_bandit: number of prices per bandit :param reward_params: single param or tuple of params for the reward distribution """ self.nb_bandits = nb_bandits self.nb_prices_per_bandit = nb_prices_per_bandit self.reward_params = reward_params self.action_space = spaces.Tuple( ( spaces.Discrete(self.nb_bandits), spaces.Discrete(self.nb_prices_per_bandit), ) ) # an action is specifying one of nb_bandits and specifying a price for the bandit. self.observation_space = ( spaces.Space()) # None type space. agents only get a reward back. self.seed() # seed environment randomness
def __init__(self, save_replay=False, render=False, config_file="2lane.json"): env_configs = load_from_file(config_file) super().__init__(env_configs=env_configs, save_replay=save_replay, render=render) self.observation_space = spaces.Space(shape=(2, DIM_H, DIM_W)) self.actions = [(i * 2, 5) for i in range(TRAFFICLIGHTS_PHASES)] self.actions = self.actions + [(i * 2, -5) for i in range(TRAFFICLIGHTS_PHASES)] self.actions = self.actions + [(-1, None)] self.action_space = spaces.Discrete(len(self.actions)) self.phases_durations = [DEFAULT_DURATION for _ in range(4)] self.throughput = 0 self.travel_time = 0 self.traveling_cars = {} self.tls_id = traci.trafficlight.getIDList()[0]
def space(self): try: return spaces.Box(shape=self.shape, low=0, high=1, dtype=np.float32) except AttributeError: return spaces.Space()
def space(self) -> spaces.Space: try: obs = self.observe() return spaces.Dict( dict( desired_goal=spaces.Box( -np.inf, np.inf, shape=obs["desired_goal"].shape, dtype=np.float32, ), achieved_goal=spaces.Box( -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype=np.float32, ), observation=spaces.Box( -np.inf, np.inf, shape=obs["observation"].shape, dtype=np.float32, ), )) except AttributeError: return spaces.Space()
def space(self): try: obs = self.observe() return spaces.Dict({ attribute: spaces.Box(-np.inf, np.inf, shape=obs[attribute].shape, dtype=np.float32) for attribute in self.attributes }) except AttributeError: return spaces.Space()
def setUp(self) -> None: self.simulator: Simulator = create_autospec(Simulator) self.interface: GymTrainingInterface = GymTrainingInterface(self.simulator) # Set mock's deepcopy to return input (a dict of already copied # attributes). self.simulator.__deepcopy__ = lambda x: x self.sim_action_space_function: Callable[ [GymTrainedInterface], spaces.Space ] = lambda x: spaces.Space() self.sim_action_function: Callable[ [GymTrainedInterface, np.ndarray], Dict[str, List[float]] ] = lambda x, y: {"a": [0]} self.env: CustomSimEnv = CustomSimEnv( self.interface, [], SimAction( self.sim_action_space_function, self.sim_action_function, "stub_action" ), [], ) self.simulator.network = ChargingNetwork()
def __init__(self, layout: str, enable_render=True, state_active=False, player_lives: int = 3): """ PacmanEnv constructor :param layout: the layout of the game :param frame_to_skip: the frame to skip during training :param enable_render: enabling the display of the game screen :param state_active: enabling the display of the state matrix """ self.layout = layout self.state_active = state_active self.enable_render = enable_render if enable_render: pg.init() self.action_space = spaces.Discrete(Action.__len__()) self.maze = Map(layout) self.width, self.height = self.maze.get_map_sizes() self.game = Game( maze=self.maze, screen=Controller.get_screen(state_active, self.width, self.height) if enable_render else None, sounds_active=False, state_active=state_active, agent=None) self.timer = 0 self.reinit_game = False self.player_lives = player_lives self.observation_space = spaces.Space( shape=self.get_screen_rgb_array().shape, dtype=int) self.seed()
def __init__(self, sumo_cmd, vehicle_generator_config, junctions, traffic_movements, traffic_lights_phases, light_duration, clusters, max_steps=1500, env_name=None): super().__init__(sumo_cmd, vehicle_generator_config, max_steps, env_name=env_name) if not clusters: clusters = {} self.junctions = junctions self.cluster_map = clusters self.traffic_lights_phases = traffic_lights_phases self.observation_space = spaces.Space(shape=(len(junctions), traffic_movements + 1)) self.action_space = spaces.MultiDiscrete([traffic_lights_phases] * len(junctions)) self.light_duration = light_duration self.previous_actions = {} self.clustered_juncions = {} for junction in self.junctions: cluster = self.cluster_map.get(junction) if cluster: for jun, _ in cluster["tls_to_phases"].items(): self.clustered_juncions[jun] = junction self.previous_actions[jun] = (0, 1, 2, 3) else: self.previous_actions[junction] = (0, 1, 2, 3) self.traveling_cars = {} self.travel_time = 0 self.throughput = 0 self.green_dur = self.light_duration self.connection.trafficlight.setPhase(self.junctions[0], 1) self.yellow_dur = self.connection.trafficlight.getPhaseDuration( self.junctions[0]) self.connection.trafficlight.setPhase(self.junctions[0], 2) self.red_dur = self.connection.trafficlight.getPhaseDuration( self.junctions[0]) self.connection.trafficlight.setPhase(self.junctions[0], 0) self.curr_phases = [-1] * len(junctions) self.prev_phases = [-1] * len(junctions) self.events = [] self.ret_state = [True] * len(junctions) self.restarted = True
def __init__(self, test_info: CoreutilsInfo, out_dir, max_itr=100, test_times=5, _original_cost=None): log('initializing environment: ' + test_info.id, LogType.INFO) assert test_times > 2 super(CoreutilsEnv, self).__init__() # only 0-8 modes, inline_func is not predictable, and basic block merge has some problems self._action_set = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} # remove 3, it is too powerful self.action_space = spaces.Discrete(len(self._action_set)) self.observation_space = spaces.Space(shape=(max_ops_len, ), dtype=_dtype) self._episode_ended = False self.max_iteration = max_itr self.test_times = test_times self._episode_count = 0 self.test_info = test_info self.out_dir = out_dir if os.path.isdir(self.out_dir): os.system('sudo rm -r {}'.format(self.out_dir)) os.mkdir(self.out_dir) # set files to store running info self._stat_files = [ os.path.join(self.out_dir, self.test_info.id + '.perf-stat-' + str(idx)) for idx in range(self.test_times) ] self._src_bin_export = self._get_bin_export_path( self.test_info.current_bin_path) # speed up the test process, the exported file exists if not os.path.isfile(self._src_bin_export): ret = ida_bin_export_info(self.test_info.current_bin_path, self._src_bin_export) if ret != 0: raise IDAException(ret) # assert ret == 0, "bindiff return errors, return value: %d" % ret if _original_cost is None: self._original_cycles_cost = self._get_performance(mode=0) assert self._original_cycles_cost > 0, "error of perf!" else: self._original_cycles_cost = _original_cost self._cycles_list = [self._original_cycles_cost ] # idx=0 is the original binary's running clock self._reward_list = [] self._action_list = [] self._original_state = bin2state(self.test_info.current_bin_path) self._state = self._original_state self._cycles_cost = self._original_cycles_cost self._similarity = 1.0 self._reward_records_file = os.path.join( self.out_dir, 'reward_records_{}.txt'.format(self.test_info.id)) self._error_log = os.path.join( self.out_dir, 'error_log_{}.txt'.format(self.test_info.id)) self._error_count = 0 self._uroboros_meet_error = False self._uroboros_error_count = 0 self._diversify_output_file = os.path.join(self.out_dir, 'div_all_output.txt') log('environment \'%s\' initialized' % self.test_info.id, LogType.INFO)
def __init__(self, bodies: SystemScope = SystemScope.ALL, start_body: SolarSystemPlanet = None, target_bodies: List[SolarSystemPlanet] = None, start_time: Time = None, action_step: TimeDelta = TimeDelta(1 * u.minute), simulation_step: TimeDelta = TimeDelta(1 * u.second), spaceship_name: SpaceShipName = SpaceShipName.DEFAULT, spaceship_initial_altitude: u.km = 400 * u.km, spaceship_mass: u.kg = None, spaceship_propellant_mass: u.kg = None, spaceship_isp: u.s = None, spaceship_engine_thrust: u.N = None): super(SolarSystemGrav, self).__init__() if start_body is None: start_body = Earth if target_bodies is None: target_bodies = [Mars] if start_time is None: start_time = Time(datetime.now()).tdb # todo: enforce action_step/simulation_step is an integer? self.start_body = start_body self.target_bodies = target_bodies self.spaceship_initial_altitude = spaceship_initial_altitude self.start_time = start_time self.current_time = None self.time_step = action_step self.simulation_step = simulation_step self.done = False self.reward = 0 self.done = False self.spaceship_name = spaceship_name self.spaceship_mass = spaceship_mass self.spaceship_propellant_mass = spaceship_propellant_mass self.spaceship_isp = spaceship_isp self.spaceship_engine_thrust = spaceship_engine_thrust # set up solar system solar_system_ephemeris.set("jpl") # Download & use JPL Ephem body_dict = { SystemScope.EARTH: [Earth, Moon], SystemScope.ALL: [ Sun, Earth, Moon, Mercury, Venus, Mars, Jupiter, Saturn, Uranus, Neptune, Pluto ] } # define bodies to model # poliastro.bodies.SolarSystemPlanet = # Sun, Earth, Moon, Mercury, Venus, Mars, Jupiter, Saturn, Uranus, Neptune, Pluto # could also add versions for: only inner solar system, only 'major' bodies jovan moons, saturn's moons? try: self.body_list = body_dict[bodies] except KeyError: raise KeyError(f"bodies must be one of {body_dict.keys()}") # set up spacecraft self.spaceship = self._init_spaceship() self.current_ephem = None # init: # * which bodies are modelled # * what time it is # * what time_step to use # * target body # * spaceship pos/vel (orbit?) /fuel/thrust # * # init must define action & observation space # initialize model solar system # # Define action and observation space # They must be gym.spaces objects # observation ~~time~~, time_step, craft position, craft velocity, craft fuel, craft engine power, # bodies: position, velocity, mass # [time_step, [craft position, velocity, fuel, engine power], # [body_1_is_target, body_1_position, body_1_velocity, body_1_mass], # ... # [body_n_is_target, body_n_position, body_n_velocity, body_n_mass]] self.observation_space = spaces.Space() # action: # tuple [[x,y,z], burn duration] self.action_space = spaces.Tuple(( spaces.Box(low=-1.0, high=1.0, shape=(3, )), # x,y,z direction vector spaces.Box(low=0.0, high=1.0, shape=(1, )) # burn duration as percent of time_step ))