コード例 #1
0
    def __init__(
        self,
        nb_bandits: int,
        nb_prices_per_bandit: int,
        reward_params: List[Tuple[float, int]],
    ):
        """
        Initialize the environment.

        :param nb_bandits: number of bandits
        :param nb_prices_per_bandit: number of prices per bandit
        :param reward_params: single param or tuple of params for the reward distribution
        """
        self.nb_bandits = nb_bandits
        self.nb_prices_per_bandit = nb_prices_per_bandit
        self.reward_params = reward_params

        self.action_space = spaces.Tuple(
            (
                spaces.Discrete(self.nb_bandits),
                spaces.Discrete(self.nb_prices_per_bandit),
            )
        )  # an action is specifying one of nb_bandits and specifying a price for the bandit.
        self.observation_space = (
            spaces.Space())  # None type space. agents only get a reward back.

        self.seed()  # seed environment randomness
コード例 #2
0
    def __init__(self,
                 save_replay=False,
                 render=False,
                 config_file="2lane.json"):
        env_configs = load_from_file(config_file)

        super().__init__(env_configs=env_configs,
                         save_replay=save_replay,
                         render=render)

        self.observation_space = spaces.Space(shape=(2, DIM_H, DIM_W))

        self.actions = [(i * 2, 5) for i in range(TRAFFICLIGHTS_PHASES)]
        self.actions = self.actions + [(i * 2, -5)
                                       for i in range(TRAFFICLIGHTS_PHASES)]
        self.actions = self.actions + [(-1, None)]

        self.action_space = spaces.Discrete(len(self.actions))

        self.phases_durations = [DEFAULT_DURATION for _ in range(4)]

        self.throughput = 0
        self.travel_time = 0

        self.traveling_cars = {}
        self.tls_id = traci.trafficlight.getIDList()[0]
コード例 #3
0
ファイル: observation.py プロジェクト: galleon/highway-env
 def space(self):
     try:
         return spaces.Box(shape=self.shape,
                           low=0, high=1,
                           dtype=np.float32)
     except AttributeError:
         return spaces.Space()
コード例 #4
0
 def space(self) -> spaces.Space:
     try:
         obs = self.observe()
         return spaces.Dict(
             dict(
                 desired_goal=spaces.Box(
                     -np.inf,
                     np.inf,
                     shape=obs["desired_goal"].shape,
                     dtype=np.float32,
                 ),
                 achieved_goal=spaces.Box(
                     -np.inf,
                     np.inf,
                     shape=obs["achieved_goal"].shape,
                     dtype=np.float32,
                 ),
                 observation=spaces.Box(
                     -np.inf,
                     np.inf,
                     shape=obs["observation"].shape,
                     dtype=np.float32,
                 ),
             ))
     except AttributeError:
         return spaces.Space()
コード例 #5
0
ファイル: observation.py プロジェクト: galleon/highway-env
 def space(self):
     try:
         obs = self.observe()
         return spaces.Dict({
             attribute: spaces.Box(-np.inf, np.inf, shape=obs[attribute].shape, dtype=np.float32)
             for attribute in self.attributes
         })
     except AttributeError:
         return spaces.Space()
コード例 #6
0
 def setUp(self) -> None:
     self.simulator: Simulator = create_autospec(Simulator)
     self.interface: GymTrainingInterface = GymTrainingInterface(self.simulator)
     # Set mock's deepcopy to return input (a dict of already copied
     # attributes).
     self.simulator.__deepcopy__ = lambda x: x
     self.sim_action_space_function: Callable[
         [GymTrainedInterface], spaces.Space
     ] = lambda x: spaces.Space()
     self.sim_action_function: Callable[
         [GymTrainedInterface, np.ndarray], Dict[str, List[float]]
     ] = lambda x, y: {"a": [0]}
     self.env: CustomSimEnv = CustomSimEnv(
         self.interface,
         [],
         SimAction(
             self.sim_action_space_function, self.sim_action_function, "stub_action"
         ),
         [],
     )
     self.simulator.network = ChargingNetwork()
コード例 #7
0
ファイル: pacman_env.py プロジェクト: paolodelia99/py-pacman
    def __init__(self,
                 layout: str,
                 enable_render=True,
                 state_active=False,
                 player_lives: int = 3):
        """
        PacmanEnv constructor

        :param layout: the layout of the game
        :param frame_to_skip: the frame to skip during training
        :param enable_render: enabling the display of the game screen
        :param state_active: enabling the display of the state matrix
        """
        self.layout = layout
        self.state_active = state_active
        self.enable_render = enable_render
        if enable_render:
            pg.init()
        self.action_space = spaces.Discrete(Action.__len__())
        self.maze = Map(layout)
        self.width, self.height = self.maze.get_map_sizes()
        self.game = Game(
            maze=self.maze,
            screen=Controller.get_screen(state_active, self.width, self.height)
            if enable_render else None,
            sounds_active=False,
            state_active=state_active,
            agent=None)
        self.timer = 0
        self.reinit_game = False
        self.player_lives = player_lives

        self.observation_space = spaces.Space(
            shape=self.get_screen_rgb_array().shape, dtype=int)

        self.seed()
コード例 #8
0
    def __init__(self,
                 sumo_cmd,
                 vehicle_generator_config,
                 junctions,
                 traffic_movements,
                 traffic_lights_phases,
                 light_duration,
                 clusters,
                 max_steps=1500,
                 env_name=None):
        super().__init__(sumo_cmd,
                         vehicle_generator_config,
                         max_steps,
                         env_name=env_name)

        if not clusters:
            clusters = {}

        self.junctions = junctions
        self.cluster_map = clusters
        self.traffic_lights_phases = traffic_lights_phases

        self.observation_space = spaces.Space(shape=(len(junctions),
                                                     traffic_movements + 1))
        self.action_space = spaces.MultiDiscrete([traffic_lights_phases] *
                                                 len(junctions))

        self.light_duration = light_duration

        self.previous_actions = {}
        self.clustered_juncions = {}
        for junction in self.junctions:
            cluster = self.cluster_map.get(junction)
            if cluster:
                for jun, _ in cluster["tls_to_phases"].items():
                    self.clustered_juncions[jun] = junction
                    self.previous_actions[jun] = (0, 1, 2, 3)
            else:
                self.previous_actions[junction] = (0, 1, 2, 3)

        self.traveling_cars = {}

        self.travel_time = 0
        self.throughput = 0

        self.green_dur = self.light_duration
        self.connection.trafficlight.setPhase(self.junctions[0], 1)
        self.yellow_dur = self.connection.trafficlight.getPhaseDuration(
            self.junctions[0])
        self.connection.trafficlight.setPhase(self.junctions[0], 2)
        self.red_dur = self.connection.trafficlight.getPhaseDuration(
            self.junctions[0])
        self.connection.trafficlight.setPhase(self.junctions[0], 0)

        self.curr_phases = [-1] * len(junctions)
        self.prev_phases = [-1] * len(junctions)

        self.events = []
        self.ret_state = [True] * len(junctions)

        self.restarted = True
コード例 #9
0
    def __init__(self,
                 test_info: CoreutilsInfo,
                 out_dir,
                 max_itr=100,
                 test_times=5,
                 _original_cost=None):
        log('initializing environment: ' + test_info.id, LogType.INFO)
        assert test_times > 2
        super(CoreutilsEnv, self).__init__()
        # only 0-8 modes, inline_func is not predictable, and basic block merge has some problems
        self._action_set = {0, 1, 2, 3, 4, 5, 6, 7, 8,
                            9}  # remove 3, it is too powerful
        self.action_space = spaces.Discrete(len(self._action_set))
        self.observation_space = spaces.Space(shape=(max_ops_len, ),
                                              dtype=_dtype)
        self._episode_ended = False
        self.max_iteration = max_itr
        self.test_times = test_times
        self._episode_count = 0

        self.test_info = test_info
        self.out_dir = out_dir
        if os.path.isdir(self.out_dir):
            os.system('sudo rm -r {}'.format(self.out_dir))
        os.mkdir(self.out_dir)

        # set files to store running info
        self._stat_files = [
            os.path.join(self.out_dir,
                         self.test_info.id + '.perf-stat-' + str(idx))
            for idx in range(self.test_times)
        ]

        self._src_bin_export = self._get_bin_export_path(
            self.test_info.current_bin_path)
        # speed up the test process, the exported file exists
        if not os.path.isfile(self._src_bin_export):
            ret = ida_bin_export_info(self.test_info.current_bin_path,
                                      self._src_bin_export)
            if ret != 0:
                raise IDAException(ret)
            # assert ret == 0, "bindiff return errors, return value: %d" % ret

        if _original_cost is None:
            self._original_cycles_cost = self._get_performance(mode=0)
            assert self._original_cycles_cost > 0, "error of perf!"
        else:
            self._original_cycles_cost = _original_cost

        self._cycles_list = [self._original_cycles_cost
                             ]  # idx=0 is the original binary's running clock
        self._reward_list = []
        self._action_list = []

        self._original_state = bin2state(self.test_info.current_bin_path)

        self._state = self._original_state
        self._cycles_cost = self._original_cycles_cost
        self._similarity = 1.0

        self._reward_records_file = os.path.join(
            self.out_dir, 'reward_records_{}.txt'.format(self.test_info.id))
        self._error_log = os.path.join(
            self.out_dir, 'error_log_{}.txt'.format(self.test_info.id))
        self._error_count = 0

        self._uroboros_meet_error = False
        self._uroboros_error_count = 0
        self._diversify_output_file = os.path.join(self.out_dir,
                                                   'div_all_output.txt')

        log('environment \'%s\' initialized' % self.test_info.id, LogType.INFO)
コード例 #10
0
    def __init__(self,
                 bodies: SystemScope = SystemScope.ALL,
                 start_body: SolarSystemPlanet = None,
                 target_bodies: List[SolarSystemPlanet] = None,
                 start_time: Time = None,
                 action_step: TimeDelta = TimeDelta(1 * u.minute),
                 simulation_step: TimeDelta = TimeDelta(1 * u.second),
                 spaceship_name: SpaceShipName = SpaceShipName.DEFAULT,
                 spaceship_initial_altitude: u.km = 400 * u.km,
                 spaceship_mass: u.kg = None,
                 spaceship_propellant_mass: u.kg = None,
                 spaceship_isp: u.s = None,
                 spaceship_engine_thrust: u.N = None):
        super(SolarSystemGrav, self).__init__()

        if start_body is None:
            start_body = Earth
        if target_bodies is None:
            target_bodies = [Mars]
        if start_time is None:
            start_time = Time(datetime.now()).tdb

        # todo: enforce action_step/simulation_step is an integer?

        self.start_body = start_body
        self.target_bodies = target_bodies
        self.spaceship_initial_altitude = spaceship_initial_altitude
        self.start_time = start_time
        self.current_time = None
        self.time_step = action_step
        self.simulation_step = simulation_step
        self.done = False
        self.reward = 0
        self.done = False

        self.spaceship_name = spaceship_name
        self.spaceship_mass = spaceship_mass
        self.spaceship_propellant_mass = spaceship_propellant_mass
        self.spaceship_isp = spaceship_isp
        self.spaceship_engine_thrust = spaceship_engine_thrust

        # set up solar system
        solar_system_ephemeris.set("jpl")
        # Download & use JPL Ephem

        body_dict = {
            SystemScope.EARTH: [Earth, Moon],
            SystemScope.ALL: [
                Sun, Earth, Moon, Mercury, Venus, Mars, Jupiter, Saturn,
                Uranus, Neptune, Pluto
            ]
        }
        # define bodies to model
        # poliastro.bodies.SolarSystemPlanet =
        #   Sun, Earth, Moon, Mercury, Venus, Mars, Jupiter, Saturn, Uranus, Neptune, Pluto
        # could also add versions for: only inner solar system, only 'major' bodies jovan moons, saturn's moons?

        try:
            self.body_list = body_dict[bodies]
        except KeyError:
            raise KeyError(f"bodies must be one of {body_dict.keys()}")

        # set up spacecraft

        self.spaceship = self._init_spaceship()

        self.current_ephem = None

        # init:
        # * which bodies are modelled
        # * what time it is
        # * what time_step to use
        # * target body
        # * spaceship pos/vel (orbit?) /fuel/thrust
        # *

        # init must define action & observation space
        # initialize model solar system
        #
        # Define action and observation space
        # They must be gym.spaces objects

        # observation ~~time~~, time_step, craft position, craft velocity, craft fuel, craft engine power,
        # bodies: position, velocity, mass

        # [time_step, [craft position, velocity, fuel, engine power],
        # [body_1_is_target, body_1_position, body_1_velocity, body_1_mass],
        # ...
        # [body_n_is_target, body_n_position, body_n_velocity, body_n_mass]]
        self.observation_space = spaces.Space()

        # action:
        # tuple [[x,y,z], burn duration]
        self.action_space = spaces.Tuple((
            spaces.Box(low=-1.0, high=1.0,
                       shape=(3, )),  # x,y,z direction vector
            spaces.Box(low=0.0, high=1.0,
                       shape=(1, ))  # burn duration as percent of time_step
        ))