def __init__(self): self.action_space = Box(low=-1.0, high=1.0, shape=(2, )) self.observation_space = dict_space self._spec = EnvSpec("NestedDictEnv-v0") self.steps = 0
def __init__(self): self.action_space = action_space self.observation_space = obs_space self.spec = EnvSpec("StubEnv-v0")
def __init__(self): self.action_space = spaces.Discrete(2) self.observation_space = REPEATED_SPACE self._spec = EnvSpec("RepeatedSpaceEnv-v0") self.steps = 0
def __init__(self): self.action_space = spaces.Discrete(2) self.observation_space = DICT_SPACE self._spec = EnvSpec("NestedDictEnv-v0") self.steps = 0
def __init__(self): self.action_space = spaces.Discrete(2) self.observation_space = TUPLE_SPACE self._spec = EnvSpec("NestedTupleEnv-v0") self.steps = 0
def __init__(self, instrument, max_quantity=1, quantity_increment=1, obs_type='time', obs_size=1, obs_xform=None, episode_steps=None, host='localhost', port=7497, client_id=None, timeout_sec=5, afterhours=True, loglevel=logging.INFO): """ :param str,tuple instrument: ticker string or :class:`IBroke` ``(symbol, sec_type, exchange, currency, expiry, strike, opt_type)`` tuple. :param int max_quantity: The number of shares/contracts that will be bought (or sold) when the action is 1 (or -1). :param int quantity_increment: The minimum increment in which shares/contracts will be bought (or sold). The actual number for a given action is ``round(action * max_quantity / quantity_increment) * quantity_increment``, clipped to the range ``[-max_quantity, max_quantity]``. :param str obs_type: ``time`` for bars at regular intervals, or ``tick`` for bars at every quote change. Raw observations are numpy float ndarrays with the following fields:: time, bid, bidsize, ask, asksize, last, lastsize, lasttime, open, high, low, close, vwap, volume, open_interest, position, unrealized_gain See the :class:`Obs` convenience namedtuple for detailed field descriptions. :param float obs_size: How often you get an observation in seconds. Ignored for ``obs_type='tick'``. :param func obs_xform: Callable that takes a raw input observation array and transforms it, returning either another numpy array or ``None`` to indicate data is not ready yet. :param int,None episode_steps: Number of steps after ``reset()`` to run before returning `done`, or ``None`` to run indefinitely. The final step in an episode will have its action forced to close any open positions so PNL can be properly accounted. :param int client_id: A unique integer identifying which API client made an order. Different instances of Sairen running at the same time must use different ``client_id`` values. In order to discover and modify pre-existing open orders, you must use the same ``client_id`` the orders were created with. :param timeout_sec: request timeout in seconds used by IBroke library. :param afterhours: If True, operate during normal market and after hours trading; if False, only operate during normal market hours. :param int loglevel: The `logging level <https://docs.python.org/3/library/logging.html#logging-levels>`_ to use. """ gym.Env.__init__( self ) # EzPickle is supposed to (un)pickle this object by saving the args and creating a new one with them. Otherwise the IBroke and maybe the queues aren't serializable. EzPickle.__init__(self, instrument=instrument, max_quantity=max_quantity, min_quantity=quantity_increment, obs_type=obs_type, obs_size=obs_size, obs_xform=obs_xform, episode_steps=episode_steps, host=host, port=port, client_id=client_id, timeout_sec=timeout_sec, afterhours=afterhours, loglevel=loglevel) self.log = create_logger('sairen', loglevel) self.max_quantity = int(max_quantity) self.quantity_increment = int(quantity_increment) assert 1 <= self.quantity_increment <= self.max_quantity and self.max_quantity <= MAX_INSTRUMENT_QUANTITY, ( self.quantity_increment, self.max_quantity) self.episode_steps = None if episode_steps is None else int( episode_steps) assert self.episode_steps is None or self.episode_steps > 0 self.afterhours = afterhours self.obs_type = obs_type self.data_q = None # Initialized in _reset self.profit = 0.0 # Since last step; zeroed every step self.episode_profit = 0.0 # Since last reset self.reward = None # Save most recent reward so we can use it in render() self.raw_obs = None # Raw obs as ndarray self.observation = None # Most recent transformed observation self.pos_desired = 0 # Action translated into target number of contracts self.done = True # Start in the "please call reset()" state self.step_num = 0 # Count calls to step() since last reset() self.unrealized_gain = 0.0 self._finish_on_next_step = False assert obs_xform is None or callable(obs_xform) self._xform = ( lambda obs: obs ) if obs_xform is None else obs_xform # Default xform is identity self.ib = IBroke(host=host, port=port, client_id=client_id, timeout_sec=timeout_sec, verbose=2) self.instrument = self.ib.get_instrument(instrument) self.log.info('Sairen %s trading %s up to %d contracts', __version__, self.instrument.tuple(), self.max_quantity) market_open = self.market_open( ) #self.ib.market_open(self.instrument, afterhours=self.afterhours) self.log.info('Market {} ({} hours). Next {} {}'.format( 'open' if market_open else 'closed', 'after' if self.afterhours else 'regular', 'close' if market_open else 'open', self.ib.market_hours(self.instrument, self.afterhours)[int(market_open)])) self.ib.register(self.instrument, on_bar=self._on_mktdata, bar_type=obs_type, bar_size=obs_size, on_order=self._on_order, on_alert=self._on_alert) self.observation_space = getattr( obs_xform, 'observation_space', Box(low=np.zeros(len(OBS_BOUNDS)), high=np.array( OBS_BOUNDS))) # TODO: Some bounds (pos, gain) are negative self.log.debug('XFORM %s', self._xform) self.log.debug('OBS SPACE %s', self.observation_space) np.set_printoptions(linewidth=9999) self.pos_actual = self.ib.get_position( self.instrument) # Actual last reported number of contracts held self.act_start_time = None self.act_time = deque(maxlen=10) # Track recent agent action times self.spec = EnvSpec( 'MarketEnv-{}-v0'.format('-'.join(map(str, self.instrument.tuple()))), trials=10, max_episode_steps=episode_steps, nondeterministic=True) # This is a bit of a hack for rllab
class BitFlippingEnv(GoalEnv): """ Simple bit flipping env, useful to test HER. The goal is to flip all the bits to get a vector of ones. In the continuous variant, if the ith action component has a value > 0, then the ith bit will be flipped. :param n_bits: Number of bits to flip :param continuous: Whether to use the continuous actions version or not, by default, it uses the discrete one :param max_steps: Max number of steps, by default, equal to n_bits :param discrete_obs_space: Whether to use the discrete observation version or not, by default, it uses the MultiBinary one """ spec = EnvSpec("BitFlippingEnv-v0") def __init__(self, n_bits: int = 10, continuous: bool = False, max_steps: Optional[int] = None, discrete_obs_space: bool = False): super(BitFlippingEnv, self).__init__() # The achieved goal is determined by the current state # here, it is a special where they are equal if discrete_obs_space: # In the discrete case, the agent act on the binary # representation of the observation self.observation_space = spaces.Dict({ "observation": spaces.Discrete(2**n_bits - 1), "achieved_goal": spaces.Discrete(2**n_bits - 1), "desired_goal": spaces.Discrete(2**n_bits - 1), }) else: self.observation_space = spaces.Dict({ "observation": spaces.MultiBinary(n_bits), "achieved_goal": spaces.MultiBinary(n_bits), "desired_goal": spaces.MultiBinary(n_bits), }) self.obs_space = spaces.MultiBinary(n_bits) if continuous: self.action_space = spaces.Box(-1, 1, shape=(n_bits, ), dtype=np.float32) else: self.action_space = spaces.Discrete(n_bits) self.continuous = continuous self.discrete_obs_space = discrete_obs_space self.state = None self.desired_goal = np.ones((n_bits, )) if max_steps is None: max_steps = n_bits self.max_steps = max_steps self.current_step = 0 def seed(self, seed: int) -> None: self.obs_space.seed(seed) def convert_if_needed(self, state: np.ndarray) -> Union[int, np.ndarray]: """ Convert to discrete space if needed. :param state: :return: """ if self.discrete_obs_space: # The internal state is the binary representation of the # observed one return int(sum([state[i] * 2**i for i in range(len(state))])) return state def _get_obs(self) -> Dict[str, Union[int, np.ndarray]]: """ Helper to create the observation. :return: """ return OrderedDict([ ("observation", self.convert_if_needed(self.state.copy())), ("achieved_goal", self.convert_if_needed(self.state.copy())), ("desired_goal", self.convert_if_needed(self.desired_goal.copy())), ]) def reset(self) -> Dict[str, Union[int, np.ndarray]]: self.current_step = 0 self.state = self.obs_space.sample() return self._get_obs() def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: if self.continuous: self.state[action > 0] = 1 - self.state[action > 0] else: self.state[action] = 1 - self.state[action] obs = self._get_obs() reward = float( self.compute_reward(obs["achieved_goal"], obs["desired_goal"], None)) done = reward == 0 self.current_step += 1 # Episode terminate when we reached the goal or the max number of steps info = {"is_success": done} done = done or self.current_step >= self.max_steps return obs, reward, done, info def compute_reward(self, achieved_goal: Union[int, np.ndarray], desired_goal: Union[int, np.ndarray], _info: Optional[Dict[str, Any]]) -> np.float32: # Deceptive reward: it is positive only when the goal is achieved # vectorized version distance = np.linalg.norm(achieved_goal - desired_goal, axis=-1) return -(distance > 0).astype(np.float32) def render(self, mode: str = "human") -> Optional[np.ndarray]: if mode == "rgb_array": return self.state.copy() print(self.state) def close(self) -> None: pass
class MinuteBarEnv(gym.Env): metadata = {'render.modes': ['human']} spec = EnvSpec("StocksEnv-v0") def __init__(self, prices, bars_count=AppConfig.DEFAULT_BARS_COUNT, commission=AppConfig.DEFAULT_COMMISSION_PERC, reset_on_close=True, state_1d=False, random_ofs_on_reset=True, reward_on_close=False, volumes=False): assert isinstance(prices, dict) self._prices = prices if state_1d: self._state = State1D( bars_count, commission, reset_on_close, reward_on_close=reward_on_close, volumes=volumes) else: self._state = State( bars_count, commission, reset_on_close, reward_on_close=reward_on_close, volumes=volumes) self.action_space = gym.spaces.Discrete(n=len(AssetActions)) self.observation_space = gym.spaces.Box( low=-np.inf, high=np.inf, shape=self._state.shape, dtype=np.float32) self.random_ofs_on_reset = random_ofs_on_reset self.seed() def reset(self): # make selection of the instrument and it's offset. Then reset the state self._instrument = self.np_random.choice( list(self._prices.keys())) prices = self._prices[self._instrument] bars = self._state.bars_count if self.random_ofs_on_reset: offset = self.np_random.choice( prices.high.shape[0]-bars*10) + bars else: offset = bars self._state.reset(prices, offset) return self._state.encode() def step(self, action_idx): action = AssetActions(action_idx) reward, done = self._state.step(action) obs = self._state.encode() info = { "instrument": self._instrument, "offset": self._state._offset } return obs, reward, done, info def render(self, mode='human', obs=None, reward=0.0, info={}, close=False): print('打印信息: {0};'.format(obs)) def close(self): pass def seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 31 return [seed1, seed2] @classmethod def from_dir(cls, data_dir, **kwargs): prices = { file: BarData.load_relative(file) for file in BarData.price_files(data_dir) } return MinuteBarEnv(prices, **kwargs)
class BitFlippingEnv(GoalEnv): """ Simple bit flipping env, useful to test HER. The goal is to flip all the bits to get a vector of ones. In the continuous variant, if the ith action component has a value > 0, then the ith bit will be flipped. :param n_bits: Number of bits to flip :param continuous: Whether to use the continuous actions version or not, by default, it uses the discrete one :param max_steps: Max number of steps, by default, equal to n_bits :param discrete_obs_space: Whether to use the discrete observation version or not, by default, it uses the ``MultiBinary`` one :param image_obs_space: Use image as input instead of the ``MultiBinary`` one. :param channel_first: Whether to use channel-first or last image. """ spec = EnvSpec("BitFlippingEnv-v0") def __init__( self, n_bits: int = 10, continuous: bool = False, max_steps: Optional[int] = None, discrete_obs_space: bool = False, image_obs_space: bool = False, channel_first: bool = True, ): super(BitFlippingEnv, self).__init__() # Shape of the observation when using image space self.image_shape = (1, 36, 36) if channel_first else (36, 36, 1) # The achieved goal is determined by the current state # here, it is a special where they are equal if discrete_obs_space: # In the discrete case, the agent act on the binary # representation of the observation self.observation_space = spaces.Dict({ "observation": spaces.Discrete(2**n_bits), "achieved_goal": spaces.Discrete(2**n_bits), "desired_goal": spaces.Discrete(2**n_bits), }) elif image_obs_space: # When using image as input, # one image contains the bits 0 -> 0, 1 -> 255 # and the rest is filled with zeros self.observation_space = spaces.Dict({ "observation": spaces.Box( low=0, high=255, shape=self.image_shape, dtype=np.uint8, ), "achieved_goal": spaces.Box( low=0, high=255, shape=self.image_shape, dtype=np.uint8, ), "desired_goal": spaces.Box( low=0, high=255, shape=self.image_shape, dtype=np.uint8, ), }) else: self.observation_space = spaces.Dict({ "observation": spaces.MultiBinary(n_bits), "achieved_goal": spaces.MultiBinary(n_bits), "desired_goal": spaces.MultiBinary(n_bits), }) self.obs_space = spaces.MultiBinary(n_bits) if continuous: self.action_space = spaces.Box(-1, 1, shape=(n_bits, ), dtype=np.float32) else: self.action_space = spaces.Discrete(n_bits) self.continuous = continuous self.discrete_obs_space = discrete_obs_space self.image_obs_space = image_obs_space self.state = None self.desired_goal = np.ones((n_bits, )) if max_steps is None: max_steps = n_bits self.max_steps = max_steps self.current_step = 0 def seed(self, seed: int) -> None: self.obs_space.seed(seed) def convert_if_needed(self, state: np.ndarray) -> Union[int, np.ndarray]: """ Convert to discrete space if needed. :param state: :return: """ if self.discrete_obs_space: # The internal state is the binary representation of the # observed one return int(sum([state[i] * 2**i for i in range(len(state))])) if self.image_obs_space: size = np.prod(self.image_shape) image = np.concatenate( (state * 255, np.zeros(size - len(state), dtype=np.uint8))) return image.reshape(self.image_shape).astype(np.uint8) return state def convert_to_bit_vector(self, state: Union[int, np.ndarray], batch_size: int) -> np.ndarray: """ Convert to bit vector if needed. :param state: :param batch_size: :return: """ # Convert back to bit vector if isinstance(state, int): state = np.array(state).reshape(batch_size, -1) # Convert to binary representation state = (((state[:, :] & (1 << np.arange(len(self.state))))) > 0).astype(int) elif self.image_obs_space: state = state.reshape(batch_size, -1)[:, :len(self.state)] / 255 else: state = np.array(state).reshape(batch_size, -1) return state def _get_obs(self) -> Dict[str, Union[int, np.ndarray]]: """ Helper to create the observation. :return: The current observation. """ return OrderedDict([ ("observation", self.convert_if_needed(self.state.copy())), ("achieved_goal", self.convert_if_needed(self.state.copy())), ("desired_goal", self.convert_if_needed(self.desired_goal.copy())), ]) def reset(self) -> Dict[str, Union[int, np.ndarray]]: self.current_step = 0 self.state = self.obs_space.sample() return self._get_obs() def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: if self.continuous: self.state[action > 0] = 1 - self.state[action > 0] else: self.state[action] = 1 - self.state[action] obs = self._get_obs() reward = float( self.compute_reward(obs["achieved_goal"], obs["desired_goal"], None)) done = reward == 0 self.current_step += 1 # Episode terminate when we reached the goal or the max number of steps info = {"is_success": done} done = done or self.current_step >= self.max_steps return obs, reward, done, info def compute_reward(self, achieved_goal: Union[int, np.ndarray], desired_goal: Union[int, np.ndarray], _info: Optional[Dict[str, Any]]) -> np.float32: # As we are using a vectorized version, we need to keep track of the `batch_size` if isinstance(achieved_goal, int): batch_size = 1 elif self.image_obs_space: batch_size = achieved_goal.shape[0] if len( achieved_goal.shape) > 3 else 1 else: batch_size = achieved_goal.shape[0] if len( achieved_goal.shape) > 1 else 1 desired_goal = self.convert_to_bit_vector(desired_goal, batch_size) achieved_goal = self.convert_to_bit_vector(achieved_goal, batch_size) # Deceptive reward: it is positive only when the goal is achieved # Here we are using a vectorized version distance = np.linalg.norm(achieved_goal - desired_goal, axis=-1) return -(distance > 0).astype(np.float32) def render(self, mode: str = "human") -> Optional[np.ndarray]: if mode == "rgb_array": return self.state.copy() print(self.state) def close(self) -> None: pass