예제 #1
0
파일: test_sac.py 프로젝트: tchordia/ray
 def __init__(self):
     self.action_space = Box(low=-1.0, high=1.0, shape=(2, ))
     self.observation_space = dict_space
     self._spec = EnvSpec("NestedDictEnv-v0")
     self.steps = 0
예제 #2
0
 def __init__(self):
     self.action_space = action_space
     self.observation_space = obs_space
     self.spec = EnvSpec("StubEnv-v0")
예제 #3
0
 def __init__(self):
     self.action_space = spaces.Discrete(2)
     self.observation_space = REPEATED_SPACE
     self._spec = EnvSpec("RepeatedSpaceEnv-v0")
     self.steps = 0
예제 #4
0
 def __init__(self):
     self.action_space = spaces.Discrete(2)
     self.observation_space = DICT_SPACE
     self._spec = EnvSpec("NestedDictEnv-v0")
     self.steps = 0
예제 #5
0
 def __init__(self):
     self.action_space = spaces.Discrete(2)
     self.observation_space = TUPLE_SPACE
     self._spec = EnvSpec("NestedTupleEnv-v0")
     self.steps = 0
예제 #6
0
    def __init__(self,
                 instrument,
                 max_quantity=1,
                 quantity_increment=1,
                 obs_type='time',
                 obs_size=1,
                 obs_xform=None,
                 episode_steps=None,
                 host='localhost',
                 port=7497,
                 client_id=None,
                 timeout_sec=5,
                 afterhours=True,
                 loglevel=logging.INFO):
        """
        :param str,tuple instrument: ticker string or :class:`IBroke` ``(symbol, sec_type, exchange, currency, expiry, strike, opt_type)`` tuple.
        :param int max_quantity: The number of shares/contracts that will be bought (or sold) when the action is 1 (or -1).
        :param int quantity_increment: The minimum increment in which shares/contracts will be bought (or sold).  The actual number for a given
          action is ``round(action * max_quantity / quantity_increment) * quantity_increment``, clipped to the range ``[-max_quantity, max_quantity]``.
        :param str obs_type: ``time`` for bars at regular intervals, or ``tick`` for bars at every quote change.
          Raw observations are numpy float ndarrays with the following fields::

                time, bid, bidsize, ask, asksize, last, lastsize, lasttime,
                open, high, low, close, vwap, volume, open_interest, position, unrealized_gain

          See the :class:`Obs` convenience namedtuple for detailed field descriptions.
        :param float obs_size: How often you get an observation in seconds.  Ignored for ``obs_type='tick'``.
        :param func obs_xform: Callable that takes a raw input observation array and transforms it,
          returning either another numpy array or ``None`` to indicate data is not ready yet.
        :param int,None episode_steps: Number of steps after ``reset()`` to run before returning `done`, or ``None`` to run indefinitely.
          The final step in an episode will have its action forced to close any open positions so PNL can be properly accounted.
        :param int client_id: A unique integer identifying which API client made an order.  Different instances of Sairen running at the same time must use
          different ``client_id`` values.  In order to discover and modify pre-existing open orders, you must use the same ``client_id`` the orders were created with.
        :param timeout_sec: request timeout in seconds used by IBroke library.
        :param afterhours: If True, operate during normal market and after hours trading; if False, only operate during normal market hours.
        :param int loglevel: The `logging level <https://docs.python.org/3/library/logging.html#logging-levels>`_ to use.
        """
        gym.Env.__init__(
            self
        )  # EzPickle is supposed to (un)pickle this object by saving the args and creating a new one with them.  Otherwise the IBroke and maybe the queues aren't serializable.
        EzPickle.__init__(self,
                          instrument=instrument,
                          max_quantity=max_quantity,
                          min_quantity=quantity_increment,
                          obs_type=obs_type,
                          obs_size=obs_size,
                          obs_xform=obs_xform,
                          episode_steps=episode_steps,
                          host=host,
                          port=port,
                          client_id=client_id,
                          timeout_sec=timeout_sec,
                          afterhours=afterhours,
                          loglevel=loglevel)
        self.log = create_logger('sairen', loglevel)
        self.max_quantity = int(max_quantity)
        self.quantity_increment = int(quantity_increment)
        assert 1 <= self.quantity_increment <= self.max_quantity and self.max_quantity <= MAX_INSTRUMENT_QUANTITY, (
            self.quantity_increment, self.max_quantity)
        self.episode_steps = None if episode_steps is None else int(
            episode_steps)
        assert self.episode_steps is None or self.episode_steps > 0
        self.afterhours = afterhours
        self.obs_type = obs_type
        self.data_q = None  # Initialized in _reset
        self.profit = 0.0  # Since last step; zeroed every step
        self.episode_profit = 0.0  # Since last reset
        self.reward = None  # Save most recent reward so we can use it in render()
        self.raw_obs = None  # Raw obs as ndarray
        self.observation = None  # Most recent transformed observation
        self.pos_desired = 0  # Action translated into target number of contracts
        self.done = True  # Start in the "please call reset()" state
        self.step_num = 0  # Count calls to step() since last reset()
        self.unrealized_gain = 0.0
        self._finish_on_next_step = False
        assert obs_xform is None or callable(obs_xform)
        self._xform = (
            lambda obs: obs
        ) if obs_xform is None else obs_xform  # Default xform is identity

        self.ib = IBroke(host=host,
                         port=port,
                         client_id=client_id,
                         timeout_sec=timeout_sec,
                         verbose=2)
        self.instrument = self.ib.get_instrument(instrument)
        self.log.info('Sairen %s trading %s up to %d contracts', __version__,
                      self.instrument.tuple(), self.max_quantity)
        market_open = self.market_open(
        )  #self.ib.market_open(self.instrument, afterhours=self.afterhours)
        self.log.info('Market {} ({} hours).  Next {} {}'.format(
            'open' if market_open else 'closed',
            'after' if self.afterhours else 'regular',
            'close' if market_open else 'open',
            self.ib.market_hours(self.instrument,
                                 self.afterhours)[int(market_open)]))
        self.ib.register(self.instrument,
                         on_bar=self._on_mktdata,
                         bar_type=obs_type,
                         bar_size=obs_size,
                         on_order=self._on_order,
                         on_alert=self._on_alert)
        self.observation_space = getattr(
            obs_xform, 'observation_space',
            Box(low=np.zeros(len(OBS_BOUNDS)), high=np.array(
                OBS_BOUNDS)))  # TODO: Some bounds (pos, gain) are negative
        self.log.debug('XFORM %s', self._xform)
        self.log.debug('OBS SPACE %s', self.observation_space)
        np.set_printoptions(linewidth=9999)
        self.pos_actual = self.ib.get_position(
            self.instrument)  # Actual last reported number of contracts held
        self.act_start_time = None
        self.act_time = deque(maxlen=10)  # Track recent agent action times
        self.spec = EnvSpec(
            'MarketEnv-{}-v0'.format('-'.join(map(str,
                                                  self.instrument.tuple()))),
            trials=10,
            max_episode_steps=episode_steps,
            nondeterministic=True)  # This is a bit of a hack for rllab
예제 #7
0
class BitFlippingEnv(GoalEnv):
    """
    Simple bit flipping env, useful to test HER.
    The goal is to flip all the bits to get a vector of ones.
    In the continuous variant, if the ith action component has a value > 0,
    then the ith bit will be flipped.

    :param n_bits: Number of bits to flip
    :param continuous: Whether to use the continuous actions version or not,
        by default, it uses the discrete one
    :param max_steps: Max number of steps, by default, equal to n_bits
    :param discrete_obs_space: Whether to use the discrete observation
        version or not, by default, it uses the MultiBinary one
    """

    spec = EnvSpec("BitFlippingEnv-v0")

    def __init__(self,
                 n_bits: int = 10,
                 continuous: bool = False,
                 max_steps: Optional[int] = None,
                 discrete_obs_space: bool = False):
        super(BitFlippingEnv, self).__init__()
        # The achieved goal is determined by the current state
        # here, it is a special where they are equal
        if discrete_obs_space:
            # In the discrete case, the agent act on the binary
            # representation of the observation
            self.observation_space = spaces.Dict({
                "observation":
                spaces.Discrete(2**n_bits - 1),
                "achieved_goal":
                spaces.Discrete(2**n_bits - 1),
                "desired_goal":
                spaces.Discrete(2**n_bits - 1),
            })
        else:
            self.observation_space = spaces.Dict({
                "observation":
                spaces.MultiBinary(n_bits),
                "achieved_goal":
                spaces.MultiBinary(n_bits),
                "desired_goal":
                spaces.MultiBinary(n_bits),
            })

        self.obs_space = spaces.MultiBinary(n_bits)

        if continuous:
            self.action_space = spaces.Box(-1,
                                           1,
                                           shape=(n_bits, ),
                                           dtype=np.float32)
        else:
            self.action_space = spaces.Discrete(n_bits)
        self.continuous = continuous
        self.discrete_obs_space = discrete_obs_space
        self.state = None
        self.desired_goal = np.ones((n_bits, ))
        if max_steps is None:
            max_steps = n_bits
        self.max_steps = max_steps
        self.current_step = 0

    def seed(self, seed: int) -> None:
        self.obs_space.seed(seed)

    def convert_if_needed(self, state: np.ndarray) -> Union[int, np.ndarray]:
        """
        Convert to discrete space if needed.

        :param state:
        :return:
        """
        if self.discrete_obs_space:
            # The internal state is the binary representation of the
            # observed one
            return int(sum([state[i] * 2**i for i in range(len(state))]))
        return state

    def _get_obs(self) -> Dict[str, Union[int, np.ndarray]]:
        """
        Helper to create the observation.

        :return:
        """
        return OrderedDict([
            ("observation", self.convert_if_needed(self.state.copy())),
            ("achieved_goal", self.convert_if_needed(self.state.copy())),
            ("desired_goal", self.convert_if_needed(self.desired_goal.copy())),
        ])

    def reset(self) -> Dict[str, Union[int, np.ndarray]]:
        self.current_step = 0
        self.state = self.obs_space.sample()
        return self._get_obs()

    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
        if self.continuous:
            self.state[action > 0] = 1 - self.state[action > 0]
        else:
            self.state[action] = 1 - self.state[action]
        obs = self._get_obs()
        reward = float(
            self.compute_reward(obs["achieved_goal"], obs["desired_goal"],
                                None))
        done = reward == 0
        self.current_step += 1
        # Episode terminate when we reached the goal or the max number of steps
        info = {"is_success": done}
        done = done or self.current_step >= self.max_steps
        return obs, reward, done, info

    def compute_reward(self, achieved_goal: Union[int, np.ndarray],
                       desired_goal: Union[int, np.ndarray],
                       _info: Optional[Dict[str, Any]]) -> np.float32:
        # Deceptive reward: it is positive only when the goal is achieved
        # vectorized version
        distance = np.linalg.norm(achieved_goal - desired_goal, axis=-1)
        return -(distance > 0).astype(np.float32)

    def render(self, mode: str = "human") -> Optional[np.ndarray]:
        if mode == "rgb_array":
            return self.state.copy()
        print(self.state)

    def close(self) -> None:
        pass
예제 #8
0
class MinuteBarEnv(gym.Env):
    metadata = {'render.modes': ['human']}
    spec = EnvSpec("StocksEnv-v0")

    def __init__(self, prices, bars_count=AppConfig.DEFAULT_BARS_COUNT,
                 commission=AppConfig.DEFAULT_COMMISSION_PERC,
                 reset_on_close=True, state_1d=False,
                 random_ofs_on_reset=True, reward_on_close=False,
                 volumes=False):
        assert isinstance(prices, dict)
        self._prices = prices
        if state_1d:
            self._state = State1D(
                bars_count, commission, reset_on_close,
                reward_on_close=reward_on_close, volumes=volumes)
        else:
            self._state = State(
                bars_count, commission, reset_on_close,
                reward_on_close=reward_on_close, volumes=volumes)
        self.action_space = gym.spaces.Discrete(n=len(AssetActions))
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf,
            shape=self._state.shape, dtype=np.float32)
        self.random_ofs_on_reset = random_ofs_on_reset
        self.seed()

    def reset(self):
        # make selection of the instrument and it's offset. Then reset the state
        self._instrument = self.np_random.choice(
            list(self._prices.keys()))
        prices = self._prices[self._instrument]
        bars = self._state.bars_count
        if self.random_ofs_on_reset:
            offset = self.np_random.choice(
                prices.high.shape[0]-bars*10) + bars
        else:
            offset = bars
        self._state.reset(prices, offset)
        return self._state.encode()

    def step(self, action_idx):
        action = AssetActions(action_idx)
        reward, done = self._state.step(action)
        obs = self._state.encode()
        info = {
            "instrument": self._instrument,
            "offset": self._state._offset
        }
        return obs, reward, done, info

    def render(self, mode='human', obs=None, reward=0.0, info={}, close=False):
        print('打印信息: {0};'.format(obs))

    def close(self):
        pass

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 31
        return [seed1, seed2]

    @classmethod
    def from_dir(cls, data_dir, **kwargs):
        prices = {
            file: BarData.load_relative(file)
            for file in BarData.price_files(data_dir)
        }
        return MinuteBarEnv(prices, **kwargs)
예제 #9
0
class BitFlippingEnv(GoalEnv):
    """
    Simple bit flipping env, useful to test HER.
    The goal is to flip all the bits to get a vector of ones.
    In the continuous variant, if the ith action component has a value > 0,
    then the ith bit will be flipped.

    :param n_bits: Number of bits to flip
    :param continuous: Whether to use the continuous actions version or not,
        by default, it uses the discrete one
    :param max_steps: Max number of steps, by default, equal to n_bits
    :param discrete_obs_space: Whether to use the discrete observation
        version or not, by default, it uses the ``MultiBinary`` one
    :param image_obs_space: Use image as input instead of the ``MultiBinary`` one.
    :param channel_first: Whether to use channel-first or last image.
    """

    spec = EnvSpec("BitFlippingEnv-v0")

    def __init__(
        self,
        n_bits: int = 10,
        continuous: bool = False,
        max_steps: Optional[int] = None,
        discrete_obs_space: bool = False,
        image_obs_space: bool = False,
        channel_first: bool = True,
    ):
        super(BitFlippingEnv, self).__init__()
        # Shape of the observation when using image space
        self.image_shape = (1, 36, 36) if channel_first else (36, 36, 1)
        # The achieved goal is determined by the current state
        # here, it is a special where they are equal
        if discrete_obs_space:
            # In the discrete case, the agent act on the binary
            # representation of the observation
            self.observation_space = spaces.Dict({
                "observation":
                spaces.Discrete(2**n_bits),
                "achieved_goal":
                spaces.Discrete(2**n_bits),
                "desired_goal":
                spaces.Discrete(2**n_bits),
            })
        elif image_obs_space:
            # When using image as input,
            # one image contains the bits 0 -> 0, 1 -> 255
            # and the rest is filled with zeros
            self.observation_space = spaces.Dict({
                "observation":
                spaces.Box(
                    low=0,
                    high=255,
                    shape=self.image_shape,
                    dtype=np.uint8,
                ),
                "achieved_goal":
                spaces.Box(
                    low=0,
                    high=255,
                    shape=self.image_shape,
                    dtype=np.uint8,
                ),
                "desired_goal":
                spaces.Box(
                    low=0,
                    high=255,
                    shape=self.image_shape,
                    dtype=np.uint8,
                ),
            })
        else:
            self.observation_space = spaces.Dict({
                "observation":
                spaces.MultiBinary(n_bits),
                "achieved_goal":
                spaces.MultiBinary(n_bits),
                "desired_goal":
                spaces.MultiBinary(n_bits),
            })

        self.obs_space = spaces.MultiBinary(n_bits)

        if continuous:
            self.action_space = spaces.Box(-1,
                                           1,
                                           shape=(n_bits, ),
                                           dtype=np.float32)
        else:
            self.action_space = spaces.Discrete(n_bits)
        self.continuous = continuous
        self.discrete_obs_space = discrete_obs_space
        self.image_obs_space = image_obs_space
        self.state = None
        self.desired_goal = np.ones((n_bits, ))
        if max_steps is None:
            max_steps = n_bits
        self.max_steps = max_steps
        self.current_step = 0

    def seed(self, seed: int) -> None:
        self.obs_space.seed(seed)

    def convert_if_needed(self, state: np.ndarray) -> Union[int, np.ndarray]:
        """
        Convert to discrete space if needed.

        :param state:
        :return:
        """
        if self.discrete_obs_space:
            # The internal state is the binary representation of the
            # observed one
            return int(sum([state[i] * 2**i for i in range(len(state))]))

        if self.image_obs_space:
            size = np.prod(self.image_shape)
            image = np.concatenate(
                (state * 255, np.zeros(size - len(state), dtype=np.uint8)))
            return image.reshape(self.image_shape).astype(np.uint8)
        return state

    def convert_to_bit_vector(self, state: Union[int, np.ndarray],
                              batch_size: int) -> np.ndarray:
        """
        Convert to bit vector if needed.

        :param state:
        :param batch_size:
        :return:
        """
        # Convert back to bit vector
        if isinstance(state, int):
            state = np.array(state).reshape(batch_size, -1)
            # Convert to binary representation
            state = (((state[:, :] &
                       (1 << np.arange(len(self.state))))) > 0).astype(int)
        elif self.image_obs_space:
            state = state.reshape(batch_size, -1)[:, :len(self.state)] / 255
        else:
            state = np.array(state).reshape(batch_size, -1)

        return state

    def _get_obs(self) -> Dict[str, Union[int, np.ndarray]]:
        """
        Helper to create the observation.

        :return: The current observation.
        """
        return OrderedDict([
            ("observation", self.convert_if_needed(self.state.copy())),
            ("achieved_goal", self.convert_if_needed(self.state.copy())),
            ("desired_goal", self.convert_if_needed(self.desired_goal.copy())),
        ])

    def reset(self) -> Dict[str, Union[int, np.ndarray]]:
        self.current_step = 0
        self.state = self.obs_space.sample()
        return self._get_obs()

    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
        if self.continuous:
            self.state[action > 0] = 1 - self.state[action > 0]
        else:
            self.state[action] = 1 - self.state[action]
        obs = self._get_obs()
        reward = float(
            self.compute_reward(obs["achieved_goal"], obs["desired_goal"],
                                None))
        done = reward == 0
        self.current_step += 1
        # Episode terminate when we reached the goal or the max number of steps
        info = {"is_success": done}
        done = done or self.current_step >= self.max_steps
        return obs, reward, done, info

    def compute_reward(self, achieved_goal: Union[int, np.ndarray],
                       desired_goal: Union[int, np.ndarray],
                       _info: Optional[Dict[str, Any]]) -> np.float32:
        # As we are using a vectorized version, we need to keep track of the `batch_size`
        if isinstance(achieved_goal, int):
            batch_size = 1
        elif self.image_obs_space:
            batch_size = achieved_goal.shape[0] if len(
                achieved_goal.shape) > 3 else 1
        else:
            batch_size = achieved_goal.shape[0] if len(
                achieved_goal.shape) > 1 else 1

        desired_goal = self.convert_to_bit_vector(desired_goal, batch_size)
        achieved_goal = self.convert_to_bit_vector(achieved_goal, batch_size)

        # Deceptive reward: it is positive only when the goal is achieved
        # Here we are using a vectorized version
        distance = np.linalg.norm(achieved_goal - desired_goal, axis=-1)
        return -(distance > 0).astype(np.float32)

    def render(self, mode: str = "human") -> Optional[np.ndarray]:
        if mode == "rgb_array":
            return self.state.copy()
        print(self.state)

    def close(self) -> None:
        pass