def test_gaussian_noise_sample_at(t, times): instance = GaussianNoise(t) s = instance.sample_at(times) if times[0] == 0: assert len(s) == len(times) - 1 else: assert len(s) == len(times)
def __init__( self, state_n, action_n, alpha_actor, alpha_critic, episodes, steps_per_episode, buffer_size, train_begin, batch_size, gamma, tau, epochs, episodes_to_print, episodes_to_save, path, reward_path, load_path_critic, load_path_actor, load_models, action_range ): # Model Creation self.critic_model = CriticModel(alpha_critic, tau, state_n, action_n) self.actor_model = ActorModel(alpha_actor, tau, state_n, action_n, action_range) if load_models: self.actor_model.load_state_dict(torch.load(load_path_actor)) self.critic_model.load_state_dict(torch.load(load_path_critic)) self.critic_model_target = CriticModel(alpha_critic, tau, state_n, action_n) self.critic_model_target.load_state_dict(self.critic_model.state_dict()) self.actor_model_target = ActorModel(alpha_actor, tau, state_n, action_n, action_range) self.actor_model_target.load_state_dict(self.actor_model.state_dict()) # Params definitions self.episodes = episodes self.steps_per_episode = steps_per_episode self.train_begin = train_begin self.batch_size = batch_size self.gamma = gamma self.epochs = epochs self.best_reward = -10e5 # Output params self.episodes_to_print = episodes_to_print self.episodes_to_save = episodes_to_save self.path = path self.reward_path = reward_path # Replay Buffer self.replay_buffer = ReplayBuffer(buffer_size) # Noise self.noise = GaussianNoise()
def fbm(base_price: int = 1, base_volume: int = 1, start_date: str = '2010-01-01', start_date_format: str = '%Y-%m-%d', times_to_generate: int = 1000, hurst: float = 0.61, time_frame: str = '1h') -> 'pd.DataFrame': """Generates price data from the FBM process. Parameters ---------- base_price : int, default 1 The base price to use for price generation. base_volume : int, default 1 The base volume to use for volume generation. start_date : str, default '2010-01-01' The start date of the generated data start_date_format : str, default '%Y-%m-%d' The format for the start date of the generated data. times_to_generate : int, default 1000 The number of bars to make. hurst : float, default 0.61 The hurst parameter for the FBM process. time_frame : str, default '1h' The time frame. Returns ------- `pd.DataFrame` The generated data frame containing the OHLCV bars. References ---------- [1] https://en.wikipedia.org/wiki/Fractional_Brownian_motion """ times_to_generate = scale_times_to_generate(times_to_generate, time_frame) price_fbm = FractionalBrownianMotion(t=times_to_generate, hurst=hurst) price_volatility = price_fbm.sample(times_to_generate - 1) prices = price_volatility + base_price volume_gen = GaussianNoise(times_to_generate) volume_volatility = volume_gen.sample(times_to_generate) volumes = volume_volatility * price_volatility + base_volume start_date = pd.to_datetime(start_date, format=start_date_format) price_frame = pd.DataFrame([], columns=['date', 'price'], dtype=float) volume_frame = pd.DataFrame([], columns=['date', 'volume'], dtype=float) price_frame['date'] = pd.date_range(start=start_date, periods=times_to_generate, freq="1min") price_frame['price'] = abs(prices) volume_frame['date'] = price_frame['date'].copy() volume_frame['volume'] = abs(volumes) price_frame.set_index('date') price_frame.index = pd.to_datetime(price_frame.index, unit='m', origin=start_date) volume_frame.set_index('date') volume_frame.index = pd.to_datetime(volume_frame.index, unit='m', origin=start_date) data_frame = price_frame['price'].resample(time_frame).ohlc() data_frame['volume'] = volume_frame['volume'].resample(time_frame).sum() return data_frame
def merton(base_price: int = 1, base_volume: int = 1, start_date: str = '2010-01-01', start_date_format: str = '%Y-%m-%d', times_to_generate: int = 1000, time_frame: str = '1h', params: 'ModelParameters' = None) -> 'pd.DataFrame': """Generates price data from the Merton Jump Diffusion model. Parameters ---------- base_price : int, default 1 The base price to use for price generation. base_volume : int, default 1 The base volume to use for volume generation. start_date : str, default '2010-01-01' The start date of the generated data start_date_format : str, default '%Y-%m-%d' The format for the start date of the generated data. times_to_generate : int, default 1000 The number of bars to make. time_frame : str, default '1h' The time frame. params : `ModelParameters`, optional The model parameters. Returns ------- `pd.DataFrame` The generated data frame containing the OHLCV bars. """ delta = get_delta(time_frame) times_to_generate = scale_times_to_generate(times_to_generate, time_frame) params = params or default(base_price, times_to_generate, delta) prices = geometric_brownian_motion_jump_diffusion_levels(params) volume_gen = GaussianNoise(t=times_to_generate) volumes = volume_gen.sample(times_to_generate) + base_volume start_date = pd.to_datetime(start_date, format=start_date_format) price_frame = pd.DataFrame([], columns=['date', 'price'], dtype=float) volume_frame = pd.DataFrame([], columns=['date', 'volume'], dtype=float) price_frame['date'] = pd.date_range(start=start_date, periods=times_to_generate, freq="1min") price_frame['price'] = abs(prices) volume_frame['date'] = price_frame['date'].copy() volume_frame['volume'] = abs(volumes) price_frame.set_index('date') price_frame.index = pd.to_datetime(price_frame.index, unit='m', origin=start_date) volume_frame.set_index('date') volume_frame.index = pd.to_datetime(volume_frame.index, unit='m', origin=start_date) data_frame = price_frame['price'].resample(time_frame).ohlc() data_frame['volume'] = volume_frame['volume'].resample(time_frame).sum() return data_frame
def generate(price_fn: 'Callable[[ModelParameters], np.array]', base_price: int = 1, base_volume: int = 1, start_date: str = '2010-01-01', start_date_format: str = '%Y-%m-%d', times_to_generate: int = 1000, time_frame: str = '1h', params: ModelParameters = None) -> 'pd.DataFrame': """Generates a data frame of OHLCV data based on the price model specified. Parameters ---------- price_fn : `Callable[[ModelParameters], np.array]` The price function generate the prices based on the chosen model. base_price : int, default 1 The base price to use for price generation. base_volume : int, default 1 The base volume to use for volume generation. start_date : str, default '2010-01-01' The start date of the generated data start_date_format : str, default '%Y-%m-%d' The format for the start date of the generated data. times_to_generate : int, default 1000 The number of bars to make. time_frame : str, default '1h' The time frame. params : `ModelParameters`, optional The model parameters. Returns ------- `pd.DataFrame` The data frame containing the OHLCV bars. """ delta = get_delta(time_frame) times_to_generate = scale_times_to_generate(times_to_generate, time_frame) params = params or default(base_price, times_to_generate, delta) prices = price_fn(params) volume_gen = GaussianNoise(t=times_to_generate) volumes = volume_gen.sample(times_to_generate) + base_volume start_date = pd.to_datetime(start_date, format=start_date_format) price_frame = pd.DataFrame([], columns=['date', 'price'], dtype=float) volume_frame = pd.DataFrame([], columns=['date', 'volume'], dtype=float) price_frame['date'] = pd.date_range(start=start_date, periods=times_to_generate, freq="1min") price_frame['price'] = abs(prices) volume_frame['date'] = price_frame['date'].copy() volume_frame['volume'] = abs(volumes) price_frame.set_index('date') price_frame.index = pd.to_datetime(price_frame.index, unit='m', origin=start_date) volume_frame.set_index('date') volume_frame.index = pd.to_datetime(volume_frame.index, unit='m', origin=start_date) data_frame = price_frame['price'].resample(time_frame).ohlc() data_frame['volume'] = volume_frame['volume'].resample(time_frame).sum() return data_frame
def __init__(self, drift=0, variance=1, scale=1, t=1, rng=None): super().__init__(t=t, rng=rng) self.drift = drift self.variance = variance self.scale = scale self.gn = GaussianNoise(t)
class VarianceGammaProcess(BaseTimeProcess): r"""Variance Gamma process. .. image:: _static/variance_gamma_process.png :scale: 50% A variance gamma process has independent increments which follow the variance-gamma distribution. It can be represented as a Brownian motion with drift subordinated by a Gamma process: .. math:: \theta \Gamma(t; 1, \nu) + \sigma W(\Gamma(t; 1, \nu)) :param float drift: the drift parameter of the Brownian motion, or :math:`\theta` above :param float variance: the variance parameter of the Gamma subordinator, or :math:`\nu` above :param float scale: the scale parameter of the Brownian motion, or :math:`\sigma` above :param float t: the right hand endpoint of the time interval :math:`[0,t]` for the process :param numpy.random.Generator rng: a custom random number generator """ def __init__(self, drift=0, variance=1, scale=1, t=1, rng=None): super().__init__(t=t, rng=rng) self.drift = drift self.variance = variance self.scale = scale self.gn = GaussianNoise(t) @property def drift(self): """Drift parameter.""" return self._drift @drift.setter def drift(self, value): check_numeric(value, "Drift") self._drift = value @property def variance(self): """Variance parameter.""" return self._variance @variance.setter def variance(self, value): check_positive_number(value, "Variance") self._variance = value @property def scale(self): """Scale parameter.""" return self._scale @scale.setter def scale(self, value): check_positive_number(value, "Scale") self._scale = value def _sample_variance_gamma_process(self, n): """Generate a realization of a variance gamma process.""" check_positive_integer(n) delta_t = 1.0 * self.t / n shape = delta_t / self.variance scale = self.variance gammas = self.rng.gamma(shape=shape, scale=scale, size=n) gn = self.gn.sample(n) increments = self.drift * gammas + self.scale * np.sqrt(gammas) * gn samples = np.cumsum(increments) return np.concatenate(([0], samples)) def _sample_variance_gamma_process_at(self, times): """Generate a realization of a variance gamma process.""" if times[0] != 0: zero = False times = np.array([0] + list(times)) else: zero = True shapes = np.diff(times) / self.variance scale = self.variance gammas = np.array( [self.rng.gamma(shape=shape, scale=scale, size=1)[0] for shape in shapes] ) gn = self.gn.sample_at(times) increments = self.drift * gammas + self.scale * np.sqrt(gammas) * gn samples = np.cumsum(increments) if zero: samples = np.insert(samples, 0, [0]) return samples def sample(self, n): """Generate a realization. :param int n: the number of increments to generate """ return self._sample_variance_gamma_process(n) def sample_at(self, times): """Generate a realization using specified times. :param times: a vector of increasing time values at which to generate the realization """ return self._sample_variance_gamma_process_at(times)
class DDPGAgent: def __init__( self, state_n, action_n, alpha_actor, alpha_critic, episodes, steps_per_episode, buffer_size, train_begin, batch_size, gamma, tau, epochs, episodes_to_print, episodes_to_save, path, reward_path, load_path_critic, load_path_actor, load_models, action_range ): # Model Creation self.critic_model = CriticModel(alpha_critic, tau, state_n, action_n) self.actor_model = ActorModel(alpha_actor, tau, state_n, action_n, action_range) if load_models: self.actor_model.load_state_dict(torch.load(load_path_actor)) self.critic_model.load_state_dict(torch.load(load_path_critic)) self.critic_model_target = CriticModel(alpha_critic, tau, state_n, action_n) self.critic_model_target.load_state_dict(self.critic_model.state_dict()) self.actor_model_target = ActorModel(alpha_actor, tau, state_n, action_n, action_range) self.actor_model_target.load_state_dict(self.actor_model.state_dict()) # Params definitions self.episodes = episodes self.steps_per_episode = steps_per_episode self.train_begin = train_begin self.batch_size = batch_size self.gamma = gamma self.epochs = epochs self.best_reward = -10e5 # Output params self.episodes_to_print = episodes_to_print self.episodes_to_save = episodes_to_save self.path = path self.reward_path = reward_path # Replay Buffer self.replay_buffer = ReplayBuffer(buffer_size) # Noise self.noise = GaussianNoise() def train_models(self): for epoch in range(self.epochs): # Sampling and defining y and y hat state, action, reward, state_next, done = self.replay_buffer.sample(self.batch_size) y_hat = reward + self.gamma * self.critic_model_target(state_next, self.actor_model_target(state_next)) * (1 - done) y = self.critic_model(state, action) # Critic train critic_loss = F.smooth_l1_loss(y, y_hat.detach()) self.critic_model.optimizer.zero_grad() critic_loss.backward() self.critic_model.optimizer.step() # Actor train actor_loss = -self.critic_model(state, self.actor_model(state)).mean() # As we have gradient descent self.actor_model.optimizer.zero_grad() actor_loss.backward() self.actor_model.optimizer.step() # Update weights self.critic_model_target.update_weights(self.critic_model.parameters()) self.actor_model_target.update_weights(self.actor_model.parameters()) return actor_loss, critic_loss def train(self, env: gym.wrappers.time_limit.TimeLimit): score = 0 for episode in range(self.episodes): state = env.reset() for step in range(self.steps_per_episode): action = self.actor_model(torch.from_numpy(state).float()) action += self.noise.sample(self.batch_size)[0] state_next, reward, done, _ = env.step(action.detach().numpy()) self.replay_buffer.push((state, action.detach().numpy(), reward / 100, state_next, done)) score += reward if done: break state = state_next actor_loss, critic_loss = 1000, 1000 if len(self.replay_buffer) >= self.train_begin: actor_loss, critic_loss = self.train_models() if (episode + 1) % self.episodes_to_print == 0: print(f"For episode {episode + 1} score is {score / self.episodes_to_print}") print(f"Critic Loss is {actor_loss}, Actor Loss is {critic_loss}") if score > self.best_reward: self.best_reward = score self.save_model(self.path + '_best') score = 0 if (episode + 1) % self.episodes_to_save == 0: self.save_model(self.path) def save_model(self, path): torch.save(self.actor_model_target.state_dict(), path + "_actor_model") torch.save(self.critic_model_target.state_dict(), path + "_critic_model") def play(self, env: gym.wrappers.time_limit.TimeLimit): state = env.reset() while True: action = self.actor_model(torch.from_numpy(state).float()) action += self.noise.sample(self.batch_size)[0] state_next, reward, done, _ = env.step(action.detach().numpy()) env.render() state = state_next if done: for i in range(100): action = self.actor_model(torch.from_numpy(state).float()) action += self.noise.sample(self.batch_size)[0] state_next, reward, done, _ = env.step(action.detach().numpy()) env.render() state = state_next break
def test_gaussian_noise_str_repr(t): instance = GaussianNoise(t) assert isinstance(repr(instance), str) assert isinstance(str(instance), str)
def test_gaussian_noise_sample(t, n): instance = GaussianNoise(t) s = instance.sample(n) assert len(s) == n