def _model_assessment(self, episode): ''' Make a model predict on a sample with epsilon=0 ''' # Load data dc = DataCluster( dataset=self.dataset, remove_features=['close', 'high', 'low', 'open', 'volume'], num_stocks=1, verbose=False, wavelet_scales=WAVELET_SCALES, num_time_steps=self.num_time_steps) collection = dc.collection (st_shape, lt_shape) = dc.get_model_shape() # Model assessment ma = ModelAssessment(collection=collection, model_shape=(st_shape, lt_shape), num_time_steps=self.num_time_steps) ma.astats = self.astats.loc[episode] ma.load_model(model_name=self.folder / self.last_model_name) ma.simulate() ma.render() self.astats.loc[episode] = ma.astats # Save stats for the last model name self.astats.loc[episode, 'networkName'] = self.last_model_name # Print assessment stats print(self.astats.loc[episode - 10:episode]) # Pickle and save assessment stats and simulation run self.astats.loc[1:episode].to_pickle(self.folder / 'astats.pkl') ma.sim.to_pickle(self.folder / f'sim_{ma.ticker}_EPS{episode}of{EPISODES}.pkl')
def __init__(self): self.num_time_steps = 300 #keep - number of sequences that will be fed into the model # Data cluster self.dataset = 'realmix' self.data_cluster = DataCluster( dataset=self.dataset, remove_features=['close', 'high', 'low', 'open', 'volume'], num_stocks=NUM_STOCKS, wavelet_scales=WAVELET_SCALES, num_time_steps=self.num_time_steps) self.collection = self.data_cluster.collection (st_shape, lt_shape) = self.data_cluster.get_model_shape() # Agent self.agent = Agent(model_shape=(st_shape, lt_shape), num_time_steps=self.num_time_steps) self.agent.pre_train(self.collection, cached_data=False, epochs=PT_EPOCHS, sample_size=SAMPLE_SIZE, lr_preTrain=1e-3) # Save the model self.agent.model.save(Path.cwd() / 'pre_trained_models' / str(int(time.time())))
def __init__(self): self.num_time_steps = 300 #keep - number of sequences that will be fed into the model # Data cluster self.dataset = 'realmix' self.data_cluster = DataCluster( dataset=self.dataset, remove_features=['close', 'high', 'low', 'open', 'volume'], num_stocks=NUM_STOCKS, wavelet_scales=WAVELET_SCALES, num_time_steps=self.num_time_steps) self.collection = self.data_cluster.collection (st_shape, lt_shape) = self.data_cluster.get_model_shape() # Env self.env = StockTradingEnv(self.collection, look_back_window=self.num_time_steps, max_steps=MAX_STEPS, static_initial_step=0) # Run self.run()
class Stager: def __init__(self): self.num_time_steps = 300 #keep - number of sequences that will be fed into the model # Data cluster self.dataset = 'realmix' self.data_cluster = DataCluster( dataset=self.dataset, remove_features=['close', 'high', 'low', 'open', 'volume'], num_stocks=NUM_STOCKS, wavelet_scales=WAVELET_SCALES, num_time_steps=self.num_time_steps) self.collection = self.data_cluster.collection (st_shape, lt_shape) = self.data_cluster.get_model_shape() # Env self.env = StockTradingEnv(self.collection, look_back_window=self.num_time_steps, max_steps=MAX_STEPS, static_initial_step=0) # Run self.run() def run(self): batches = 50 iterations_per_batch = 500 for batch in range(batches): batch_dict = {'df': list(), 'obs': list()} for iteration in tqdm(range(iterations_per_batch), desc=f'Batch {batch}'): # List to save all obs for several iterations iteration_list = list() break_flag = False try: obs, _ = self.env.reset() except: print('CONTINUED ON RESET') continue # Append obs from reset iteration_list.append(obs) for step in range(MAX_STEPS): # Step with arbitrary action try: obs, _, _ = self.env.step(0) except: print('CONTINUED DURING ITERATION') break_flag = True break # Append obs from step iteration_list.append(obs) # Append to batch_dict batch_dict['df'].append(self.env.df_reward) batch_dict['obs'].append(iteration_list) if break_flag: continue # Pickle batch with open( Path.cwd() / 'data' / 'staged' / f'staged_batch_{batch}.pkl', 'wb') as handle: pickle.dump(batch_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) # print(f'Batch {batch} finished with {len(iteration_list)} iterations') # Free memory del iteration_list, batch_dict gc.collect()
self.actions.count(2) / len(self.actions), 3) self.astats.loc['lastReward'] = round(self.rewards[-1]) def _save_data(self): pass if __name__ == '__main__': import matplotlib.pyplot as plt num_steps = 300 wavelet_scales = 100 dc = DataCluster( dataset='realmix', remove_features=['close', 'high', 'low', 'open', 'volume'], wavelet_scales=wavelet_scales, num_time_steps=num_steps) collection = dc.collection (st_shape, lt_shape) = dc.get_model_shape() model_name = 'models/1618873083/1618873135_EPS1of800.model' ma = ModelAssessment(collection=collection, model_shape=(st_shape, lt_shape), num_time_steps=num_steps) ma.load_model(model_name=model_name) ma.sim_range = 300 ma.simulate() print(ma.sim) print(ma.actions)
def __init__(self): self.num_time_steps = 300 #keep - number of sequences that will be fed into the model # Data cluster self.dataset = 'realmix' self.data_cluster = DataCluster( dataset=self.dataset, remove_features=['close', 'high', 'low', 'open', 'volume'], num_stocks=num_stocks, wavelet_scales=WAVELET_SCALES, num_time_steps=self.num_time_steps) self.collection = self.data_cluster.collection (st_shape, lt_shape) = self.data_cluster.get_model_shape() # Agent self.agent = Agent(model_shape=(st_shape, lt_shape), num_time_steps=self.num_time_steps) self.agent.load_network(PT_NETWORK) # Environment self.env = StockTradingEnv2() # Epsilon self.epsilon_steps = \ [1.0] * int(EPISODES * epsilon_plateau) + \ list(np.linspace(1,MIN_EPSILON,EPISODES - int(EPISODES * epsilon_plateau)*2+1)) + \ [MIN_EPSILON] * int(EPISODES * epsilon_plateau) # Statistics self.epsilon = epsilon stats = [ 'avgReward', 'buyAndHold', 'netWorthChng', 'buyTrigger', 'sellTrigger', 'holdTrigger', 'epsilon', 'totTrainTime', 'amountBalance', 'amountAsset', 'holdReward', 'sellReward', 'buyReward', 'avgAmount', 'episodeTime' ] self.estats = pd.DataFrame(np.nan, index=np.arange(1, EPISODES + 1), columns=stats) self.estats.index.name = 'Episode' astats_index = np.append(np.array([1]), np.arange(0, EPISODES + 1, EPOCH_SIZE)[1::]) self.astats = pd.DataFrame(np.nan, index=astats_index, columns=[ 'lastReward', 'buyAndHold', 'netWorthChng', 'buyTrigger', 'sellTrigger', 'holdTrigger', 'avgAmount', 'networkName' ]) self.astats.index.name = 'Episode' # Create model folder and model ID self.model_id = int(time.time()) self.agent.model._name = str(self.model_id) self.folder = Path.cwd() / 'models' / str(self.model_id) self.folder.mkdir(exist_ok=False) # Save architechture self.agent.model._name = str(self.model_id) with open(self.folder / 'arch.txt', 'w') as fh: self.agent.model.summary(print_fn=lambda x: fh.write(x + '\n')) # Save metadata metadata = { 'model_id': self.model_id, 'note': ' '.join([str(x) for x in sys.argv[1::]]), 'date': str(datetime.now()), 'episodes': EPISODES, 'epoch_size': EPOCH_SIZE, 'time_steps': self.num_time_steps, 'data': { 'dataset': self.dataset, 'length': len(self.collection), }, 'optimizer': { 'algorithm': K.eval(self.agent.model.optimizer._name), 'learning_rate': float(K.eval(self.agent.model.optimizer.lr)) }, 'agent': { 'discount': self.agent.DISCOUNT, 'replay_memory_size': self.agent.REPLAY_MEMORY_SIZE, 'min_replay_memory_size': self.agent.MIN_REPLAY_MEMORY_SIZE, 'minibatch_size': self.agent.MINIBATCH_SIZE, 'update_target_network_every': self.agent.UPDATE_TARGET_EVERY }, 'pre_training': { 'conf_mat': str(self.agent.conf_mat) } } with open(self.folder / 'metadata.json', 'w') as outfile: json.dump(metadata, outfile) # Run self.run()
class Trader: def __init__(self): self.num_time_steps = 300 #keep - number of sequences that will be fed into the model # Data cluster self.dataset = 'realmix' self.data_cluster = DataCluster( dataset=self.dataset, remove_features=['close', 'high', 'low', 'open', 'volume'], num_stocks=num_stocks, wavelet_scales=WAVELET_SCALES, num_time_steps=self.num_time_steps) self.collection = self.data_cluster.collection (st_shape, lt_shape) = self.data_cluster.get_model_shape() # Agent self.agent = Agent(model_shape=(st_shape, lt_shape), num_time_steps=self.num_time_steps) self.agent.load_network(PT_NETWORK) # Environment self.env = StockTradingEnv2() # Epsilon self.epsilon_steps = \ [1.0] * int(EPISODES * epsilon_plateau) + \ list(np.linspace(1,MIN_EPSILON,EPISODES - int(EPISODES * epsilon_plateau)*2+1)) + \ [MIN_EPSILON] * int(EPISODES * epsilon_plateau) # Statistics self.epsilon = epsilon stats = [ 'avgReward', 'buyAndHold', 'netWorthChng', 'buyTrigger', 'sellTrigger', 'holdTrigger', 'epsilon', 'totTrainTime', 'amountBalance', 'amountAsset', 'holdReward', 'sellReward', 'buyReward', 'avgAmount', 'episodeTime' ] self.estats = pd.DataFrame(np.nan, index=np.arange(1, EPISODES + 1), columns=stats) self.estats.index.name = 'Episode' astats_index = np.append(np.array([1]), np.arange(0, EPISODES + 1, EPOCH_SIZE)[1::]) self.astats = pd.DataFrame(np.nan, index=astats_index, columns=[ 'lastReward', 'buyAndHold', 'netWorthChng', 'buyTrigger', 'sellTrigger', 'holdTrigger', 'avgAmount', 'networkName' ]) self.astats.index.name = 'Episode' # Create model folder and model ID self.model_id = int(time.time()) self.agent.model._name = str(self.model_id) self.folder = Path.cwd() / 'models' / str(self.model_id) self.folder.mkdir(exist_ok=False) # Save architechture self.agent.model._name = str(self.model_id) with open(self.folder / 'arch.txt', 'w') as fh: self.agent.model.summary(print_fn=lambda x: fh.write(x + '\n')) # Save metadata metadata = { 'model_id': self.model_id, 'note': ' '.join([str(x) for x in sys.argv[1::]]), 'date': str(datetime.now()), 'episodes': EPISODES, 'epoch_size': EPOCH_SIZE, 'time_steps': self.num_time_steps, 'data': { 'dataset': self.dataset, 'length': len(self.collection), }, 'optimizer': { 'algorithm': K.eval(self.agent.model.optimizer._name), 'learning_rate': float(K.eval(self.agent.model.optimizer.lr)) }, 'agent': { 'discount': self.agent.DISCOUNT, 'replay_memory_size': self.agent.REPLAY_MEMORY_SIZE, 'min_replay_memory_size': self.agent.MIN_REPLAY_MEMORY_SIZE, 'minibatch_size': self.agent.MINIBATCH_SIZE, 'update_target_network_every': self.agent.UPDATE_TARGET_EVERY }, 'pre_training': { 'conf_mat': str(self.agent.conf_mat) } } with open(self.folder / 'metadata.json', 'w') as outfile: json.dump(metadata, outfile) # Run self.run() def run(self): # Define iterator and track elapsed time episode_iter = tqdm(range(1, EPISODES + 1), ascii=True, unit='episode') last_iteration_time = datetime.fromtimestamp(episode_iter.start_t) self.episode_times = list() # Iterate over episodes for episode in episode_iter: # Slice estats for this episode for simplicity self._estats = self.estats.loc[episode] # Timer to track train time self.train_time = 0 # Save actions self.actions = list() # Update tensorboard step every episode self.agent.tensorboard.step = episode # Reset episode reward and step number self.episode_reward = list() self.reward_action = [0, 0, 0] step = 1 # Reset environment and get initial state current_state, _ = self.env.reset() # Reset flag and start iterating until episode ends done = False while not done: # This part stays mostly the same, the change is to query a model for Q values if random.random() > self.epsilon: # Get action from Q table action = np.argmax(self.agent.get_qs(current_state)) else: # Get random action action = np.random.randint(0, self.env.ACTION_SPACE_SIZE) # STEP ENV new_state, reward, done = self.env.step(action) # Save reward and action self.reward_action[action] += reward self.actions.append(action) # Transform new continous state to new discrete state and count reward self.episode_reward.append(reward) # Every step we update replay memory and train main network self.agent.update_replay_memory( (current_state, action, reward, new_state, done)) self.agent.train(done, step) self.train_time += self.agent.elapsed current_state = new_state step += 1 max_action_errors = { 0: [max(self.agent.action_errors[0], default=0)], 1: [max(self.agent.action_errors[1], default=0)], 2: [max(self.agent.action_errors[2], default=0)] } min_action_errors = { 0: [min(self.agent.action_errors[0], default=0)], 1: [min(self.agent.action_errors[1], default=0)], 2: [min(self.agent.action_errors[2], default=0)] } mean_action_errors = { 0: [np.mean(self.agent.action_errors[0])], 1: [np.mean(self.agent.action_errors[1])], 2: [np.mean(self.agent.action_errors[2])] } print('max_action_errors: ', max_action_errors) print('min_action_errors: ', min_action_errors) print('mean_action_errors: ', mean_action_errors) # Save model if not episode % EPOCH_SIZE or episode == 1: self._save_model(episode) else: # Set default values to evaluation stats self._estats.loc['TotTrainTime'] = round(self.train_time, 1) # Decay epsilon self.epsilon = self.epsilon_steps[episode] # Time tracking iteration_time = datetime.fromtimestamp(episode_iter.last_print_t) datetime_delta = iteration_time - last_iteration_time self.delta_time = datetime_delta.seconds + datetime_delta.microseconds / 1e6 last_iteration_time = iteration_time # Render if not episode % AGGREGATE_STATS_EVERY: self._render(episode) # Free memory gc.collect() def _render(self, episode): ''' Renders stats for a certain episodes ''' print('=' * 20) # Env to render self.env.render(stats=self._estats) # Episode aggregated stats self._estats.loc['epsilon'] = round(self.epsilon, 2) self._estats.loc['avgReward'] = round(mean(self.episode_reward), 3) self._estats.loc['totTrainTime'] = round(self.train_time, 1) self._estats.loc['holdTrigger'] = round( self.actions.count(0) / len(self.actions), 3) self._estats.loc['buyTrigger'] = round( self.actions.count(1) / len(self.actions), 3) self._estats.loc['sellTrigger'] = round( self.actions.count(2) / len(self.actions), 3) self._estats.loc['holdReward'] = round( safe_div(self.reward_action[0], self.actions.count(0)), 3) self._estats.loc['buyReward'] = round( safe_div(self.reward_action[1], self.actions.count(1)), 3) self._estats.loc['sellReward'] = round( safe_div(self.reward_action[2], self.actions.count(2)), 3) self._estats.loc['episodeTime'] = self.delta_time # Print episode stats self.estats.loc[episode] = self._estats print(self.estats.loc[episode - 10:episode]) print( f'Replay memory allocation: {self.agent.replay_memory_allocation * 100}%' ) # Pickle and save episode stats self.estats.loc[1:episode].to_pickle(self.folder / 'estats.pkl') def _save_model(self, episode): ''' For each epoch save model and make a sample inference with epsilon=0 ''' epoch_id = int(time.time()) self.last_model_name = f'{epoch_id}_EPS{episode}of{EPISODES}.model' self.agent.model.save(self.folder / self.last_model_name) try: self._model_assessment(episode) except: pass def _model_assessment(self, episode): ''' Make a model predict on a sample with epsilon=0 ''' # Load data dc = DataCluster( dataset=self.dataset, remove_features=['close', 'high', 'low', 'open', 'volume'], num_stocks=1, verbose=False, wavelet_scales=WAVELET_SCALES, num_time_steps=self.num_time_steps) collection = dc.collection (st_shape, lt_shape) = dc.get_model_shape() # Model assessment ma = ModelAssessment(collection=collection, model_shape=(st_shape, lt_shape), num_time_steps=self.num_time_steps) ma.astats = self.astats.loc[episode] ma.load_model(model_name=self.folder / self.last_model_name) ma.simulate() ma.render() self.astats.loc[episode] = ma.astats # Save stats for the last model name self.astats.loc[episode, 'networkName'] = self.last_model_name # Print assessment stats print(self.astats.loc[episode - 10:episode]) # Pickle and save assessment stats and simulation run self.astats.loc[1:episode].to_pickle(self.folder / 'astats.pkl') ma.sim.to_pickle(self.folder / f'sim_{ma.ticker}_EPS{episode}of{EPISODES}.pkl')