예제 #1
0
파일: main.py 프로젝트: Niwood/DQL-Trader
    def _model_assessment(self, episode):
        ''' Make a model predict on a sample with epsilon=0 '''

        # Load data
        dc = DataCluster(
            dataset=self.dataset,
            remove_features=['close', 'high', 'low', 'open', 'volume'],
            num_stocks=1,
            verbose=False,
            wavelet_scales=WAVELET_SCALES,
            num_time_steps=self.num_time_steps)
        collection = dc.collection
        (st_shape, lt_shape) = dc.get_model_shape()

        # Model assessment
        ma = ModelAssessment(collection=collection,
                             model_shape=(st_shape, lt_shape),
                             num_time_steps=self.num_time_steps)
        ma.astats = self.astats.loc[episode]
        ma.load_model(model_name=self.folder / self.last_model_name)
        ma.simulate()
        ma.render()
        self.astats.loc[episode] = ma.astats

        # Save stats for the last model name
        self.astats.loc[episode, 'networkName'] = self.last_model_name

        # Print assessment stats
        print(self.astats.loc[episode - 10:episode])

        # Pickle and save assessment stats and simulation run
        self.astats.loc[1:episode].to_pickle(self.folder / 'astats.pkl')
        ma.sim.to_pickle(self.folder /
                         f'sim_{ma.ticker}_EPS{episode}of{EPISODES}.pkl')
예제 #2
0
    def __init__(self):

        self.num_time_steps = 300  #keep - number of sequences that will be fed into the model

        # Data cluster
        self.dataset = 'realmix'
        self.data_cluster = DataCluster(
            dataset=self.dataset,
            remove_features=['close', 'high', 'low', 'open', 'volume'],
            num_stocks=NUM_STOCKS,
            wavelet_scales=WAVELET_SCALES,
            num_time_steps=self.num_time_steps)
        self.collection = self.data_cluster.collection
        (st_shape, lt_shape) = self.data_cluster.get_model_shape()

        # Agent
        self.agent = Agent(model_shape=(st_shape, lt_shape),
                           num_time_steps=self.num_time_steps)
        self.agent.pre_train(self.collection,
                             cached_data=False,
                             epochs=PT_EPOCHS,
                             sample_size=SAMPLE_SIZE,
                             lr_preTrain=1e-3)

        # Save the model
        self.agent.model.save(Path.cwd() / 'pre_trained_models' /
                              str(int(time.time())))
예제 #3
0
    def __init__(self):

        self.num_time_steps = 300  #keep - number of sequences that will be fed into the model

        # Data cluster
        self.dataset = 'realmix'
        self.data_cluster = DataCluster(
            dataset=self.dataset,
            remove_features=['close', 'high', 'low', 'open', 'volume'],
            num_stocks=NUM_STOCKS,
            wavelet_scales=WAVELET_SCALES,
            num_time_steps=self.num_time_steps)
        self.collection = self.data_cluster.collection
        (st_shape, lt_shape) = self.data_cluster.get_model_shape()

        # Env
        self.env = StockTradingEnv(self.collection,
                                   look_back_window=self.num_time_steps,
                                   max_steps=MAX_STEPS,
                                   static_initial_step=0)

        # Run
        self.run()
예제 #4
0
class Stager:
    def __init__(self):

        self.num_time_steps = 300  #keep - number of sequences that will be fed into the model

        # Data cluster
        self.dataset = 'realmix'
        self.data_cluster = DataCluster(
            dataset=self.dataset,
            remove_features=['close', 'high', 'low', 'open', 'volume'],
            num_stocks=NUM_STOCKS,
            wavelet_scales=WAVELET_SCALES,
            num_time_steps=self.num_time_steps)
        self.collection = self.data_cluster.collection
        (st_shape, lt_shape) = self.data_cluster.get_model_shape()

        # Env
        self.env = StockTradingEnv(self.collection,
                                   look_back_window=self.num_time_steps,
                                   max_steps=MAX_STEPS,
                                   static_initial_step=0)

        # Run
        self.run()

    def run(self):

        batches = 50
        iterations_per_batch = 500

        for batch in range(batches):

            batch_dict = {'df': list(), 'obs': list()}

            for iteration in tqdm(range(iterations_per_batch),
                                  desc=f'Batch {batch}'):

                # List to save all obs for several iterations
                iteration_list = list()
                break_flag = False

                try:
                    obs, _ = self.env.reset()
                except:
                    print('CONTINUED ON RESET')
                    continue

                # Append obs from reset
                iteration_list.append(obs)

                for step in range(MAX_STEPS):

                    # Step with arbitrary action
                    try:
                        obs, _, _ = self.env.step(0)
                    except:
                        print('CONTINUED DURING ITERATION')
                        break_flag = True
                        break

                    # Append obs from step
                    iteration_list.append(obs)

                # Append to batch_dict
                batch_dict['df'].append(self.env.df_reward)
                batch_dict['obs'].append(iteration_list)

            if break_flag: continue

            # Pickle batch
            with open(
                    Path.cwd() / 'data' / 'staged' /
                    f'staged_batch_{batch}.pkl', 'wb') as handle:
                pickle.dump(batch_dict,
                            handle,
                            protocol=pickle.HIGHEST_PROTOCOL)

            # print(f'Batch {batch} finished with {len(iteration_list)} iterations')

            # Free memory
            del iteration_list, batch_dict
            gc.collect()
예제 #5
0
                self.actions.count(2) / len(self.actions), 3)
            self.astats.loc['lastReward'] = round(self.rewards[-1])

    def _save_data(self):
        pass


if __name__ == '__main__':
    import matplotlib.pyplot as plt

    num_steps = 300
    wavelet_scales = 100

    dc = DataCluster(
        dataset='realmix',
        remove_features=['close', 'high', 'low', 'open', 'volume'],
        wavelet_scales=wavelet_scales,
        num_time_steps=num_steps)
    collection = dc.collection
    (st_shape, lt_shape) = dc.get_model_shape()

    model_name = 'models/1618873083/1618873135_EPS1of800.model'
    ma = ModelAssessment(collection=collection,
                         model_shape=(st_shape, lt_shape),
                         num_time_steps=num_steps)
    ma.load_model(model_name=model_name)
    ma.sim_range = 300
    ma.simulate()

    print(ma.sim)
    print(ma.actions)
예제 #6
0
파일: main.py 프로젝트: Niwood/DQL-Trader
    def __init__(self):

        self.num_time_steps = 300  #keep - number of sequences that will be fed into the model

        # Data cluster
        self.dataset = 'realmix'
        self.data_cluster = DataCluster(
            dataset=self.dataset,
            remove_features=['close', 'high', 'low', 'open', 'volume'],
            num_stocks=num_stocks,
            wavelet_scales=WAVELET_SCALES,
            num_time_steps=self.num_time_steps)
        self.collection = self.data_cluster.collection
        (st_shape, lt_shape) = self.data_cluster.get_model_shape()

        # Agent
        self.agent = Agent(model_shape=(st_shape, lt_shape),
                           num_time_steps=self.num_time_steps)
        self.agent.load_network(PT_NETWORK)

        # Environment
        self.env = StockTradingEnv2()

        # Epsilon
        self.epsilon_steps = \
            [1.0] * int(EPISODES * epsilon_plateau) + \
            list(np.linspace(1,MIN_EPSILON,EPISODES - int(EPISODES * epsilon_plateau)*2+1)) + \
            [MIN_EPSILON] * int(EPISODES * epsilon_plateau)

        # Statistics
        self.epsilon = epsilon
        stats = [
            'avgReward', 'buyAndHold', 'netWorthChng', 'buyTrigger',
            'sellTrigger', 'holdTrigger', 'epsilon', 'totTrainTime',
            'amountBalance', 'amountAsset', 'holdReward', 'sellReward',
            'buyReward', 'avgAmount', 'episodeTime'
        ]
        self.estats = pd.DataFrame(np.nan,
                                   index=np.arange(1, EPISODES + 1),
                                   columns=stats)
        self.estats.index.name = 'Episode'

        astats_index = np.append(np.array([1]),
                                 np.arange(0, EPISODES + 1, EPOCH_SIZE)[1::])
        self.astats = pd.DataFrame(np.nan,
                                   index=astats_index,
                                   columns=[
                                       'lastReward', 'buyAndHold',
                                       'netWorthChng', 'buyTrigger',
                                       'sellTrigger', 'holdTrigger',
                                       'avgAmount', 'networkName'
                                   ])
        self.astats.index.name = 'Episode'

        # Create model folder and model ID
        self.model_id = int(time.time())
        self.agent.model._name = str(self.model_id)
        self.folder = Path.cwd() / 'models' / str(self.model_id)
        self.folder.mkdir(exist_ok=False)

        # Save architechture
        self.agent.model._name = str(self.model_id)
        with open(self.folder / 'arch.txt', 'w') as fh:
            self.agent.model.summary(print_fn=lambda x: fh.write(x + '\n'))

        # Save metadata
        metadata = {
            'model_id': self.model_id,
            'note': ' '.join([str(x) for x in sys.argv[1::]]),
            'date': str(datetime.now()),
            'episodes': EPISODES,
            'epoch_size': EPOCH_SIZE,
            'time_steps': self.num_time_steps,
            'data': {
                'dataset': self.dataset,
                'length': len(self.collection),
            },
            'optimizer': {
                'algorithm': K.eval(self.agent.model.optimizer._name),
                'learning_rate': float(K.eval(self.agent.model.optimizer.lr))
            },
            'agent': {
                'discount': self.agent.DISCOUNT,
                'replay_memory_size': self.agent.REPLAY_MEMORY_SIZE,
                'min_replay_memory_size': self.agent.MIN_REPLAY_MEMORY_SIZE,
                'minibatch_size': self.agent.MINIBATCH_SIZE,
                'update_target_network_every': self.agent.UPDATE_TARGET_EVERY
            },
            'pre_training': {
                'conf_mat': str(self.agent.conf_mat)
            }
        }
        with open(self.folder / 'metadata.json', 'w') as outfile:
            json.dump(metadata, outfile)

        # Run
        self.run()
예제 #7
0
파일: main.py 프로젝트: Niwood/DQL-Trader
class Trader:
    def __init__(self):

        self.num_time_steps = 300  #keep - number of sequences that will be fed into the model

        # Data cluster
        self.dataset = 'realmix'
        self.data_cluster = DataCluster(
            dataset=self.dataset,
            remove_features=['close', 'high', 'low', 'open', 'volume'],
            num_stocks=num_stocks,
            wavelet_scales=WAVELET_SCALES,
            num_time_steps=self.num_time_steps)
        self.collection = self.data_cluster.collection
        (st_shape, lt_shape) = self.data_cluster.get_model_shape()

        # Agent
        self.agent = Agent(model_shape=(st_shape, lt_shape),
                           num_time_steps=self.num_time_steps)
        self.agent.load_network(PT_NETWORK)

        # Environment
        self.env = StockTradingEnv2()

        # Epsilon
        self.epsilon_steps = \
            [1.0] * int(EPISODES * epsilon_plateau) + \
            list(np.linspace(1,MIN_EPSILON,EPISODES - int(EPISODES * epsilon_plateau)*2+1)) + \
            [MIN_EPSILON] * int(EPISODES * epsilon_plateau)

        # Statistics
        self.epsilon = epsilon
        stats = [
            'avgReward', 'buyAndHold', 'netWorthChng', 'buyTrigger',
            'sellTrigger', 'holdTrigger', 'epsilon', 'totTrainTime',
            'amountBalance', 'amountAsset', 'holdReward', 'sellReward',
            'buyReward', 'avgAmount', 'episodeTime'
        ]
        self.estats = pd.DataFrame(np.nan,
                                   index=np.arange(1, EPISODES + 1),
                                   columns=stats)
        self.estats.index.name = 'Episode'

        astats_index = np.append(np.array([1]),
                                 np.arange(0, EPISODES + 1, EPOCH_SIZE)[1::])
        self.astats = pd.DataFrame(np.nan,
                                   index=astats_index,
                                   columns=[
                                       'lastReward', 'buyAndHold',
                                       'netWorthChng', 'buyTrigger',
                                       'sellTrigger', 'holdTrigger',
                                       'avgAmount', 'networkName'
                                   ])
        self.astats.index.name = 'Episode'

        # Create model folder and model ID
        self.model_id = int(time.time())
        self.agent.model._name = str(self.model_id)
        self.folder = Path.cwd() / 'models' / str(self.model_id)
        self.folder.mkdir(exist_ok=False)

        # Save architechture
        self.agent.model._name = str(self.model_id)
        with open(self.folder / 'arch.txt', 'w') as fh:
            self.agent.model.summary(print_fn=lambda x: fh.write(x + '\n'))

        # Save metadata
        metadata = {
            'model_id': self.model_id,
            'note': ' '.join([str(x) for x in sys.argv[1::]]),
            'date': str(datetime.now()),
            'episodes': EPISODES,
            'epoch_size': EPOCH_SIZE,
            'time_steps': self.num_time_steps,
            'data': {
                'dataset': self.dataset,
                'length': len(self.collection),
            },
            'optimizer': {
                'algorithm': K.eval(self.agent.model.optimizer._name),
                'learning_rate': float(K.eval(self.agent.model.optimizer.lr))
            },
            'agent': {
                'discount': self.agent.DISCOUNT,
                'replay_memory_size': self.agent.REPLAY_MEMORY_SIZE,
                'min_replay_memory_size': self.agent.MIN_REPLAY_MEMORY_SIZE,
                'minibatch_size': self.agent.MINIBATCH_SIZE,
                'update_target_network_every': self.agent.UPDATE_TARGET_EVERY
            },
            'pre_training': {
                'conf_mat': str(self.agent.conf_mat)
            }
        }
        with open(self.folder / 'metadata.json', 'w') as outfile:
            json.dump(metadata, outfile)

        # Run
        self.run()

    def run(self):

        # Define iterator and track elapsed time
        episode_iter = tqdm(range(1, EPISODES + 1), ascii=True, unit='episode')
        last_iteration_time = datetime.fromtimestamp(episode_iter.start_t)
        self.episode_times = list()

        # Iterate over episodes
        for episode in episode_iter:

            # Slice estats for this episode for simplicity
            self._estats = self.estats.loc[episode]

            # Timer to track train time
            self.train_time = 0

            # Save actions
            self.actions = list()

            # Update tensorboard step every episode
            self.agent.tensorboard.step = episode

            # Reset episode reward and step number
            self.episode_reward = list()
            self.reward_action = [0, 0, 0]
            step = 1

            # Reset environment and get initial state
            current_state, _ = self.env.reset()

            # Reset flag and start iterating until episode ends
            done = False

            while not done:

                # This part stays mostly the same, the change is to query a model for Q values
                if random.random() > self.epsilon:
                    # Get action from Q table
                    action = np.argmax(self.agent.get_qs(current_state))
                else:
                    # Get random action
                    action = np.random.randint(0, self.env.ACTION_SPACE_SIZE)

                # STEP ENV
                new_state, reward, done = self.env.step(action)

                # Save reward and action
                self.reward_action[action] += reward
                self.actions.append(action)

                # Transform new continous state to new discrete state and count reward
                self.episode_reward.append(reward)

                # Every step we update replay memory and train main network
                self.agent.update_replay_memory(
                    (current_state, action, reward, new_state, done))
                self.agent.train(done, step)
                self.train_time += self.agent.elapsed

                current_state = new_state
                step += 1

            max_action_errors = {
                0: [max(self.agent.action_errors[0], default=0)],
                1: [max(self.agent.action_errors[1], default=0)],
                2: [max(self.agent.action_errors[2], default=0)]
            }
            min_action_errors = {
                0: [min(self.agent.action_errors[0], default=0)],
                1: [min(self.agent.action_errors[1], default=0)],
                2: [min(self.agent.action_errors[2], default=0)]
            }
            mean_action_errors = {
                0: [np.mean(self.agent.action_errors[0])],
                1: [np.mean(self.agent.action_errors[1])],
                2: [np.mean(self.agent.action_errors[2])]
            }
            print('max_action_errors: ', max_action_errors)
            print('min_action_errors: ', min_action_errors)
            print('mean_action_errors: ', mean_action_errors)

            # Save model
            if not episode % EPOCH_SIZE or episode == 1:
                self._save_model(episode)
            else:
                # Set default values to evaluation stats
                self._estats.loc['TotTrainTime'] = round(self.train_time, 1)

            # Decay epsilon
            self.epsilon = self.epsilon_steps[episode]

            # Time tracking
            iteration_time = datetime.fromtimestamp(episode_iter.last_print_t)
            datetime_delta = iteration_time - last_iteration_time
            self.delta_time = datetime_delta.seconds + datetime_delta.microseconds / 1e6
            last_iteration_time = iteration_time

            # Render
            if not episode % AGGREGATE_STATS_EVERY:
                self._render(episode)

            # Free memory
            gc.collect()

    def _render(self, episode):
        ''' Renders stats for a certain episodes '''
        print('=' * 20)

        # Env to render
        self.env.render(stats=self._estats)

        # Episode aggregated stats
        self._estats.loc['epsilon'] = round(self.epsilon, 2)
        self._estats.loc['avgReward'] = round(mean(self.episode_reward), 3)
        self._estats.loc['totTrainTime'] = round(self.train_time, 1)
        self._estats.loc['holdTrigger'] = round(
            self.actions.count(0) / len(self.actions), 3)
        self._estats.loc['buyTrigger'] = round(
            self.actions.count(1) / len(self.actions), 3)
        self._estats.loc['sellTrigger'] = round(
            self.actions.count(2) / len(self.actions), 3)
        self._estats.loc['holdReward'] = round(
            safe_div(self.reward_action[0], self.actions.count(0)), 3)
        self._estats.loc['buyReward'] = round(
            safe_div(self.reward_action[1], self.actions.count(1)), 3)
        self._estats.loc['sellReward'] = round(
            safe_div(self.reward_action[2], self.actions.count(2)), 3)
        self._estats.loc['episodeTime'] = self.delta_time

        # Print episode stats
        self.estats.loc[episode] = self._estats
        print(self.estats.loc[episode - 10:episode])
        print(
            f'Replay memory allocation: {self.agent.replay_memory_allocation * 100}%'
        )

        # Pickle and save episode stats
        self.estats.loc[1:episode].to_pickle(self.folder / 'estats.pkl')

    def _save_model(self, episode):
        ''' For each epoch save model and make a sample inference with epsilon=0 '''
        epoch_id = int(time.time())
        self.last_model_name = f'{epoch_id}_EPS{episode}of{EPISODES}.model'
        self.agent.model.save(self.folder / self.last_model_name)
        try:
            self._model_assessment(episode)
        except:
            pass

    def _model_assessment(self, episode):
        ''' Make a model predict on a sample with epsilon=0 '''

        # Load data
        dc = DataCluster(
            dataset=self.dataset,
            remove_features=['close', 'high', 'low', 'open', 'volume'],
            num_stocks=1,
            verbose=False,
            wavelet_scales=WAVELET_SCALES,
            num_time_steps=self.num_time_steps)
        collection = dc.collection
        (st_shape, lt_shape) = dc.get_model_shape()

        # Model assessment
        ma = ModelAssessment(collection=collection,
                             model_shape=(st_shape, lt_shape),
                             num_time_steps=self.num_time_steps)
        ma.astats = self.astats.loc[episode]
        ma.load_model(model_name=self.folder / self.last_model_name)
        ma.simulate()
        ma.render()
        self.astats.loc[episode] = ma.astats

        # Save stats for the last model name
        self.astats.loc[episode, 'networkName'] = self.last_model_name

        # Print assessment stats
        print(self.astats.loc[episode - 10:episode])

        # Pickle and save assessment stats and simulation run
        self.astats.loc[1:episode].to_pickle(self.folder / 'astats.pkl')
        ma.sim.to_pickle(self.folder /
                         f'sim_{ma.ticker}_EPS{episode}of{EPISODES}.pkl')