def __init__(self, expert_a: IExpert, expert_b: IExpert, load_trained_model: bool = True,
                 train_while_trading: bool = False, color: str = 'black', name: str = 'dql_trader', ):
        """
        Constructor
        Args:
            expert_a: Expert for stock A
            expert_b: Expert for stock B
            load_trained_model: Flag to trigger loading an already trained neural network
            train_while_trading: Flag to trigger on-the-fly training while trading
        """
        # Save experts, training mode and name
        super().__init__(color, name)
        assert expert_a is not None and expert_b is not None
        self.expert_a = expert_a
        self.expert_b = expert_b
        self.train_while_trading = train_while_trading

        # Parameters for neural network
        self.state_size = 2
        self.action_size = 4
        self.hidden_size = 50

        # Parameters for deep Q-learning
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_decay = 0.90
        self.epsilon_min = 0.01
        self.batch_size = 64
        self.min_size_of_memory_before_training = 1000  # should be way bigger than batch_size, but smaller than memory
        self.memory = deque(maxlen=2000)

        # Attributes necessary to remember our last actions and fill our memory with experiences
        self.last_state = None
        self.last_action_a = None
        self.last_action_b = None
        self.last_portfolio_value = None
        self.last_price_a = None
        self.last_price_b = None

        # Create main model, either as trained model (from file) or as untrained model (from scratch)
        self.model = None
        if load_trained_model:
            self.model = load_keras_sequential(self.RELATIVE_DATA_DIRECTORY, self.get_name())
            logger.info(f"DQL Trader: Loaded trained model")
        if self.model is None:  # loading failed or we didn't want to use a trained model
            self.model = Sequential()
            self.model.add(Dense(self.hidden_size * 2, input_dim=self.state_size, activation='relu'))
            self.model.add(Dense(self.hidden_size, activation='relu'))
            self.model.add(Dense(self.action_size, activation='linear'))
            logger.info(f"DQL Trader: Created new untrained model")
        assert self.model is not None
        self.model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))

        self.vote_map = {
            Vote.SELL : 0,
            Vote.HOLD : 1,
            Vote.BUY : 2,
        }

        self.vote_map_inverse = {v: k for k, v in self.vote_map.items()}
Ejemplo n.º 2
0
    def __init__(self, expert_a: IExpert, expert_b: IExpert, load_trained_model: bool = True,
                 train_while_trading: bool = False, color: str = 'black', name: str = 'dql_trader',
                 other_data_directory: typing.Optional[str] = None,
                 plot_name: typing.Optional[str] = None):
        """
        Constructor
        Args:
            expert_a: Expert for stock A
            expert_b: Expert for stock B
            load_trained_model: Flag to trigger loading an already trained neural network
            train_while_trading: Flag to trigger on-the-fly training while trading
            other_data_directory: relative directory path from project root to .json and .h5 files of model
            plot_name: name to use in a plot for this trader, overriding parameter "name" (which also determines
                       path where the model gets saved)
        """
        # Save experts, training mode and name
        super().__init__(color, name)
        assert expert_a is not None and expert_b is not None
        self.experts = [expert_a, expert_b]
        self.train_while_trading = train_while_trading
        self.other_data_directory = other_data_directory
        self.plot_name = plot_name

        # Parameters for neural network
        self.state_size = State.get_number_of_input_neurons()
        self.action_size = Action.get_number_of_output_neurons()
        self.hidden_size = 30

        # Parameters for deep Q-learning
        self.learning_rate = 0.001  # default for Adam: 0.001
        self.epsilon = 0.9995  # determines how quickly epsilon decreases to epsilon_min
        self.random_action_min_probability = 0.01  # minimum probability of choosing a random action
        self.train_each_n_days = 128  # how many trading days pass between each training
        self.batch_size = 128  # how many experience samples from memory to train on each training occasion
        self.min_size_of_memory_before_training = 1000  # should be way bigger than batch_size, but smaller than memory
        self.memory: typing.Deque[Experience] = deque(maxlen=2000)
        # discount factor: how quickly we expect an action to pay off
        # (0 -> only evaluate immediate effect on portfolio value on next day;
        #  near 1 -> also include development of portfolio value on future days, where each day has a weight of
        #  discount_factor ^ #days_ahead; higher values allow to factor in payoffs at later time while also
        #  making it harder to attribute the effects to the actions from a concrete day)
        self.discount_factor = 0
        assert 0 <= self.discount_factor < 1.0
        # min_horizon: number of more recent experiences required until discount_factor ^ #days_ahead drops to 1%
        if 0 < self.discount_factor < 1:
            self.min_horizon = min(100, math.ceil(math.log(0.01, self.discount_factor)))
        else:
            self.min_horizon = 1
        self.q_value_cap = 1.0  # limit Q-value to this magnitude; 0.0 to deactivate
        self.is_evolved_model = False  # set to True when loading model from file
        self.days_passed = 0
        self.training_occasions = 0

        # Create main model, either as trained model (from file) or as untrained model (from scratch)
        self.model = None
        if load_trained_model:
            data_dir = self.RELATIVE_DATA_DIRECTORY if self.other_data_directory is None else self.other_data_directory
            self.model = load_keras_sequential(data_dir, 'dql_trader')
            # logger.info(f"DQL Trader: Loaded trained model")
            self.is_evolved_model = True
        if self.model is None:  # loading failed or we didn't want to use a trained model
            self.model = Sequential()
            # original code:
            # self.model.add(Dense(self.hidden_size * 2, input_dim=self.state_size, activation='relu'))
            # self.model.add(Dense(self.hidden_size, activation='relu'))
            # self.model.add(Dense(self.action_size, activation='linear'))
            # modified code:
            # relu -> elu: avoid "dead nodes" problem of relu
            # lecun_normal: initialization from gaussian accounting for number of nodes as well
            # initialize bias with zeros
            self.model.add(Dense(self.hidden_size * 2, input_dim=self.state_size, activation='elu', kernel_initializer='lecun_normal', bias_initializer='zeros'))
            self.model.add(Dense(self.hidden_size, activation='elu', kernel_initializer='lecun_normal', bias_initializer='zeros'))
            self.model.add(Dense(self.action_size, activation='linear', kernel_initializer='lecun_normal', bias_initializer='zeros'))
            # logger.info(f"DQL Trader: Created new untrained model")
        assert self.model is not None
        # use one of the following solvers:
        self.model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))