Python Portfolio.get_valueの例、framework.portfolio.Portfolio.get_value Pythonの例

コード例 #1

0

ファイルを表示

ファイル: deep_q_learning_trader.py プロジェクト: FRules/fau-saki-a4

def get_reward(self, portfolio: Portfolio, stock_market_data: StockMarketData):
    current_portfolio_value = portfolio.get_value(stock_market_data)
    if self.last_portfolio_value < current_portfolio_value:
        return 100 * (current_portfolio_value / self.last_portfolio_value)
    elif self.last_portfolio_value > portfolio.get_value(stock_market_data):
        return -100
    return -20

コード例 #2

0

ファイルを表示

    def test_get_value_with_date(self):
        stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING])
        date = Date(2012, 1, 3)

        portfolio = Portfolio(100.0)
        self.assertEqual(portfolio.get_value(stock_market_data, date), 100.0)
        portfolio = Portfolio(100.0, {Company.A: 10})
        self.assertEqual(portfolio.get_value(stock_market_data, date), 455.54107999999997)
        portfolio = Portfolio(100.0, {Company.A: 10, Company.B: 10})
        self.assertEqual(portfolio.get_value(stock_market_data, date), 2046.9924999999998)

コード例 #3

0

ファイルを表示

    def test_get_value_without_date(self):
        stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING])
        portfolio = Portfolio()
        self.assertEqual(portfolio.get_value(stock_market_data), 0)

        portfolio = Portfolio(100.0)
        self.assertEqual(portfolio.get_value(stock_market_data), 100.0)
        portfolio = Portfolio(100.0, {Company.A: 10})
        self.assertEqual(portfolio.get_value(stock_market_data), 1127.59895)
        portfolio = Portfolio(100.0, {Company.A: 10, Company.B: 10})
        self.assertEqual(portfolio.get_value(stock_market_data), 2416.5398400000004)

コード例 #4

0

ファイルを表示

    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """

        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        order_list = []
        vote_a = self.expert_a.vote(stock_market_data[Company.A])
        vote_b = self.expert_b.vote(stock_market_data[Company.B])

        # convert votes to state
        state = np.array([self.vote2num[vote_a], self.vote2num[vote_b]])

        if self.train_while_trading:
            if len(self.memory) > self.min_size_of_memory_before_training:
                # helper function which executes experience replay
                self._replay()

        # act
        action = self._act(state, stock_market_data, portfolio, order_list)

        if self.last_portfolio_value is not None:
            # Reward function R1
            if self.last_portfolio_value <= portfolio.get_value(
                    stock_market_data):
                reward = 1
            else:
                reward = 0

            # Reward function R2
            #reward = (portfolio.get_value(
            #        stock_market_data) - self.last_portfolio_value) / self.last_portfolio_value

            # helper to fill up the queue for performance replay
            self._remember(self.last_state, action, reward, state)

        # save state and portfolio value
        self.last_portfolio_value = portfolio.get_value(stock_market_data)
        self.last_state = state

        return order_list

コード例 #5

0

ファイルを表示

ファイル: deep_q_learning_trader.py プロジェクト: audiofrequenz/oss-saki-ss19-exercise-4

 def gen_reward(self, portfolio: Portfolio,
                stock_market_data: StockMarketData):
     print('gen_reward')
     if self.last_portfolio_value < portfolio.get_value(stock_market_data):
         return self.reward_factor * (portfolio.get_value(stock_market_data)
                                      / self.last_portfolio_value)
     elif self.last_portfolio_value > portfolio.get_value(
             stock_market_data):
         return -self.reward_factor * (
             portfolio.get_value(stock_market_data) /
             self.last_portfolio_value)
     else:
         return -self.reward_factor / 5

コード例 #6

0

ファイルを表示

ファイル: deep_q_learning_trader.py プロジェクト: FRules/fau-saki-a4

    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        current_state = get_state(self, stock_market_data)

        if self.train_while_trading and self.last_state is not None:
            reward = get_reward(self, portfolio, stock_market_data)
            self.memory.append(
                (self.last_state, self.last_action, reward, current_state))
            train_neural_net(self)

        action_index = get_index_for_action_to_execute(self, current_state)

        self.last_state = current_state
        self.last_action = action_index
        self.last_portfolio_value = portfolio.get_value(stock_market_data)

        return get_order_list(self, portfolio, stock_market_data)

コード例 #7

0

ファイルを表示

    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # calculate current state
        state_ = state_maker(self.expert_a, self.expert_b, stock_market_data,
                             portfolio)
        curr_state = state_.create_numerical_state(self.state_size)
        # calculate current portfolio value
        curr_portfolio_value = portfolio.get_value(stock_market_data)

        # train or testing mode
        if self.train_while_trading == True:
            # Store state as experience (memory) and train the neural network only if trade() was called before at least once
            if self.last_state is not None:
                reward = self.get_rewards(self.last_portfolio_value,
                                          curr_portfolio_value)
                self.remember(self.last_state, self.last_action_a, reward,
                              curr_state)
                if len(self.memory) > self.min_size_of_memory_before_training:
                    self.replay()
            # Create actions for current state and decrease epsilon for fewer random actions
            curr_action_a = self.get_best_action(curr_state)
            final_order = self.create_order_list(curr_action_a, portfolio,
                                                 stock_market_data)
            self.decrease_epsilon()
        else:
            # predict best action from neuronal net
            curr_action_a = self.model.predict(curr_state)
            curr_action_a = np.argmax(curr_action_a[0])
            final_order = self.create_order_list(curr_action_a, portfolio,
                                                 stock_market_data)

        # Save created state, actions and portfolio value for the next call of trade()  --> erledigt
        self.last_state = curr_state
        self.last_action_a = curr_action_a
        self.last_portfolio_value = curr_portfolio_value

        return final_order

コード例 #8

0

ファイルを表示

ファイル: deep_q_learning_trader.py プロジェクト: huma23/oss-saki-ss19

    def create_reward(self, portfolio: Portfolio,
                      stock_market_data: StockMarketData, state_now: State):
        new_portfolio_value = portfolio.get_value(stock_market_data)
        index_of_action = Action.get_id_from_action(self.last_action)
        reward = -1 if (self.last_portfolio_value -
                        new_portfolio_value) > 0 else 1
        reward = 0 if (self.last_portfolio_value -
                       new_portfolio_value) == 0 else reward
        reward_array = np.zeros([9])
        reward_array[index_of_action] = reward

        q_next = self.run_model(state_now)
        weighted_q_next = q_next * self.gamma
        reward_array = np.sum([reward_array, weighted_q_next], axis=0)
        #reward_array[index_of_action] += self.gamma * q_next[index_of_action]
        return reward_array

コード例 #9

0

ファイルを表示

ファイル: deep_q_learning_trader.py プロジェクト: audiofrequenz/oss-saki-ss19-exercise-4

    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]
        state = State(portfolio,
                      self.expert_a.vote(stock_market_data[Company.A]),
                      self.expert_b.vote(stock_market_data[Company.B]),
                      stock_market_data[Company.A],
                      stock_market_data[Company.B])
        param = state.get_nn_input_state()
        if self.train_while_trading and self.last_state is not None:
            current_reward = self.gen_reward(portfolio, stock_market_data)
            self.update_memory(current_reward, param)
            self.train_model()

        self.last_action = self.get_action_idx(param)
        self.last_state = param
        self.last_portfolio_value = portfolio.get_value(stock_market_data)
        orders = self.get_orders(stock_market_data, portfolio)
        return orders

コード例 #10

0

ファイルを表示

    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"

        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation
        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # Compute the current state
        current_state = self.current_state(stock_market_data)
        current_portfolio_value = portfolio.get_value(stock_market_data)

        # Store state as experience (memory) and train the neural network only if trade() was called before at least once
        if self.last_action is not None and self.train_while_trading:
            reward = self.reward(current_portfolio_value)
            self.memory.append(
                (self.last_state, self.last_action, reward, current_state))

            if len(self.memory) > self.min_size_of_memory_before_training:
                self.experience_replay()

        # Create actions for current state and decrease epsilon for fewer random actions
        action = self.get_action(current_state)
        self.epsilon = max(
            [self.epsilon_min, self.epsilon * self.epsilon_decay])

        # Save created state, actions and portfolio value for the next call of trade()
        self.last_state = current_state
        self.last_action = action
        self.last_portfolio_value = current_portfolio_value

        # convert action to orderlist
        return self.mapping_action_order(action, portfolio, stock_market_data)

コード例 #11

0

ファイルを表示

    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock marketf"
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation
        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # TODO Compute the current state

        stock_data_a = None
        stock_data_b = None
        last_stock_data_a = None
        last_stock_data_b = None

        company_list = stock_market_data.get_companies()
        for company in company_list:
            if company == Company.A:
                stock_data_a = stock_market_data[Company.A]
                last_stock_data_a = stock_data_a.get_from_offset(-2)
            elif company == Company.B:
                stock_data_b = stock_market_data[Company.B]
                last_stock_data_b = stock_data_b.get_from_offset(-2)
            else:
                assert False

        vote_a = self.expert_a.vote(stock_data_a)
        vote_b = self.expert_b.vote(stock_data_b)

        state = State(last_stock_data_a, last_stock_data_b, vote_a, vote_b)

        # TODO Q-Learning
        nn_input = np.array(
            [np.array([state.aDiff, state.vote_a, state.bDiff, state.vote_b])])

        action_vals = self.model.predict(nn_input)

        # TODO Store state as experience (memory) and train the neural network only if trade() was called before at least once

        # TODO Create actions for current state and decrease epsilon for fewer random actions
        actions = [[
            Order(OrderType.BUY, Company.A,
                  int((portfolio.cash / 2) // stock_data_a.get_last()[-1])),
            Order(OrderType.BUY, Company.B,
                  int((portfolio.cash / 2) // stock_data_b.get_last()[-1]))
        ],
                   [
                       Order(
                           OrderType.BUY, Company.A,
                           int((portfolio.cash) //
                               stock_data_a.get_last()[-1])),
                       Order(OrderType.SELL, Company.B,
                             portfolio.get_stock(Company.B))
                   ],
                   [
                       Order(OrderType.SELL, Company.A,
                             portfolio.get_stock(Company.A)),
                       Order(
                           OrderType.BUY, Company.B,
                           int(portfolio.cash // stock_data_b.get_last()[-1]))
                   ],
                   [
                       Order(OrderType.SELL, Company.A,
                             portfolio.get_stock(Company.A)),
                       Order(OrderType.SELL, Company.B,
                             portfolio.get_stock(Company.B))
                   ],
                   [
                       Order(OrderType.SELL, Company.A, 0),
                       Order(OrderType.SELL, Company.B, 0)
                   ]]

        if not self.train_while_trading:
            self.epsilon = 0.0
        else:
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay
            else:
                self.epsilon = self.epsilon_min

        # randomize action
        if random.random() < self.epsilon:
            next_action = random.choice(list(range(self.action_size)))
        else:
            next_action = np.argmax(action_vals[0])

        order_list = actions[next_action]
        portfolio_value = portfolio.get_value(
            stock_market_data, stock_market_data.get_most_recent_trade_day())

        if (self.last_state != None and self.train_while_trading):

            def reward(oldVal, newVal):
                neg = -100.0
                pos = 100.0

                q = newVal / oldVal

                if q < 1:
                    return neg
                elif q == 1:
                    return -10
                else:
                    print("Q: ", q)
                    return pos / 2 * oldVal / newVal

            r = reward(self.last_portfolio_value, portfolio_value)

            action_vals[0][self.last_order] = r

            self.memory.append([self.last_input, action_vals])

            if (len(self.memory) > self.min_size_of_memory_before_training):
                sample = random.sample(self.memory, self.batch_size)
                trainSample = list()
                testSample = list()

                for [sampleIn, sampleOut] in sample:
                    trainSample.append(sampleIn[0])
                    testSample.append(sampleOut[0])

                self.model.fit(np.array(trainSample), np.array(testSample),
                               self.batch_size)

        # Save created state, actions and portfolio value for the next call of trade()

        self.last_input = nn_input
        self.last_state = state
        self.last_order = next_action
        self.last_portfolio_value = portfolio_value

        print(next_action, action_vals, portfolio.cash,
              portfolio.get_stock(Company.A), portfolio.get_stock(Company.B))
        return order_list

コード例 #12

0

ファイルを表示

    def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # INPUT layer:  1 (buy or sell A?)
        # output layer: 2 ([buy_A, sell_A])

        # TODO Compute the current state
        stock_data_A = stock_market_data[Company.A]
        expertA_voteA = self.expert_a.vote(stock_data_A)
        expertB_voteA = self.expert_b.vote(stock_data_A)
        stock_data_B = stock_market_data[Company.B]
        expertA_voteB = self.expert_a.vote(stock_data_B)
        expertB_voteB = self.expert_b.vote(stock_data_B)

        state = np.array([[
            self.vote_map[expertA_voteA] + self.vote_map[expertB_voteA],
            self.vote_map[expertA_voteB] + self.vote_map[expertB_voteB],
        ]])

        # do action 0 or 1?
        predictions = self.model.predict(state)
        '''
        if random.random() < self.epsilon:
            # use random actions for A and B
            action_A = random.randrange(2)
            action_B = random.randrange(2)
        else:
            # use prediction actions
            action_A = np.argmax(predictions[0][0:2])
            action_B = np.argmax(predictions[0][2:4])
        '''
        action_A = np.argmax(predictions[0][0:2])
        action_B = np.argmax(predictions[0][2:4])

        current_price_a = stock_market_data.get_most_recent_price(Company.A)
        current_price_b = stock_market_data.get_most_recent_price(Company.B)

        money_to_spend = portfolio.cash
        order_list = []

        # do stuff for A
        if action_A == 0:
            # buy all A
            amount_to_buy = money_to_spend // current_price_a
            if amount_to_buy > 0:
                money_to_spend -= amount_to_buy * current_price_a
                order_list.append(Order(OrderType.BUY, Company.A, amount_to_buy))
        elif action_A == 1:
            # sell all A
            amount_to_sell = portfolio.get_stock(Company.A)
            if amount_to_sell > 0:
                order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell))
        else:
            assert False

        # do stuff for B
        if action_B == 0:
            # buy all B
            amount_to_buy = money_to_spend // current_price_b
            if amount_to_buy > 0:
                order_list.append(Order(OrderType.BUY, Company.B, amount_to_buy))
        elif action_B == 1:
            # sell all B
            amount_to_sell = portfolio.get_stock(Company.B)
            if amount_to_sell > 0:
                order_list.append(Order(OrderType.SELL, Company.B, amount_to_sell))
        else:
            assert False

        if self.last_state is not None:
            # train
            diff_a = (current_price_a / self.last_price_a - 1)
            diff_b = (current_price_b / self.last_price_b - 1)
            fut_reward_a_buy = np.max(predictions[0][0])
            fut_reward_a_buy = fut_reward_a_buy if fut_reward_a_buy > 0 else 0
            fut_reward_a_sell = np.max(predictions[0][1])
            fut_reward_a_sell = fut_reward_a_sell if fut_reward_a_sell > 0 else 0
            fut_reward_b_buy = np.max(predictions[0][2])
            fut_reward_b_buy = fut_reward_b_buy if fut_reward_b_buy > 0 else 0
            fut_reward_b_sell = np.max(predictions[0][3])
            fut_reward_b_sell = fut_reward_b_sell if fut_reward_b_sell > 0 else 0
            reward_vec = np.array([[
                diff_a + self.gamma * fut_reward_a_buy,
                -diff_a + self.gamma * fut_reward_a_sell,
                diff_b + self.gamma * fut_reward_b_buy,
                -diff_b  + self.gamma * fut_reward_b_sell
                ]])
            #reward_vec = np.array([[portfolio.get_value(stock_market_data)]])
            self.model.fit(self.last_state, reward_vec, verbose=0)
        
        self.last_state = state
        self.last_action_a = action_A
        self.last_action_b = action_B
        self.last_portfolio_value = portfolio.get_value(stock_market_data)
        self.last_price_a = current_price_a
        self.last_price_b = current_price_b


        return order_list

コード例 #13

0

ファイルを表示

ファイル: deep_q_learning_trader.py プロジェクト: timsigit/Software-Applications-with-AI-SS19-

    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"

        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # 1. Compute current state
        state = self.compute_state(stock_market_data)

        # 1.2 If training is turned off, just predict the next action and return orders
        if not self.train_while_trading:
            self.last_state = state
            actionSpace = self.model.predict(state)
            action = np.argmax(actionSpace[0])
            orders = self.action_to_order(action, portfolio, stock_market_data)
            return orders

        # 2. Get a random action with the probability of epsilon, otherwise predict the action via the ANN
        if np.random.rand() <= self.epsilon and self.train_while_trading:
            action = np.random.randint(self.action_size, size=1)[0]

        else:
            actionSpace = self.model.predict(state)
            action = np.argmax(actionSpace[0])

        # 3. Reduce Epsilon if it is bigger than epsilon min
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        # 4. Training of the ANN
        if self.train_while_trading and self.last_state is not None:
            # 4.1 Get reward
            reward = self.get_reward(portfolio.get_value(stock_market_data),
                                     self.last_portfolio_value)

            # 4.2 Store memory
            self.memory.append(
                [self.last_state, self.last_action, reward, state])

            # 4.3 Actual training via Experience Replay
            if len(self.memory) > self.min_size_of_memory_before_training:
                self.experienceReplay(self.batch_size)

        # 5. Map Action + Create Order
        orders = self.action_to_order(action, portfolio, stock_market_data)

        # 6. Save the values
        self.last_state = state
        self.last_action = action
        self.last_portfolio_value = portfolio.get_value(stock_market_data)

        return orders

コード例 #14

0

ファイルを表示

    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # INPUT layer:  1 (buy or sell A?)
        # output layer: 2 ([buy_A, sell_A])

        # TODO Compute the current state
        stock_data_A = stock_market_data[Company.A]
        expertA_voteA = self.expert_a.vote(stock_data_A)
        expertB_voteA = self.expert_b.vote(stock_data_A)
        stock_data_B = stock_market_data[Company.B]
        expertA_voteB = self.expert_a.vote(stock_data_B)
        expertB_voteB = self.expert_b.vote(stock_data_B)

        state = np.array([[
            self.vote_map[expertA_voteA] + self.vote_map[expertB_voteA],
            self.vote_map[expertA_voteB] + self.vote_map[expertB_voteB],
        ]])

        # do action 0 or 1?
        predictions = self.model.predict(state)
        #print(f'predictions:{predictions}')
        #input()
        action_A = np.argmax(predictions[0][0:2])
        action_B = np.argmax(predictions[0][2:4])

        most_recent_price_A = stock_market_data.get_most_recent_price(
            Company.A)
        most_recent_price_B = stock_market_data.get_most_recent_price(
            Company.B)
        order_list = []

        money_to_spend = portfolio.cash

        # do stuff for A
        if action_A == 0:
            # buy all A
            amount_to_buy = money_to_spend // most_recent_price_A
            if amount_to_buy > 0:
                money_to_spend -= amount_to_buy * most_recent_price_A
                order_list.append(
                    Order(OrderType.BUY, Company.A, amount_to_buy))
        elif action_A == 1:
            # sell all A
            amount_to_sell = portfolio.get_stock(Company.A)
            if amount_to_sell > 0:
                order_list.append(
                    Order(OrderType.SELL, Company.A, amount_to_sell))
        else:
            assert False

        # do stuff for B
        if action_B == 0:
            # buy all B
            amount_to_buy = money_to_spend // most_recent_price_B
            if amount_to_buy > 0:
                order_list.append(
                    Order(OrderType.BUY, Company.B, amount_to_buy))
        elif action_B == 1:
            # sell all B
            amount_to_sell = portfolio.get_stock(Company.B)
            if amount_to_sell > 0:
                order_list.append(
                    Order(OrderType.SELL, Company.B, amount_to_sell))
        else:
            assert False

        if self.last_state is not None:
            # train
            diff = (portfolio.get_value(stock_market_data) /
                    self.last_portfolio_value - 1)
            rec_vec = np.array([[-diff, -diff, -diff, -diff]])
            rec_vec[0][self.last_action_a] = diff
            rec_vec[0][2 + self.last_action_b] = diff

            #reward_vec = np.array([[portfolio.get_value(stock_market_data)]])
            self.model.fit(self.last_state, rec_vec)

        self.last_state = state
        self.last_action_a = action_A
        self.last_action_b = action_B
        self.last_portfolio_value = portfolio.get_value(stock_market_data)

        return order_list

コード例 #15

0

ファイルを表示

    def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate actions to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # Compute the current state
        expert_votes = [
            self.experts[i].vote(stock_market_data[company])
            for i, company in enumerate(stock_market_data.get_companies())
        ]
        shares_owned = [
            portfolio.get_stock(company)
            for company in stock_market_data.get_companies()
        ]
        if State is StateExpertsOnly:
            state = StateExpertsOnly(expert_votes, portfolio.get_value(stock_market_data))
        elif State is StateExpertsCashShares:
            state = StateExpertsCashShares(expert_votes, portfolio.cash, shares_owned, portfolio.get_value(stock_market_data))
        else:
            raise RuntimeError

        if self.train_while_trading:
            # store state as experience in memory
            if len(self.memory) > 0:
                self.memory[-1].state2 = state
            experience = Experience(
                state1=state
            )
            self.memory.append(experience)

            # train
            if len(self.memory) >= self.min_size_of_memory_before_training:
                if self.days_passed % self.train_each_n_days == 0:
                    self.train()

            # determine probability for random actions
            if not self.is_evolved_model:
                # first training episode
                random_action_probability = (
                        (self.epsilon ** self.days_passed) * (1.0 - self.random_action_min_probability) +
                        self.random_action_min_probability
                )
            else:
                # subsequent training episode
                random_action_probability = self.random_action_min_probability

            if self.training_occasions == 0 or random.random() < random_action_probability:
                actions = [Action.get_random(), Action.get_random()]
            else:
                # choose actions by querying network
                x = state.to_input()
                y = self.model.predict(numpy.array([x]))
                assert y.shape == (1, self.action_size)
                actions = Action.from_model_prediction(y[0])

            experience.actions = actions
        else:
            # not training -> always choose actions by querying network
            actions = Action.from_model_prediction(self.model.predict(numpy.array([state.to_input()]))[0])

        # translate actions into orders
        orders: typing.List[Order] = []
        companies_with_actions_and_magnitudes = list(zip(list(Company), actions, Action.get_action_magnitudes(actions)))
        for comp, action, mag in companies_with_actions_and_magnitudes:
            if action.is_buy():
                cash_limit = portfolio.cash * mag
                date, stock_price = stock_market_data[comp].get_last()
                shares_amount = cash_limit / stock_price
                if shares_amount > 0:
                    orders.append(Order(OrderType.BUY, comp, shares_amount))
            elif action.is_sell():
                shares_amount = portfolio.get_stock(comp) * mag
                if shares_amount > 0:
                    orders.append(Order(OrderType.SELL, comp, shares_amount))

        self.days_passed += 1
        return orders

コード例 #16

0

ファイルを表示

ファイル: deep_q_learning_trader.py プロジェクト: hijonga/oss-saki-ss19-exercise-4

    def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]
        self.day += 1
        # TODO Compute the current state
        order_list = []
        stock_data_a = stock_market_data[Company.A]
        stock_data_b = stock_market_data[Company.B]
        # Expert A
        expert_a = self.expert_a.vote(stock_data_a)
        #  Expert B
        expert_b = self.expert_b.vote(stock_data_b)

        state = np.array([[
            self.vote_num[expert_a], self.vote_num[expert_b], ]])


        action = self.decide_action(state)

        # TODO Store state as experience (memory) and train the neural network only if trade() was called before at least once

        if self.last_state is not None:
            reward = (portfolio.get_value(stock_market_data) - self.last_portfolio_value) / self.last_portfolio_value
            self.memory.append((self.last_state, self.last_action, reward, state))
            self.train_network(self.batch_size)

        # TODO Create actions for current state and decrease epsilon for fewer random actions

        act0 = 0
        act1 = 0
        act2 = 0
        act3 = 0
        act4 = 0
        act5 = 0
        act6 = 0
        act7 = 0
        act8 = 0

        # What amount of the stocks should be bought or sold
        percent_buy = 1
        percent_sell = 1

        if action == 0:
            # Buy A
            stock_price_a = stock_market_data.get_most_recent_price(Company.A)
            amount_to_buy_a = int((portfolio.cash*percent_buy/2) // stock_price_a)
            if amount_to_buy_a > 0:
                order_list.append(Order(OrderType.BUY, Company.A, amount_to_buy_a))
            # Buy B
            stock_price_b = stock_market_data.get_most_recent_price(Company.B)
            amount_to_buy_b = int((portfolio.cash*percent_buy/2) // stock_price_b)
            if amount_to_buy_b > 0:
                order_list.append(Order(OrderType.BUY, Company.B, amount_to_buy_b))
            act0 += 1
        elif action == 1:
            # Buy A
            stock_price_a = stock_market_data.get_most_recent_price(Company.A)
            amount_to_buy_a = int(portfolio.cash *percent_buy// stock_price_a)
            if amount_to_buy_a > 0:
                order_list.append(Order(OrderType.BUY, Company.A, amount_to_buy_a))
            # Sell B
            amount_to_sell_b = int(portfolio.get_stock(Company.B)*percent_sell)
            if amount_to_sell_b > 0:
                order_list.append(Order(OrderType.SELL, Company.B, amount_to_sell_b))
            act1 += 1
        elif action == 2:
            # Sell A
            amount_to_sell_a = int(portfolio.get_stock(Company.A)*percent_sell)
            if amount_to_sell_a > 0:
                order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell_a))
            # Buy B
            stock_price_b = stock_market_data.get_most_recent_price(Company.B)
            amount_to_buy_b = int(portfolio.cash*percent_buy // stock_price_b)
            if amount_to_buy_b > 0:
                order_list.append(Order(OrderType.BUY, Company.B, amount_to_buy_b))
            act2 += 1
        elif action == 3:
            # Sell A
            amount_to_sell_a = int(portfolio.get_stock(Company.A)*percent_sell)
            if amount_to_sell_a > 0:
                order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell_a))
            # Sell B
            amount_to_sell_b = int(portfolio.get_stock(Company.B)*percent_sell)
            if amount_to_sell_b > 0:
                order_list.append(Order(OrderType.SELL, Company.B, amount_to_sell_b))
            act3 += 1
        elif action == 4:
            # Sell A
            amount_to_sell_a = int(portfolio.get_stock(Company.A)*percent_sell)
            if amount_to_sell_a > 0:
                order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell_a))
            # Hold B
            act4 += 1
        elif action == 5:
            # Hold A
            # Sell B
            amount_to_sell_b = int(portfolio.get_stock(Company.B)*percent_sell)
            if amount_to_sell_b > 0:
                order_list.append(Order(OrderType.SELL, Company.B, amount_to_sell_b))
            act5 += 1
        elif action == 6:
            # Buy A
            stock_price_a = stock_market_data.get_most_recent_price(Company.A)
            amount_to_buy_a = int((portfolio.cash*percent_buy) // stock_price_a)
            if amount_to_buy_a > 0:
                order_list.append(Order(OrderType.BUY, Company.A, amount_to_buy_a))
            # Hold B
            act6 += 1
        elif action == 7:
            # Hold A
            # Buy B
            stock_price_b = stock_market_data.get_most_recent_price(Company.B)
            amount_to_buy_b = int((portfolio.cash*percent_buy) // stock_price_b)
            if amount_to_buy_b > 0:
                order_list.append(Order(OrderType.BUY, Company.B, amount_to_buy_b))
            act7 += 1
        elif action == 8:
            # Hold A
            # Hold B
            order_list.append(Order(OrderType.BUY, Company.B, 0))
            act8 += 1
        else:
            print("undefined action called"+str(action))


        # Decrease the epsilon for fewer random actions
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        # TODO Save created state, actions and portfolio value for the next call of trade()

        self.last_state = state
        self.last_action = action
        self.last_portfolio_value = portfolio.get_value(stock_market_data)

        return order_list

コード例 #17

0

ファイルを表示

ファイル: deep_q_learning_trader.py プロジェクト: huma23/oss-saki-ss19

    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        #Compute current State
        state_now = State(stock_market_data, self.expert_a, self.expert_b,
                          self.last_state)
        if (self.last_state == None):
            state_now.prev_state = state_now

        #Create actions and let probability decide
        rand_action = Action.create_random_action()
        model_action = self.action_by_model(state_now)
        action = np.random.choice([rand_action, model_action],
                                  1,
                                  p=[self.epsilon, 1 - self.epsilon])[0]

        if (self.epsilon > self.epsilon_min):
            self.epsilon -= self.epsilon_decay

        #if training is deactivated
        if not self.train_while_trading:
            self.last_state = state_now
            return model_action.create_order_list(portfolio, stock_market_data)

        #for the first call of trade()
        if (self.last_state == None):
            self.epsilon = 1.0
            self.last_state = state_now
            self.last_portfolio_value = portfolio.get_value(stock_market_data)
            self.last_action = action
            return action.create_order_list(portfolio, stock_market_data)

        #calculate reward and create tuple for memory
        reward = self.create_reward(portfolio, stock_market_data, state_now)
        memory_unit = (self.last_state, self.last_action, reward, state_now)
        self.memory.append(memory_unit)

        #train if there is enough experience
        if len(self.memory) > self.min_size_of_memory_before_training:
            #start training with random batch
            batch = random.sample(self.memory, self.batch_size)
            x = np.empty((self.batch_size, self.state_size))
            y = np.empty((self.batch_size, self.action_size))
            i = 0
            for (s1, a, r, s2) in batch:
                x[i] = s1.create_array_diff()
                y[i] = r
                i += 1

            self.model.train_on_batch(x, y)

        #save old values for next run
        self.last_state = state_now
        self.last_portfolio_value = portfolio.get_value(stock_market_data)
        self.last_action = action
        return action.create_order_list(portfolio, stock_market_data)

コード例 #18

0

ファイルを表示

    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # TODO Compute the current state
        stock_data_a = stock_market_data[Company.A]
        vote_a = self.expert_a.vote(stock_data_a)
        stock_data_b = stock_market_data[Company.B]
        vote_b = self.expert_a.vote(stock_data_b)

        curr_state = np.array([[
            #portfolio.cash,
            #portfolio.stocks[Company.A],
            #portfolio.stocks[Company.B],
            #stock_market_data.get_most_recent_price(Company.A),
            #stock_market_data.get_most_recent_price(Company.B),
            self.vote_map[vote_a],
            self.vote_map[vote_b],
            #vote_a,
            #vote_b
        ]])

        # TODO Store state as experience (memory) and train the neural network only if trade() was called before at least once
        # <s, a, r, s'>
        trade_called_once_before = (self.last_state is not None)
        experience = (self.last_state, self.last_action_a, self.last_action_b,
                      self.last_portfolio_value, curr_state)
        self.memory.append(experience)
        if trade_called_once_before and self.min_size_of_memory_before_training <= len(
                self.memory):
            # create self.batch_size-times random numbers
            # between 0 and length of queue
            selected_mems_ind = random.sample(range(0, len(self.memory)),
                                              self.batch_size)
            selected_mems = [self.memory[i] for i in selected_mems_ind]
            # reverse the list, so del indices works out
            selected_mems_ind = sorted(selected_mems_ind)[::-1]
            for i in selected_mems_ind:
                print(f'len(self.memory): {len(self.memory)}')
                print(f'deleting: {i}')
                del self.memory[i]
            # mem[:2] -> s, a
            # mem[2] -> r
            X = [np.asarray(mem[:3]) for mem in selected_mems]
            Y = [np.asarray(mem[3]) for mem in selected_mems]
            self.model.fit(X, Y, batch_size=self.batch_size)

        # TODO Create actions for current state and decrease epsilon for fewer random actions
        # Order(OrderType.SELL, company, amount_to_sell)
        # model get suggested action
        predicted_actions_matrix = self.model.predict(curr_state)
        model_choice = np.argmax(predicted_actions_matrix)

        order_list = []
        curr_action_a, curr_action_b, order_list = self.choose_actions(
            stock_data_a,
            stock_data_b,
            portfolio,
            order_list,
            epsilon=self.epsilon,
            model_choice=model_choice)
        curr_portfolio_value = portfolio.get_value(stock_market_data)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        # TODO Save created state, actions and portfolio value for the next call of trade()
        self.last_state = curr_state
        self.last_action_a = curr_action_a
        self.last_action_b = curr_action_b
        self.last_portfolio_value = curr_portfolio_value

        return order_list

コード例 #19

0

ファイルを表示

    def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # TODO Compute the current state
        def get_state():
            expert_vot_dict = {"BUY": 1, "SELL": 2, "HOLD": 0}
            stock_data_a = stock_market_data[Company.A]
            stock_data_b = stock_market_data[Company.B]
            vote_a = self.expert_a.vote(stock_data_a)
            vote_a = str(vote_a).split(".")[1]
            vote_b = self.expert_b.vote(stock_data_b)
            vote_b = str(vote_b).split(".")[1]
            vote_a = expert_vot_dict[vote_a]
            vote_b = expert_vot_dict[vote_b]
            state = np.array([portfolio.get_stock(Company.A), portfolio.get_stock(Company.B), portfolio.cash,
                              portfolio.get_value(stock_market_data), vote_a, vote_b])
            state = state.reshape((1, self.state_size))
            return state

        # TODO Store state as experience (memory) and train the neural network only if trade() was called before at least once
        def train_without_experience_replay(current_state, next_state, reward, action_index):
            expected_value = reward + self.gamma * np.amax(self.model.predict(next_state))
            expected_value_array = self.model.predict(current_state)
            expected_value_array[0][action_index] = expected_value
            self.model.fit(current_state, expected_value_array, epochs=1, verbose=0)

        # TODO Save created state, actions and portfolio value for the next call of trade()
        def experience_replay():
            batch = random.sample(self.memory, self.batch_size)
            for state, action_index, reward, next_state in batch:
                expected_value_array = self.model.predict(state)
                expected_value = reward + self.gamma * np.amax(self.model.predict(next_state))
                expected_value_array[0][action_index] = expected_value
                self.model.fit(state, expected_value_array, epochs=1, verbose=0)

        # TODO Create actions for current state and decrease epsilon for fewer random actions
        def get_order_index(state):
            self.epsilon *= self.epsilon_decay
            self.epsilon = max(self.epsilon_min, self.epsilon)
            if np.random.random() < self.epsilon:
                return random.randrange(self.action_size)
            return np.argmax(self.model.predict(state)[0])

        def get_orders(order_index):
            order_combination_dict = {0: ["BUY", "BUY"], 1: ["SELL", "SELL"], 2: ["BUY", "SELL"], 3: ["SELL", "BUY"]}
            order_dict = {"BUY": 1, "SELL": 2}
            action_combination = order_combination_dict[order_index]
            order_a = order_dict[action_combination[0]]
            order_b = order_dict[action_combination[1]]
            order_a = Order(OrderType(order_a), Company.A, portfolio.get_stock(Company.A))
            order_b = Order(OrderType(order_b), Company.B, portfolio.get_stock(Company.B))
            order_list = [order_a, order_b]
            return order_list

        def follow_orders(orders):
            order_list = []
            company_list = stock_market_data.get_companies()
            for company, order in zip(company_list, orders):
                stock_data = stock_market_data[company]
                if order.type == OrderType.BUY:
                    # buy as many stocks as possible
                    stock_price = stock_data.get_last()[-1]
                    amount_to_buy = int(portfolio.cash // stock_price)
                    logger.debug(f"{self.get_name()}: Got order to buy {company}: {amount_to_buy} shares a {stock_price}")
                    if amount_to_buy > 0:
                        order_list.append(Order(OrderType.BUY, company, amount_to_buy))
                elif order.type == OrderType.SELL:
                    # sell as many stocks as possible
                    amount_to_sell = portfolio.get_stock(company)
                    logger.debug(f"{self.get_name()}: Got order to sell {company}: {amount_to_sell} shares available")
                    if amount_to_sell > 0:
                        order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell))
            return order_list

        def get_reward(current_portfolio_value, next_portfolio_value):
            if next_portfolio_value > current_portfolio_value:
                reward = 1
                return reward
            elif next_portfolio_value < current_portfolio_value:
                reward = -2
                return reward
            else:
                reward = -1
                return reward

        self.last_state = get_state()
        self.memory_state.append(self.last_state)
        self.last_portfolio_value = portfolio.get_value(stock_market_data)
        self.memory_portfolio.append(self.last_portfolio_value)
        if self.called_once:
            current_state = self.memory_state[0]
            next_state = self.memory_state[1]
            reward = get_reward(self.memory_portfolio[0], self.memory_portfolio[1])
            self.memory.append((current_state, self.memory_action, reward, next_state))
            if len(self.memory) >= self.min_size_of_memory_before_training:
                experience_replay()
            self.last_state = next_state
        action_index = get_order_index(self.last_state)
        action_list = get_orders(action_index)
        action_list = follow_orders(action_list)
        self.memory_action = action_index
        self.called_once = True
        return action_list

コード例 #20

0

ファイルを表示

ファイル: deep_q_learning_trader_eps0_gamma90.py プロジェクト: nakami/oss-saki-ss19-exercise-4

    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # INPUT layer:  1 (buy or sell A?)
        # output layer: 2 ([buy_A, sell_A])

        # TODO Compute the current state
        stock_data_A = stock_market_data[Company.A]
        expertA_voteA = self.expert_a.vote(stock_data_A)
        stock_data_B = stock_market_data[Company.B]
        expertB_voteB = self.expert_b.vote(stock_data_B)

        state = np.array([[
            self.vote_map[expertA_voteA],
            self.vote_map[expertB_voteB],
        ]])

        # do action 0 or 1?
        predictions = self.model.predict(state)

        # TODO Create actions for current state and decrease epsilon for fewer random actions
        if random.random() < self.epsilon:
            # use random actions for A and B
            action_A = random.randrange(2)
            action_B = random.randrange(2)
        else:
            # use prediction actions
            action_A = np.argmax(predictions[0][0:2])
            action_B = np.argmax(predictions[0][2:4])
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        current_price_a = stock_market_data.get_most_recent_price(Company.A)
        current_price_b = stock_market_data.get_most_recent_price(Company.B)

        money_to_spend = portfolio.cash
        order_list = []

        # do stuff for A
        if action_A == 0:
            # buy all A
            amount_to_buy = money_to_spend // current_price_a
            if amount_to_buy > 0:
                money_to_spend -= amount_to_buy * current_price_a
                order_list.append(
                    Order(OrderType.BUY, Company.A, amount_to_buy))
        elif action_A == 1:
            # sell all A
            amount_to_sell = portfolio.get_stock(Company.A)
            if amount_to_sell > 0:
                order_list.append(
                    Order(OrderType.SELL, Company.A, amount_to_sell))
        else:
            assert False

        # do stuff for B
        if action_B == 0:
            # buy all B
            amount_to_buy = money_to_spend // current_price_b
            if amount_to_buy > 0:
                order_list.append(
                    Order(OrderType.BUY, Company.B, amount_to_buy))
        elif action_B == 1:
            # sell all B
            amount_to_sell = portfolio.get_stock(Company.B)
            if amount_to_sell > 0:
                order_list.append(
                    Order(OrderType.SELL, Company.B, amount_to_sell))
        else:
            assert False

        # TODO train the neural network only if trade() was called before at least once
        if self.last_state is not None:
            # train
            diff_a = (current_price_a / self.last_price_a - 1)
            diff_b = (current_price_b / self.last_price_b - 1)
            fut_reward_a = np.max(predictions[0][0:2])
            fut_reward_b = np.max(predictions[0][2:4])
            reward_vec = np.array([[
                diff_a + self.gamma * fut_reward_a,
                -diff_a + self.gamma * fut_reward_a,
                diff_b + self.gamma * fut_reward_b,
                -diff_b + self.gamma * fut_reward_b
            ]])

            # TODO Store state as experience (memory) and replay
            # slides: <s, a, r, s'>
            # mine: <s, r>
            if self.min_size_of_memory_before_training <= len(self.memory):
                # take self.batch_size - 1 from memory
                batch = random.sample(self.memory, self.batch_size - 1)
                # append current state, reward
                batch.append((self.last_state, reward_vec))
                for x, y in batch:
                    self.model.fit(x, y, batch_size=self.batch_size, verbose=0)
            else:
                # only train with current (state, reward)
                self.model.fit(self.last_state,
                               reward_vec,
                               batch_size=1,
                               verbose=0)

            self.memory.append((self.last_state, reward_vec))

        # TODO Save created state, actions and portfolio value for the next call of trade()
        self.last_state = state
        self.last_action_a = action_A
        self.last_action_b = action_B
        self.last_portfolio_value = portfolio.get_value(stock_market_data)
        self.last_price_a = current_price_a
        self.last_price_b = current_price_b
        return order_list

コード例 #21

0

ファイルを表示

ファイル: deep_q_learning_trader_basic_onlyA.py プロジェクト: nakami/oss-saki-ss19-exercise-4

    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # INPUT layer:  1 (buy or sell A?)
        # output layer: 2 ([buy_A, sell_A])

        # TODO Compute the current state
        stock_data_a = stock_market_data[Company.A]
        vote_a_for_a = self.expert_a.vote(stock_data_a)
        vote_b_for_a = self.expert_b.vote(stock_data_a)
        #stock_data_b = stock_market_data[Company.B]
        #vote_b = self.expert_a.vote(stock_data_b)
        state = np.array([[
            self.vote_map[vote_a_for_a] + self.vote_map[vote_b_for_a]
        ]])  #, self.vote_map[vote_b]])

        # do action 0 or 1?
        predictions = self.model.predict(state)
        action = np.argmax(predictions)

        current_price_a = stock_market_data.get_most_recent_price(Company.A)
        order_list = []

        if action == 0:
            # buy all A
            amount_to_buy = portfolio.cash // current_price_a
            if amount_to_buy > 0:
                order_list.append(
                    Order(OrderType.BUY, Company.A, amount_to_buy))
        elif action == 1:
            # sell all A
            amount_to_sell = portfolio.get_stock(Company.A)
            if amount_to_sell > 0:
                order_list.append(
                    Order(OrderType.SELL, Company.A, amount_to_sell))
        else:
            assert False

        if self.last_state is not None:
            # train
            diff_a = (current_price_a / self.last_price_a - 1)
            if self.last_action_a == 0:
                rec_vec = np.array([[diff_a, -diff_a]])
            elif self.last_action_a == 1:
                rec_vec = np.array([[diff_a, -diff_a]])
            else:
                assert False  # wtf
            #reward_vec = np.array([[portfolio.get_value(stock_market_data)]])
            self.model.fit(self.last_state, rec_vec)

        self.last_state = state
        self.last_action_a = action
        self.last_portfolio_value = portfolio.get_value(stock_market_data)
        self.last_price_a = current_price_a

        return order_list