예제 #1
0
def get_reward(self, portfolio: Portfolio, stock_market_data: StockMarketData):
    current_portfolio_value = portfolio.get_value(stock_market_data)
    if self.last_portfolio_value < current_portfolio_value:
        return 100 * (current_portfolio_value / self.last_portfolio_value)
    elif self.last_portfolio_value > portfolio.get_value(stock_market_data):
        return -100
    return -20
예제 #2
0
def get_order_list(self, portfolio: Portfolio,
                   stock_market_data: StockMarketData):
    stock_price_a = stock_market_data[Company.A].get_last()[-1]
    stock_price_b = stock_market_data[Company.B].get_last()[-1]

    order_list = []
    if self.actions[self.last_action][0] > 0:
        amount_to_buy_stock_a = int(portfolio.cash *
                                    self.actions[self.last_action][0] //
                                    stock_price_a)
        order_list.append(
            Order(OrderType.BUY, Company.A, amount_to_buy_stock_a))
    elif self.actions[self.last_action][0] < 0:
        # sell everything we have, look at the actions, we don't have -0.8 or sth just -1
        # we don't need any calculation for "amount_to_sell_stock_a"
        order_list.append(
            Order(OrderType.SELL, Company.A, portfolio.get_stock(Company.A)))

    if self.actions[self.last_action][1] > 0:
        amount_to_buy_stock_b = int(portfolio.cash *
                                    self.actions[self.last_action][1] //
                                    stock_price_b)
        order_list.append(
            Order(OrderType.BUY, Company.B, amount_to_buy_stock_b))
    elif self.actions[self.last_action][1] < 0:
        # sell everything we have, look at the actions, we don't have -0.8 or sth just -1
        # we don't need any calculation for "amount_to_sell_stock_b"
        order_list.append(
            Order(OrderType.SELL, Company.B, portfolio.get_stock(Company.B)))

    return order_list
예제 #3
0
    def test_update_sufficient_cash_reserve(self):
        stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING])
        portfolio = Portfolio(20000, {Company.A: 200})
        order_list = [Order(OrderType.BUY, Company.A, 100)]

        # Current cash reserve is sufficient for trade volume. Trade should happen
        portfolio.update_with_order_list(stock_market_data, order_list)
        self.assertEqual(portfolio.cash, 9724.0105)
        self.assertEqual(portfolio.stocks[Company.A], 300)
예제 #4
0
    def test_update_no_sufficient_cash_reserve(self):
        stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING])
        portfolio = Portfolio(0, {Company.A: 200})
        order_list = [Order(OrderType.BUY, Company.A, 100)]

        # Trade volume is too high for current cash reserve. Nothing should happen
        portfolio.update_with_order_list(stock_market_data, order_list)
        self.assertEqual(portfolio.cash, 0)
        self.assertEqual(portfolio.stocks[Company.A], 200)
예제 #5
0
    def test_update_do_not_drop_below_cash_0(self):
        stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING])
        portfolio = Portfolio(110)

        # Create a order list whose individual actions are within the limit but in sum are over the limit
        # Most recent stock price of stock A is 102.759895
        order_list = [Order(OrderType.BUY, Company.A, 1), Order(OrderType.BUY, Company.A, 1)]
        portfolio.update_with_order_list(stock_market_data, order_list)
        self.assertEqual(portfolio.cash, 7.240105)
        self.assertEqual(portfolio.stocks[Company.A], 1)
예제 #6
0
    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """

        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        order_list = []
        vote_a = self.expert_a.vote(stock_market_data[Company.A])
        vote_b = self.expert_b.vote(stock_market_data[Company.B])

        # convert votes to state
        state = np.array([self.vote2num[vote_a], self.vote2num[vote_b]])

        if self.train_while_trading:
            if len(self.memory) > self.min_size_of_memory_before_training:
                # helper function which executes experience replay
                self._replay()

        # act
        action = self._act(state, stock_market_data, portfolio, order_list)

        if self.last_portfolio_value is not None:
            # Reward function R1
            if self.last_portfolio_value <= portfolio.get_value(
                    stock_market_data):
                reward = 1
            else:
                reward = 0

            # Reward function R2
            #reward = (portfolio.get_value(
            #        stock_market_data) - self.last_portfolio_value) / self.last_portfolio_value

            # helper to fill up the queue for performance replay
            self._remember(self.last_state, action, reward, state)

        # save state and portfolio value
        self.last_portfolio_value = portfolio.get_value(stock_market_data)
        self.last_state = state

        return order_list
 def gen_reward(self, portfolio: Portfolio,
                stock_market_data: StockMarketData):
     print('gen_reward')
     if self.last_portfolio_value < portfolio.get_value(stock_market_data):
         return self.reward_factor * (portfolio.get_value(stock_market_data)
                                      / self.last_portfolio_value)
     elif self.last_portfolio_value > portfolio.get_value(
             stock_market_data):
         return -self.reward_factor * (
             portfolio.get_value(stock_market_data) /
             self.last_portfolio_value)
     else:
         return -self.reward_factor / 5
예제 #8
0
    def test_get_value_with_date(self):
        stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING])
        date = Date(2012, 1, 3)

        portfolio = Portfolio(100.0)
        self.assertEqual(portfolio.get_value(stock_market_data, date), 100.0)
        portfolio = Portfolio(100.0, {Company.A: 10})
        self.assertEqual(portfolio.get_value(stock_market_data, date), 455.54107999999997)
        portfolio = Portfolio(100.0, {Company.A: 10, Company.B: 10})
        self.assertEqual(portfolio.get_value(stock_market_data, date), 2046.9924999999998)
    def test_update_action_order_does_not_matter(self):
        stock_market_data = StockMarketData([Company.A, Company.B],
                                            [Period.TESTING])

        # Create two equal designed portfolios
        portfolio1 = Portfolio(0, {Company.A: 100})
        portfolio2 = Portfolio(0, {Company.A: 100})

        # Create two order lists with the same entries, however in different order
        order_list_1 = [
            Order(OrderType.BUY, Company.A, 50),
            Order(OrderType.SELL, Company.A, 100)
        ]
        order_list_2 = [
            Order(OrderType.SELL, Company.A, 100),
            Order(OrderType.BUY, Company.A, 50)
        ]

        # Execute the trade action lists on the two portfolios: Sell 100 stocks, skip buying because no cash available
        portfolio1.update_with_order_list(stock_market_data, order_list_1)
        portfolio2.update_with_order_list(stock_market_data, order_list_2)

        # The portfolios should still be equal after applying the actions
        self.assertEqual(portfolio1.cash, 10275.9895)
        self.assertEqual(portfolio1.cash, portfolio2.cash)
        self.assertEqual(portfolio1.stocks[Company.A], 0)
        self.assertEqual(portfolio1.stocks, portfolio2.stocks)
예제 #10
0
    def __init__(self, expert_a, expert_b, stock_market_data: StockMarketData,
                 portfolio: Portfolio):
        """
        Constructor  
        Args:
          expert_a : expert opinion from analyst A
          expert_b : expert opinion from analyst B 
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation
        """
        # get composition of current portfolio
        self.noStockA = portfolio.get_stock(Company.A)
        self.noStockB = portfolio.get_stock(Company.B)
        self.Cash = portfolio.cash
        # most important information: minimum cash neccessary to buy one addtional share: if set to 100 or any fixed value,the zig zag curve in the evalution set is occuring
        # when set to max no zigzag can be seen
        self.min_cash_to_buy = max(
            stock_market_data.get_most_recent_price(Company.A),
            stock_market_data.get_most_recent_price(Company.B))

        # get votes from experts opinions
        company_list = stock_market_data.get_companies()
        for company in company_list:
            if company == Company.A:
                stock_data_a = stock_market_data[Company.A]
                vote_a = expert_a.vote(stock_data_a)
            elif company == Company.B:
                stock_data_b = stock_market_data[Company.B]
                vote_b = expert_b.vote(stock_data_b)
            else:
                assert False

        self.expertOpinionA = vote_a
        self.expertOpinionB = vote_b
예제 #11
0
    def make_order(self, company: Company, orderTyp: OrderType, percentage,
                   portfolio: Portfolio,
                   stock_market_data: StockMarketData) -> Order:
        """
         creates an Order
         Args:
            company: the company for the order
            orderTyp: the OrderTyp (as Vote instance)
            percentage: an integer indicating how much percent should be bought or sold
         Returns an order for one company of instance Order
        """
        if orderTyp == OrderType.BUY:
            stock_price = stock_market_data.get_most_recent_price(company)
            port = portfolio.cash * percentage
            amount_to_buy = int(port // stock_price)
            logger.debug(
                f"{self.get_name()}: Got best action to buy {company}: and bought {amount_to_buy}"
            )
            return Order(OrderType.BUY, company,
                         amount_to_buy) if amount_to_buy > 0 else None

        elif orderTyp == OrderType.SELL:
            amount_to_sell = portfolio.get_stock(company)
            amount_to_sell *= percentage
            logger.debug(
                f"{self.get_name()}: Got best action to sell {company}: and sold {amount_to_sell}"
            )
            return Order(OrderType.SELL, company,
                         amount_to_sell) if amount_to_sell > 0 else None

        else:
            assert False
예제 #12
0
    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        current_state = get_state(self, stock_market_data)

        if self.train_while_trading and self.last_state is not None:
            reward = get_reward(self, portfolio, stock_market_data)
            self.memory.append(
                (self.last_state, self.last_action, reward, current_state))
            train_neural_net(self)

        action_index = get_index_for_action_to_execute(self, current_state)

        self.last_state = current_state
        self.last_action = action_index
        self.last_portfolio_value = portfolio.get_value(stock_market_data)

        return get_order_list(self, portfolio, stock_market_data)
예제 #13
0
    def __follow_expert_vote(self, company: Company, stock_data: StockData,
                             vote: Vote, buy_weight: float,
                             portfolio: Portfolio, order_list: List[Order]):
        assert company is not None
        assert stock_data is not None
        assert vote is not None
        assert portfolio is not None
        assert order_list is not None

        if vote is Vote.BUY or vote is Vote.HOLD:
            assert buy_weight is not None and 0 < buy_weight <= 1.0
            stock_price = stock_data.get_last()[-1]
            amount_to_buy = int(buy_weight * portfolio.cash // stock_price)
            logger.debug(
                f"{self.get_name()}: Got vote to buy {company}: {amount_to_buy} shares a {stock_price}"
            )
            if amount_to_buy > 0:
                order_list.append(Order(OrderType.BUY, company, amount_to_buy))
        elif vote == Vote.SELL:
            # sell as many stocks as possible
            amount_to_sell = portfolio.get_stock(company)
            logger.debug(
                f"{self.get_name()}: Got vote to sell {company}: {amount_to_sell} shares available"
            )
            if amount_to_sell > 0:
                order_list.append(
                    Order(OrderType.SELL, company, amount_to_sell))
        else:
            # do nothing
            assert vote == Vote.HOLD
            logger.debug(f"{self.get_name()}: Got vote to hold {company}")
    def get_orders(self, stock_market_data: StockMarketData,
                   portfolio: Portfolio):
        orders = []
        price_a = stock_market_data[Company.A].get_last()[-1]
        sell_off_a = portfolio.get_stock(Company.A)
        action_a = self.actions[self.last_action][0]
        order_a = int(action_a * portfolio.cash // price_a)
        orders.append(
            self.get_order_item(action_a, order_a, sell_off_a, Company.A))

        price_b = stock_market_data[Company.B].get_last()[-1]
        sell_off_b = portfolio.get_stock(Company.B)
        action_b = self.actions[self.last_action][1]
        order_b = int(action_b * portfolio.cash // price_b)
        orders.append(
            self.get_order_item(action_b, order_b, sell_off_b, Company.B))
        return orders
예제 #15
0
    def test_trader_no_stock(self):
        trader = BuyAndHoldTrader('test_color', 'test_name')

        portfolio = Portfolio(1000)
        stock_market_data = StockMarketData([], [Period.TESTING])
        order_list = trader.trade(portfolio, stock_market_data)
        self.assertIsNotNone(order_list)
        self.assertEqual(len(order_list), 0)
    def test_create_portfolio(self):
        # empty portfolio
        portfolio = Portfolio()
        self.assertIsNotNone(portfolio)
        self.assertEqual(portfolio.cash, 0)
        self.assertEqual(portfolio.stocks, {})

        # portfolio with cash
        portfolio = Portfolio(1000.0)
        self.assertIsNotNone(portfolio)
        self.assertEqual(portfolio.cash, 1000.0)
        self.assertEqual(portfolio.stocks, {})

        # portfolio with cash and stocks
        portfolio = Portfolio(1000.0, {Company.A: 10, Company.B: 50})
        self.assertIsNotNone(portfolio)
        self.assertEqual(portfolio.cash, 1000.0)
        self.assertEqual(len(portfolio.stocks.keys()), 2)
        self.assertEqual(portfolio.stocks[Company.A], 10)
        self.assertEqual(portfolio.stocks[Company.B], 50)
예제 #17
0
    def test_trade_one_stock(self):
        trader = BuyAndHoldTrader('test_color', 'test_name')

        portfolio = Portfolio(1000)
        stock_market_data = StockMarketData([Company.A], [Period.TESTING])
        order_list = trader.trade(portfolio, stock_market_data)
        self.assertIsNotNone(order_list)
        self.assertEqual(len(order_list), 1)
        self.assertEqual(order_list[0].type, OrderType.BUY)
        self.assertEqual(order_list[0].company, Company.A)
        self.assertEqual(order_list[0].amount, 9)
예제 #18
0
    def run(self,
            data: StockMarketData,
            traders: List[ITrader],
            offset: int = 0) -> Dict[ITrader, Dict[Date, Portfolio]]:
        """
        Runs the stock exchange over the given stock market data for the given traders.
        :param data: The complete stock market data
        :param traders: A list of all traders
        :param offset: The number of trading days which a will be skipped before (!) trading starts
        :return: The main data structure, which stores one portfolio per trade day, for each traders
        """
        assert data is not None
        assert traders is not None

        # initialize the main data structure: Dictionary over traders, that stores each traders's portfolio per day
        # data structure type is Dict[ITrader, Dict[Date, Portfolio]]
        trade_dates = data.get_trade_days()
        assert trade_dates  # must not be empty
        assert 0 <= offset < len(trade_dates)  # offset must be feasible
        self.__complete_stock_market_data = data
        self.__trader_portfolios = {
            trader: {
                trade_dates[offset]: Portfolio(self.__cash)
            }
            for trader in traders
        }

        # iterate over all trade days minus 1, because we don't trade on the last day
        for tick in range(offset, len(trade_dates) - 1):
            logger.debug(
                f"Stock Exchange: Current tick '{tick}' means today is '{trade_dates[tick]}'"
            )
            if tick % 365 == 1:
                print(trade_dates[tick])
            # build stock market data until today
            current_stock_market_data = data.deepcopy_first_n_items(tick + 1)

            # iterate over all traders
            for trader in traders:
                # get the traders's order list by giving him a copy (to prevent cheating) of today's portfolio
                todays_portfolio = self.__trader_portfolios[trader][
                    trade_dates[tick]]
                current_order_list = trader.trade(
                    copy.deepcopy(todays_portfolio), current_stock_market_data)

                # execute order list and save the result as tomorrow's portfolio
                tomorrows_portfolio = copy.deepcopy(todays_portfolio)
                tomorrows_portfolio.update_with_order_list(
                    current_stock_market_data, current_order_list)
                self.__trader_portfolios[trader][trade_dates[
                    tick + 1]] = tomorrows_portfolio

        return self.__trader_portfolios
예제 #19
0
    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # calculate current state
        state_ = state_maker(self.expert_a, self.expert_b, stock_market_data,
                             portfolio)
        curr_state = state_.create_numerical_state(self.state_size)
        # calculate current portfolio value
        curr_portfolio_value = portfolio.get_value(stock_market_data)

        # train or testing mode
        if self.train_while_trading == True:
            # Store state as experience (memory) and train the neural network only if trade() was called before at least once
            if self.last_state is not None:
                reward = self.get_rewards(self.last_portfolio_value,
                                          curr_portfolio_value)
                self.remember(self.last_state, self.last_action_a, reward,
                              curr_state)
                if len(self.memory) > self.min_size_of_memory_before_training:
                    self.replay()
            # Create actions for current state and decrease epsilon for fewer random actions
            curr_action_a = self.get_best_action(curr_state)
            final_order = self.create_order_list(curr_action_a, portfolio,
                                                 stock_market_data)
            self.decrease_epsilon()
        else:
            # predict best action from neuronal net
            curr_action_a = self.model.predict(curr_state)
            curr_action_a = np.argmax(curr_action_a[0])
            final_order = self.create_order_list(curr_action_a, portfolio,
                                                 stock_market_data)

        # Save created state, actions and portfolio value for the next call of trade()  --> erledigt
        self.last_state = curr_state
        self.last_action_a = curr_action_a
        self.last_portfolio_value = curr_portfolio_value

        return final_order
예제 #20
0
    def test_trade_vote_down_stock_a(self):
        expert_a = PerfectExpert(Company.A)
        expert_b = PerfectExpert(Company.B)
        trader = TrustingTrader(expert_a, expert_b, 'test_color', 'test_name')

        portfolio = Portfolio(1000.0, {Company.A: 10, Company.B: 10})
        stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING]).deepcopy_first_n_items(4)
        order_list = trader.trade(portfolio, stock_market_data)
        self.assertIsNotNone(order_list)
        self.assertEqual(len(order_list), 2)
        self.assertEqual(order_list[0].type, OrderType.SELL)
        self.assertEqual(order_list[0].company, Company.A)
        self.assertEqual(order_list[0].amount, 10.0)
        self.assertEqual(order_list[1].type, OrderType.SELL)
        self.assertEqual(order_list[1].company, Company.B)
        self.assertEqual(order_list[1].amount, 10.0)
예제 #21
0
    def create_reward(self, portfolio: Portfolio,
                      stock_market_data: StockMarketData, state_now: State):
        new_portfolio_value = portfolio.get_value(stock_market_data)
        index_of_action = Action.get_id_from_action(self.last_action)
        reward = -1 if (self.last_portfolio_value -
                        new_portfolio_value) > 0 else 1
        reward = 0 if (self.last_portfolio_value -
                       new_portfolio_value) == 0 else reward
        reward_array = np.zeros([9])
        reward_array[index_of_action] = reward

        q_next = self.run_model(state_now)
        weighted_q_next = q_next * self.gamma
        reward_array = np.sum([reward_array, weighted_q_next], axis=0)
        #reward_array[index_of_action] += self.gamma * q_next[index_of_action]
        return reward_array
예제 #22
0
    def __follow_action(self, company: Company, stock_data: StockData,
                        vote: Vote, portfolio: Portfolio,
                        order_list: List[Order]):
        """
        Protected helper method to calculate amount of stocks to be bought and sold.

        :param company: Company
        :param stock_data: StockData
        :param vote: Vote
        :param portfolio: Portfolio
        :param order_list: List[Order]
        :return: None (writes result to order_list)
        """

        assert company is not None
        assert stock_data is not None
        assert vote is not None
        assert portfolio is not None
        assert order_list is not None

        if vote == Vote.BUY:
            # buy as many stocks as possible
            stock_price = stock_data.get_last()[-1]
            amount_to_buy = int(portfolio.cash // stock_price)
            logger.debug(
                f"{self.get_name()}: Got vote to buy {company}: {amount_to_buy} shares a {stock_price}"
            )
            if amount_to_buy > 0:
                order_list.append(Order(OrderType.BUY, company, amount_to_buy))
        elif vote == Vote.SELL:
            # sell as many stocks as possible
            amount_to_sell = portfolio.get_stock(company)
            logger.debug(
                f"{self.get_name()}: Got vote to sell {company}: {amount_to_sell} shares available"
            )
            if amount_to_sell > 0:
                order_list.append(
                    Order(OrderType.SELL, company, amount_to_sell))
        else:
            # do nothing
            assert vote == Vote.HOLD
            logger.debug(f"{self.get_name()}: Got vote to hold {company}")
예제 #23
0
    def __create_order_for_company(
            self, company: Company, portfolio: Portfolio, vote: Vote,
            stock_market_data: StockMarketData) -> Order:
        order = None
        if vote == Vote.SELL:
            amount = portfolio.get_stock(company)
            if amount > 0:
                order = Order(OrderType.SELL, company, amount)
        elif vote == Vote.BUY:
            stock_price = stock_market_data.get_most_recent_price(company)
            amount = 0
            if (self.type_a == self.type_b):
                # buy both - half portfolio value for each
                amount = int((portfolio.cash // 2) // stock_price)
            else:
                amount = int(portfolio.cash // stock_price)

            if amount > 0:
                order = Order(OrderType.BUY, company, amount)
        return order
    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]
        state = State(portfolio,
                      self.expert_a.vote(stock_market_data[Company.A]),
                      self.expert_b.vote(stock_market_data[Company.B]),
                      stock_market_data[Company.A],
                      stock_market_data[Company.B])
        param = state.get_nn_input_state()
        if self.train_while_trading and self.last_state is not None:
            current_reward = self.gen_reward(portfolio, stock_market_data)
            self.update_memory(current_reward, param)
            self.train_model()

        self.last_action = self.get_action_idx(param)
        self.last_state = param
        self.last_portfolio_value = portfolio.get_value(stock_market_data)
        orders = self.get_orders(stock_market_data, portfolio)
        return orders
예제 #25
0
    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"

        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation
        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # Compute the current state
        current_state = self.current_state(stock_market_data)
        current_portfolio_value = portfolio.get_value(stock_market_data)

        # Store state as experience (memory) and train the neural network only if trade() was called before at least once
        if self.last_action is not None and self.train_while_trading:
            reward = self.reward(current_portfolio_value)
            self.memory.append(
                (self.last_state, self.last_action, reward, current_state))

            if len(self.memory) > self.min_size_of_memory_before_training:
                self.experience_replay()

        # Create actions for current state and decrease epsilon for fewer random actions
        action = self.get_action(current_state)
        self.epsilon = max(
            [self.epsilon_min, self.epsilon * self.epsilon_decay])

        # Save created state, actions and portfolio value for the next call of trade()
        self.last_state = current_state
        self.last_action = action
        self.last_portfolio_value = current_portfolio_value

        # convert action to orderlist
        return self.mapping_action_order(action, portfolio, stock_market_data)
예제 #26
0
    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # INPUT layer:  1 (buy or sell A?)
        # output layer: 2 ([buy_A, sell_A])

        # TODO Compute the current state
        stock_data_A = stock_market_data[Company.A]
        expertA_voteA = self.expert_a.vote(stock_data_A)
        expertB_voteA = self.expert_b.vote(stock_data_A)
        stock_data_B = stock_market_data[Company.B]
        expertA_voteB = self.expert_a.vote(stock_data_B)
        expertB_voteB = self.expert_b.vote(stock_data_B)

        state = np.array([[
            self.vote_map[expertA_voteA] + self.vote_map[expertB_voteA],
            self.vote_map[expertA_voteB] + self.vote_map[expertB_voteB],
        ]])

        # do action 0 or 1?
        predictions = self.model.predict(state)
        #print(f'predictions:{predictions}')
        #input()
        action_A = np.argmax(predictions[0][0:2])
        action_B = np.argmax(predictions[0][2:4])

        most_recent_price_A = stock_market_data.get_most_recent_price(
            Company.A)
        most_recent_price_B = stock_market_data.get_most_recent_price(
            Company.B)
        order_list = []

        money_to_spend = portfolio.cash

        # do stuff for A
        if action_A == 0:
            # buy all A
            amount_to_buy = money_to_spend // most_recent_price_A
            if amount_to_buy > 0:
                money_to_spend -= amount_to_buy * most_recent_price_A
                order_list.append(
                    Order(OrderType.BUY, Company.A, amount_to_buy))
        elif action_A == 1:
            # sell all A
            amount_to_sell = portfolio.get_stock(Company.A)
            if amount_to_sell > 0:
                order_list.append(
                    Order(OrderType.SELL, Company.A, amount_to_sell))
        else:
            assert False

        # do stuff for B
        if action_B == 0:
            # buy all B
            amount_to_buy = money_to_spend // most_recent_price_B
            if amount_to_buy > 0:
                order_list.append(
                    Order(OrderType.BUY, Company.B, amount_to_buy))
        elif action_B == 1:
            # sell all B
            amount_to_sell = portfolio.get_stock(Company.B)
            if amount_to_sell > 0:
                order_list.append(
                    Order(OrderType.SELL, Company.B, amount_to_sell))
        else:
            assert False

        if self.last_state is not None:
            # train
            diff = (portfolio.get_value(stock_market_data) /
                    self.last_portfolio_value - 1)
            rec_vec = np.array([[-diff, -diff, -diff, -diff]])
            rec_vec[0][self.last_action_a] = diff
            rec_vec[0][2 + self.last_action_b] = diff

            #reward_vec = np.array([[portfolio.get_value(stock_market_data)]])
            self.model.fit(self.last_state, rec_vec)

        self.last_state = state
        self.last_action_a = action_A
        self.last_action_b = action_B
        self.last_portfolio_value = portfolio.get_value(stock_market_data)

        return order_list
    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # INPUT layer:  1 (buy or sell A?)
        # output layer: 2 ([buy_A, sell_A])

        # TODO Compute the current state
        stock_data_A = stock_market_data[Company.A]
        expertA_voteA = self.expert_a.vote(stock_data_A)
        stock_data_B = stock_market_data[Company.B]
        expertB_voteB = self.expert_b.vote(stock_data_B)

        state = np.array([[
            self.vote_map[expertA_voteA],
            self.vote_map[expertB_voteB],
        ]])

        # do action 0 or 1?
        predictions = self.model.predict(state)

        # TODO Create actions for current state and decrease epsilon for fewer random actions
        if random.random() < self.epsilon:
            # use random actions for A and B
            action_A = random.randrange(2)
            action_B = random.randrange(2)
        else:
            # use prediction actions
            action_A = np.argmax(predictions[0][0:2])
            action_B = np.argmax(predictions[0][2:4])
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        current_price_a = stock_market_data.get_most_recent_price(Company.A)
        current_price_b = stock_market_data.get_most_recent_price(Company.B)

        money_to_spend = portfolio.cash
        order_list = []

        # do stuff for A
        if action_A == 0:
            # buy all A
            amount_to_buy = money_to_spend // current_price_a
            if amount_to_buy > 0:
                money_to_spend -= amount_to_buy * current_price_a
                order_list.append(
                    Order(OrderType.BUY, Company.A, amount_to_buy))
        elif action_A == 1:
            # sell all A
            amount_to_sell = portfolio.get_stock(Company.A)
            if amount_to_sell > 0:
                order_list.append(
                    Order(OrderType.SELL, Company.A, amount_to_sell))
        else:
            assert False

        # do stuff for B
        if action_B == 0:
            # buy all B
            amount_to_buy = money_to_spend // current_price_b
            if amount_to_buy > 0:
                order_list.append(
                    Order(OrderType.BUY, Company.B, amount_to_buy))
        elif action_B == 1:
            # sell all B
            amount_to_sell = portfolio.get_stock(Company.B)
            if amount_to_sell > 0:
                order_list.append(
                    Order(OrderType.SELL, Company.B, amount_to_sell))
        else:
            assert False

        # TODO train the neural network only if trade() was called before at least once
        if self.last_state is not None:
            # train
            diff_a = (current_price_a / self.last_price_a - 1)
            diff_b = (current_price_b / self.last_price_b - 1)
            fut_reward_a = np.max(predictions[0][0:2])
            fut_reward_b = np.max(predictions[0][2:4])
            reward_vec = np.array([[
                diff_a + self.gamma * fut_reward_a,
                -diff_a + self.gamma * fut_reward_a,
                diff_b + self.gamma * fut_reward_b,
                -diff_b + self.gamma * fut_reward_b
            ]])

            # TODO Store state as experience (memory) and replay
            # slides: <s, a, r, s'>
            # mine: <s, r>
            if self.min_size_of_memory_before_training <= len(self.memory):
                # take self.batch_size - 1 from memory
                batch = random.sample(self.memory, self.batch_size - 1)
                # append current state, reward
                batch.append((self.last_state, reward_vec))
                for x, y in batch:
                    self.model.fit(x, y, batch_size=self.batch_size, verbose=0)
            else:
                # only train with current (state, reward)
                self.model.fit(self.last_state,
                               reward_vec,
                               batch_size=1,
                               verbose=0)

            self.memory.append((self.last_state, reward_vec))

        # TODO Save created state, actions and portfolio value for the next call of trade()
        self.last_state = state
        self.last_action_a = action_A
        self.last_action_b = action_B
        self.last_portfolio_value = portfolio.get_value(stock_market_data)
        self.last_price_a = current_price_a
        self.last_price_b = current_price_b
        return order_list
    def test_get_value_without_date(self):
        stock_market_data = StockMarketData([Company.A, Company.B],
                                            [Period.TESTING])
        portfolio = Portfolio()
        self.assertEqual(portfolio.get_value(stock_market_data), 0)

        portfolio = Portfolio(100.0)
        self.assertEqual(portfolio.get_value(stock_market_data), 100.0)
        portfolio = Portfolio(100.0, {Company.A: 10})
        self.assertEqual(portfolio.get_value(stock_market_data), 1127.59895)
        portfolio = Portfolio(100.0, {Company.A: 10, Company.B: 10})
        self.assertEqual(portfolio.get_value(stock_market_data),
                         2416.5398400000004)
    def trade(self, portfolio: Portfolio,
              stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"

        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # 1. Compute current state
        state = self.compute_state(stock_market_data)

        # 1.2 If training is turned off, just predict the next action and return orders
        if not self.train_while_trading:
            self.last_state = state
            actionSpace = self.model.predict(state)
            action = np.argmax(actionSpace[0])
            orders = self.action_to_order(action, portfolio, stock_market_data)
            return orders

        # 2. Get a random action with the probability of epsilon, otherwise predict the action via the ANN
        if np.random.rand() <= self.epsilon and self.train_while_trading:
            action = np.random.randint(self.action_size, size=1)[0]

        else:
            actionSpace = self.model.predict(state)
            action = np.argmax(actionSpace[0])

        # 3. Reduce Epsilon if it is bigger than epsilon min
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        # 4. Training of the ANN
        if self.train_while_trading and self.last_state is not None:
            # 4.1 Get reward
            reward = self.get_reward(portfolio.get_value(stock_market_data),
                                     self.last_portfolio_value)

            # 4.2 Store memory
            self.memory.append(
                [self.last_state, self.last_action, reward, state])

            # 4.3 Actual training via Experience Replay
            if len(self.memory) > self.min_size_of_memory_before_training:
                self.experienceReplay(self.batch_size)

        # 5. Map Action + Create Order
        orders = self.action_to_order(action, portfolio, stock_market_data)

        # 6. Save the values
        self.last_state = state
        self.last_action = action
        self.last_portfolio_value = portfolio.get_value(stock_market_data)

        return orders
예제 #30
0
    def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]:
        """
        Generate action to be taken on the "stock market"
    
        Args:
          portfolio : current Portfolio of this traders
          stock_market_data : StockMarketData for evaluation

        Returns:
          A OrderList instance, may be empty never None
        """
        assert portfolio is not None
        assert stock_market_data is not None
        assert stock_market_data.get_companies() == [Company.A, Company.B]

        # INPUT layer:  1 (buy or sell A?)
        # output layer: 2 ([buy_A, sell_A])

        # TODO Compute the current state
        stock_data_A = stock_market_data[Company.A]
        expertA_voteA = self.expert_a.vote(stock_data_A)
        expertB_voteA = self.expert_b.vote(stock_data_A)
        stock_data_B = stock_market_data[Company.B]
        expertA_voteB = self.expert_a.vote(stock_data_B)
        expertB_voteB = self.expert_b.vote(stock_data_B)

        state = np.array([[
            self.vote_map[expertA_voteA] + self.vote_map[expertB_voteA],
            self.vote_map[expertA_voteB] + self.vote_map[expertB_voteB],
        ]])

        # do action 0 or 1?
        predictions = self.model.predict(state)
        '''
        if random.random() < self.epsilon:
            # use random actions for A and B
            action_A = random.randrange(2)
            action_B = random.randrange(2)
        else:
            # use prediction actions
            action_A = np.argmax(predictions[0][0:2])
            action_B = np.argmax(predictions[0][2:4])
        '''
        action_A = np.argmax(predictions[0][0:2])
        action_B = np.argmax(predictions[0][2:4])

        current_price_a = stock_market_data.get_most_recent_price(Company.A)
        current_price_b = stock_market_data.get_most_recent_price(Company.B)

        money_to_spend = portfolio.cash
        order_list = []

        # do stuff for A
        if action_A == 0:
            # buy all A
            amount_to_buy = money_to_spend // current_price_a
            if amount_to_buy > 0:
                money_to_spend -= amount_to_buy * current_price_a
                order_list.append(Order(OrderType.BUY, Company.A, amount_to_buy))
        elif action_A == 1:
            # sell all A
            amount_to_sell = portfolio.get_stock(Company.A)
            if amount_to_sell > 0:
                order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell))
        else:
            assert False

        # do stuff for B
        if action_B == 0:
            # buy all B
            amount_to_buy = money_to_spend // current_price_b
            if amount_to_buy > 0:
                order_list.append(Order(OrderType.BUY, Company.B, amount_to_buy))
        elif action_B == 1:
            # sell all B
            amount_to_sell = portfolio.get_stock(Company.B)
            if amount_to_sell > 0:
                order_list.append(Order(OrderType.SELL, Company.B, amount_to_sell))
        else:
            assert False

        if self.last_state is not None:
            # train
            diff_a = (current_price_a / self.last_price_a - 1)
            diff_b = (current_price_b / self.last_price_b - 1)
            fut_reward_a_buy = np.max(predictions[0][0])
            fut_reward_a_buy = fut_reward_a_buy if fut_reward_a_buy > 0 else 0
            fut_reward_a_sell = np.max(predictions[0][1])
            fut_reward_a_sell = fut_reward_a_sell if fut_reward_a_sell > 0 else 0
            fut_reward_b_buy = np.max(predictions[0][2])
            fut_reward_b_buy = fut_reward_b_buy if fut_reward_b_buy > 0 else 0
            fut_reward_b_sell = np.max(predictions[0][3])
            fut_reward_b_sell = fut_reward_b_sell if fut_reward_b_sell > 0 else 0
            reward_vec = np.array([[
                diff_a + self.gamma * fut_reward_a_buy,
                -diff_a + self.gamma * fut_reward_a_sell,
                diff_b + self.gamma * fut_reward_b_buy,
                -diff_b  + self.gamma * fut_reward_b_sell
                ]])
            #reward_vec = np.array([[portfolio.get_value(stock_market_data)]])
            self.model.fit(self.last_state, reward_vec, verbose=0)
        
        self.last_state = state
        self.last_action_a = action_A
        self.last_action_b = action_B
        self.last_portfolio_value = portfolio.get_value(stock_market_data)
        self.last_price_a = current_price_a
        self.last_price_b = current_price_b


        return order_list