def get_reward(self, portfolio: Portfolio, stock_market_data: StockMarketData): current_portfolio_value = portfolio.get_value(stock_market_data) if self.last_portfolio_value < current_portfolio_value: return 100 * (current_portfolio_value / self.last_portfolio_value) elif self.last_portfolio_value > portfolio.get_value(stock_market_data): return -100 return -20
def test_get_value_with_date(self): stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING]) date = Date(2012, 1, 3) portfolio = Portfolio(100.0) self.assertEqual(portfolio.get_value(stock_market_data, date), 100.0) portfolio = Portfolio(100.0, {Company.A: 10}) self.assertEqual(portfolio.get_value(stock_market_data, date), 455.54107999999997) portfolio = Portfolio(100.0, {Company.A: 10, Company.B: 10}) self.assertEqual(portfolio.get_value(stock_market_data, date), 2046.9924999999998)
def test_get_value_without_date(self): stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING]) portfolio = Portfolio() self.assertEqual(portfolio.get_value(stock_market_data), 0) portfolio = Portfolio(100.0) self.assertEqual(portfolio.get_value(stock_market_data), 100.0) portfolio = Portfolio(100.0, {Company.A: 10}) self.assertEqual(portfolio.get_value(stock_market_data), 1127.59895) portfolio = Portfolio(100.0, {Company.A: 10, Company.B: 10}) self.assertEqual(portfolio.get_value(stock_market_data), 2416.5398400000004)
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] order_list = [] vote_a = self.expert_a.vote(stock_market_data[Company.A]) vote_b = self.expert_b.vote(stock_market_data[Company.B]) # convert votes to state state = np.array([self.vote2num[vote_a], self.vote2num[vote_b]]) if self.train_while_trading: if len(self.memory) > self.min_size_of_memory_before_training: # helper function which executes experience replay self._replay() # act action = self._act(state, stock_market_data, portfolio, order_list) if self.last_portfolio_value is not None: # Reward function R1 if self.last_portfolio_value <= portfolio.get_value( stock_market_data): reward = 1 else: reward = 0 # Reward function R2 #reward = (portfolio.get_value( # stock_market_data) - self.last_portfolio_value) / self.last_portfolio_value # helper to fill up the queue for performance replay self._remember(self.last_state, action, reward, state) # save state and portfolio value self.last_portfolio_value = portfolio.get_value(stock_market_data) self.last_state = state return order_list
def gen_reward(self, portfolio: Portfolio, stock_market_data: StockMarketData): print('gen_reward') if self.last_portfolio_value < portfolio.get_value(stock_market_data): return self.reward_factor * (portfolio.get_value(stock_market_data) / self.last_portfolio_value) elif self.last_portfolio_value > portfolio.get_value( stock_market_data): return -self.reward_factor * ( portfolio.get_value(stock_market_data) / self.last_portfolio_value) else: return -self.reward_factor / 5
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] current_state = get_state(self, stock_market_data) if self.train_while_trading and self.last_state is not None: reward = get_reward(self, portfolio, stock_market_data) self.memory.append( (self.last_state, self.last_action, reward, current_state)) train_neural_net(self) action_index = get_index_for_action_to_execute(self, current_state) self.last_state = current_state self.last_action = action_index self.last_portfolio_value = portfolio.get_value(stock_market_data) return get_order_list(self, portfolio, stock_market_data)
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # calculate current state state_ = state_maker(self.expert_a, self.expert_b, stock_market_data, portfolio) curr_state = state_.create_numerical_state(self.state_size) # calculate current portfolio value curr_portfolio_value = portfolio.get_value(stock_market_data) # train or testing mode if self.train_while_trading == True: # Store state as experience (memory) and train the neural network only if trade() was called before at least once if self.last_state is not None: reward = self.get_rewards(self.last_portfolio_value, curr_portfolio_value) self.remember(self.last_state, self.last_action_a, reward, curr_state) if len(self.memory) > self.min_size_of_memory_before_training: self.replay() # Create actions for current state and decrease epsilon for fewer random actions curr_action_a = self.get_best_action(curr_state) final_order = self.create_order_list(curr_action_a, portfolio, stock_market_data) self.decrease_epsilon() else: # predict best action from neuronal net curr_action_a = self.model.predict(curr_state) curr_action_a = np.argmax(curr_action_a[0]) final_order = self.create_order_list(curr_action_a, portfolio, stock_market_data) # Save created state, actions and portfolio value for the next call of trade() --> erledigt self.last_state = curr_state self.last_action_a = curr_action_a self.last_portfolio_value = curr_portfolio_value return final_order
def create_reward(self, portfolio: Portfolio, stock_market_data: StockMarketData, state_now: State): new_portfolio_value = portfolio.get_value(stock_market_data) index_of_action = Action.get_id_from_action(self.last_action) reward = -1 if (self.last_portfolio_value - new_portfolio_value) > 0 else 1 reward = 0 if (self.last_portfolio_value - new_portfolio_value) == 0 else reward reward_array = np.zeros([9]) reward_array[index_of_action] = reward q_next = self.run_model(state_now) weighted_q_next = q_next * self.gamma reward_array = np.sum([reward_array, weighted_q_next], axis=0) #reward_array[index_of_action] += self.gamma * q_next[index_of_action] return reward_array
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] state = State(portfolio, self.expert_a.vote(stock_market_data[Company.A]), self.expert_b.vote(stock_market_data[Company.B]), stock_market_data[Company.A], stock_market_data[Company.B]) param = state.get_nn_input_state() if self.train_while_trading and self.last_state is not None: current_reward = self.gen_reward(portfolio, stock_market_data) self.update_memory(current_reward, param) self.train_model() self.last_action = self.get_action_idx(param) self.last_state = param self.last_portfolio_value = portfolio.get_value(stock_market_data) orders = self.get_orders(stock_market_data, portfolio) return orders
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # Compute the current state current_state = self.current_state(stock_market_data) current_portfolio_value = portfolio.get_value(stock_market_data) # Store state as experience (memory) and train the neural network only if trade() was called before at least once if self.last_action is not None and self.train_while_trading: reward = self.reward(current_portfolio_value) self.memory.append( (self.last_state, self.last_action, reward, current_state)) if len(self.memory) > self.min_size_of_memory_before_training: self.experience_replay() # Create actions for current state and decrease epsilon for fewer random actions action = self.get_action(current_state) self.epsilon = max( [self.epsilon_min, self.epsilon * self.epsilon_decay]) # Save created state, actions and portfolio value for the next call of trade() self.last_state = current_state self.last_action = action self.last_portfolio_value = current_portfolio_value # convert action to orderlist return self.mapping_action_order(action, portfolio, stock_market_data)
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock marketf" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # TODO Compute the current state stock_data_a = None stock_data_b = None last_stock_data_a = None last_stock_data_b = None company_list = stock_market_data.get_companies() for company in company_list: if company == Company.A: stock_data_a = stock_market_data[Company.A] last_stock_data_a = stock_data_a.get_from_offset(-2) elif company == Company.B: stock_data_b = stock_market_data[Company.B] last_stock_data_b = stock_data_b.get_from_offset(-2) else: assert False vote_a = self.expert_a.vote(stock_data_a) vote_b = self.expert_b.vote(stock_data_b) state = State(last_stock_data_a, last_stock_data_b, vote_a, vote_b) # TODO Q-Learning nn_input = np.array( [np.array([state.aDiff, state.vote_a, state.bDiff, state.vote_b])]) action_vals = self.model.predict(nn_input) # TODO Store state as experience (memory) and train the neural network only if trade() was called before at least once # TODO Create actions for current state and decrease epsilon for fewer random actions actions = [[ Order(OrderType.BUY, Company.A, int((portfolio.cash / 2) // stock_data_a.get_last()[-1])), Order(OrderType.BUY, Company.B, int((portfolio.cash / 2) // stock_data_b.get_last()[-1])) ], [ Order( OrderType.BUY, Company.A, int((portfolio.cash) // stock_data_a.get_last()[-1])), Order(OrderType.SELL, Company.B, portfolio.get_stock(Company.B)) ], [ Order(OrderType.SELL, Company.A, portfolio.get_stock(Company.A)), Order( OrderType.BUY, Company.B, int(portfolio.cash // stock_data_b.get_last()[-1])) ], [ Order(OrderType.SELL, Company.A, portfolio.get_stock(Company.A)), Order(OrderType.SELL, Company.B, portfolio.get_stock(Company.B)) ], [ Order(OrderType.SELL, Company.A, 0), Order(OrderType.SELL, Company.B, 0) ]] if not self.train_while_trading: self.epsilon = 0.0 else: if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay else: self.epsilon = self.epsilon_min # randomize action if random.random() < self.epsilon: next_action = random.choice(list(range(self.action_size))) else: next_action = np.argmax(action_vals[0]) order_list = actions[next_action] portfolio_value = portfolio.get_value( stock_market_data, stock_market_data.get_most_recent_trade_day()) if (self.last_state != None and self.train_while_trading): def reward(oldVal, newVal): neg = -100.0 pos = 100.0 q = newVal / oldVal if q < 1: return neg elif q == 1: return -10 else: print("Q: ", q) return pos / 2 * oldVal / newVal r = reward(self.last_portfolio_value, portfolio_value) action_vals[0][self.last_order] = r self.memory.append([self.last_input, action_vals]) if (len(self.memory) > self.min_size_of_memory_before_training): sample = random.sample(self.memory, self.batch_size) trainSample = list() testSample = list() for [sampleIn, sampleOut] in sample: trainSample.append(sampleIn[0]) testSample.append(sampleOut[0]) self.model.fit(np.array(trainSample), np.array(testSample), self.batch_size) # Save created state, actions and portfolio value for the next call of trade() self.last_input = nn_input self.last_state = state self.last_order = next_action self.last_portfolio_value = portfolio_value print(next_action, action_vals, portfolio.cash, portfolio.get_stock(Company.A), portfolio.get_stock(Company.B)) return order_list
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # INPUT layer: 1 (buy or sell A?) # output layer: 2 ([buy_A, sell_A]) # TODO Compute the current state stock_data_A = stock_market_data[Company.A] expertA_voteA = self.expert_a.vote(stock_data_A) expertB_voteA = self.expert_b.vote(stock_data_A) stock_data_B = stock_market_data[Company.B] expertA_voteB = self.expert_a.vote(stock_data_B) expertB_voteB = self.expert_b.vote(stock_data_B) state = np.array([[ self.vote_map[expertA_voteA] + self.vote_map[expertB_voteA], self.vote_map[expertA_voteB] + self.vote_map[expertB_voteB], ]]) # do action 0 or 1? predictions = self.model.predict(state) ''' if random.random() < self.epsilon: # use random actions for A and B action_A = random.randrange(2) action_B = random.randrange(2) else: # use prediction actions action_A = np.argmax(predictions[0][0:2]) action_B = np.argmax(predictions[0][2:4]) ''' action_A = np.argmax(predictions[0][0:2]) action_B = np.argmax(predictions[0][2:4]) current_price_a = stock_market_data.get_most_recent_price(Company.A) current_price_b = stock_market_data.get_most_recent_price(Company.B) money_to_spend = portfolio.cash order_list = [] # do stuff for A if action_A == 0: # buy all A amount_to_buy = money_to_spend // current_price_a if amount_to_buy > 0: money_to_spend -= amount_to_buy * current_price_a order_list.append(Order(OrderType.BUY, Company.A, amount_to_buy)) elif action_A == 1: # sell all A amount_to_sell = portfolio.get_stock(Company.A) if amount_to_sell > 0: order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell)) else: assert False # do stuff for B if action_B == 0: # buy all B amount_to_buy = money_to_spend // current_price_b if amount_to_buy > 0: order_list.append(Order(OrderType.BUY, Company.B, amount_to_buy)) elif action_B == 1: # sell all B amount_to_sell = portfolio.get_stock(Company.B) if amount_to_sell > 0: order_list.append(Order(OrderType.SELL, Company.B, amount_to_sell)) else: assert False if self.last_state is not None: # train diff_a = (current_price_a / self.last_price_a - 1) diff_b = (current_price_b / self.last_price_b - 1) fut_reward_a_buy = np.max(predictions[0][0]) fut_reward_a_buy = fut_reward_a_buy if fut_reward_a_buy > 0 else 0 fut_reward_a_sell = np.max(predictions[0][1]) fut_reward_a_sell = fut_reward_a_sell if fut_reward_a_sell > 0 else 0 fut_reward_b_buy = np.max(predictions[0][2]) fut_reward_b_buy = fut_reward_b_buy if fut_reward_b_buy > 0 else 0 fut_reward_b_sell = np.max(predictions[0][3]) fut_reward_b_sell = fut_reward_b_sell if fut_reward_b_sell > 0 else 0 reward_vec = np.array([[ diff_a + self.gamma * fut_reward_a_buy, -diff_a + self.gamma * fut_reward_a_sell, diff_b + self.gamma * fut_reward_b_buy, -diff_b + self.gamma * fut_reward_b_sell ]]) #reward_vec = np.array([[portfolio.get_value(stock_market_data)]]) self.model.fit(self.last_state, reward_vec, verbose=0) self.last_state = state self.last_action_a = action_A self.last_action_b = action_B self.last_portfolio_value = portfolio.get_value(stock_market_data) self.last_price_a = current_price_a self.last_price_b = current_price_b return order_list
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # 1. Compute current state state = self.compute_state(stock_market_data) # 1.2 If training is turned off, just predict the next action and return orders if not self.train_while_trading: self.last_state = state actionSpace = self.model.predict(state) action = np.argmax(actionSpace[0]) orders = self.action_to_order(action, portfolio, stock_market_data) return orders # 2. Get a random action with the probability of epsilon, otherwise predict the action via the ANN if np.random.rand() <= self.epsilon and self.train_while_trading: action = np.random.randint(self.action_size, size=1)[0] else: actionSpace = self.model.predict(state) action = np.argmax(actionSpace[0]) # 3. Reduce Epsilon if it is bigger than epsilon min if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay # 4. Training of the ANN if self.train_while_trading and self.last_state is not None: # 4.1 Get reward reward = self.get_reward(portfolio.get_value(stock_market_data), self.last_portfolio_value) # 4.2 Store memory self.memory.append( [self.last_state, self.last_action, reward, state]) # 4.3 Actual training via Experience Replay if len(self.memory) > self.min_size_of_memory_before_training: self.experienceReplay(self.batch_size) # 5. Map Action + Create Order orders = self.action_to_order(action, portfolio, stock_market_data) # 6. Save the values self.last_state = state self.last_action = action self.last_portfolio_value = portfolio.get_value(stock_market_data) return orders
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # INPUT layer: 1 (buy or sell A?) # output layer: 2 ([buy_A, sell_A]) # TODO Compute the current state stock_data_A = stock_market_data[Company.A] expertA_voteA = self.expert_a.vote(stock_data_A) expertB_voteA = self.expert_b.vote(stock_data_A) stock_data_B = stock_market_data[Company.B] expertA_voteB = self.expert_a.vote(stock_data_B) expertB_voteB = self.expert_b.vote(stock_data_B) state = np.array([[ self.vote_map[expertA_voteA] + self.vote_map[expertB_voteA], self.vote_map[expertA_voteB] + self.vote_map[expertB_voteB], ]]) # do action 0 or 1? predictions = self.model.predict(state) #print(f'predictions:{predictions}') #input() action_A = np.argmax(predictions[0][0:2]) action_B = np.argmax(predictions[0][2:4]) most_recent_price_A = stock_market_data.get_most_recent_price( Company.A) most_recent_price_B = stock_market_data.get_most_recent_price( Company.B) order_list = [] money_to_spend = portfolio.cash # do stuff for A if action_A == 0: # buy all A amount_to_buy = money_to_spend // most_recent_price_A if amount_to_buy > 0: money_to_spend -= amount_to_buy * most_recent_price_A order_list.append( Order(OrderType.BUY, Company.A, amount_to_buy)) elif action_A == 1: # sell all A amount_to_sell = portfolio.get_stock(Company.A) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, Company.A, amount_to_sell)) else: assert False # do stuff for B if action_B == 0: # buy all B amount_to_buy = money_to_spend // most_recent_price_B if amount_to_buy > 0: order_list.append( Order(OrderType.BUY, Company.B, amount_to_buy)) elif action_B == 1: # sell all B amount_to_sell = portfolio.get_stock(Company.B) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, Company.B, amount_to_sell)) else: assert False if self.last_state is not None: # train diff = (portfolio.get_value(stock_market_data) / self.last_portfolio_value - 1) rec_vec = np.array([[-diff, -diff, -diff, -diff]]) rec_vec[0][self.last_action_a] = diff rec_vec[0][2 + self.last_action_b] = diff #reward_vec = np.array([[portfolio.get_value(stock_market_data)]]) self.model.fit(self.last_state, rec_vec) self.last_state = state self.last_action_a = action_A self.last_action_b = action_B self.last_portfolio_value = portfolio.get_value(stock_market_data) return order_list
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate actions to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # Compute the current state expert_votes = [ self.experts[i].vote(stock_market_data[company]) for i, company in enumerate(stock_market_data.get_companies()) ] shares_owned = [ portfolio.get_stock(company) for company in stock_market_data.get_companies() ] if State is StateExpertsOnly: state = StateExpertsOnly(expert_votes, portfolio.get_value(stock_market_data)) elif State is StateExpertsCashShares: state = StateExpertsCashShares(expert_votes, portfolio.cash, shares_owned, portfolio.get_value(stock_market_data)) else: raise RuntimeError if self.train_while_trading: # store state as experience in memory if len(self.memory) > 0: self.memory[-1].state2 = state experience = Experience( state1=state ) self.memory.append(experience) # train if len(self.memory) >= self.min_size_of_memory_before_training: if self.days_passed % self.train_each_n_days == 0: self.train() # determine probability for random actions if not self.is_evolved_model: # first training episode random_action_probability = ( (self.epsilon ** self.days_passed) * (1.0 - self.random_action_min_probability) + self.random_action_min_probability ) else: # subsequent training episode random_action_probability = self.random_action_min_probability if self.training_occasions == 0 or random.random() < random_action_probability: actions = [Action.get_random(), Action.get_random()] else: # choose actions by querying network x = state.to_input() y = self.model.predict(numpy.array([x])) assert y.shape == (1, self.action_size) actions = Action.from_model_prediction(y[0]) experience.actions = actions else: # not training -> always choose actions by querying network actions = Action.from_model_prediction(self.model.predict(numpy.array([state.to_input()]))[0]) # translate actions into orders orders: typing.List[Order] = [] companies_with_actions_and_magnitudes = list(zip(list(Company), actions, Action.get_action_magnitudes(actions))) for comp, action, mag in companies_with_actions_and_magnitudes: if action.is_buy(): cash_limit = portfolio.cash * mag date, stock_price = stock_market_data[comp].get_last() shares_amount = cash_limit / stock_price if shares_amount > 0: orders.append(Order(OrderType.BUY, comp, shares_amount)) elif action.is_sell(): shares_amount = portfolio.get_stock(comp) * mag if shares_amount > 0: orders.append(Order(OrderType.SELL, comp, shares_amount)) self.days_passed += 1 return orders
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] self.day += 1 # TODO Compute the current state order_list = [] stock_data_a = stock_market_data[Company.A] stock_data_b = stock_market_data[Company.B] # Expert A expert_a = self.expert_a.vote(stock_data_a) # Expert B expert_b = self.expert_b.vote(stock_data_b) state = np.array([[ self.vote_num[expert_a], self.vote_num[expert_b], ]]) action = self.decide_action(state) # TODO Store state as experience (memory) and train the neural network only if trade() was called before at least once if self.last_state is not None: reward = (portfolio.get_value(stock_market_data) - self.last_portfolio_value) / self.last_portfolio_value self.memory.append((self.last_state, self.last_action, reward, state)) self.train_network(self.batch_size) # TODO Create actions for current state and decrease epsilon for fewer random actions act0 = 0 act1 = 0 act2 = 0 act3 = 0 act4 = 0 act5 = 0 act6 = 0 act7 = 0 act8 = 0 # What amount of the stocks should be bought or sold percent_buy = 1 percent_sell = 1 if action == 0: # Buy A stock_price_a = stock_market_data.get_most_recent_price(Company.A) amount_to_buy_a = int((portfolio.cash*percent_buy/2) // stock_price_a) if amount_to_buy_a > 0: order_list.append(Order(OrderType.BUY, Company.A, amount_to_buy_a)) # Buy B stock_price_b = stock_market_data.get_most_recent_price(Company.B) amount_to_buy_b = int((portfolio.cash*percent_buy/2) // stock_price_b) if amount_to_buy_b > 0: order_list.append(Order(OrderType.BUY, Company.B, amount_to_buy_b)) act0 += 1 elif action == 1: # Buy A stock_price_a = stock_market_data.get_most_recent_price(Company.A) amount_to_buy_a = int(portfolio.cash *percent_buy// stock_price_a) if amount_to_buy_a > 0: order_list.append(Order(OrderType.BUY, Company.A, amount_to_buy_a)) # Sell B amount_to_sell_b = int(portfolio.get_stock(Company.B)*percent_sell) if amount_to_sell_b > 0: order_list.append(Order(OrderType.SELL, Company.B, amount_to_sell_b)) act1 += 1 elif action == 2: # Sell A amount_to_sell_a = int(portfolio.get_stock(Company.A)*percent_sell) if amount_to_sell_a > 0: order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell_a)) # Buy B stock_price_b = stock_market_data.get_most_recent_price(Company.B) amount_to_buy_b = int(portfolio.cash*percent_buy // stock_price_b) if amount_to_buy_b > 0: order_list.append(Order(OrderType.BUY, Company.B, amount_to_buy_b)) act2 += 1 elif action == 3: # Sell A amount_to_sell_a = int(portfolio.get_stock(Company.A)*percent_sell) if amount_to_sell_a > 0: order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell_a)) # Sell B amount_to_sell_b = int(portfolio.get_stock(Company.B)*percent_sell) if amount_to_sell_b > 0: order_list.append(Order(OrderType.SELL, Company.B, amount_to_sell_b)) act3 += 1 elif action == 4: # Sell A amount_to_sell_a = int(portfolio.get_stock(Company.A)*percent_sell) if amount_to_sell_a > 0: order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell_a)) # Hold B act4 += 1 elif action == 5: # Hold A # Sell B amount_to_sell_b = int(portfolio.get_stock(Company.B)*percent_sell) if amount_to_sell_b > 0: order_list.append(Order(OrderType.SELL, Company.B, amount_to_sell_b)) act5 += 1 elif action == 6: # Buy A stock_price_a = stock_market_data.get_most_recent_price(Company.A) amount_to_buy_a = int((portfolio.cash*percent_buy) // stock_price_a) if amount_to_buy_a > 0: order_list.append(Order(OrderType.BUY, Company.A, amount_to_buy_a)) # Hold B act6 += 1 elif action == 7: # Hold A # Buy B stock_price_b = stock_market_data.get_most_recent_price(Company.B) amount_to_buy_b = int((portfolio.cash*percent_buy) // stock_price_b) if amount_to_buy_b > 0: order_list.append(Order(OrderType.BUY, Company.B, amount_to_buy_b)) act7 += 1 elif action == 8: # Hold A # Hold B order_list.append(Order(OrderType.BUY, Company.B, 0)) act8 += 1 else: print("undefined action called"+str(action)) # Decrease the epsilon for fewer random actions if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay # TODO Save created state, actions and portfolio value for the next call of trade() self.last_state = state self.last_action = action self.last_portfolio_value = portfolio.get_value(stock_market_data) return order_list
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] #Compute current State state_now = State(stock_market_data, self.expert_a, self.expert_b, self.last_state) if (self.last_state == None): state_now.prev_state = state_now #Create actions and let probability decide rand_action = Action.create_random_action() model_action = self.action_by_model(state_now) action = np.random.choice([rand_action, model_action], 1, p=[self.epsilon, 1 - self.epsilon])[0] if (self.epsilon > self.epsilon_min): self.epsilon -= self.epsilon_decay #if training is deactivated if not self.train_while_trading: self.last_state = state_now return model_action.create_order_list(portfolio, stock_market_data) #for the first call of trade() if (self.last_state == None): self.epsilon = 1.0 self.last_state = state_now self.last_portfolio_value = portfolio.get_value(stock_market_data) self.last_action = action return action.create_order_list(portfolio, stock_market_data) #calculate reward and create tuple for memory reward = self.create_reward(portfolio, stock_market_data, state_now) memory_unit = (self.last_state, self.last_action, reward, state_now) self.memory.append(memory_unit) #train if there is enough experience if len(self.memory) > self.min_size_of_memory_before_training: #start training with random batch batch = random.sample(self.memory, self.batch_size) x = np.empty((self.batch_size, self.state_size)) y = np.empty((self.batch_size, self.action_size)) i = 0 for (s1, a, r, s2) in batch: x[i] = s1.create_array_diff() y[i] = r i += 1 self.model.train_on_batch(x, y) #save old values for next run self.last_state = state_now self.last_portfolio_value = portfolio.get_value(stock_market_data) self.last_action = action return action.create_order_list(portfolio, stock_market_data)
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # TODO Compute the current state stock_data_a = stock_market_data[Company.A] vote_a = self.expert_a.vote(stock_data_a) stock_data_b = stock_market_data[Company.B] vote_b = self.expert_a.vote(stock_data_b) curr_state = np.array([[ #portfolio.cash, #portfolio.stocks[Company.A], #portfolio.stocks[Company.B], #stock_market_data.get_most_recent_price(Company.A), #stock_market_data.get_most_recent_price(Company.B), self.vote_map[vote_a], self.vote_map[vote_b], #vote_a, #vote_b ]]) # TODO Store state as experience (memory) and train the neural network only if trade() was called before at least once # <s, a, r, s'> trade_called_once_before = (self.last_state is not None) experience = (self.last_state, self.last_action_a, self.last_action_b, self.last_portfolio_value, curr_state) self.memory.append(experience) if trade_called_once_before and self.min_size_of_memory_before_training <= len( self.memory): # create self.batch_size-times random numbers # between 0 and length of queue selected_mems_ind = random.sample(range(0, len(self.memory)), self.batch_size) selected_mems = [self.memory[i] for i in selected_mems_ind] # reverse the list, so del indices works out selected_mems_ind = sorted(selected_mems_ind)[::-1] for i in selected_mems_ind: print(f'len(self.memory): {len(self.memory)}') print(f'deleting: {i}') del self.memory[i] # mem[:2] -> s, a # mem[2] -> r X = [np.asarray(mem[:3]) for mem in selected_mems] Y = [np.asarray(mem[3]) for mem in selected_mems] self.model.fit(X, Y, batch_size=self.batch_size) # TODO Create actions for current state and decrease epsilon for fewer random actions # Order(OrderType.SELL, company, amount_to_sell) # model get suggested action predicted_actions_matrix = self.model.predict(curr_state) model_choice = np.argmax(predicted_actions_matrix) order_list = [] curr_action_a, curr_action_b, order_list = self.choose_actions( stock_data_a, stock_data_b, portfolio, order_list, epsilon=self.epsilon, model_choice=model_choice) curr_portfolio_value = portfolio.get_value(stock_market_data) if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay # TODO Save created state, actions and portfolio value for the next call of trade() self.last_state = curr_state self.last_action_a = curr_action_a self.last_action_b = curr_action_b self.last_portfolio_value = curr_portfolio_value return order_list
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # TODO Compute the current state def get_state(): expert_vot_dict = {"BUY": 1, "SELL": 2, "HOLD": 0} stock_data_a = stock_market_data[Company.A] stock_data_b = stock_market_data[Company.B] vote_a = self.expert_a.vote(stock_data_a) vote_a = str(vote_a).split(".")[1] vote_b = self.expert_b.vote(stock_data_b) vote_b = str(vote_b).split(".")[1] vote_a = expert_vot_dict[vote_a] vote_b = expert_vot_dict[vote_b] state = np.array([portfolio.get_stock(Company.A), portfolio.get_stock(Company.B), portfolio.cash, portfolio.get_value(stock_market_data), vote_a, vote_b]) state = state.reshape((1, self.state_size)) return state # TODO Store state as experience (memory) and train the neural network only if trade() was called before at least once def train_without_experience_replay(current_state, next_state, reward, action_index): expected_value = reward + self.gamma * np.amax(self.model.predict(next_state)) expected_value_array = self.model.predict(current_state) expected_value_array[0][action_index] = expected_value self.model.fit(current_state, expected_value_array, epochs=1, verbose=0) # TODO Save created state, actions and portfolio value for the next call of trade() def experience_replay(): batch = random.sample(self.memory, self.batch_size) for state, action_index, reward, next_state in batch: expected_value_array = self.model.predict(state) expected_value = reward + self.gamma * np.amax(self.model.predict(next_state)) expected_value_array[0][action_index] = expected_value self.model.fit(state, expected_value_array, epochs=1, verbose=0) # TODO Create actions for current state and decrease epsilon for fewer random actions def get_order_index(state): self.epsilon *= self.epsilon_decay self.epsilon = max(self.epsilon_min, self.epsilon) if np.random.random() < self.epsilon: return random.randrange(self.action_size) return np.argmax(self.model.predict(state)[0]) def get_orders(order_index): order_combination_dict = {0: ["BUY", "BUY"], 1: ["SELL", "SELL"], 2: ["BUY", "SELL"], 3: ["SELL", "BUY"]} order_dict = {"BUY": 1, "SELL": 2} action_combination = order_combination_dict[order_index] order_a = order_dict[action_combination[0]] order_b = order_dict[action_combination[1]] order_a = Order(OrderType(order_a), Company.A, portfolio.get_stock(Company.A)) order_b = Order(OrderType(order_b), Company.B, portfolio.get_stock(Company.B)) order_list = [order_a, order_b] return order_list def follow_orders(orders): order_list = [] company_list = stock_market_data.get_companies() for company, order in zip(company_list, orders): stock_data = stock_market_data[company] if order.type == OrderType.BUY: # buy as many stocks as possible stock_price = stock_data.get_last()[-1] amount_to_buy = int(portfolio.cash // stock_price) logger.debug(f"{self.get_name()}: Got order to buy {company}: {amount_to_buy} shares a {stock_price}") if amount_to_buy > 0: order_list.append(Order(OrderType.BUY, company, amount_to_buy)) elif order.type == OrderType.SELL: # sell as many stocks as possible amount_to_sell = portfolio.get_stock(company) logger.debug(f"{self.get_name()}: Got order to sell {company}: {amount_to_sell} shares available") if amount_to_sell > 0: order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell)) return order_list def get_reward(current_portfolio_value, next_portfolio_value): if next_portfolio_value > current_portfolio_value: reward = 1 return reward elif next_portfolio_value < current_portfolio_value: reward = -2 return reward else: reward = -1 return reward self.last_state = get_state() self.memory_state.append(self.last_state) self.last_portfolio_value = portfolio.get_value(stock_market_data) self.memory_portfolio.append(self.last_portfolio_value) if self.called_once: current_state = self.memory_state[0] next_state = self.memory_state[1] reward = get_reward(self.memory_portfolio[0], self.memory_portfolio[1]) self.memory.append((current_state, self.memory_action, reward, next_state)) if len(self.memory) >= self.min_size_of_memory_before_training: experience_replay() self.last_state = next_state action_index = get_order_index(self.last_state) action_list = get_orders(action_index) action_list = follow_orders(action_list) self.memory_action = action_index self.called_once = True return action_list
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # INPUT layer: 1 (buy or sell A?) # output layer: 2 ([buy_A, sell_A]) # TODO Compute the current state stock_data_A = stock_market_data[Company.A] expertA_voteA = self.expert_a.vote(stock_data_A) stock_data_B = stock_market_data[Company.B] expertB_voteB = self.expert_b.vote(stock_data_B) state = np.array([[ self.vote_map[expertA_voteA], self.vote_map[expertB_voteB], ]]) # do action 0 or 1? predictions = self.model.predict(state) # TODO Create actions for current state and decrease epsilon for fewer random actions if random.random() < self.epsilon: # use random actions for A and B action_A = random.randrange(2) action_B = random.randrange(2) else: # use prediction actions action_A = np.argmax(predictions[0][0:2]) action_B = np.argmax(predictions[0][2:4]) if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay current_price_a = stock_market_data.get_most_recent_price(Company.A) current_price_b = stock_market_data.get_most_recent_price(Company.B) money_to_spend = portfolio.cash order_list = [] # do stuff for A if action_A == 0: # buy all A amount_to_buy = money_to_spend // current_price_a if amount_to_buy > 0: money_to_spend -= amount_to_buy * current_price_a order_list.append( Order(OrderType.BUY, Company.A, amount_to_buy)) elif action_A == 1: # sell all A amount_to_sell = portfolio.get_stock(Company.A) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, Company.A, amount_to_sell)) else: assert False # do stuff for B if action_B == 0: # buy all B amount_to_buy = money_to_spend // current_price_b if amount_to_buy > 0: order_list.append( Order(OrderType.BUY, Company.B, amount_to_buy)) elif action_B == 1: # sell all B amount_to_sell = portfolio.get_stock(Company.B) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, Company.B, amount_to_sell)) else: assert False # TODO train the neural network only if trade() was called before at least once if self.last_state is not None: # train diff_a = (current_price_a / self.last_price_a - 1) diff_b = (current_price_b / self.last_price_b - 1) fut_reward_a = np.max(predictions[0][0:2]) fut_reward_b = np.max(predictions[0][2:4]) reward_vec = np.array([[ diff_a + self.gamma * fut_reward_a, -diff_a + self.gamma * fut_reward_a, diff_b + self.gamma * fut_reward_b, -diff_b + self.gamma * fut_reward_b ]]) # TODO Store state as experience (memory) and replay # slides: <s, a, r, s'> # mine: <s, r> if self.min_size_of_memory_before_training <= len(self.memory): # take self.batch_size - 1 from memory batch = random.sample(self.memory, self.batch_size - 1) # append current state, reward batch.append((self.last_state, reward_vec)) for x, y in batch: self.model.fit(x, y, batch_size=self.batch_size, verbose=0) else: # only train with current (state, reward) self.model.fit(self.last_state, reward_vec, batch_size=1, verbose=0) self.memory.append((self.last_state, reward_vec)) # TODO Save created state, actions and portfolio value for the next call of trade() self.last_state = state self.last_action_a = action_A self.last_action_b = action_B self.last_portfolio_value = portfolio.get_value(stock_market_data) self.last_price_a = current_price_a self.last_price_b = current_price_b return order_list
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # INPUT layer: 1 (buy or sell A?) # output layer: 2 ([buy_A, sell_A]) # TODO Compute the current state stock_data_a = stock_market_data[Company.A] vote_a_for_a = self.expert_a.vote(stock_data_a) vote_b_for_a = self.expert_b.vote(stock_data_a) #stock_data_b = stock_market_data[Company.B] #vote_b = self.expert_a.vote(stock_data_b) state = np.array([[ self.vote_map[vote_a_for_a] + self.vote_map[vote_b_for_a] ]]) #, self.vote_map[vote_b]]) # do action 0 or 1? predictions = self.model.predict(state) action = np.argmax(predictions) current_price_a = stock_market_data.get_most_recent_price(Company.A) order_list = [] if action == 0: # buy all A amount_to_buy = portfolio.cash // current_price_a if amount_to_buy > 0: order_list.append( Order(OrderType.BUY, Company.A, amount_to_buy)) elif action == 1: # sell all A amount_to_sell = portfolio.get_stock(Company.A) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, Company.A, amount_to_sell)) else: assert False if self.last_state is not None: # train diff_a = (current_price_a / self.last_price_a - 1) if self.last_action_a == 0: rec_vec = np.array([[diff_a, -diff_a]]) elif self.last_action_a == 1: rec_vec = np.array([[diff_a, -diff_a]]) else: assert False # wtf #reward_vec = np.array([[portfolio.get_value(stock_market_data)]]) self.model.fit(self.last_state, rec_vec) self.last_state = state self.last_action_a = action self.last_portfolio_value = portfolio.get_value(stock_market_data) self.last_price_a = current_price_a return order_list