def test_model(episode_count, data_test, data_test_open, start_balance, model_name): # Define arrays to store per episode values Act_datasize = len(data_test) Act_Bench_Stock1_Bal = int( np.floor((start_balance / 2) / data_test_open[0])) Act_Bench_Open_cash = start_balance / 2 model = load_model("models/" + model_name) # Actual run episode_count = 0 # Define arrays to store per episode values total_Prof = [] total_stock1bal = [] total_open_cash = [] total_port_value = [] total_days_played = [] Act_total_Prof = [] Act_total_stock1bal = [] Act_total_open_cash = [] Act_total_port_value = [] Act_total_days_played = [] actions_done_perday = [] portfolio_value = [] for e in range(1): # here we run only for 1 episode, as it is Test run Bal_stock1_t2 = Act_Bench_Stock1_Bal done = False open_cash_t2 = Act_Bench_Open_cash total_profit = 0 reward = 0 # Initialize Agent agent_test = Agent(8, is_eval=True, model_name=model_name) # agent = Agent(8) agent_test.inventory1 = [] for i in range(Bal_stock1_t2): agent_test.inventory1.append(data_test_open[0]) # Timestep delta to make sure that with time reward increases for taking action timestep_delta = 0 # Running episode over all days in the datasize for t in range(Act_datasize): print("..........") print(data_test.iloc[t, 0]) state_class_obj = State(data_test_open, Bal_stock1_t2, open_cash_t2, t) state_array_obj = state_class_obj.getState() action = agent_test.act(state_array_obj) print("Total portfolio value: " + str(state_class_obj.portfolio_value) + " stock 1 number: " + str(len(agent_test.inventory1)) + " open cash" + str(state_class_obj.open_cash)) # reward should be more as time goes further. We will remove reward_timedelta from actual reward # reward_timedelta=(datasize-t)*timestep_delta change_percent_stock1 = (state_class_obj.Stock1Price - state_class_obj.fiveday_stock1 ) / state_class_obj.fiveday_stock1 * 100 # print("change_percent_stock1: "+str(change_percent_stock1)) # print("change_percent_stock2: "+str(change_percent_stock2)) if action == 0: # buy stock 1 if state_class_obj.Stock1Price > state_class_obj.open_cash: ''' print("Buy stock 1 when it did not have cash, so bankrupt, end of episode") reward=-reward_timedelta*10 done = True ''' done = True # end episode else: # print("In Buy stock 1") agent_test.inventory1.append(data_test_open[t]) Bal_stock1_t2 = len(agent_test.inventory1) open_cash_t2 = state_class_obj.open_cash - state_class_obj.Stock1Price # Here we are buying 1 stock if action == 1: # sell stock 1 if state_class_obj.Stock1Blnc < 1: # print("sold stock 2 when it did not have stock 2, so bankrupt, end of episode") done = True # end episode else: # print("In sell stock 1") agent_test.inventory1.pop(0) Bal_stock1_t2 = len(agent_test.inventory1) # Bal_stock2_t2 = len(agent_test.inventory2) open_cash_t2 = state_class_obj.open_cash + state_class_obj.Stock1Price # State[0] is the price of stock 1. Here we are buying 1 stoc if action == 2: # Do nothing action Bal_stock1_t2 = len(agent_test.inventory1) # Bal_stock2_t2 = len(agent_test.inventory2) # print("Do nothing") if t == Act_datasize - 1: # print("t==datasize") done = True next_state_class_obj = State(data_test_open, Bal_stock1_t2, open_cash_t2, t) next_state_array_obj = next_state_class_obj.getState() else: # print("t!=datasize"+str(open_cash_t2)) next_state_class_obj = State(data_test_open, Bal_stock1_t2, open_cash_t2, t + 1) next_state_array_obj = next_state_class_obj.getState() # print("Action is "+str(action)+" reward is" + str(reward)) actions_done_perday.append(action) portfolio_value.append(next_state_class_obj.portfolio_value) if done == True: print("--------------------------------") print("Total Profit: " + formatPrice(next_state_class_obj.portfolio_value - start_balance)) print("Total No. of days played: " + str(t) + " out of overall days: " + str(Act_datasize)) print("Total portfolio value: " + str(next_state_class_obj.portfolio_value) + " stock 1 number: " + str(len(agent_test.inventory1)) + " open cash" + str(next_state_class_obj.open_cash)) # + " stock 2 number: " + str(len(agent_test.inventory2)) Act_total_Prof.append(total_profit) Act_total_stock1bal.append(len(agent_test.inventory1)) # Act_total_stock2bal.append(len(agent_test.inventory2)) Act_total_open_cash.append(state_class_obj.open_cash) Act_total_port_value.append(state_class_obj.portfolio_value) Act_total_days_played.append(t) print("--------------------------------") state_class_obj.reset() break opencash = state_class_obj.open_cash return total_profit, portfolio_value, opencash, Act_total_days_played
def train_model(episode_count, start_balance, data_train, training, date): from os import path # Define arrays to store per episode values total_Prof = [] total_stock1bal = [] total_open_cash = [] total_port_value = [] total_days_played = [] batch_size = 64 # Training run for e in range(episode_count + 1): print("..........") print("Episode " + str(e) + "/" + str(episode_count)) Bal_stock1 = int(np.floor((start_balance / 2) / data_train[0])) open_cash = start_balance / 2 datasize = training done = False total_profit = 0 reward = 0 max = 0 # Initialize Agent agent = Agent(5) agent.inventory1 = [] for i in range(Bal_stock1): agent.inventory1.append(data_train[0]) # Timestep delta to make sure that with time reward increases for taking action # timestep_delta=0 # Running episode over all days in the datasize for t in range(datasize): # print("..........") # print(pd_data1_train.iloc[t,0]) state_class_obj = State(data_train, Bal_stock1, open_cash, t) state_array_obj = state_class_obj.getState() action = agent.act(state_array_obj) change_percent_stock1 = (state_class_obj.Stock1Price - state_class_obj.fiveday_stock1 ) / state_class_obj.fiveday_stock1 * 100 # profit=data1_train[t]-agent.inventory1(-1) # print("change_percent_stock1: "+str(change_percent_stock1)) # if action not in [0,1,2]: # reward= reward-1000 # decide_reward(action,data_train) if action == 0: # buy stock 1 if state_class_obj.Stock1Price > state_class_obj.open_cash: ''' print("Buy stock 1 when it did not have cash, so bankrupt, end of episode") reward=-reward_timedelta*10 done = True ''' reward = reward - 4000 # done = True # end episode else: # print("In Buy stock 1") agent.inventory1.append(data_train[t]) Bal_stock1_t1 = len(agent.inventory1) # Bal_stock2_t1 = len(agent.inventory2) open_cash_t1 = state_class_obj.open_cash - state_class_obj.Stock1Price # Here we are buying 1 stock # needs to be reviewed if (state_class_obj.open_cash < 500): reward = reward - 2000 elif (0.1 * Bal_stock1_t1 > Bal_stock1): reward = reward - (1000 * Bal_stock1_t1) # elif (abs(change_percent_stock1) <= 2): # reward = reward-2000 else: reward = reward - (change_percent_stock1 * 1000) if action == 1: # sell stock 1 if state_class_obj.Stock1Blnc < 1: # print("sold stock 2 when it did not have stock 2, so bankrupt, end of episode") reward = reward - 4000 # done = True # end episode else: # print("In sell stock 1") bought_price1 = agent.inventory1.pop(0) Bal_stock1_t1 = len(agent.inventory1) total_profit += data_train[t] - bought_price1 # Bal_stock2_t1 = len(agent.inventory2) open_cash_t1 = state_class_obj.open_cash + state_class_obj.Stock1Price # State[0] is the price of stock 1. Here we are selling 1 stoc if (0.1 * Bal_stock1_t1 > Bal_stock1): reward = reward - (1000 * Bal_stock1_t1) # elif (abs(change_percent_stock1) <= 2): # reward = -1000 elif total_profit > 200: reward = reward + (2000 * total_profit) else: reward = reward + ( change_percent_stock1 * 100 ) # State[0] is the price of stock 1. Here we are selling 1 stock # total_profit += data1_train[t] - bought_price1 # print("reward for sell stock1 " + str(reward)) if action == 2: # Do nothing action # if (abs(change_percent_stock1) <= 2): # reward = 100 if (state_class_obj.open_cash < 0.05 * start_balance): reward += 2000 else: reward = reward - 2000 Bal_stock1_t1 = len(agent.inventory1) # Bal_stock2_t1 = len(agent.inventory2) open_cash_t1 = open_cash # print("Do nothing") if t == datasize - 1: # print("t==datasize") done = True next_state_class_obj = State(data_train, Bal_stock1_t1, open_cash_t1, t) next_state_array_obj = next_state_class_obj.getState() else: next_state_class_obj = State(data_train, Bal_stock1_t1, open_cash_t1, t + 1) next_state_array_obj = next_state_class_obj.getState() agent.memory.append( (state_array_obj, action, reward, next_state_array_obj, done)) # print("Action is "+str(action)+" reward is" + str(reward)) Bal_stock1 = Bal_stock1_t1 # Bal_stock2 = Bal_stock2_t1 open_cash = open_cash_t1 if done == True: total_Prof.append(total_profit) total_stock1bal.append(len(agent.inventory1)) # total_stock2bal.append(len(agent.inventory2)) total_open_cash.append(state_class_obj.open_cash) total_port_value.append(state_class_obj.portfolio_value) total_days_played.append(t) print("--------------------------------") state_class_obj.reset() break if len(agent.memory) > batch_size: agent.expReplay(batch_size) print(reward) if reward > max: max = reward agent.model.save("models/model_" + date + "-max") if e % 30 == 0: agent.model.save("models/model_" + date + "-" + str(e)) if path.exists("models/model_" + date + "-max"): model_name = "model_" + date + "-max" else: model_name = "model_" + date + "-" + str(episode_count) return model_name