def Trainer(stock_name, window_size, episode_count): agent = Agent(window_size) data = getStockDataVec(stock_name) l = len(data) - 1 batch_size = 32 for e in range(episode_count + 1): print("Episode " + str(e) + "/" + str(episode_count)) state = getState(data, 0, window_size + 1) total_profit = 0 agent.inventory = [] for t in range(l): action = agent.act(state) # sit next_state = getState(data, t + 1, window_size + 1) reward = 0 if action == 1: # buy agent.inventory.append(data[t]) print("Buy: " + formatPrice(data[t])) elif action == 2 and len(agent.inventory) > 0: # sell bought_price = agent.inventory.pop(0) reward = max(data[t] - bought_price, 0) total_profit += data[t] - bought_price print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price)) done = True if t == l - 1 else False agent.memory.append((state, action, reward, next_state, done)) state = next_state if done: print("--------------------------------") print("Total Profit: " + formatPrice(total_profit)) print("--------------------------------") if len(agent.memory) > batch_size: agent.expReplay(batch_size) if e % 10 == 0: agent.model.save("models/model_ep" + str(e))
# sit action next_state = getState(data, t + 1, window_size + 1) reward = 0 # buy action if action == 1: agent.inventory.append(data[t]) print("buy: " + formatPrice(data[t])) # sell action and if we have more than 0 stock elif action == 2 and len(agent.inventory) > 0: bought_price = agent.inventory.pop(0) reward = max(data[t] - bought_price, 0) total_profit += data[t] - bought_price print("sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price)) # we save if we almost end loop if t == data_len - 1: done = True else: done = False # we send dtata to agent agent.memory.append((state, action, reward, next_state, done)) state = next_state # we print how much we earn if done: print(stock_name + " Total Profit: " + formatPrice(total_profit)) if len(agent.memory) > batch_size: agent.expReplay(batch_size) # we save model every 10 episodes if episode % 10 == 0: agent.model.save("models/model_ep" + str(episode))
def train_model(episode_count, start_balance, data_train, training, date): from os import path # Define arrays to store per episode values total_Prof = [] total_stock1bal = [] total_open_cash = [] total_port_value = [] total_days_played = [] batch_size = 64 # Training run for e in range(episode_count + 1): print("..........") print("Episode " + str(e) + "/" + str(episode_count)) Bal_stock1 = int(np.floor((start_balance / 2) / data_train[0])) open_cash = start_balance / 2 datasize = training done = False total_profit = 0 reward = 0 max = 0 # Initialize Agent agent = Agent(5) agent.inventory1 = [] for i in range(Bal_stock1): agent.inventory1.append(data_train[0]) # Timestep delta to make sure that with time reward increases for taking action # timestep_delta=0 # Running episode over all days in the datasize for t in range(datasize): # print("..........") # print(pd_data1_train.iloc[t,0]) state_class_obj = State(data_train, Bal_stock1, open_cash, t) state_array_obj = state_class_obj.getState() action = agent.act(state_array_obj) change_percent_stock1 = (state_class_obj.Stock1Price - state_class_obj.fiveday_stock1 ) / state_class_obj.fiveday_stock1 * 100 # profit=data1_train[t]-agent.inventory1(-1) # print("change_percent_stock1: "+str(change_percent_stock1)) # if action not in [0,1,2]: # reward= reward-1000 # decide_reward(action,data_train) if action == 0: # buy stock 1 if state_class_obj.Stock1Price > state_class_obj.open_cash: ''' print("Buy stock 1 when it did not have cash, so bankrupt, end of episode") reward=-reward_timedelta*10 done = True ''' reward = reward - 4000 # done = True # end episode else: # print("In Buy stock 1") agent.inventory1.append(data_train[t]) Bal_stock1_t1 = len(agent.inventory1) # Bal_stock2_t1 = len(agent.inventory2) open_cash_t1 = state_class_obj.open_cash - state_class_obj.Stock1Price # Here we are buying 1 stock # needs to be reviewed if (state_class_obj.open_cash < 500): reward = reward - 2000 elif (0.1 * Bal_stock1_t1 > Bal_stock1): reward = reward - (1000 * Bal_stock1_t1) # elif (abs(change_percent_stock1) <= 2): # reward = reward-2000 else: reward = reward - (change_percent_stock1 * 1000) if action == 1: # sell stock 1 if state_class_obj.Stock1Blnc < 1: # print("sold stock 2 when it did not have stock 2, so bankrupt, end of episode") reward = reward - 4000 # done = True # end episode else: # print("In sell stock 1") bought_price1 = agent.inventory1.pop(0) Bal_stock1_t1 = len(agent.inventory1) total_profit += data_train[t] - bought_price1 # Bal_stock2_t1 = len(agent.inventory2) open_cash_t1 = state_class_obj.open_cash + state_class_obj.Stock1Price # State[0] is the price of stock 1. Here we are selling 1 stoc if (0.1 * Bal_stock1_t1 > Bal_stock1): reward = reward - (1000 * Bal_stock1_t1) # elif (abs(change_percent_stock1) <= 2): # reward = -1000 elif total_profit > 200: reward = reward + (2000 * total_profit) else: reward = reward + ( change_percent_stock1 * 100 ) # State[0] is the price of stock 1. Here we are selling 1 stock # total_profit += data1_train[t] - bought_price1 # print("reward for sell stock1 " + str(reward)) if action == 2: # Do nothing action # if (abs(change_percent_stock1) <= 2): # reward = 100 if (state_class_obj.open_cash < 0.05 * start_balance): reward += 2000 else: reward = reward - 2000 Bal_stock1_t1 = len(agent.inventory1) # Bal_stock2_t1 = len(agent.inventory2) open_cash_t1 = open_cash # print("Do nothing") if t == datasize - 1: # print("t==datasize") done = True next_state_class_obj = State(data_train, Bal_stock1_t1, open_cash_t1, t) next_state_array_obj = next_state_class_obj.getState() else: next_state_class_obj = State(data_train, Bal_stock1_t1, open_cash_t1, t + 1) next_state_array_obj = next_state_class_obj.getState() agent.memory.append( (state_array_obj, action, reward, next_state_array_obj, done)) # print("Action is "+str(action)+" reward is" + str(reward)) Bal_stock1 = Bal_stock1_t1 # Bal_stock2 = Bal_stock2_t1 open_cash = open_cash_t1 if done == True: total_Prof.append(total_profit) total_stock1bal.append(len(agent.inventory1)) # total_stock2bal.append(len(agent.inventory2)) total_open_cash.append(state_class_obj.open_cash) total_port_value.append(state_class_obj.portfolio_value) total_days_played.append(t) print("--------------------------------") state_class_obj.reset() break if len(agent.memory) > batch_size: agent.expReplay(batch_size) print(reward) if reward > max: max = reward agent.model.save("models/model_" + date + "-max") if e % 30 == 0: agent.model.save("models/model_" + date + "-" + str(e)) if path.exists("models/model_" + date + "-max"): model_name = "model_" + date + "-max" else: model_name = "model_" + date + "-" + str(episode_count) return model_name
action = agent.act(state) # sit next_state = getState(data, t + 1, window_size + 1) reward = 0 if action == 1: # buy agent.inventory.append(data[t]) print("Buy: " + formatPrice(data[t])) elif action == 2 and len(agent.inventory) > 0: # sell bought_price = agent.inventory.pop(0) reward = max(data[t] - bought_price, 0) total_profit += data[t] - bought_price print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price)) done = True if t == l - 1 else False agent.memory.append((state, action, reward, next_state, done)) state = next_state if done: print("--------------------------------") print("Total Profit: " + formatPrice(total_profit)) print("--------------------------------") if len(agent.memory) > batch_size: agent.expReplay(batch_size) if e % 10 == 0: agent.model.save("models/model_ep" + str(e))
# Append the possible actions to replay memory # next_state[4:7] = np.zeros(3) # hold_state = np.copy(next_state) # hold_state[4] = 1 # agent.memory.append((cur_state, 0, rewards[0], hold_state)) # buy_state = np.copy(next_state) # buy_state[5] = 1 # agent.memory.append((cur_state, 1, rewards[1], buy_state)) # sell_state = np.copy(next_state) # sell_state[6] = 1 # agent.memory.append((cur_state, 2, rewards[2], sell_state)) agent.memory.append((cur_state, action, reward, next_state)) if num_steps % 96 == 0 and len(agent.memory) == 480: agent.expReplay() agent.targetUpdate() if num_steps % 1000 == 0: print("Training checkpoint (", num_steps, ") ", sep="", end="") try: agent.critic_model.save( path.join(model_dir, "critic_model_" + str(num_steps))) print(".") agent.actor_model.save( path.join(model_dir, "actor_model_" + str(num_steps))) except NotImplementedError: print("x") print(str(env)) print(str(agent))