Exemple #1
0
def Trainer(stock_name, window_size, episode_count):
    agent = Agent(window_size)
    data = getStockDataVec(stock_name)
    l = len(data) - 1
    batch_size = 32

    for e in range(episode_count + 1):
        print("Episode " + str(e) + "/" + str(episode_count))
        state = getState(data, 0, window_size + 1)

        total_profit = 0
        agent.inventory = []

        for t in range(l):
            action = agent.act(state)

            # sit
            next_state = getState(data, t + 1, window_size + 1)
            reward = 0

            if action == 1: # buy
                agent.inventory.append(data[t])
                print("Buy: " + formatPrice(data[t]))

            elif action == 2 and len(agent.inventory) > 0: # sell
                bought_price = agent.inventory.pop(0)
                reward = max(data[t] - bought_price, 0)
                total_profit += data[t] - bought_price
                print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))

            done = True if t == l - 1 else False
            agent.memory.append((state, action, reward, next_state, done))
            state = next_state

            if done:
                print("--------------------------------")
                print("Total Profit: " + formatPrice(total_profit))
                print("--------------------------------")

            if len(agent.memory) > batch_size:
                agent.expReplay(batch_size)

        if e % 10 == 0:
            agent.model.save("models/model_ep" + str(e))
        # sit action
        next_state = getState(data, t + 1, window_size + 1)
        reward = 0
        # buy action
        if action == 1:
            agent.inventory.append(data[t])
            print("buy: " + formatPrice(data[t]))
        # sell action and if we have more than 0 stock
        elif action == 2 and len(agent.inventory) > 0:
            bought_price = agent.inventory.pop(0)
            reward = max(data[t] - bought_price, 0)
            total_profit += data[t] - bought_price
            print("sell: " + formatPrice(data[t]) + " | Profit: " +
                  formatPrice(data[t] - bought_price))
        # we save if we almost end loop
        if t == data_len - 1:
            done = True
        else:
            done = False
        # we send dtata to agent
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state
        # we print how much we earn
        if done:
            print(stock_name + " Total Profit: " + formatPrice(total_profit))
        if len(agent.memory) > batch_size:
            agent.expReplay(batch_size)
    # we save model every 10 episodes
    if episode % 10 == 0:
        agent.model.save("models/model_ep" + str(episode))
def train_model(episode_count, start_balance, data_train, training, date):
    from os import path
    # Define arrays to store per episode values
    total_Prof = []
    total_stock1bal = []
    total_open_cash = []
    total_port_value = []
    total_days_played = []
    batch_size = 64
    # Training run
    for e in range(episode_count + 1):
        print("..........")
        print("Episode " + str(e) + "/" + str(episode_count))

        Bal_stock1 = int(np.floor((start_balance / 2) / data_train[0]))
        open_cash = start_balance / 2

        datasize = training
        done = False
        total_profit = 0
        reward = 0
        max = 0

        # Initialize Agent
        agent = Agent(5)
        agent.inventory1 = []
        for i in range(Bal_stock1):
            agent.inventory1.append(data_train[0])
        # Timestep delta to make sure that with time reward increases for taking action
        # timestep_delta=0
        # Running episode over all days in the datasize
        for t in range(datasize):
            # print("..........")
            # print(pd_data1_train.iloc[t,0])
            state_class_obj = State(data_train, Bal_stock1, open_cash, t)
            state_array_obj = state_class_obj.getState()
            action = agent.act(state_array_obj)

            change_percent_stock1 = (state_class_obj.Stock1Price -
                                     state_class_obj.fiveday_stock1
                                     ) / state_class_obj.fiveday_stock1 * 100
            # profit=data1_train[t]-agent.inventory1(-1)
            # print("change_percent_stock1:  "+str(change_percent_stock1))

            # if action not in [0,1,2]:
            #     reward= reward-1000
            # decide_reward(action,data_train)
            if action == 0:  # buy stock 1
                if state_class_obj.Stock1Price > state_class_obj.open_cash:
                    '''
                    print("Buy stock 1 when it did not have cash, so bankrupt, end of episode")
                    reward=-reward_timedelta*10
                    done = True
                    '''

                    reward = reward - 4000
                    # done = True
                    # end episode

                else:
                    # print("In Buy stock 1")
                    agent.inventory1.append(data_train[t])
                    Bal_stock1_t1 = len(agent.inventory1)
                    # Bal_stock2_t1 = len(agent.inventory2)
                    open_cash_t1 = state_class_obj.open_cash - state_class_obj.Stock1Price  # Here we are buying 1 stock

                    # needs to be reviewed

                    if (state_class_obj.open_cash < 500):
                        reward = reward - 2000
                    elif (0.1 * Bal_stock1_t1 > Bal_stock1):
                        reward = reward - (1000 * Bal_stock1_t1)
                    # elif (abs(change_percent_stock1) <= 2):
                    #     reward = reward-2000
                    else:
                        reward = reward - (change_percent_stock1 * 1000)

            if action == 1:  # sell stock 1
                if state_class_obj.Stock1Blnc < 1:
                    # print("sold stock 2 when it did not have stock 2, so bankrupt, end of episode")
                    reward = reward - 4000
                    # done = True
                    # end episode
                else:
                    # print("In sell stock 1")
                    bought_price1 = agent.inventory1.pop(0)
                    Bal_stock1_t1 = len(agent.inventory1)
                    total_profit += data_train[t] - bought_price1
                    # Bal_stock2_t1 = len(agent.inventory2)
                    open_cash_t1 = state_class_obj.open_cash + state_class_obj.Stock1Price  # State[0] is the price of stock 1. Here we are selling 1 stoc

                    if (0.1 * Bal_stock1_t1 > Bal_stock1):
                        reward = reward - (1000 * Bal_stock1_t1)
                    # elif (abs(change_percent_stock1) <= 2):
                    #     reward = -1000
                    elif total_profit > 200:
                        reward = reward + (2000 * total_profit)
                    else:
                        reward = reward + (
                            change_percent_stock1 * 100
                        )  # State[0] is the price of stock 1. Here we are selling 1 stock

                    # total_profit += data1_train[t] - bought_price1
                # print("reward for sell stock1 " + str(reward))

            if action == 2:  # Do nothing action
                # if (abs(change_percent_stock1) <= 2):
                #     reward = 100
                if (state_class_obj.open_cash < 0.05 * start_balance):
                    reward += 2000
                else:
                    reward = reward - 2000

                Bal_stock1_t1 = len(agent.inventory1)
                # Bal_stock2_t1 = len(agent.inventory2)
                open_cash_t1 = open_cash
            # print("Do nothing")

            if t == datasize - 1:
                # print("t==datasize")
                done = True
                next_state_class_obj = State(data_train, Bal_stock1_t1,
                                             open_cash_t1, t)
                next_state_array_obj = next_state_class_obj.getState()
            else:
                next_state_class_obj = State(data_train, Bal_stock1_t1,
                                             open_cash_t1, t + 1)
                next_state_array_obj = next_state_class_obj.getState()

            agent.memory.append(
                (state_array_obj, action, reward, next_state_array_obj, done))
            # print("Action is "+str(action)+" reward is" + str(reward))

            Bal_stock1 = Bal_stock1_t1
            # Bal_stock2 = Bal_stock2_t1
            open_cash = open_cash_t1

            if done == True:
                total_Prof.append(total_profit)
                total_stock1bal.append(len(agent.inventory1))
                # total_stock2bal.append(len(agent.inventory2))
                total_open_cash.append(state_class_obj.open_cash)
                total_port_value.append(state_class_obj.portfolio_value)
                total_days_played.append(t)
                print("--------------------------------")
                state_class_obj.reset()
                break

            if len(agent.memory) > batch_size:
                agent.expReplay(batch_size)
        print(reward)
        if reward > max:
            max = reward
            agent.model.save("models/model_" + date + "-max")

        if e % 30 == 0:
            agent.model.save("models/model_" + date + "-" + str(e))
    if path.exists("models/model_" + date + "-max"):
        model_name = "model_" + date + "-max"
    else:
        model_name = "model_" + date + "-" + str(episode_count)
    return model_name
		action = agent.act(state)

		# sit
		next_state = getState(data, t + 1, window_size + 1)
		reward = 0

		if action == 1: # buy
			agent.inventory.append(data[t])
			print("Buy: " + formatPrice(data[t]))

		elif action == 2 and len(agent.inventory) > 0: # sell
			bought_price = agent.inventory.pop(0)
			reward = max(data[t] - bought_price, 0)
			total_profit += data[t] - bought_price
			print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))

		done = True if t == l - 1 else False
		agent.memory.append((state, action, reward, next_state, done))
		state = next_state

		if done:
			print("--------------------------------")
			print("Total Profit: " + formatPrice(total_profit))
			print("--------------------------------")

		if len(agent.memory) > batch_size:
			agent.expReplay(batch_size)

	if e % 10 == 0:
		agent.model.save("models/model_ep" + str(e))
Exemple #5
0
    # Append the possible actions to replay memory
    # next_state[4:7] = np.zeros(3)
    # hold_state = np.copy(next_state)
    # hold_state[4] = 1
    # agent.memory.append((cur_state, 0, rewards[0], hold_state))
    # buy_state = np.copy(next_state)
    # buy_state[5] = 1
    # agent.memory.append((cur_state, 1, rewards[1], buy_state))
    # sell_state = np.copy(next_state)
    # sell_state[6] = 1
    # agent.memory.append((cur_state, 2, rewards[2], sell_state))
    agent.memory.append((cur_state, action, reward, next_state))

    if num_steps % 96 == 0 and len(agent.memory) == 480:
        agent.expReplay()

    agent.targetUpdate()

    if num_steps % 1000 == 0:
        print("Training checkpoint (", num_steps, ") ", sep="", end="")
        try:
            agent.critic_model.save(
                path.join(model_dir, "critic_model_" + str(num_steps)))
            print(".")
            agent.actor_model.save(
                path.join(model_dir, "actor_model_" + str(num_steps)))
        except NotImplementedError:
            print("x")
        print(str(env))
        print(str(agent))