def step(self, action): # 0: Sit # 1: But # 2: Sell assert (action in (0, 1, 2)) # State transition next_state = getState(self.data, self.t + 1, self.window_size + 1) # Reward if action == 0: reward = 0 elif action == 1: reward = 0 self.agent.inventory.append(self.data[self.t]) print("Buy: " + formatPrice(self.data[self.t])) else: if len(self.agent.inventory) > 0: bought_price = self.agent.inventory.pop(0) profit = self.data[self.t] - bought_price reward = max(profit, 0) self.total_profit += profit print("Sell: " + formatPrice(self.data[self.t]) + " | Profit: " + formatPrice(profit)) else: reward = 0 # try to sell, but con't do done = True if self.t == len(self.data) - 2 else False self.t += 1 return next_state, reward, done, {}
def render(self, mode='human', close=False): if self.current_step >= self.price_history.get_length(): return if close: if self.viewer is not None: os.kill(self.viewer.pid, signal.SIGKILL) else: close_price = self.trading_days[self.current_step].get_close() print(self.trading_days[self.current_step].get_date().strftime( '%m/%d/%Y') + " Cash " + formatPrice(self.cash) + " | Holdings: " + formatPriceOfHoldings(self.inventory, close_price) + " | Balance: " + formatPrice(self.balance))
agent.inventory = [] for t in range(l): action = agent.ActRbot(state) print('****************************') print('Action is {}'.format(f.decipheraction(action))) # hold next_state = f.getState(data, t + 1, window_size + 1) reward = 0 if action == 1: # buy agent.inventory.append(data[t + 1]) print("Buy: " + f.formatPrice(data[t + 1])) print('****************************') elif action == 2 and len(agent.inventory) > 0: # sell bought_price = agent.inventory.pop(0) reward = max(data[t + 1] - bought_price, 0) total_profit += data[t + 1] - bought_price print("Sell: " + f.formatPrice(data[t + 1]) + " | Profit: " + f.formatPrice(data[t + 1] - bought_price)) print('****************************') done = True if t == l - 1 else False agent.MemoryRbot(state, action, reward, next_state, done) state = next_state if done:
data_size = len(data) - 1 batch_size = 100 gain = 0 broker.trades_list = [] reward = 0 state = getState(data, 0, state_size + 1) for t in range(data_size): action = broker.act(state) if action == 1 and broker.portfolio >= data[t]: # buy broker.trades_list.append(data[t]) broker.portfolio = broker.portfolio - data[t] print("Bought: " + formatPrice(data[t]) + "| Portfolio Value: " + formatPrice(broker.portfolio)) reward = 0 elif action == 2 and len(broker.trades_list) > 0: # sell buying_price = broker.trades_list.pop(0) gain += data[t] - buying_price if (gain > 0): reward = 1 else: reward = 0 broker.portfolio = broker.portfolio + data[t] print("Sold: " + formatPrice(data[t]) + " | Financial Gain: " + formatPrice( data[t] - buying_price) + "| Portfolio Value: " + formatPrice(broker.portfolio))
for e in range(nb_epochs + 1): print("Epoch Number:" + str(e) + "/" + str(nb_epochs)) state = getState(data, 0, state_size + 1) reward = 0 gain = 0 broker.trades_list = [] broker.portfolio = 100000 for t in range(data_size - state_size + 1): action = broker.act(state) if action == 1 and broker.portfolio >= data[t]: # buy broker.trades_list.append(data[t]) broker.portfolio = broker.portfolio - data[t] print("Buy: " + formatPrice(data[t]) + "| Portfolio Value: " + formatPrice(broker.portfolio) + "| Inventory Size:" + str(len(broker.trades_list))) reward = 0 elif action == 2 and len(broker.trades_list) > 0: # sell buying_price = broker.trades_list.pop(0) broker.portfolio = broker.portfolio + data[t] #reward = max(data[t] - buying_price, 0) #gain += data[t] - buying_price if (gain > 0): reward = 1 else: reward = 0 print("Sell: " + formatPrice(data[t]) + " | Profit: " +