class Environnement: def __init__(self, gui): self._name = os.path.basename(__file__).replace(".py", "") self.name = "TradzQAI" self.version = "v0.2" self.v_state = "Alpha" self._platform = sys.platform self.agents = self.src_agents() self.gui = gui self.model = None self.model_name = "PPO" self.mode = "" self.stock_name = "DAX30_TICK_2018_03_15" self.model_dir = self.model_name + "_" + self.stock_name.split("_")[0] self.episode_count = 500 self.window_size = 10 self.batch_size = 32 self.daily_trade = dict( win = 0, loss = 0, draw = 0, total = 0 ) self.reward = dict( current = 0, daily = 0, total = 0 ) self.current_step = dict( order = "", episode = 0, step = -1 ) self.trade = dict( win = 0, loss = 0, draw = 0, total = 0, ) self.date = dict( day = 1, month = 1, year = 1, total_minutes = 1, total_day = 1, total_month = 1, total_year = 1 ) self.price = dict( buy = 0, sell = 0 ) self.last_closed_order = dict( current_price = 0, current_pos = "" ) self.step_left = 0 self.max_return = 0 self.max_drawdown = 0 self.pause = 0 self.action = None self.data = None self.POS_BUY = -1 self.POS_SELL = -1 self.start_t = 0 self.loop_t = 0 self._date = None self.mod_ordr = False self.day_changed = False self.new_episode = False self.lst_data = [] self.lst_inventory_len = [] self.lst_return = deque(maxlen=1000) self.lst_mean_return = [] self.lst_sharp_ratio = [] self.lst_drawdown = [] self.lst_win_order = [] self.lst_loose_order = [] self.lst_draw_order = [] self.lst_capital = [] self.lst_act = deque(maxlen=1000) self.lst_reward = deque(maxlen=1000) self.lst_state = deque(maxlen=1000) self.train_in = [] self.train_out = [] self.time = None self.lst_data_full = deque(maxlen=100) self.lst_data_preprocessed = [] self.offset = 0 self.tensorOCHL = [[] for _ in range(4)] self.lst_reward_daily = [] self.readed = None self.wallet = Wallet() self.inventory = Inventory(self.wallet.risk_managment['stop_loss']) self.data, self.raw, self._date = getStockDataVec(self.stock_name) self.settings = dict( network = self.get_network(), agent = self.get_agent_settings(), env = self.get_env_settings() ) self.logger = Logger() self.logger._load_conf(self) self.check_dates() def get_network(self): network = [dict(type='dense', size=64, activation='relu'), dict(type='dense', size=32, activation='relu'), dict(type='dense', size=8, activation='relu')] ''' network = [dict( type = "conv1d", size = 64, window = 4, stride = 1, padding = "SAME" ), dict( type = "conv1d", size = 64, window = 2, stride = 1, padding = "SAME" ), dict( type = "flatten" ), dict( type="dense", size=128, activation="relu" ) ] ''' return network def get_agent_settings(self): self.update_mode = dict( unit = 'timesteps', batch_size = self.batch_size, frequency = self.batch_size // 8 ) self.summarizer = dict( directory="./board/", steps=1000, labels=['configuration', 'gradients_scalar', 'regularization', 'inputs', 'losses', 'variables'] ) self.memory=dict( type='latest', include_next_states=True, capacity=((len(self.data) - 1) * self.batch_size) ) self.hyperparameters = dict( update_rate = 1e-3, learning_rate = 1e-3, gamma = 0.97, epsilon = 1.0, epsilon_min = 1e-2, epsilon_decay = 0.995 ) self.exploration = dict( type = 'epsilon_anneal', initial_epsilon = self.hyperparameters['epsilon'], final_epsilon = self.hyperparameters['epsilon_min'] ) self.optimizer = dict( type='adam', learning_rate=self.hyperparameters['learning_rate'] ) agent = [self.hyperparameters, self.exploration, self.update_mode, self.summarizer, self.memory, self.optimizer] return agent def get_env_settings(self): self.contract_settings = dict( pip_value = 5, contract_price = 125, spread = 1, allow_short = False ) self.meta = dict( window_size = self.window_size, batch_size = self.batch_size ) env = [self.contract_settings, self.wallet.settings, self.wallet.risk_managment, self.meta] return env def _pause(self): self.pause = 1 def _resume(self): self.pause = 0 def init_logger(self): self.logger.init_saver(self) self.logger._load() #self.logger.new_logs(self._name) def def_act(self): if self.action == 1: self.act = "BUY" self.lst_act.append(1) elif self.action == 2: self.act = "SELL" self.lst_act.append(-1) else: self.act = "HOLD" self.lst_act.append(0) def manage_orders(self, ordr): if self.POS_BUY > -1 or self.POS_SELL > -1: last_trade = self.inventory.get_last_trade() new = [str(last_trade['open']['pos']) \ + " : " \ + '{:.2f}'.format(last_trade['open']['price']) \ + " -> " \ + str(last_trade['close']['pos']) \ + " : " \ + '{:.2f}'.format(last_trade['close']['price']) \ + " | Profit : " \ + '{:.2f}'.format(self.wallet.profit['current'])] if len(ordr['Orders']) > 37: ordr = (ordr.drop(0)).reset_index(drop=True) tmp = pd.DataFrame(new, columns = ['Orders']) ordr = ordr.append(tmp, ignore_index=True) self.mod_ordr = True else: self.mod_ordr = False return ordr def src_agents(self): ignore = ['agent.py', '__init__.py', '__pycache__'] valid = [] for f in os.listdir("agents"): if f not in ignore: valid.append(f.replace(".py", "")) return valid def check_dates(self): self._date = self._date.apply(lambda x: x.replace(" ", "")[:12]) if self.gui == 0: ldate = tqdm(range(1, len(self._date) - 1), desc = "Checking dates ") else: ldate = range(1, len(self._date) - 1) for r in ldate: date_c = self._date[r] date_p = self._date[r - 1] if date_p[11] != date_c[11]: self.date['total_minutes'] += 1 if date_p[7] != date_c[7]: self.date['total_day'] += 1 if date_p[5] != date_c[5]: self.date['total_month'] += 1 if date_p[3] != date_c[3]: self.date['total_day'] += 1 if self.date['total_minutes'] != len(self._date) - 1: self.time = "Tick" else: self.time = "1M" def check_time_before_closing(self): for idx in range(self.current_step['step'] + 1 , len(self._date) - 1): if self._date[idx - 1][7] != self._date[idx][7]: break self.step_left = idx - self.current_step['step'] + 1 def manage_date(self): self.day_changed = False if self.current_step['step'] > 0: if self._date[self.current_step['step'] - 1][3] != self._date[self.current_step['step']][3]: self.date['year'] += 1 self.date['month'] = 1 self.date['day'] = 1 self.day_changed = True elif self._date[self.current_step['step'] - 1][5] != self._date[self.current_step['step']][5]: self.date['month'] += 1 self.date['day'] = 1 self.day_changed = True elif self._date[self.current_step['step'] - 1][7] != self._date[self.current_step['step']][7]: self.date['day'] += 1 self.day_changed = True if self.day_changed is True: return 1 else: return 0 def get3DState(self): for idx in range(len(self.lst_data_preprocessed)): self.tensorOCHL[idx].append(self.lst_data_preprocessed[idx]) state = getState(self.raw, self.current_step['step'] + 1, self.window_size + 1) d = self.current_step['step'] - self.window_size + 1 #tensorState = [[] for _ in range(len(self.tensorOCHL))] tensorState = [] for i in range(self.window_size): if d+i > 0 and i > 0: if self._date[self.current_step['step'] - (d + i)][11] == "0" or self._date[self.current_step['step'] - (d + i) + 1][11] == "5": tensorState.append([state[i], state[i], state[i], state[i]]) #tensorState[1].append(state[i]) #tensorState[2].append(state[i]) #tensorState[3].append(state[i]) elif self.current_step['step'] and self.tensorOCHL[2][self.current_step['step'] - (d + i)] > self.tensorOCHL[2][self.current_step['step'] - (d + i) + 1]: tensorState.append([state[i], tensorState[i - 1][1], state[i], tensorState[i - 1][3]]) #tensorState[0].append(state[i]) #tensorState[1].append(tensorState[1][i - 1]) #tensorState[3].append(tensorState[3][i - 1]) elif self.current_step['step'] and self.tensorOCHL[3][self.current_step['step'] - (d + i)] < self.tensorOCHL[3][self.current_step['step'] - (d + i) + 1]: tensorState.append([state[i], tensorState[i - 1][1], tensorState[i - 1][2], state[i]]) #tensorState[3].append(state[i]) #tensorState[0].append(state[i]) #tensorState[1].append(tensorState[1][i - 1]) #tensorState[2].append(tensorState[2][i - 1]) else: tensorState.append([tensorState[i - 1][0], state[i], tensorState[i - 1][2], tensorState[i - 1][3]]) #tensorState[0].append(tensorState[0][i - 1]) #tensorState[3].append(tensorState[3][i - 1]) #tensorState[2].append(tensorState[2][i - 1]) #tensorState[1].append(state[i]) else: tensorState.append([state[i], state[i], state[i], state[i]]) #tensorState[0].append(state[i]) #tensorState[1].append(state[i]) #tensorState[2].append(state[i]) #tensorState[3].append(state[i]) return np.array(tensorState) def chart_preprocessing(self, data): if self.current_step['step'] == 0: self.lst_data_preprocessed = [data, data, data, data] self.lst_data_full.append((0, self.lst_data_preprocessed[0], #open self.lst_data_preprocessed[1], #close self.lst_data_preprocessed[2], #min self.lst_data_preprocessed[3], #high self.lst_act[len(self.lst_act) - 1])) #toutes les 5 ou 1 M ajouter nouvelle entrer dans la liste #modifier la liste dans cet interval if self.time == "Tick": #Passage en 1M if self.current_step['step'] > 0 and self._date[self.current_step['step'] - 1][11] != self._date[self.current_step['step']][11]: self.lst_data_preprocessed = [data, data, data, data] self.lst_data_full.append((int(self.current_step['step'] - self.offset), self.lst_data_preprocessed[0], #open self.lst_data_preprocessed[1], #close self.lst_data_preprocessed[2], #min self.lst_data_preprocessed[3], #high self.lst_act[len(self.lst_act) - 1])) else: if self.current_step['step'] > 0: self.offset += 1 if self.lst_data_preprocessed[2] > data: self.lst_data_preprocessed[2] = data if self.lst_data_preprocessed[3] < data: self.lst_data_preprocessed[3] = data self.lst_data_preprocessed[1] = data self.lst_data_full[len(self.lst_data_full) - 1] = (int(self.current_step['step'] - self.offset), self.lst_data_preprocessed[0], #open self.lst_data_preprocessed[1], #close self.lst_data_preprocessed[2], #min self.lst_data_preprocessed[3], #high self.lst_act[len(self.lst_act) - 1]) elif self.time == "1M": #Passage en 5M if self._date[self.current_step['step']][11] == "0" or self._date[self.current_step['step']][11] == "5": self.lst_data_preprocessed = [data, data, data, data] self.lst_data_full.append((int(self.current_step['step'] - self.offset), self.lst_data_preprocessed[0], #open self.lst_data_preprocessed[1], #close self.lst_data_preprocessed[2], #min self.lst_data_preprocessed[3], #high self.lst_act[len(self.lst_act) - 1])) else: if self.current_step['step'] > 0: self.offset += 1 if self.lst_data_preprocessed[2] > data: self.lst_data_preprocessed[2] = data if self.lst_data_preprocessed[3] < data: self.lst_data_preprocessed[3] = data self.lst_data_preprocessed[1] = data self.lst_data_full[len(self.lst_data_full) - 1] = (int(self.current_step['step'] - self.offset), self.lst_data_preprocessed[0], #open self.lst_data_preprocessed[1], #close self.lst_data_preprocessed[2], #min self.lst_data_preprocessed[3], #high self.lst_act[len(self.lst_act) - 1]) def daily_processing(self, terminal): if self.manage_date() == 1 or terminal is True: self.lst_reward_daily.append(self.reward['daily']) self.wallet.episode_process(self.trade) ''' self.logger._add("Daily reward : " + str(self.reward['daily']), self._name) self.logger._add("Daily average rewards : " + str(self.avg_reward(env.lst_reward, 0)), self._name) self.logger._add("Daily profit : " + str(self.wallet.profit['daily']), self._name) self.logger._add("Daily trade : " + str(self.daily_trade['loss'] + self.daily_trade['win'] + self.daily_trade['draw']), self._name) if self.daily_trade['win'] + self.daily_trade['loss'] > 1: self.logger._add("Daily W/L : " + str('{:.3f}'.format(self.daily_trade['win'] / (self.daily_trade['loss'] + self.daily_trade['win']))), self._name) else: self.logger._add("Daily W/L : " + str('{:.3f}'.format(self.daily_trade['win'] / 1)), self._name) ''' if self.wallet.profit['daily'] > 0: ''' self.logger._add("Saving training data with " + str(self.wallet.profit['daily']) + " daily profit", self._name) ''' self.logger.save_training_data(self.train_in, self.train_out) self.daily_reset() def execute(self, action): self.current_step['step'] += 1 self.action = action self.POS_BUY = -1 self.POS_SELL = -1 if self.step_left == 0: self.check_time_before_closing() self.step_left -= 1 self.price['buy'] = self.data[self.current_step['step']] - (self.contract_settings['spread'] / 2) self.price['sell'] = self.data[self.current_step['step']] + (self.contract_settings['spread'] / 2) #self.lst_state.append(self.state[0]) self.reward['current'] = 0 self.wallet.profit['current'] = 0 self.wallet.manage_exposure(self.contract_settings) stopped = self.inventory.stop_loss(self) if stopped == False: force_closing = self.inventory.trade_closing(self) if force_closing == False: self.inventory.inventory_managment(self) else: self.POS_SELL = 0 if self.inventory.get_last_trade()['close']['pos'] == "SELL": self.action = 2 else: self.action = 1 else: if self.action == 1: self.POS_BUY = 0 self.action = 2 elif self.action == 2: self.POS_SELL = 0 self.action = 1 self.train_in.append(self.state) self.train_out.append(act_processing(self.action)) self.wallet.profit['daily'] += self.wallet.profit['current'] self.wallet.profit['total'] += self.wallet.profit['current'] self.reward['daily'] += self.reward['current'] self.reward['total'] += self.reward['current'] self.lst_reward.append(self.reward['current']) self.def_act() self.wallet.manage_wallet(self.inventory.get_inventory(), self.price, self.contract_settings) if self.gui == 1: self.chart_preprocessing(self.data[self.current_step['step']]) self.state = getState( self.raw, self.current_step['step'] + 1, self.window_size + 1) self.wallet.daily_process() done = True if len(self.data) - 2 == self.current_step['step'] else False if self.wallet.risk_managment['current_max_pos'] < 1 or self.wallet.risk_managment['current_max_pos'] <= int(self.wallet.risk_managment['max_pos'] // 2): self.wallet.settings['capital'] = self.wallet.settings['saved_capital'] done = True return self.state, done, self.reward['current'] def avg_reward(self, reward, n): if n == 0: return np.average(np.array(reward)) return np.average(np.array(reward[(len(reward) - 1) - n:])) def daily_reset(self): self.wallet.daily_reset() self.lst_reward = [] self.daily_trade['win'] = 0 self.daily_trade['loss'] = 0 self.daily_trade['draw'] = 0 self.daily_trade['total'] = 0 self.price['buy'] = 0 self.price['sell'] = 0 self.reward['current'] = 0 self.reward['daily'] = 0 self.train_in = [] self.train_out = [] def reset(self): self.daily_reset() self.wallet.reset() self.inventory.reset() try: self.h_lst_reward.append(self.reward['total']) self.h_lst_profit.append(self.wallet.profit['total']) self.h_lst_win_order.append(self.trade['win']) self.h_lst_loose_order.append(self.trade['loss']) self.h_lst_draw_order.append(self.trade['draw']) except: self.h_lst_reward = [] self.h_lst_profit = [] self.h_lst_win_order = [] self.h_lst_loose_order = [] self.h_lst_draw_order = [] self.tensorOCHL = [[] for _ in range(4)] self.lst_reward_daily = [] self.lst_data_full = deque(maxlen=100) self.date['day'] = 1 self.date['month'] = 1 self.date['year'] = 1 self.date['total_minutes'] = 1 self.date['total_day'] = 1 self.date['total_month'] = 1 self.date['total_year'] = 1 self.trade['win'] = 0 self.trade['loss'] = 0 self.trade['draw'] = 0 self.trade['total'] = 0 self.current_step['order'] = "" self.current_step['step'] = -1 self.reward['total'] = 0 self.new_episode = True self.state = getState( self.raw, 0, self.window_size + 1) self.current_step['episode'] += 1 return self.state