def __load_dir(dir_path): """ Load all the data in "dir_path", and complement the data in the dates that no transaction happened :return data: (list) e.g. [ # include transactions happen in many days ['bond_a', 'bond_b', ...], # represent transaction happen in one day ['bond_a', 'bond_b', ...], ... ] """ data = [] # load the date list date_list = os.listdir(dir_path) date_list.sort() # generate a date dict so that we can check whether there is transaction happens in that date date_dict = utils.list_2_dict(date_list) # find out the start and end date of all the transactions start_date = date_list[0][len('doc_'):-len('.json')] end_date = date_list[-1][len('doc_'):-len('.json')] # covert the date to timestamp cur_timestamp = utils.date_2_timestamp(start_date) end_timestamp = utils.date_2_timestamp(end_date) + 86000 # traverse all the date between the start date and the end date, but skip the holidays while cur_timestamp < end_timestamp: _date = utils.timestamp_2_date(cur_timestamp) file_name = f'doc_{_date}.json' # check if there is any transaction if file_name in date_dict: file_path = os.path.join(dir_path, file_name) # remove nan in doc tmp_doc = list( map(lambda x: x if isinstance(x, str) else '', utils.load_json(file_path))) while '' in tmp_doc: tmp_doc.remove('') data.append(tmp_doc) # if it is holidays, then skip it elif date.is_holiday(_date): pass # if no transaction happens in that date else: data.append([]) # move to the next day cur_timestamp += 86400 return data
def __generate_date_structure(len_bonds, start_date='2015-01-02', end_date='2015-12-31', with_day_off=True, buy_sell_plan=2): """ return: date_matrix: np.array, shape: (date_num, len_bonds + 3) or (date_num, len_bonds * 2 + 3), values are all ones. Therefore, a mask for the input is needed. "+ 3" is because the last 3 tokens are for <start> <end> <day-off> <pad> uses all zeros, thus, it does not need a place in vocabulary date_mask: np.array, shape: (date_num, len_bonds + 3) or (date_num, len_bonds * 2 + 3), values are all zeros. The mask would be changed when traversing the transaction history. dict_date_2_input_m_index: dict, map the date to the index of date_matrix """ dict_date_2_input_m_index = {} # convert timestamp start_timestamp = utils.date_2_timestamp(start_date) end_timestamp = utils.date_2_timestamp(end_date, True) # temporary variables l = [] cur_timestamp = start_timestamp # generate the dict_date_2_input_m_index and calculate the date_num while cur_timestamp <= end_timestamp: _date = utils.timestamp_2_date(cur_timestamp) cur_timestamp += 86400 if date.is_holiday(_date): if with_day_off: l.append(0) continue dict_date_2_input_m_index[_date] = len(l) l.append(1) # calculate the shape date_num = len(l) extra_token_num = 3 if with_day_off else 2 len_bonds = len_bonds * 2 + extra_token_num if buy_sell_plan == 2 else len_bonds + extra_token_num # generate variables date_matrix = np.ones((date_num, 1)) * np.arange(len_bonds) date_mask = np.zeros((date_num, len_bonds)) * np.arange(len_bonds) # change the value in day off pos to be 1 date_mask[np.where(np.array(l) == 0)[0], -1] = 1 return date_matrix, date_mask, dict_date_2_input_m_index
def is_holiday_timestamp(timestamp): _date = timestamp_2_date(timestamp) return is_holiday(_date)