def __load_dir(dir_path):
        """
        Load all the data in "dir_path", and complement the data in the dates that no transaction happened
        :return
            data: (list)
            e.g. [ # include transactions happen in many days
                ['bond_a', 'bond_b', ...], # represent transaction happen in one day
                ['bond_a', 'bond_b', ...],
                ...
            ]
        """
        data = []

        # load the date list
        date_list = os.listdir(dir_path)
        date_list.sort()

        # generate a date dict so that we can check whether there is transaction happens in that date
        date_dict = utils.list_2_dict(date_list)

        # find out the start and end date of all the transactions
        start_date = date_list[0][len('doc_'):-len('.json')]
        end_date = date_list[-1][len('doc_'):-len('.json')]

        # covert the date to timestamp
        cur_timestamp = utils.date_2_timestamp(start_date)
        end_timestamp = utils.date_2_timestamp(end_date) + 86000

        # traverse all the date between the start date and the end date, but skip the holidays
        while cur_timestamp < end_timestamp:
            _date = utils.timestamp_2_date(cur_timestamp)
            file_name = f'doc_{_date}.json'

            # check if there is any transaction
            if file_name in date_dict:
                file_path = os.path.join(dir_path, file_name)

                # remove nan in doc
                tmp_doc = list(
                    map(lambda x: x if isinstance(x, str) else '',
                        utils.load_json(file_path)))
                while '' in tmp_doc:
                    tmp_doc.remove('')

                data.append(tmp_doc)

            # if it is holidays, then skip it
            elif date.is_holiday(_date):
                pass

            # if no transaction happens in that date
            else:
                data.append([])

            # move to the next day
            cur_timestamp += 86400

        return data
def __generate_date_structure(len_bonds,
                              start_date='2015-01-02',
                              end_date='2015-12-31',
                              with_day_off=True,
                              buy_sell_plan=2):
    """
    return:
        date_matrix: np.array, shape: (date_num, len_bonds + 3) or (date_num, len_bonds * 2 + 3),
                    values are all ones. Therefore, a mask for the input is needed.
                    "+ 3" is because the last 3 tokens are for <start> <end> <day-off>
                    <pad> uses all zeros, thus, it does not need a place in vocabulary
        date_mask: np.array, shape: (date_num, len_bonds + 3) or (date_num, len_bonds * 2 + 3),
                    values are all zeros. The mask would be changed when traversing the transaction history.
        dict_date_2_input_m_index: dict, map the date to the index of date_matrix
    """
    dict_date_2_input_m_index = {}

    # convert timestamp
    start_timestamp = utils.date_2_timestamp(start_date)
    end_timestamp = utils.date_2_timestamp(end_date, True)

    # temporary variables
    l = []
    cur_timestamp = start_timestamp

    # generate the dict_date_2_input_m_index and calculate the date_num
    while cur_timestamp <= end_timestamp:
        _date = utils.timestamp_2_date(cur_timestamp)
        cur_timestamp += 86400

        if date.is_holiday(_date):
            if with_day_off:
                l.append(0)
            continue

        dict_date_2_input_m_index[_date] = len(l)
        l.append(1)

    # calculate the shape
    date_num = len(l)
    extra_token_num = 3 if with_day_off else 2
    len_bonds = len_bonds * 2 + extra_token_num if buy_sell_plan == 2 else len_bonds + extra_token_num

    # generate variables
    date_matrix = np.ones((date_num, 1)) * np.arange(len_bonds)
    date_mask = np.zeros((date_num, len_bonds)) * np.arange(len_bonds)
    # change the value in day off pos to be 1
    date_mask[np.where(np.array(l) == 0)[0], -1] = 1

    return date_matrix, date_mask, dict_date_2_input_m_index
Exemple #3
0
def is_holiday_timestamp(timestamp):
    _date = timestamp_2_date(timestamp)
    return is_holiday(_date)