コード例 #1
0
ファイル: regression_support.py プロジェクト: sondber/krypto
def AR_matrix(y, y_time, order, hours_to_remove=[]):
    # print("  \033[0;32;0mrs.%i: running 'AR_matrix'...\033[0;0;0m" % (gf(cf()).lineno))
    n = len(y)
    ar_len = n  #- order

    if order == 1:
        value_exists_binary = np.zeros(n)
        x_ar = np.zeros([ar_len, 1])
        i = n - 1
        year, month, day, hour, minute = supp.fix_time_list(
            y_time, single_time_stamp=0, move_n_hours=-1)
        y_time_moved = supp.make_time_list(year, month, day, hour, minute)

        while i >= 0:
            found = False
            j = i

            while not found and j >= 0:
                if y_time_moved[i] == y_time[j]:
                    found = True
                    value_exists_binary[i] = 1
                    x_ar[i, 0] = y[j]
                j -= 1
            if not found:
                value_exists_binary[i] = 0
            i -= 1
    else:
        value_exists_binary = np.zeros([n, order])
        x_ar = np.zeros([ar_len, order])
        for k in range(order):
            # print("  \033[0;32;0mrs.%i: order %i out of %i\033[0;0;0m" % (gf(cf()).lineno, k+1, order))
            i = n - 1
            year, month, day, hour, minute = supp.fix_time_list(
                y_time, single_time_stamp=0, move_n_hours=-(k + 1))
            y_time_moved = supp.make_time_list(year, month, day, hour, minute)

            while i >= order:
                found = False
                j = i
                while not found and j >= 0:
                    if y_time_moved[i] == y_time[j]:
                        found = True
                        value_exists_binary[i, k] = 1
                        x_ar[i, k] = y[j]
                    j -= 1
                if not found:
                    value_exists_binary[i, k] = 0
                i -= 1
    indeces = binary_missing_to_indeces(value_exists_binary)
    hours_to_remove = add_two_remove_lists(hours_to_remove, indeces)
    # print("  \033[0;32;0mrs.%i: finished running 'AR_matrix'\033[0;0;0m" % (gf(cf()).lineno))
    return x_ar, hours_to_remove
コード例 #2
0
ファイル: regression_support.py プロジェクト: sondber/krypto
def get_last_day_average(data,
                         time_list,
                         index_list_prev_lag,
                         freq="h",
                         lag=24,
                         hours_to_remove_prev=[]):
    partsum = 0
    n_avg = 0
    last_day_average = np.zeros(len(data))

    for i in range(0, len(data)):
        # determine starting point of averaging. If none is found, average is set to -1
        y_i, mo_i, d_i, h_i, mi_i = fix_time_list(time_list[i],
                                                  single_time_stamp=1)
        timeindex_i = y_i * (365 * 31 * 24 * 60) + mo_i * (
            31 * 24 * 60) + d_i * (24 * 60) + h_i * 60 + mi_i
        start_point_avg = -1
        if index_list_prev_lag[i] == -1:
            for k in range(max(0, i - lag), i):
                y_k, mo_k, d_k, h_k, mi_k = fix_time_list(time_list[k],
                                                          single_time_stamp=1)
                timeindex_k = y_k * (365 * 31 * 24 * 60) + mo_k * (
                    31 * 24 * 60) + d_k * (24 * 60) + h_k * 60 + mi_k
                if timeindex_i - timeindex_k < lag * 60:
                    start_point_avg = k
                    break
        else:
            start_point_avg = int(index_list_prev_lag[i])

        if start_point_avg == -1:
            last_day_average[i] = -1
        else:
            for j in range(start_point_avg, i):
                partsum += data[j]
                n_avg += 1
            last_day_average[i] = partsum / n_avg
            partsum = 0
            n_avg = 0

    hours_to_remove = []
    for i in range(len(last_day_average)):
        if last_day_average[i] == -1:
            hours_to_remove.append(i)

    hours_to_remove = add_two_remove_lists(hours_to_remove,
                                           hours_to_remove_prev)

    return last_day_average, hours_to_remove
コード例 #3
0
def week_vars(time_list, move_n_hours=0):
    year, month, day, hour, minute = fix_time_list(time_list,
                                                   move_n_hours=move_n_hours)
    n_entries = len(time_list)

    mon = np.zeros(n_entries)
    tue = np.zeros(n_entries)
    wed = np.zeros(n_entries)
    thu = np.zeros(n_entries)
    fri = np.zeros(n_entries)
    sat = np.zeros(n_entries)
    sun = np.zeros(n_entries)
    day_string = []
    for i in range(0, n_entries):
        daynum = int(date(year[i], month[i], day[i]).isoweekday()) - 1
        if daynum == 0:
            mon[i] = 1
        elif daynum == 1:
            tue[i] = 1
        elif daynum == 2:
            wed[i] = 1
        elif daynum == 3:
            thu[i] = 1
        elif daynum == 4:
            fri[i] = 1
        elif daynum == 5:
            sat[i] = 1
        elif daynum == 6:
            sun[i] = 1
    return mon, tue, wed, thu, fri, sat, sun
コード例 #4
0
ファイル: regression_support.py プロジェクト: sondber/krypto
def time_of_day_dummies(time_list, hours_in_period=4):
    hour = supp.fix_time_list(time_list)[3]
    n_rows = len(time_list)

    n_dummies = int(24 / hours_in_period)  # Antall forklaringsvariable
    X_dummies = np.zeros([n_rows, n_dummies])
    for i in range(n_rows):
        j = int(math.floor(float(hour[i] / hours_in_period)))
        X_dummies[i, j] = 1

    return X_dummies, n_dummies
コード例 #5
0
def currency_converter(prices_time, prices, xrate_times, xrate):
    prices_converted = []

    prices_year, prices_month, prices_day, hour, minute = supp.fix_time_list(
        prices_time)
    xrate_year, xrate_month, xrate_day, hour, minute = supp.fix_time_list(
        xrate_times)

    for i in range(0, len(prices)):
        found = 0
        j = 0
        while not found:
            if prices_year[i] == xrate_year[j] and prices_month[
                    i] == xrate_month[j] and prices_day[i] == xrate_day[j]:
                found = 1
                prices_converted.append(prices[i] / xrate[j])
            j += 1
    if not len(prices_time) == len(prices_converted):
        print("For some reason, all prices could not be converted")

    return prices_time, prices_converted
コード例 #6
0
ファイル: data_import_support.py プロジェクト: sondber/krypto
def convert_to_day(time_stamps, prices, volumes):
    print("  \033[32;0;0mConverting to daily data...\033[0;0;0m")
    year, month, day, hour, minute = supp.fix_time_list(time_stamps)
    n_mins = len(time_stamps)

    try:
        np.size(prices, 1)
        n_exc = np.size(prices, 0)
    except IndexError:
        n_exc = 1

    start_minute = hour[0] * 60 + minute[0]
    minutes_first_day = 1440 - start_minute

    n_days = int((n_mins) / 1440)  # THIS IS A POSSIBLE BUG WITHOUT +1

    time_stamps_out = []
    time_stamps_out.append(time_stamps[0])
    if n_exc > 1:
        volumes_out = np.zeros([n_exc, n_days])
        prices_out = np.zeros([n_exc, n_days])
        for exc in range(n_exc):

            for t in range(0, minutes_first_day):
                volumes_out[exc, 0] += volumes[exc, t]
            prices_out[exc, 0] = prices[exc, minutes_first_day - 1]

            k = 1
            for t in range(minutes_first_day,
                           min(n_mins, n_mins - start_minute), 1440):
                volumes_out[exc, k] = sum(volumes[exc, t - 1440:t])
                prices_out[exc, k] = prices[exc, t]
                time_stamps_out.append(time_stamps[t])
                k += 1
    else:
        volumes_out = [0]
        prices_out = [0]

        for t in range(0, minutes_first_day):
            volumes_out[0] += volumes[t]
        prices_out[0] = prices[minutes_first_day - 1]

        for t in range(minutes_first_day, min(n_mins, n_mins - start_minute),
                       1440):
            volumes_out.append(sum(volumes[t:t + 1440]))
            prices_out.append(prices[t + 1439])
            time_stamps_out.append(time_stamps[t])
    print("  \033[32;0;0mConversion complete...\033[0;0;0m")
    return time_stamps_out, prices_out, volumes_out
コード例 #7
0
def opening_hours_w_weekends(in_excel_stamps, in_prices, in_volumes):
    year, month, day, hour, minute = supp.fix_time_list(in_excel_stamps)
    n_mins = len(in_excel_stamps)
    out_excel_stamps = []
    out_prices = []
    out_volumes = []

    # Kan sette inn en funksjon som sjekker om det er helligdag

    for i in range(n_mins):
        w_day = int(date(year[i], month[i], day[i]).isoweekday())
        if 15 <= hour[i] <= 20 or (hour[i] == 14 and minute[i] >= 30):
            out_excel_stamps.append(in_excel_stamps[i])
            out_prices.append(in_prices[:, i])
            out_volumes.append(in_volumes[:, i])
    out_prices = np.transpose(np.matrix(out_prices))
    out_volumes = np.transpose(np.matrix(out_volumes))
    return out_excel_stamps, out_prices, out_volumes
コード例 #8
0
ファイル: realized_volatility.py プロジェクト: sondber/krypto
def RVol(time_series_minutes, prices_list_minutes, daily=1, annualize=1):
    year, month, day, hour, minute = supp.fix_time_list(time_series_minutes)

    if daily == 1:
        mins = int(1440)
    else:
        mins = int(60)

    n_entries = int(len(prices_list_minutes) / mins)
    minutes_in_first_period = daily * (1440 - (hour[0] * 60))
    window = 15
    rvol = np.zeros(n_entries)
    time_list_rvol = []

    # first iteration
    for k in range(0, minutes_in_first_period):  # this only loops if minutes_in_first_period is positive
        if (k % window == 0):
            try:
                rvol[0] += ((prices_list_minutes[k + window-1] - prices_list_minutes[k]) /
                            prices_list_minutes[k]) ** 2
            except IndexError:
                print("ERROR at index =", k + window)
    rvol[0] = math.sqrt(rvol[0])
    time_list_rvol.append(time_series_minutes[0])

    # remaining iterations
    for i in range(daily * 1, n_entries-daily):  # starts at index 1 if daily, 0 otherwise
        for j in range(0, mins):
            if (j % window == 0):
                try:
                    rvol[i] += ((prices_list_minutes[(i-daily) * mins + j + window - 1 + minutes_in_first_period] -
                                 prices_list_minutes[(i-daily) * mins + j + minutes_in_first_period]) /
                                 prices_list_minutes[(i-daily) * mins + j + minutes_in_first_period]) ** 2
                except IndexError:
                    print("index =", i * mins + j + window + minutes_in_first_period)
        rvol[i] = math.sqrt(rvol[i])
        time_list_rvol.append(time_series_minutes[(i * mins) + minutes_in_first_period])
    if annualize == 1:
        if daily == 1:
            rvol = np.multiply(rvol, math.sqrt(365))
        else:
            rvol = np.multiply(rvol, math.sqrt(365 * 24))

    return rvol, time_list_rvol
コード例 #9
0
def opening_hours(in_excel_stamps, in_matrix1, in_matrix2):
    year, month, day, hour, minute = supp.fix_time_list(in_excel_stamps)
    n_mins = len(in_excel_stamps)
    out_excel_stamps = []
    out_matrix1 = []
    out_matrix2 = []

    # Kan sette inn en funksjon som sjekker om det er helligdag

    for i in range(n_mins):
        w_day = int(date(year[i], month[i], day[i]).isoweekday())
        if w_day != 6 and w_day != 7:
            if 15 <= hour[i] <= 20 or (hour[i] == 14 and minute[i] >= 30):
                out_excel_stamps.append(in_excel_stamps[i])
                out_matrix1.append(in_matrix1[:, i])
                out_matrix2.append(in_matrix2[:, i])
    out_matrix1 = np.transpose(np.matrix(out_matrix1))
    out_matrix2 = np.transpose(np.matrix(out_matrix2))
    return out_excel_stamps, out_matrix1, out_matrix2
コード例 #10
0
ファイル: regression_support.py プロジェクト: sondber/krypto
def get_lagged_list(data,
                    time_list,
                    freq="h",
                    lag=24,
                    hours_to_remove_prev=[]):
    if freq != "h":
        print("FUNCTIONALITY ONLY WRITTEN FOR HOURLY FREQUENCY")

    n_entries = len(data)
    lagged_list = np.zeros(n_entries)
    index_list = np.zeros(n_entries)
    found = 0

    y, mo, d, h, mi = fix_time_list(time_list,
                                    single_time_stamp=0,
                                    move_n_hours=-lag)
    time_stamp = make_time_list(y, mo, d, h, mi)

    for i in range(len(time_stamp) - 1, -1, -1):
        for j in range(i, i - lag - 1, -1):
            if time_stamp[i] == time_list[j]:
                lagged_list[i] = data[j]
                index_list[i] = j
                found = 1
        if found == 0:
            lagged_list[i] = -1
            index_list[i] = -1
        found = 0

    hours_to_remove = []
    for i in range(len(index_list)):
        if index_list[i] == -1:
            hours_to_remove.append(i)

    hours_to_remove = add_two_remove_lists(hours_to_remove,
                                           hours_to_remove_prev)

    return lagged_list, index_list, hours_to_remove
コード例 #11
0
ファイル: data_import_support.py プロジェクト: sondber/krypto
def convert_to_hour(time_stamps, prices, volumes):
    print("  \033[32;0;0mConverting to hourly data...\033[0;0;0m")
    year, month, day, hour, minute = supp.fix_time_list(time_stamps)
    n_mins = len(time_stamps)
    if n_mins % 60 != 0:
        print(
            "WARNING: convert_to_hour found an uneven number of minutes, with %i to spare"
            % (n_mins % 60))
        n_mins -= n_mins % 60
    try:
        np.size(prices, 1)
        n_exc = np.size(prices, 0)
    except IndexError:
        n_exc = 1

    n_hours = int((n_mins) / 60)

    time_stamps_out = []
    if n_exc > 1:
        volumes_out = np.zeros([n_exc, n_hours])
        prices_out = np.zeros([n_exc, n_hours])
        for exc in range(n_exc):
            k = 0
            for t in range(0, n_mins, 60):
                volumes_out[exc, k] = sum(volumes[exc, t:t + 60])
                prices_out[exc, k] = prices[exc, t + 59]
                k += 1
                if exc == 0:
                    time_stamps_out.append(time_stamps[t + 59])
    else:
        volumes_out = []
        prices_out = []
        for t in range(0, n_mins, 60):
            volumes_out.append(sum(volumes[t:t + 60]))
            time_stamps_out.append(time_stamps[t])
            prices_out.append(prices[t + 59])
    print("  \033[32;0;0mConversion complete...\033[0;0;0m")
    return time_stamps_out, prices_out, volumes_out
コード例 #12
0
ファイル: data_import_support.py プロジェクト: sondber/krypto
def clean_series_days(time_listM,
                      pricesM,
                      volumesM,
                      exc=0,
                      print_days_excluded=0,
                      convert_time_zones=1,
                      plot_for_extreme=0):
    print(" \033[32;0;0mRunning 'clean_series_days' ...\033[0;0;0m")
    if convert_time_zones:
        if exc == 0 or exc == 5:
            n_hours = 1
        elif exc == 1:
            n_hours = 9
        elif exc == 2:
            n_hours = 8
        elif exc == 3:
            n_hours = -5
        elif exc == 4:
            n_hours = 9
        else:
            n_hours = 0
        print("  Converting time zones: moving series %i hour(s)" % n_hours)
    else:
        n_hours = 0

    if n_hours != 0:
        year, month, day, hour, minute = supp.fix_time_list(
            time_listM, move_n_hours=n_hours)
        time_listM = supp.make_time_list(year, month, day, hour, minute)

    time_listD, pricesD, volumesD = convert_to_day(time_listM, pricesM,
                                                   volumesM)

    end_time_D = ""
    if exc == 0:
        cutoff_date = "01.01.2013 00:00"
        cutoff_min_date = "01.01.2013 01:00"
        start_averaging_date = "01.01.2012 00:00"
    elif exc == 1:
        cutoff_date = "01.01.2016 00:00"
        cutoff_min_date = "01.01.2016 09:00"
        start_averaging_date = "30.10.2014 00:00"
    elif exc == 2:
        cutoff_date = "01.01.2013 00:00"
        cutoff_min_date = "01.01.2013 08:00"
        end_time_D = "01.01.2017 00:00"
        end_time_M = "01.01.2017 08:00"
        start_averaging_date = "01.01.2012 00:00"
    elif exc == 3:
        cutoff_date = "01.01.2015 00:00"
        cutoff_min_date = "01.01.2015 19:00"
        start_averaging_date = "02.12.2014 00:00"
    elif exc == 4:
        cutoff_date = "01.01.2014 00:00"
        cutoff_min_date = "01.01.2014 09:00"
        start_averaging_date = "01.10.2013 00:00"
    elif exc == 5:
        cutoff_date = "01.01.2015 00:00"
        cutoff_min_date = "01.01.2015 01:00"
        start_averaging_date = "01.03.2014 00:00"
    else:
        print("  TEST SET")
        cutoff_date = "01.01.2017 00:00"
        cutoff_min_date = "01.01.2017 00:00"
        start_averaging_date = "01.01.2017 00:00"

    cutoff_day = supp.find_date_index(cutoff_date, time_listD, next_date=1)
    cutoff_min = supp.find_date_index(cutoff_min_date, time_listM, next_date=1)
    start_averaging_day = supp.find_date_index(start_averaging_date,
                                               time_listD,
                                               next_date=1)
    mean_volume_prev_year = np.average(
        volumesD[start_averaging_day:cutoff_day])
    if len(end_time_D) > 1:
        cutoff_endD = supp.find_date_index(end_time_D, time_listD)
        cutoff_endM = supp.find_date_index(end_time_M, time_listM)
    else:
        cutoff_endD = len(time_listD)
        cutoff_endM = len(time_listM)

    time_listM = time_listM[cutoff_min:cutoff_endM]
    print("  Time period:", time_listM[0], "to",
          time_listM[len(time_listM) - 1])
    pricesM = pricesM[cutoff_min:cutoff_endM]
    volumesM = volumesM[cutoff_min:cutoff_endM]
    pricesD = pricesD[cutoff_day:cutoff_endD]
    volumesD = volumesD[cutoff_day:cutoff_endD]
    time_listD = time_listD[cutoff_day:cutoff_endD]

    # Rolls
    spread_abs, spreadD, time_list_rolls, count_value_error = rolls.rolls(
        pricesM, time_listM, calc_basis="d", kill_output=1)
    # Realized volatility
    rvolD, RVol_time = realized_volatility.RVol(time_listM,
                                                pricesM,
                                                daily=1,
                                                annualize=1)
    # Returns
    returnsM = jake_supp.logreturn(pricesM)
    returnsD = jake_supp.logreturn(pricesD)
    # Amihud's ILLIQ
    illiq_timeD, illiqD = ILLIQ.illiq(time_listM,
                                      returnsM,
                                      volumesM,
                                      threshold=0)  # Already clean

    if plot_for_extreme == 1:
        plt.plot(rvolD)
        plt.title("Raw rvol")
        plt.figure()
        plt.plot(spreadD)
        plt.title("Raw spreadH")
        plt.figure()
        plt.plot(volumesD)
        plt.title("Raw volume")
        plt.figure()
        plt.plot(illiqD)
        plt.title("Raw illiq")
        plt.figure()
        plt.plot(returnsD)
        plt.title("Raw returnsH")
        plt.figure()

    time_list_removed = []
    # Removing all days where Volume is zero
    time_listD, time_list_removed, volumesD, spreadD, returnsD, rvolD = supp.remove_list1_zeros_from_all_lists(
        time_listD, time_list_removed, volumesD, spreadD, returnsD, rvolD)

    # --------------------------------------------
    days_to_remove = []
    if exc == 0:
        days_to_remove = supp.remove_extremes(days_to_remove,
                                              returnsD,
                                              0.1,
                                              threshold_lower=-0.1)
        days_to_remove = supp.remove_extremes(days_to_remove, rvolD, 2)
        days_to_remove = supp.remove_extremes(days_to_remove, spreadD, 0.01)
        days_to_remove = supp.remove_extremes(days_to_remove, illiqD, 0.1)
    elif exc == 1:
        days_to_remove = supp.remove_extremes(days_to_remove,
                                              returnsD,
                                              0.1,
                                              threshold_lower=-0.1)
        days_to_remove = supp.remove_extremes(days_to_remove, rvolD, 2)
        days_to_remove = supp.remove_extremes(days_to_remove, spreadD, 0.01)
        days_to_remove = supp.remove_extremes(days_to_remove, illiqD, 0.1)
    elif exc == 2:
        days_to_remove = supp.remove_extremes(days_to_remove,
                                              returnsD,
                                              0.1,
                                              threshold_lower=-0.1)
        days_to_remove = supp.remove_extremes(days_to_remove, rvolD, 2)
        days_to_remove = supp.remove_extremes(days_to_remove, spreadD, 0.01)
        days_to_remove = supp.remove_extremes(days_to_remove, illiqD, 0.1)
    elif exc == 3:
        days_to_remove = supp.remove_extremes(days_to_remove,
                                              returnsD,
                                              0.1,
                                              threshold_lower=-0.1)
        days_to_remove = supp.remove_extremes(days_to_remove, rvolD, 2)
        days_to_remove = supp.remove_extremes(days_to_remove, spreadD, 0.01)
        days_to_remove = supp.remove_extremes(days_to_remove, volumesD, 50000)
        days_to_remove = supp.remove_extremes(days_to_remove, illiqD, 0.01)
    elif exc == 4:
        days_to_remove = supp.remove_extremes(days_to_remove,
                                              returnsD,
                                              0.1,
                                              threshold_lower=-0.1)
        days_to_remove = supp.remove_extremes(days_to_remove, rvolD, 2)
        days_to_remove = supp.remove_extremes(days_to_remove, spreadD, 0.01)
        days_to_remove = supp.remove_extremes(days_to_remove, volumesD, 50000)
        days_to_remove = supp.remove_extremes(days_to_remove, illiqD, 0.01)
    elif exc == 5:
        days_to_remove = supp.remove_extremes(days_to_remove,
                                              returnsD,
                                              0.1,
                                              threshold_lower=-0.1)
        days_to_remove = supp.remove_extremes(days_to_remove, rvolD, 2)
        days_to_remove = supp.remove_extremes(days_to_remove, spreadD, 0.01)
        days_to_remove = supp.remove_extremes(days_to_remove, volumesD, 50000)
        days_to_remove = supp.remove_extremes(days_to_remove, illiqD, 0.01)

    for d in days_to_remove:
        time_list_removed = np.append(time_list_removed, time_listD[d])
    time_listD = np.delete(time_listD, days_to_remove)
    returnsD = np.delete(returnsD, days_to_remove)
    volumesD = np.delete(volumesD, days_to_remove)
    spreadD = np.delete(spreadD, days_to_remove)
    rvolD = np.delete(rvolD, days_to_remove)
    illiqD = np.delete(illiqD, days_to_remove)
    illiq_timeD = np.delete(illiq_timeD, days_to_remove)

    if plot_for_extreme == 1:
        plt.plot(rvolD)
        plt.title("rvol")
        plt.figure()
        plt.plot(spreadD)
        plt.title("spreadH")
        plt.figure()
        plt.plot(volumesD)
        plt.title("volume")
        plt.figure()
        plt.plot(illiqD)
        plt.title("illiq")
        plt.figure()
        plt.plot(returnsD)
        plt.title("returnsH")
        plt.show()

    # Removing all days where Roll is zero
    time_listD, time_list_removed, spreadD, volumesD, returnsD, \
    rvolD, illiqD = supp.remove_list1_zeros_from_all_lists(time_listD, time_list_removed, spreadD, volumesD, returnsD,
                                                           rvolD, illiqD)

    # Removing all days where Volatility is zero
    time_listD, time_list_removed, rvolD, volumesD, returnsD, \
    spreadD, illiqD = supp.remove_list1_zeros_from_all_lists(time_listD, time_list_removed, rvolD, volumesD, returnsD,
                                                             spreadD, illiqD)

    # Turning ILLIQ, Volume and RVol into log
    log_illiqD = np.log(illiqD)
    log_rvolD = np.log(rvolD)
    log_volumesD = volume_transformation(volumesD, mean_volume_prev_year)

    print(
        "  dis.%i: Length of time %i, spread %i, rvol %i, illiq %i, and log_illiq %i"
        % (gf(cf()).lineno, len(time_listD), len(spreadD), len(rvolD),
           len(illiqD), len(log_illiqD)))
    print(" \033[32;0;0m Finished running 'clean_series_days' ...\033[0;0;0m")

    return time_listD, returnsD, volumesD, log_volumesD, spreadD, illiqD, log_illiqD, rvolD, log_rvolD
コード例 #13
0
ファイル: data_import_support.py プロジェクト: sondber/krypto
def cyclical_average_hour(time_list,
                          data,
                          print_n_entries=0,
                          print_val_tab=0,
                          hours_per_basket=1):
    year, month, day, hour, minute = supp.fix_time_list(time_list)
    n_entries = len(time_list)
    day_time = []  # Excel stamps for each minute in the day
    basket_list = []  # integer indicating which hour it is
    m_list = []  # integer indicating which minute it is
    n_out = int(24 / hours_per_basket)

    # Generating day_time ---------------
    for h in range(0, n_out):
        basket = h * hours_per_basket
        if basket < 10:
            hs = "0" + str(basket)
        else:
            hs = str(basket)
        day_time.append(hs + ":" + "00")
        basket_list.append(basket)
    # -----------------------------------

    # Calculating averages

    lower = np.zeros(n_out)
    upper = np.zeros(n_out)
    data_average = np.zeros(n_out)
    count_entries = np.zeros(
        n_out
    )  # Will count actual observations, to get correct numerator in mean
    temp_list = np.zeros(n_out)

    n_cycles = int(
        2 * n_entries /
        n_out)  # trenger bare være minst like stor. Sikkerhetsmargin på 50%
    temp_matrix = np.zeros([n_cycles, n_out])
    for i in range(n_entries):
        index = int(np.floor(float(hour[i] / hours_per_basket)))
        cycle_nr = int(count_entries[index])
        count_entries[index] += 1
        temp_matrix[cycle_nr, index] = data[i]

    temp_matrix = np.matrix(temp_matrix)
    percentile = 0.95

    if print_val_tab == 1:
        for i in range(n_cycles):
            for j in range(7):
                print('{0:.3f}'.format(temp_matrix[i, j]), end='   ')
            print()

    if print_n_entries == 1:
        print(count_entries)

    for i in range(n_out):
        data_average[i] = float(np.sum(temp_matrix[:, i]) /
                                count_entries[i])  # takes the mean
        lower[i], upper[i] = st.t.interval(percentile,
                                           len(temp_matrix[:, i]) - 1,
                                           loc=data_average[i],
                                           scale=st.sem(temp_matrix[:, i]))

    return day_time, data_average, lower, upper
コード例 #14
0
ファイル: data_import_support.py プロジェクト: sondber/krypto
def cyclical_average_legacy(time_list,
                            data,
                            frequency="h",
                            print_n_entries=0,
                            print_val_tab=0,
                            incl_zeros=0,
                            hours_per_basket=1):
    year, month, day, hour, minute = supp.fix_time_list(time_list)
    n_entries = len(time_list)
    day_time = []  # Excel stamps for each minute in the day
    h_list = []  # integer indicating which hour it is
    m_list = []  # integer indicating which minute it is

    # Generating day_time ---------------
    if frequency == "h":
        for h in range(0, 24):
            if h < 10:
                hs = "0" + str(h)
            else:
                hs = str(h)
            day_time.append(hs + ":" + "00")
            h_list.append(h)
    elif frequency == "d":
        day_time = ["Mon", "Tue", "Wed", "Thur", "Fri", "Sat", "Sun"]
    # -----------------------------------

    # Calculating averages
    n_out = len(day_time)
    lower = np.zeros(n_out)
    upper = np.zeros(n_out)
    data_average = np.zeros(n_out)
    count_entries = np.zeros(
        n_out
    )  # Will count actual observations, to get correct numerator in mean
    temp_list = np.zeros(n_out)

    n_cycles = int(
        2 * n_entries /
        n_out)  # trenger bare være minst like stor. Sikkerhetsmargin på 50%
    temp_matrix = np.zeros([n_cycles, n_out])
    for i in range(n_entries):
        if incl_zeros == 0:
            if data[i] != 0:
                if frequency == "h":
                    index = int(hour[i])
                elif frequency == "d":
                    index = int(date(year[i], month[i],
                                     day[i]).isoweekday()) - 1
                cycle_nr = int(count_entries[index])
                count_entries[index] += 1
                temp_matrix[cycle_nr, index] = data[i]
        else:
            if frequency == "h":
                index = int(hour[i])
            elif frequency == "d":
                index = int(date(year[i], month[i], day[i]).isoweekday()) - 1
            cycle_nr = int(count_entries[index])
            count_entries[index] += 1
            temp_matrix[cycle_nr, index] = data[i]

    temp_matrix = np.matrix(temp_matrix)
    percentile = 0.95

    if print_val_tab == 1:
        for i in range(n_cycles):
            for j in range(7):
                print('{0:.3f}'.format(temp_matrix[i, j]), end='   ')
            print()

    if print_n_entries == 1:
        print(count_entries)

    for i in range(n_out):
        data_average[i] = float(np.sum(temp_matrix[:, i]) /
                                count_entries[i])  # takes the mean
        lower[i], upper[i] = st.t.interval(percentile,
                                           len(temp_matrix[:, i]) - 1,
                                           loc=data_average[i],
                                           scale=st.sem(temp_matrix[:, i]))

    return day_time, data_average, lower, upper
コード例 #15
0
def get_list(exc=0, freq="m", local_time=0):  # testtest

    if exc == 0 or exc == "bitstampusd" or exc == "bitstamp":
        exc_name = "bitstampusd"
        exc = 0
    elif exc == 1 or exc == "coincheckjpy" or exc == "coincheck":
        exc_name = "coincheckjpy"
        exc = 1
    elif exc == 2 or exc == "btcncny" or exc == "btcn":
        exc_name = "btcncny"
        exc = 2
    elif exc == 3 or exc == "coinbaseusd" or exc == "coinbase":
        exc_name = "coinbaseusd"
        exc = 3
    elif exc == 4 or exc == "korbitkrw" or exc == "korbit":
        exc_name = "korbitkrw"
        exc = 4
    elif exc == 5 or exc == "krakeneur" or exc == "kraken":
        exc_name = "krakeneur"
        exc = 5
    elif exc == -1 or exc == "test":
        exc_name = "test"
        exc = -1
    else:
        print(
            "\033[31;0;0mYou did not choose an exchange, so I am returning bitstamp\033[0;0;0m"
        )
        exc_name = "bitstampusd"

    if freq == "m" or freq == 0:
        file_name = "data/export_csv/" + exc_name + "_edit.csv"
        time_listM = []
        priceM = []
        volumeM = []

        with open(file_name, newline='') as csvfile:
            reader = csv.reader(csvfile, delimiter=';', quotechar='|')
            print("\033[0;32;0m Reading file '%s'...\033[0;0;0m" % file_name)
            i = 0
            next(reader)
            next(reader)
            next(reader)
            for row in reader:
                try:
                    time_listM.append(str(row[0]))
                    priceM.append(float(row[1]))
                    volumeM.append(float(row[2]))
                except ValueError:
                    print(
                        "\033[0;31;0m There was an error on row %i in '%s'\033[0;0;0m"
                        % (i + 1, file_name))
                i = i + 1

        return exc_name, time_listM, priceM, volumeM

    elif freq == "h" or freq == 1:
        file_name = "data/export_csv/" + exc_name + "_global_time_hourly.csv"

        time_listH, returnsH, spreadH, volumesH, log_volumesH, illiqH, log_illiqH, rvolH, log_rvolH = read_clean_csv(
            file_name)

        if local_time == 1:
            if exc == 0 or exc == 5:
                n_hours = 1
            elif exc == 1:
                n_hours = 9
            elif exc == 2:
                n_hours = 8
            elif exc == 3:
                n_hours = -5
            elif exc == 4:
                n_hours = 9
            else:
                n_hours = 0
            print("  Converting time zones: moving series %i hour(s)" %
                  n_hours)
        else:
            n_hours = 0

        if n_hours != 0:
            year, month, day, freq, minute = supp.fix_time_list(
                time_listH, move_n_hours=n_hours)
            time_listH = supp.make_time_list(
                year, month, day, freq,
                minute)  # Lager en ny tidsliste fra de flyttede listene
        return exc_name, time_listH, returnsH, spreadH, volumesH, log_volumesH, illiqH, log_illiqH, rvolH, log_rvolH

    else:
        file_name = "data/export_csv/" + exc_name + "_daily.csv"
        time_listD, returnsD, spreadD, volumesD, log_volumesD, illiqD, log_illiqD, rvolD, log_rvolD = read_clean_csv(
            file_name)

        return exc_name, time_listD, returnsD, spreadD, volumesD, log_volumesD, illiqD, log_illiqD, rvolD, log_rvolD
コード例 #16
0
def hi_lo_spread(timestamps, highs, lows, prices,
                 kill_output=0, hour_yesno=0):  # returnsH daily spreads, spreadH is set to zero if lack of data (as with rolls). hour=1 if houry data
    year, month, day, hour, minute = supp.fix_time_list(timestamps)

    spreads = []
    time_list = []
    rel_spreads = []
    value_errors = 0
    na_spread = 0

    freq_desc = "daily"
    if hour_yesno == 0:
        resolution_desc = "minute"
    else:
        resolution_desc = "hour"

    n = len(highs)  # number of minutes in dataset
    # determine trading day yes/no
    if hour[0] == 0:  # this indicates that full day is being investigated
        hours_in_day = 24
        if hour_yesno == 0:
            window = int(hours_in_day * 60)
        else:
            window = int(hours_in_day)
        day_desc = "full day"
    else:
        hours_in_day = 6.5
        day_desc = "trading day"
        if hour_yesno == 0:
            window = int(hours_in_day * 60)
        else:
            window = int(hours_in_day+0.5)

    if kill_output == 0:
        print("Calculating Hi/Lo-spreadH on a/an", freq_desc, "basis using", day_desc, "data, with", resolution_desc)

    for i in range(0, n, window):  # iterates through days
        partsum = 0  # for averaging
        averager_adjusted = window - 1
        for j in range(i, i + window - 1):  # iterates through minutes in day
            two_highs = highs[i:i + 2]
            two_lows = lows[i:i + 2]
            two_highs = highs[i:i + 2]  # endret denne fra 1 til 2 (09.02.18)
            two_lows = lows[i:i + 2]  # endret denne fra 1 til 2 (09.02.18)
            high_two = determine_hilo(two_highs, two_lows)[0]
            low_two = determine_hilo(two_highs, two_lows)[1]
            gamma = gamma_calc(high_two, low_two)
            if gamma == 0:
                value_errors += 1
                averager_adjusted -= 1
            else:
                beta = beta_calc(two_highs, two_lows)
                if beta == 0:
                    value_errors += 1
                    averager_adjusted -= 1
                else:
                    alpha = alpha_calc(beta, gamma)
                    spread = spread_calc(alpha)
                    if spread < 0:
                        spread = 0
                    partsum += spread
        if averager_adjusted == 0:
            na_spread += 1
            spreads.append(0)
            rel_spreads.append(0)
            time_list.append(timestamps[i])
        else:
            spread_averaged = partsum / averager_adjusted
            spreads.append(spread_averaged)
            rel_spreads.append(spread_averaged/prices[i+window-1])
            time_list.append(timestamps[i])

    if kill_output == 0:
        print("Hi/Low spreadH-calculation is finished")
        print("The length of the spreadH-vector is", len(spreads))
        print("The length of the time-vector is", len(time_list))
        print("The length of the relative spreadH-vector is", len(rel_spreads))
        print("Number of value errors:", value_errors)
        print("Number of days set to zero due to lack of data:", na_spread)

    return time_list, spreads, rel_spreads
コード例 #17
0
ファイル: rolls.py プロジェクト: sondber/krypto
def rolls(prices_minute,
          time_list_minute,
          calc_basis="h",
          kill_output=0,
          bias_indicator=0):  # calc_basis "h"/"d"
    year, month, day, hour, minute = supp.fix_time_list(
        time_list_minute)  # gives 5 equal length lists

    spread = []
    spread_rel = []
    time_list = []

    count_value_error = 0

    bias_indicator_list = []

    if kill_output == 0:
        print("Calculating first price differences ...")

    price_differences = first_price_differences(
        prices_minute)  # calculates price difference

    if kill_output == 0:
        print("Price differences-calculation finished. ")

    # determine minutes_in_window based on calc_basis
    if calc_basis == "h" or calc_basis == 0:  # hourly
        minutes_in_window = 60
        freq_desc = "hourly"
    else:  # daily with full data
        minutes_in_window = 60 * 24
        freq_desc = "daily"

    if kill_output == 0:
        print("Calculating spreads on a/an", freq_desc, "basis")

    # calculation
    if calc_basis == "h" or calc_basis == 0:  # calculating on hourly basis
        sum_inside = 0
        for i in range(0, len(price_differences), minutes_in_window):
            for y in range(i + 1, i + minutes_in_window):
                try:
                    sum_inside += (price_differences[y] *
                                   price_differences[y - 1])
                except IndexError:
                    print("There is an error when i = %i and y = %i" % (i, y))
            try:
                ba_calc = 2 * math.sqrt(-sum_inside / (minutes_in_window - 2))
                bias_indicator_list.append(0)
            except ValueError:
                count_value_error += 1
                ba_calc = 0
                bias_indicator_list.append(1)
            spread.append(ba_calc)
            time_list.append(time_list_minute[i])
            spread_rel.append(ba_calc /
                              prices_minute[i + minutes_in_window - 1])
            sum_inside = 0
    elif calc_basis == "d" or calc_basis == 1:  # calculating on daily basis
        sum_inside = 0
        start_index = 0
        if hour[0] != 0:  # first day if not starting midnight
            start_index = (24 * 60) - (hour[0] * 60)
            for j in range(1, start_index):
                sum_inside += (price_differences[j] * price_differences[j - 1])
            try:
                ba_calc = 2 * math.sqrt(-sum_inside / (start_index - 2))
                bias_indicator_list.append(0)
            except ValueError:
                count_value_error += 1
                ba_calc = 0
                bias_indicator_list.append(1)
            spread.append(ba_calc)
            time_list.append(time_list_minute[0])
            spread_rel.append(ba_calc / prices_minute[start_index - 1])
            sum_inside = 0
        for i in range(
                start_index,
                min(len(price_differences),
                    len(price_differences) - hour[0] * 60),
                minutes_in_window):  # rest of days except overshooting
            for y in range(i + 1, i + minutes_in_window):
                sum_inside += price_differences[y] * price_differences[y - 1]
            try:
                ba_calc = 2 * math.sqrt(-sum_inside / (minutes_in_window - 2))
                bias_indicator_list.append(0)
            except ValueError:
                count_value_error += 1
                ba_calc = 0
                bias_indicator_list.append(1)
            spread.append(ba_calc)
            time_list.append(time_list_minute[i])
            spread_rel.append(ba_calc /
                              prices_minute[i + minutes_in_window - 1])
            sum_inside = 0

    if kill_output == 0:
        print("Spreads-calculation is finished")
        print("The length of the spreadH-vector is", len(spread_rel))
        print("The length of the time-vector is", len(time_list))
        print(count_value_error, "(",
              round(100 * (count_value_error / len(spread_rel)), 2), "%)",
              "value errors were counted when calculating Roll-spreads")
        count_value_error = round(100 * (count_value_error / len(spread_rel)),
                                  2)

    if bias_indicator == 1:
        return spread, spread_rel, time_list, count_value_error, bias_indicator_list
    else:
        return spread, spread_rel, time_list, count_value_error
コード例 #18
0
ファイル: data_import_support.py プロジェクト: sondber/krypto
def clean_series_hour(time_listM,
                      pricesM,
                      volumesM,
                      exc=0,
                      convert_time_zones=1,
                      plot_for_extreme=0):
    remove_extremes = 1
    print(" \033[32;0;0mRunning 'clean_series_hour' ...\033[0;0;0m")
    if convert_time_zones:  # Flytter nå Coincheck ni timer, men lar Bitstamp stå
        if exc == 0 or exc == 5:
            n_hours = 1
        elif exc == 1:
            n_hours = 9
        elif exc == 2:
            n_hours = 8
        elif exc == 3:
            n_hours = -5
        elif exc == 4:
            n_hours = 9
        else:
            n_hours = 0
        print("  Converting time zones: moving series %i hour(s)" % n_hours)
    else:
        n_hours = 0

    if n_hours != 0:
        year, month, day, hour, minute = supp.fix_time_list(
            time_listM, move_n_hours=n_hours)
        time_listM = supp.make_time_list(
            year, month, day, hour,
            minute)  # Lager en ny tidsliste fra de flyttede listene

    returnsM = jake_supp.logreturn(pricesM)
    time_listH, pricesH, volumesH = convert_to_hour(time_listM, pricesM,
                                                    volumesM)
    returnsH = jake_supp.logreturn(pricesH)

    spread_abs, spreadH, time_list_spread, count_value_error = rolls.rolls(
        pricesM, time_listM, calc_basis="h", kill_output=1)
    illiq_hours_time, illiqH = ILLIQ.illiq(time_listM,
                                           returnsM,
                                           volumesM,
                                           hourly_or_daily="h",
                                           threshold=0)
    rvolH, time_list_rvol = realized_volatility.RVol(time_listM,
                                                     pricesM,
                                                     daily=0,
                                                     annualize=1)

    time_list_removed = []
    # Removing all hours where Volume is zero
    time_listH, time_list_removed, volumesH, spreadH, returnsH, rvolH = supp.remove_list1_zeros_from_all_lists(
        time_listH, time_list_removed, volumesH, spreadH, returnsH, rvolH)

    print("  dis.%i: Number of hours removed due to zero-volume: %i" %
          (gf(cf()).lineno, len(time_list_removed)))
    end_time = ""

    if exc == 0:
        cutoff_date = "01.01.2013 00:00"
        start_averaging_date = "01.01.2012 00:00"
    elif exc == 1:
        cutoff_date = "01.06.2016 00:00"
        start_averaging_date = "30.10.2014 00:00"
    elif exc == 2:
        cutoff_date = "01.01.2013 00:00"
        start_averaging_date = "01.01.2012 00:00"
        end_time = "29.09.2017 00:00"
    elif exc == 3:
        cutoff_date = "01.01.2015 00:00"
        start_averaging_date = "02.12.2014 00:00"
    elif exc == 4:
        cutoff_date = "01.01.2014 00:00"
        start_averaging_date = "01.10.2013 00:00"
    elif exc == 5:
        cutoff_date = "01.01.2015 00:00"
        start_averaging_date = "01.01.2014 00:00"
    else:
        print("  TEST SET")
        cutoff_date = "01.01.2017 00:00"
        start_averaging_date = "01.01.2017 00:00"

    cutoff_hour = supp.find_date_index(cutoff_date, time_listH, next_date=1)
    start_averaging_hour = supp.find_date_index(start_averaging_date,
                                                time_listH,
                                                next_date=1)
    if len(end_time) > 1:
        end_hour = supp.find_date_index(end_time, time_listH)
    else:
        end_hour = len(time_listH) - 1

    mean_volume_prev_year = np.average(
        volumesH[start_averaging_hour:cutoff_hour])

    time_listH = time_listH[cutoff_hour:end_hour]
    print("  Time period:", time_listH[0], "to", time_listH[-1])
    returnsH = returnsH[cutoff_hour:end_hour]
    volumesH = volumesH[cutoff_hour:end_hour]
    spreadH = spreadH[cutoff_hour:end_hour]
    illiqH = illiqH[cutoff_hour:end_hour]
    rvolH = rvolH[cutoff_hour:end_hour]

    if plot_for_extreme == 1:
        plt.plot(rvolH)
        plt.title("Raw rvol")
        plt.figure()
        plt.plot(spreadH)
        plt.title("Raw spreadH")
        plt.figure()
        plt.plot(volumesH)
        plt.title("Raw volume")
        plt.figure()
        plt.plot(illiqH)
        plt.title("Raw illiq")
        plt.figure()
        plt.plot(returnsH)
        plt.title("Raw returnsH")
        plt.figure()

    hours_to_remove = []
    if remove_extremes == 1:
        if exc == 0:
            hours_to_remove = supp.remove_extremes(hours_to_remove,
                                                   returnsH,
                                                   0.1,
                                                   threshold_lower=-0.1)
            hours_to_remove = supp.remove_extremes(hours_to_remove, rvolH, 2)
            hours_to_remove = supp.remove_extremes(hours_to_remove, spreadH,
                                                   0.01)
            hours_to_remove = supp.remove_extremes(hours_to_remove, illiqH,
                                                   0.1)
        elif exc == 1:
            hours_to_remove = supp.remove_extremes(hours_to_remove,
                                                   returnsH,
                                                   0.075,
                                                   threshold_lower=-0.075)
            hours_to_remove = supp.remove_extremes(hours_to_remove, rvolH, 2)
            hours_to_remove = supp.remove_extremes(hours_to_remove, spreadH,
                                                   0.1)
            hours_to_remove = supp.remove_extremes(hours_to_remove, illiqH,
                                                   0.1)
        elif exc == 2:
            hours_to_remove = supp.remove_extremes(hours_to_remove,
                                                   returnsH,
                                                   0.075,
                                                   threshold_lower=-0.075)
            hours_to_remove = supp.remove_extremes(hours_to_remove, rvolH, 2)
            hours_to_remove = supp.remove_extremes(hours_to_remove, spreadH,
                                                   0.1)
            hours_to_remove = supp.remove_extremes(hours_to_remove, illiqH,
                                                   0.1)
        elif exc == 3:
            hours_to_remove = supp.remove_extremes(hours_to_remove,
                                                   returnsH,
                                                   0.075,
                                                   threshold_lower=-0.075)
            hours_to_remove = supp.remove_extremes(hours_to_remove, rvolH, 2)
            hours_to_remove = supp.remove_extremes(hours_to_remove, volumesH,
                                                   15000)
            hours_to_remove = supp.remove_extremes(hours_to_remove, spreadH,
                                                   0.1)
            hours_to_remove = supp.remove_extremes(hours_to_remove, illiqH,
                                                   0.02)
        elif exc == 4:
            hours_to_remove = supp.remove_extremes(hours_to_remove,
                                                   returnsH,
                                                   0.075,
                                                   threshold_lower=-0.075)
            hours_to_remove = supp.remove_extremes(hours_to_remove, rvolH, 2)
            hours_to_remove = supp.remove_extremes(hours_to_remove, volumesH,
                                                   15000)
            hours_to_remove = supp.remove_extremes(hours_to_remove, spreadH,
                                                   0.1)
            hours_to_remove = supp.remove_extremes(hours_to_remove, illiqH,
                                                   0.02)

    time_listH = np.delete(time_listH, hours_to_remove)
    returnsH = np.delete(returnsH, hours_to_remove)
    volumesH = np.delete(volumesH, hours_to_remove)
    spreadH = np.delete(spreadH, hours_to_remove)
    illiqH = np.delete(illiqH, hours_to_remove)
    rvolH = np.delete(rvolH, hours_to_remove)

    if plot_for_extreme == 1:
        plt.plot(rvolH)
        plt.title("rvol")
        plt.figure()
        plt.plot(spreadH)
        plt.title("spreadH")
        plt.figure()
        plt.plot(volumesH)
        plt.title("volume")
        plt.figure()
        plt.plot(illiqH)
        plt.title("illiq")
        plt.figure()
        plt.plot(returnsH)
        plt.title("returnsH")
        plt.show()

    # Removing all days where Roll is zero
    time_listH, time_list_removed, spreadH, volumesH, returnsH, illiqH, rvolH = supp.remove_list1_zeros_from_all_lists(
        time_listH, time_list_removed, spreadH, volumesH, returnsH, illiqH,
        rvolH)

    # Removing all hours where Rvol is zero
    time_listH, time_list_removed, rvolH, spreadH, volumesH, returnsH, illiqH = supp.remove_list1_zeros_from_all_lists(
        time_listH, time_list_removed, rvolH, spreadH, volumesH, returnsH,
        illiqH)

    # Removing all hours where ILLIQ is zero
    time_listH, time_list_removed, illiqH, rvolH, spreadH, volumesH, returnsH = supp.remove_list1_zeros_from_all_lists(
        time_listH, time_list_removed, illiqH, rvolH, spreadH, volumesH,
        returnsH)

    # Turning ILLIQ, Volume and rvol into log
    log_illiqH = np.log(illiqH)
    log_volumesH = volume_transformation(volumesH, mean_volume_prev_year)
    log_rvolH = np.log(rvolH)

    #print("  dis.%i: Length of time %i, spread %i, rvol %i, illiq %i, and log_illiq %i" % (gf(cf()).lineno, len(time_listH), len(spreadH), len(rvolH), len(illiqH), len(log_illiqH)))
    print(" \033[32;0;0mFinished running 'clean_series_hour' ...\033[0;0;0m")
    return time_listH, returnsH, spreadH, volumesH, log_volumesH, illiqH, log_illiqH, rvolH, log_rvolH
コード例 #19
0
ファイル: testing.py プロジェクト: sondber/krypto
import os
from matplotlib import pyplot as plt
import matplotlib.ticker as mtick
import realized_volatility
import rolls
import ILLIQ

file_name = "data/test_set.csv"
n_exc = 1
intraday = 0
intraweek = 0
plots = 0
print_table = 0

time_list_minutes, prices, volumes = data_import.fetch_aggregate_csv(file_name, n_exc)
y, mo, d, h, mi = supp.fix_time_list(time_list_minutes, move_n_hours=0)
time_list_minutes = supp.make_time_list(y, mo, d, h, mi )

time_listH, returnsH, spreadH, log_volumesH, illiqH, log_illiqH, rvolH, log_rvolH = dis.clean_series_hour(time_list_minutes, prices, volumes)


prices_minutes = prices[0, :]
volumes_minutes = volumes[0, :]
returns_minutes = jake_supp.logreturn(prices_minutes)

time_list_hours, prices_hours, volumes_hours = dis.convert_to_hour(time_list_minutes, prices_minutes, volumes_minutes)
spread_hours = rolls.rolls(prices_minutes, time_list_minutes, calc_basis="h", kill_output=1)[1]

illiq_hours_time, illiq_hours = ILLIQ.illiq(time_list_minutes, returns_minutes, volumes_minutes, hourly_or_daily="h", threshold=0)
rvol_hours, time_list_hours_rvol = realized_volatility.RVol(time_list_minutes, prices_minutes, daily=0, annualize=1)
コード例 #20
0
ファイル: sondre_test_script.py プロジェクト: sondber/krypto
    X = np.transpose(np.matrix(volume_nativeH))
    volume_indexH = np.transpose(np.matrix(volume_indexH))
    X = np.append(X, volume_indexH, axis=1)

    linreg.reg_multiple(spreadH, X, prints=1)

    plot.time_series_single(time_list_combined,volumes_combined,"global_volumes_index")
    plot.time_series_single(time_listD,volumesD,"actual_global_volumes")

if make_real_spread_csv:

    for exc_name in ["bitstamp", "korbit"]:
        file_name= "data/long_raw_data/" + exc_name + "_new_minutes.csv"
        time_listM, pricesM, volumesM = dis.price_volume_from_raw(file_name, [], [], [], semi=1, unix=0, price_col=4)
        y, mo, d, h, mi = supp.fix_time_list(time_listM)

        unixM = []
        unixM = supp.timestamp_to_unix(time_listM)

        for i in range(1, len(time_listM)):
            if unixM[i] - unixM[i-1] != 60:
                print(i, time_listM[i])

        pricesM = supp.fill_blanks(pricesM)
        print("finihed importing")
        print(len(time_listM), time_listM[-10:])
        print(len(pricesM),(pricesM[-10:]))
        print(len(volumesM), (volumesM[-10:]))
        spread_abs, spreadH, time_listH, count_value_error = rolls.rolls(pricesM, time_listM, calc_basis="h", kill_output=1)
コード例 #21
0
def illiq(timestamps,
          minute_returns,
          minute_volumes,
          hourly_or_daily="d",
          kill_output=1,
          threshold=0.05):
    year, month, day, hour, minute = supp.fix_time_list(timestamps)
    illiq = []
    time_list_illiq = []
    value_errors = 0
    zero_count_window = 0

    n_entries = len(timestamps)
    if hourly_or_daily == "d":
        start_hour = hour[0]
        start_minute = start_hour * 60 + minute[0]
    elif hourly_or_daily == "h":
        start_hour = hour[0]
        start_minute = 0

    hours_in_day = 24
    day_desc = "full day"

    if hourly_or_daily == "d":
        window = int(hours_in_day * 60)
        freq_desc = "daily"
    elif hourly_or_daily == "h":
        window = int(60)
        freq_desc = "hourly"

    if kill_output == 0:
        print("Calculating ILLIQ on a/an", freq_desc, "basis using", day_desc,
              "data")

    partsum = 0
    """
    if hourly_or_daily == 0 and hours_in_day == 6.5:  # seperate loop to take care of half hours
        pos = 0  # position in price diff vector
        half = 1  # indicates that one half hour must be accounted for
        tod = 0  # tod to keep track of when to reset half
        half_hour = 30
        while pos < len(minute_returns):  # looping through hours of day using the pos-var
            half_hour_adjusted = half_hour
            window_adjusted = window
            if half == 1:
                for i in range(pos, pos + half_hour):
                    # print("We are in the IF %d pos and %d i", pos, i)
                    if minute_volumes[i] <= threshold:
                        value_errors += 1
                        partsum += 0
                        window_adjusted -= 1
                    else:
                        partsum += abs(minute_returns[i]) / minute_volumes[i]
                if half_hour_adjusted != 0:
                    window_illiq = partsum / window_adjusted
                    illiq.append(window_illiq)
                    time_listH.append(timestamps[pos])
                else:
                    zero_count_window += 1

                half = 0
                pos += 30
                partsum = 0
            else:
                for i in range(pos, pos + window):
                    # print("We are in the ELSE  %d pos and %d i", pos, i)
                    if minute_volumes[i] < threshold:
                        value_errors += 1
                        partsum += 0
                        window_adjusted -= 1
                    else:
                        partsum += abs(minute_returns[i]) / minute_volumes[i]
                if window_adjusted != 0:
                    window_illiq = partsum/window_adjusted
                    illiq.append(window_illiq)
                    time_listH.append(timestamps[pos])
                else:
                    zero_count_window += 1

                partsum = 0

                if tod == 5:
                    tod = 0
                    half = 1
                    pos += 60
                else:
                    tod += 1
                    pos += 60
    """

    if hourly_or_daily == "d":
        window_adjusted = window - start_minute
        for j in range(0,
                       window_adjusted):  # looping through minutes in window
            if minute_volumes[j] <= threshold:
                value_errors += 1
                partsum += 0
                window_adjusted -= 1
            else:
                partsum += abs(minute_returns[j]) / minute_volumes[j]
        if window_adjusted > 0:
            window_illiq = partsum / window_adjusted
            illiq.append(window_illiq)
            time_list_illiq.append(timestamps[0])
        else:
            zero_count_window += 1
        partsum = 0

    if hourly_or_daily == "d":
        second_iteration_start = window - start_minute
    else:
        second_iteration_start = 0

    for i in range(second_iteration_start,
                   min(n_entries, n_entries - start_minute),
                   window):  # looping through windows
        # if i in range(8000, 10000):
        #     print(" i =", i)
        #     print("  time[i] =", timestamps[i])
        #     print("  time[end] =", timestamps[min(i + window, n_entries)-1])
        #     print("  j from %i to %i" % (i, min(i + window, n_entries)))
        #     print("  volumes in this period", sum(minute_volumes[i:min(i + window, n_entries)]))

        window_adjusted = window
        for j in range(
                i, min(i + window, n_entries)
        ):  # looping through minutes in window. min to ensure it does not exceed size of list
            if minute_volumes[j] <= threshold:
                value_errors += 1
                partsum += 0
                window_adjusted -= 1
            else:
                partsum += abs(minute_returns[j]) / minute_volumes[j]

        if window_adjusted > 0:
            window_illiq = partsum / window_adjusted
            illiq.append(window_illiq)
            time_list_illiq.append(timestamps[i])
            #print("  timestamp added:", timestamps[i])
        else:
            zero_count_window += 1
        partsum = 0

    if kill_output == 0:
        print("ILLIQ-calculation is finished")
        print("The length of the ILLIQ-vector is", len(illiq))
        print("The length of the time-vector is", len(time_list_illiq))
        print("Number of value errors:", value_errors)
        print("Number of zero-count windows", zero_count_window)

    return time_list_illiq, illiq
コード例 #22
0
import plot

exchanges = ["korbit"]
hours_in_window = [
    1, 2, 4
]  # La denne være en liste med de forskjellige vinduene analysen skal gjøres for
convert_coeffs_to_percentage = 1  # Convert coeffs and std.errs. of returnsH and spreadH to percentage
convert_logs = 0  # Convert coeffs and std.errs. of rvol and illiq to percentage, i.e. 100*exp(coeff) NB! Doesn't work
subtract_means = 1
log_illiqs = True

for exc in exchanges:
    exc_name, time_listH, returnsH, spreadH, volumesH, log_volumesH, illiqH, log_illiqH, rvolH, log_rvolH = di.get_list(
        exc=exc, freq=1, local_time=1)

    hour = supp.fix_time_list(time_listH)[3]

    print()
    print("------------------------ INTRADAY REGRESSION FOR",
          exc_name.upper()[0:-3], "-------------------------")
    print()

    print()
    print(
        "     ------------------------------------------------Regression table for Intraday seasonality-----------------------------------------"
    )

    n = len(hour)
    for h in hours_in_window:  # Itererer over de forskjellige vinduene
        single_hour, two_hours, three_hours, four_hours = [0, 0, 0,
                                                           0]  # initialize