Ejemplo n.º 1
0
def returns_data(dates: List[str], time_step: str) -> None:
    """Computes the returns of the time series.

    :param dates: List of the interval of dates to be analyzed
     (i.e. ['1980-01-01', '2020-12-01']).
    :param time_step: time step of the data (i.e. '1m', '1h', '1d', 'wk',
     '1mo').
    :return: None -- The function saves the data in a file and does not return
     a value.
    """

    function_name: str = returns_data.__name__
    epochs_tools.function_header_print_data(function_name, dates, time_step, "", "")

    try:

        # Load data
        data: pd.DataFrame = pickle.load(
            open(
                f"../data/original_data/original_data_{dates[0]}_{dates[1]}_step"
                + f"_{time_step}.pickle",
                "rb",
            )
        )

        returns_df: pd.DataFrame = data.pct_change().dropna()
        returns_df = returns_df.iloc[:, :200]

        # Saving data
        epochs_tools.save_data(returns_df, function_name, dates, time_step, "", "")

    except FileNotFoundError as error:
        print("No data")
        print(error)
        print()
Ejemplo n.º 2
0
def normalized_returns_data(dates: List[str], time_step: str) -> None:
    """Normalizes the returns of the time series to mean zero and std deviation one.

    :param dates: List of the interval of dates to be analyzed
     (i.e. ['1980-01-01', '2020-12-31']).
    :param time_step: time step of the data (i.e. '1m', '1h', '1d', 'wk',
     '1mo').
    :param window: window time to compute the volatility (i.e. '60').
    :return: None -- The function saves the data in a file and does not return
     a value.
    """

    function_name: str = normalized_returns_data.__name__
    epochs_tools.function_header_print_data(function_name, dates, time_step, "", "")

    try:

        # Load data
        data: pd.DataFrame = pickle.load(
            open(
                f"../data/epochs/returns_data_{dates[0]}_{dates[1]}_step_{time_step}"
                + "_win__K_.pickle",
                "rb",
            )
        )

        normalized_df: pd.DataFrame = (data - data.mean()) / data.std()

        # Saving data
        epochs_tools.save_data(normalized_df, function_name, dates, time_step, "", "")

    except FileNotFoundError as error:
        print("No data")
        print(error)
        print()
Ejemplo n.º 3
0
def epochs_rolling_avg_correlation_matrix_data(
    dates: List[str], time_step: str, window: str
) -> None:
    """Computes the correlation matrix of the normalized returns and average each column.

    :param dates: List of the interval of dates to be analyzed
     (i.e. ['1980-01-01', '2020-12-31']).
    :param time_step: time step of the data (i.e. '1m', '1h', '1d', '1wk',
     '1mo').
    :param window: window time to compute the volatility (i.e. '25').
    :return: None -- The function saves the data in a file and does not return
     a value.
    """

    function_name: str = epochs_rolling_avg_correlation_matrix_data.__name__
    epochs_tools.function_header_print_data(function_name, dates, time_step, window, "")

    try:

        # Load data
        data: pd.DataFrame = pickle.load(
            open(
                f"../data/epochs/normalized_returns_data_{dates[0]}"
                + f"_{dates[1]}_step_{time_step}_win__K_.pickle",
                "rb",
            )
        )

        _, k = data.shape

        corr_matrix_df: pd.DataFrame = data.rolling(window=int(window)).corr().dropna()

        avg_data = []
        dates_series = []

        for idx in set(corr_matrix_df.index.get_level_values(0)):
            avg_data.append(corr_matrix_df.loc[idx].values.sum() / (k * k))
            dates_series.append(idx)

        series = pd.Series(data=avg_data, index=dates_series)
        series = series.sort_index()

        # Saving data
        epochs_tools.save_data(series, function_name, dates, time_step, window, "")

    except FileNotFoundError as error:
        print("No data")
        print(error)
        print()

    except TypeError as error:
        print("To compute the correlation is needed at least to stocks")
        print(error)
        print()
Ejemplo n.º 4
0
def epochs_correlation_matrix_data(
    dates: List[str], time_step: str, window: str
) -> None:
    """uses local normalization to compute the correlation matrix of the
       normalized returns.

    :param dates: List of the interval of dates to be analyzed
     (i.e. ['1980-01-01', '2020-12-31']).
    :param time_step: time step of the data (i.e. '1m', '1h', '1d', '1wk',
     '1mo').
    :param window: window time to compute the volatility (i.e. '25').
    :return: None -- The function saves the data in a file and does not return
     a value.
    """

    function_name: str = epochs_correlation_matrix_data.__name__
    epochs_tools.function_header_print_data(function_name, dates, time_step, window)

    try:

        # Load data
        data: pd.DataFrame = pickle.load(
            open(
                f"../data/epochs/epochs_normalized_returns_data_{dates[0]}"
                + f"_{dates[1]}_step_{time_step}_win_{window}.pickle",
                "rb",
            )
        )

        corr_matrix_df: pd.DataFrame = data.corr()

        # Saving data
        epochs_tools.save_data(corr_matrix_df, function_name, dates, time_step, window)

    except FileNotFoundError as error:
        print("No data")
        print(error)
        print()

    except TypeError as error:
        print("To compute the correlation is needed at least to stocks")
        print(error)
        print()
Ejemplo n.º 5
0
def epochs_rolling_volatility_data(
    dates: List[str], time_step: str, window: str
) -> None:
    """Uses local normalization to compute the volatility of the time series.

    :param dates: List of the interval of dates to be analyzed
     (i.e. ['1980-01-01', '2020-12-31']).
    :param time_step: time step of the data (i.e. '1m', '1h', '1d', '1wk',
     '1mo').
    :param window: window time to compute the volatility (i.e. '25').
    :return: None -- The function saves the data in a file and does not return
     a value.
    """

    function_name: str = epochs_rolling_volatility_data.__name__
    epochs_tools.function_header_print_data(function_name, dates, time_step, window, "")

    try:

        # Load data
        data: pd.DataFrame = pickle.load(
            open(
                f"../data/epochs/normalized_returns_data_{dates[0]}_{dates[1]}_step"
                + f"_{time_step}_win__K_.pickle",
                "rb",
            )
        )

        std_df: pd.DataFrame = data.rolling(window=int(window)).std().dropna()

        # Saving data
        epochs_tools.save_data(std_df, function_name, dates, time_step, window, "")

    except FileNotFoundError as error:
        print("No data")
        print(error)
        print()
Ejemplo n.º 6
0
def epochs_aggregated_dist_returns_market_data(
    dates: List[str], time_step: str, window: str, K_value: str, norm: str = "long"
) -> None:
    """Computes the aggregated distribution of returns for a market.

    :param dates: List of the interval of dates to be analyzed
     (i.e. ['1980-01-01', '2020-12-31']).
    :param time_step: time step of the data (i.e. '1m', '1h', '1d', '1wk',
     '1mo').
    :param window: window time to compute the volatility (i.e. '25').
    :param K_value: number of companies to be used (i.e. '80', 'all').
    :norm: define if the normalization is made in the complete time series or
     in each epoch. Default 'long', 'short' is the other option.
    :return: None -- The function saves the data in a file and does not return
     a value.
    """

    function_name: str = epochs_aggregated_dist_returns_market_data.__name__
    epochs_tools.function_header_print_data(
        function_name, dates, time_step, window, K_value
    )

    try:

        # Load name of the stocks
        if K_value == "all":
            stocks_name: pd.DataFrame = pickle.load(
                open(
                    f"../data/epochs/returns_data_{dates[0]}_{dates[1]}_step"
                    + f"_{time_step}_win__K_.pickle",
                    "rb",
                )
            )[:200]

        else:
            stocks_name = pickle.load(
                open(
                    f"../data/epochs/returns_data_{dates[0]}_{dates[1]}_step"
                    + f"_{time_step}_win__K_.pickle",
                    "rb",
                )
            ).sample(n=int(K_value), axis="columns")

        agg_ret_mkt_list: List[List[float]] = []

        # Combination of stock pairs
        stocks_comb: Iterable[Tuple[Any, ...]] = icomb(stocks_name, 2)
        args_prod: Iterable[Any] = iprod(
            [dates], [time_step], stocks_comb, [window], [norm]
        )

        # Parallel computing
        with mp.Pool(processes=mp.cpu_count()) as pool:
            agg_ret_mkt_list.extend(
                pool.starmap(epochs_aggregated_dist_returns_pair_data, args_prod)
            )

        # Flatten the list
        agg_ret_mkt_list_flat: List[float] = [
            val for sublist in agg_ret_mkt_list for val in sublist
        ]
        agg_ret_mkt_series: pd.Series = pd.Series(agg_ret_mkt_list_flat)

        print(f"mean = {agg_ret_mkt_series.mean()}")
        print(f"std  = {agg_ret_mkt_series.std()}")

        # Saving data
        epochs_tools.save_data(
            agg_ret_mkt_series,
            function_name + "_" + norm,
            dates,
            time_step,
            window,
            K_value,
        )

        del agg_ret_mkt_list
        del agg_ret_mkt_series

    except FileNotFoundError as error:
        print("No data")
        print(error)
        print()