def returns_data(dates: List[str], time_step: str) -> None: """Computes the returns of the time series. :param dates: List of the interval of dates to be analyzed (i.e. ['1980-01-01', '2020-12-01']). :param time_step: time step of the data (i.e. '1m', '1h', '1d', 'wk', '1mo'). :return: None -- The function saves the data in a file and does not return a value. """ function_name: str = returns_data.__name__ epochs_tools.function_header_print_data(function_name, dates, time_step, "", "") try: # Load data data: pd.DataFrame = pickle.load( open( f"../data/original_data/original_data_{dates[0]}_{dates[1]}_step" + f"_{time_step}.pickle", "rb", ) ) returns_df: pd.DataFrame = data.pct_change().dropna() returns_df = returns_df.iloc[:, :200] # Saving data epochs_tools.save_data(returns_df, function_name, dates, time_step, "", "") except FileNotFoundError as error: print("No data") print(error) print()
def normalized_returns_data(dates: List[str], time_step: str) -> None: """Normalizes the returns of the time series to mean zero and std deviation one. :param dates: List of the interval of dates to be analyzed (i.e. ['1980-01-01', '2020-12-31']). :param time_step: time step of the data (i.e. '1m', '1h', '1d', 'wk', '1mo'). :param window: window time to compute the volatility (i.e. '60'). :return: None -- The function saves the data in a file and does not return a value. """ function_name: str = normalized_returns_data.__name__ epochs_tools.function_header_print_data(function_name, dates, time_step, "", "") try: # Load data data: pd.DataFrame = pickle.load( open( f"../data/epochs/returns_data_{dates[0]}_{dates[1]}_step_{time_step}" + "_win__K_.pickle", "rb", ) ) normalized_df: pd.DataFrame = (data - data.mean()) / data.std() # Saving data epochs_tools.save_data(normalized_df, function_name, dates, time_step, "", "") except FileNotFoundError as error: print("No data") print(error) print()
def epochs_rolling_avg_correlation_matrix_data( dates: List[str], time_step: str, window: str ) -> None: """Computes the correlation matrix of the normalized returns and average each column. :param dates: List of the interval of dates to be analyzed (i.e. ['1980-01-01', '2020-12-31']). :param time_step: time step of the data (i.e. '1m', '1h', '1d', '1wk', '1mo'). :param window: window time to compute the volatility (i.e. '25'). :return: None -- The function saves the data in a file and does not return a value. """ function_name: str = epochs_rolling_avg_correlation_matrix_data.__name__ epochs_tools.function_header_print_data(function_name, dates, time_step, window, "") try: # Load data data: pd.DataFrame = pickle.load( open( f"../data/epochs/normalized_returns_data_{dates[0]}" + f"_{dates[1]}_step_{time_step}_win__K_.pickle", "rb", ) ) _, k = data.shape corr_matrix_df: pd.DataFrame = data.rolling(window=int(window)).corr().dropna() avg_data = [] dates_series = [] for idx in set(corr_matrix_df.index.get_level_values(0)): avg_data.append(corr_matrix_df.loc[idx].values.sum() / (k * k)) dates_series.append(idx) series = pd.Series(data=avg_data, index=dates_series) series = series.sort_index() # Saving data epochs_tools.save_data(series, function_name, dates, time_step, window, "") except FileNotFoundError as error: print("No data") print(error) print() except TypeError as error: print("To compute the correlation is needed at least to stocks") print(error) print()
def epochs_correlation_matrix_data( dates: List[str], time_step: str, window: str ) -> None: """uses local normalization to compute the correlation matrix of the normalized returns. :param dates: List of the interval of dates to be analyzed (i.e. ['1980-01-01', '2020-12-31']). :param time_step: time step of the data (i.e. '1m', '1h', '1d', '1wk', '1mo'). :param window: window time to compute the volatility (i.e. '25'). :return: None -- The function saves the data in a file and does not return a value. """ function_name: str = epochs_correlation_matrix_data.__name__ epochs_tools.function_header_print_data(function_name, dates, time_step, window) try: # Load data data: pd.DataFrame = pickle.load( open( f"../data/epochs/epochs_normalized_returns_data_{dates[0]}" + f"_{dates[1]}_step_{time_step}_win_{window}.pickle", "rb", ) ) corr_matrix_df: pd.DataFrame = data.corr() # Saving data epochs_tools.save_data(corr_matrix_df, function_name, dates, time_step, window) except FileNotFoundError as error: print("No data") print(error) print() except TypeError as error: print("To compute the correlation is needed at least to stocks") print(error) print()
def epochs_rolling_volatility_data( dates: List[str], time_step: str, window: str ) -> None: """Uses local normalization to compute the volatility of the time series. :param dates: List of the interval of dates to be analyzed (i.e. ['1980-01-01', '2020-12-31']). :param time_step: time step of the data (i.e. '1m', '1h', '1d', '1wk', '1mo'). :param window: window time to compute the volatility (i.e. '25'). :return: None -- The function saves the data in a file and does not return a value. """ function_name: str = epochs_rolling_volatility_data.__name__ epochs_tools.function_header_print_data(function_name, dates, time_step, window, "") try: # Load data data: pd.DataFrame = pickle.load( open( f"../data/epochs/normalized_returns_data_{dates[0]}_{dates[1]}_step" + f"_{time_step}_win__K_.pickle", "rb", ) ) std_df: pd.DataFrame = data.rolling(window=int(window)).std().dropna() # Saving data epochs_tools.save_data(std_df, function_name, dates, time_step, window, "") except FileNotFoundError as error: print("No data") print(error) print()
def epochs_aggregated_dist_returns_market_data( dates: List[str], time_step: str, window: str, K_value: str, norm: str = "long" ) -> None: """Computes the aggregated distribution of returns for a market. :param dates: List of the interval of dates to be analyzed (i.e. ['1980-01-01', '2020-12-31']). :param time_step: time step of the data (i.e. '1m', '1h', '1d', '1wk', '1mo'). :param window: window time to compute the volatility (i.e. '25'). :param K_value: number of companies to be used (i.e. '80', 'all'). :norm: define if the normalization is made in the complete time series or in each epoch. Default 'long', 'short' is the other option. :return: None -- The function saves the data in a file and does not return a value. """ function_name: str = epochs_aggregated_dist_returns_market_data.__name__ epochs_tools.function_header_print_data( function_name, dates, time_step, window, K_value ) try: # Load name of the stocks if K_value == "all": stocks_name: pd.DataFrame = pickle.load( open( f"../data/epochs/returns_data_{dates[0]}_{dates[1]}_step" + f"_{time_step}_win__K_.pickle", "rb", ) )[:200] else: stocks_name = pickle.load( open( f"../data/epochs/returns_data_{dates[0]}_{dates[1]}_step" + f"_{time_step}_win__K_.pickle", "rb", ) ).sample(n=int(K_value), axis="columns") agg_ret_mkt_list: List[List[float]] = [] # Combination of stock pairs stocks_comb: Iterable[Tuple[Any, ...]] = icomb(stocks_name, 2) args_prod: Iterable[Any] = iprod( [dates], [time_step], stocks_comb, [window], [norm] ) # Parallel computing with mp.Pool(processes=mp.cpu_count()) as pool: agg_ret_mkt_list.extend( pool.starmap(epochs_aggregated_dist_returns_pair_data, args_prod) ) # Flatten the list agg_ret_mkt_list_flat: List[float] = [ val for sublist in agg_ret_mkt_list for val in sublist ] agg_ret_mkt_series: pd.Series = pd.Series(agg_ret_mkt_list_flat) print(f"mean = {agg_ret_mkt_series.mean()}") print(f"std = {agg_ret_mkt_series.std()}") # Saving data epochs_tools.save_data( agg_ret_mkt_series, function_name + "_" + norm, dates, time_step, window, K_value, ) del agg_ret_mkt_list del agg_ret_mkt_series except FileNotFoundError as error: print("No data") print(error) print()