def get_rolling_estimate(returns, func_name, value_index=0, monte_length=1000): slice_end = pd.date_range(returns.index[1], returns.index[-1], freq="12M") thing = progressBar(len(slice_end)) lower_points = [] upper_points = [] for end_point in slice_end: subset_returns = returns[:end_point] subset_distribution = distribution_of_statistic( subset_returns, func_name, monte_length=monte_length) lower_points.append(subset_distribution.quantile(0.1)[value_index]) upper_points.append(subset_distribution.quantile(0.9)[value_index]) thing.iterate() output = pd.DataFrame(dict(upper=upper_points, lower=lower_points), index=slice_end) return output
def _get_portfolio_risk_given_weights(self, portfolio_weights: pd.DataFrame) -> pd.Series: risk_series = [] common_index = self.common_index() p = progressBar(len(common_index), show_timings=True, show_each_time=False) for relevant_date in common_index: p.iterate() weights_on_date = portfolioWeights( get_row_of_df_aligned_to_weights_as_dict(portfolio_weights, relevant_date)) covariance = self.get_covariance_matrix(relevant_date) risk_on_date = calculate_risk(weights = weights_on_date, covariance = covariance) risk_series.append(risk_on_date) p.finished() risk_series = pd.Series(risk_series, common_index) return risk_series
def calc_historic_confidence(perc, function_to_use, rollperiods=250): fitting_dates = generate_fitting_dates(perc, "rolling", rollperiods=rollperiods) list_of_confidence = [] thing = progressBar(len(fitting_dates) - 1) for fit_date in fitting_dates[1:]: list_of_confidence.append(function_to_use(perc, fit_date)) thing.iterate() thing.finished() list_of_confidence = pd.DataFrame( list_of_confidence, index=[fit_date.fit_end for fit_date in fitting_dates[1:]]) list_of_confidence.columns = ["lower", "upper"] return list_of_confidence
def get_liquidity_data_df(data: dataBlob): diag_prices = diagPrices(data) instrument_list = diag_prices.get_list_of_instruments_with_contract_prices( ) print("Getting data... patience") p = progressBar(len(instrument_list)) all_liquidity = [] for instrument_code in instrument_list: p.iterate() liquidity_this_instrument = get_liquidity_dict_for_instrument_code( data, instrument_code) all_liquidity.append(liquidity_this_instrument) all_liquidity_df = pd.DataFrame(all_liquidity) all_liquidity_df.index = instrument_list return all_liquidity_df
def weights(self) -> pd.DataFrame: fit_dates = self.fit_dates optimiser = self.optimiser progress = progressBar(len(fit_dates), "Optimising weights") weight_list = [] # Now for each time period, estimate weights for fit_period in fit_dates: progress.iterate() weight_dict = optimiser.calculate_weights_for_period( fit_period) weight_list.append(weight_dict) weight_index= fit_dates.list_of_starting_periods() weights = pd.DataFrame(weight_list, index = weight_index) return weights
def get_instrument_risk_table(data, only_held_instruments=True): ## INSTRUMENT RISK (daily %, annual %, return space daily and annual, base currency per contract daily and annual, positions) if only_held_instruments: instrument_list = get_instruments_with_positions_all_strategies(data) else: instrument_list = get_list_of_instruments() p = progressBar(len(instrument_list)) risk_data_list = [] for instrument_code in instrument_list: risk_this_instrument = get_risk_data_for_instrument( data, instrument_code) risk_data_list.append(risk_this_instrument) p.iterate() p.finished() risk_df = pd.DataFrame(risk_data_list, index=instrument_list).transpose() risk_df = sorted_clean_df(risk_df, "annual_risk_perc_capital") return risk_df
def get_optimised_weights_df(self) -> pd.DataFrame: self.log.msg( "Optimising positions for small capital: may take a while!") common_index = list(self.common_index()) p = progressBar(len(common_index), show_timings=True, show_each_time=True) previous_optimal_weights = portfolioWeights.allzeros( self.instrument_list()) weights_list = [] for relevant_date in common_index: #self.log.msg(relevant_date) optimal_weights = self.get_optimal_weights_with_fixed_contract_values( relevant_date, previous_weights=previous_optimal_weights) weights_list.append(optimal_weights) previous_optimal_weights = copy(optimal_weights) p.iterate() p.finished() weights_list_df = pd.DataFrame(weights_list, index=common_index) return weights_list_df
def get_trading_hours_for_all_instruments(data=arg_not_supplied): if data is arg_not_supplied: data = dataBlob() diag_prices = diagPrices(data) list_of_instruments = diag_prices.get_list_of_instruments_with_contract_prices() p = progressBar(len(list_of_instruments)) all_trading_hours = {} for instrument_code in list_of_instruments: p.iterate() trading_hours = get_trading_hours_for_instrument(data, instrument_code) if trading_hours is missing_contract: print("*** NO EXPIRY FOR %s ***" % instrument_code) continue ## will have several days use first one check_trading_hours(trading_hours, instrument_code) all_trading_hours[instrument_code] = trading_hours p.finished() return all_trading_hours
def distribution_of_statistic(returns, stat_function, monte_length=1000, horizon=None, colnames=None): if colnames == None: colnames = list(returns.columns) if horizon is None: horizon = len(returns.index) list_of_bs_stats = [] thing = progressBar(monte_length) for notUsed in range(monte_length): list_of_bs_stats.append( statistic_from_bootstrap(returns, stat_function, horizon=horizon)) thing.iterate() ans = pd.DataFrame(np.array(list_of_bs_stats)) ans = pd.DataFrame(ans) ans.columns = colnames return ans
def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs): """ We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling Its important that forward filling, or index / ffill / diff has been done before we begin :param data: Data to get correlations from :type data: pd.DataFrame or list if pooling :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param dict_group: dictionary of groupings; used to replace missing values :type dict_group: dict :param boring_offdiag: Value used in creating 'boring' matrix, for when no data :type boring_offdiag: float :param **kwargs: passed to correlation_single_period :returns: CorrelationList """ cleaning = str2Bool(cleaning) # grouping dictionary, convert to faster, algo friendly, form group_dict = group_dict_from_natural(dict_group) data = df_from_list(data) column_names = list(data.columns) data = data.resample(frequency).last() # Generate time periods fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears) size = len(column_names) corr_with_no_data = boring_corr_matrix(size, offdiag=boring_offdiag) # create a list of correlation matrices corr_list = [] progress = progressBar(len(fit_dates), "Estimating correlations") # Now for each time period, estimate correlation for fit_period in fit_dates: progress.iterate() if fit_period.no_data: # no data to fit with corr_with_nan = boring_corr_matrix(size, offdiag=np.nan, diag=np.nan) corrmat = corr_with_nan else: data_for_estimate = data[fit_period.fit_start:fit_period. fit_end] corrmat = correlation_single_period(data_for_estimate, **kwargs) if cleaning: current_period_data = data[fit_period.fit_start:fit_period. fit_end] must_haves = must_have_item(current_period_data) # means we can use earlier correlations with sensible values corrmat = clean_correlation(corrmat, corr_with_no_data, must_haves) corr_list.append(corrmat) setattr(self, "corr_list", corr_list) setattr(self, "columns", column_names) setattr(self, "fit_dates", fit_dates)
def __init__(self, data, frequency="W", date_method="expanding", rollyears=20, **kwargs): """ We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling Its important that forward filling, or index / ffill / diff has been done before we begin :param data: simData to get correlations from :type data: pd.DataFrame or list if pooling :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param **kwargs: passed to correlationSinglePeriod :returns: CorrelationList """ if isinstance(data, list): # turn the list of data into a single dataframe. This will have a unique time series, which we manage # through adding a small offset of a few microseconds length_of_data = len(data) data_resampled = [ data_item.resample(frequency).last() for data_item in data ] data_as_df = df_from_list(data_resampled) else: length_of_data = 1 data_as_df = data.resample(frequency).last() column_names = list(data_as_df.columns) # Generate time periods fit_dates = generate_fitting_dates(data_as_df, date_method=date_method, rollyears=rollyears) # create a single period correlation estimator correlation_estimator_for_one_period = correlationSinglePeriod( data_as_df, length_of_data=length_of_data, **kwargs) # create a list of correlation matrices corr_list = [] progress = progressBar(len(fit_dates), "Estimating correlations") # Now for each time period, estimate correlation for fit_period in fit_dates: progress.iterate() corrmat = correlation_estimator_for_one_period.calculate( fit_period) corr_list.append(corrmat) setattr(self, "corr_list", corr_list) setattr(self, "columns", column_names) setattr(self, "fit_dates", fit_dates)
def optimise(self): """ Optimise weights over some returns data """ log = self.log date_method = self.date_method rollyears = self.rollyears optimiser = self.optimiser cleaning = self.cleaning apply_cost_weight = self.apply_cost_weight data = getattr(self, "data", None) if data is None: log.critical("You need to run .set_up_data() before .optimise()") fit_dates = generate_fitting_dates( data, date_method=date_method, rollyears=rollyears) setattr(self, "fit_dates", fit_dates) # Now for each time period, estimate weights # create a list of weight vectors weight_list = [] # create a class object for each period opt_results = [] progress = progressBar(len(fit_dates), "Optimising") for fit_period in fit_dates: # Do the optimisation for one period, using a particular optimiser # instance results_this_period = optSinglePeriod(self, data, fit_period, optimiser, cleaning) opt_results.append(results_this_period) weights = results_this_period.weights # We adjust dates slightly to ensure no overlaps dindex = [ fit_period.period_start + datetime.timedelta(days=1), fit_period.period_end - datetime.timedelta(days=1) ] # create a double row to delineate start and end of test period weight_row = pd.DataFrame( [weights] * 2, index=dindex, columns=data.columns) weight_list.append(weight_row) progress.iterate() # Stack everything up raw_weight_df = pd.concat(weight_list, axis=0) if apply_cost_weight: log.terse("Applying cost weighting to optimisation results") # ann_SR_costs must be calculated before a cost multiplier is applied ann_SR_costs = self.calculate_ann_SR_costs() weight_df = apply_cost_weighting(raw_weight_df, ann_SR_costs) else: weight_df = raw_weight_df setattr(self, "results", opt_results) setattr(self, "weights", weight_df) setattr(self, "raw_weights", raw_weight_df)
def optimise(self): """ Optimise weights over some returns data """ log = self.log date_method = self.date_method rollyears = self.rollyears optimiser = self.optimiser cleaning = self.cleaning apply_cost_weight = self.apply_cost_weight data = getattr(self, "data", None) if data is None: log.critical("You need to run .set_up_data() before .optimise()") fit_dates = generate_fitting_dates( data, date_method=date_method, rollyears=rollyears) setattr(self, "fit_dates", fit_dates) # Now for each time period, estimate weights # create a list of weight vectors weight_list = [] # create a class object for each period opt_results = [] progress = progressBar(len(fit_dates), "Optimising") for fit_period in fit_dates: # Do the optimisation for one period, using a particular optimiser # instance results_this_period = optSinglePeriod(self, data, fit_period, optimiser, cleaning) opt_results.append(results_this_period) weights = results_this_period.weights # We adjust dates slightly to ensure no overlaps dindex = [ fit_period.period_start + datetime.timedelta(days=1), fit_period.period_end - datetime.timedelta(days=1) ] # create a double row to delineate start and end of test period weight_row = pd.DataFrame( [weights] * 2, index=dindex, columns=data.columns) weight_list.append(weight_row) progress.iterate() # Stack everything up raw_weight_df = pd.concat(weight_list, axis=0) if apply_cost_weight: log.terse("Applying cost weighting to optimisation results") # ann_SR_costs must be calculated before a cost multiplier is applied ann_SR_costs = self.calculate_ann_SR_costs() weight_df = apply_cost_weighting(raw_weight_df, ann_SR_costs) else: weight_df = raw_weight_df setattr(self, "results", opt_results) setattr(self, "weights", weight_df) setattr(self, "raw_weights", raw_weight_df)
def __init__(self, data, frequency="W", date_method="expanding", rollyears=20, **kwargs): """ We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling Its important that forward filling, or index / ffill / diff has been done before we begin :param data: simData to get correlations from :type data: pd.DataFrame or list if pooling :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param **kwargs: passed to correlationSinglePeriod :returns: CorrelationList """ if type(data) is list: # turn the list of data into a single dataframe. This will have a unique time series, which we manage # through adding a small offset of a few microseconds length_of_data = len(data) data_resampled = [ data_item.resample(frequency).last() for data_item in data ] data_as_df = df_from_list(data_resampled) else: length_of_data = 1 data_as_df = data.resample(frequency).last() column_names = list(data_as_df.columns) # Generate time periods fit_dates = generate_fitting_dates( data_as_df, date_method=date_method, rollyears=rollyears) # create a single period correlation estimator correlation_estimator_for_one_period = correlationSinglePeriod( data_as_df, length_of_data=length_of_data, **kwargs) # create a list of correlation matrices corr_list = [] progress = progressBar(len(fit_dates), "Estimating correlations") # Now for each time period, estimate correlation for fit_period in fit_dates: progress.iterate() corrmat = correlation_estimator_for_one_period.calculate( fit_period) corr_list.append(corrmat) setattr(self, "corr_list", corr_list) setattr(self, "columns", column_names) setattr(self, "fit_dates", fit_dates)
corrmatrix.iloc[0][0] = 1.0 corrmatrix.iloc[0][1] = corrvec[1] corrmatrix.iloc[0][2] = corrvec[0] corrmatrix.iloc[1][0] = corrvec[1] corrmatrix.iloc[1][1] = 1.0 corrmatrix.iloc[1][2] = corrvec[2] corrmatrix.iloc[2][0] = corrvec[0] corrmatrix.iloc[2][1] = corrvec[2] corrmatrix.iloc[2][2] = 1.0 return corrmatrix monte_length_inside = 5000 weights = [] thing = progressBar(monte_length_inside) for notUsed in range(monte_length_inside): corrvec = corr_dist.iloc[int(random.uniform(0, len(corr_dist)))] srlist = SR_distr.iloc[int(random.uniform(0, len(corr_dist)))] stdev_list = stdev_distr.iloc[int(random.uniform(0, len(corr_dist)))] corrmatrix = from_cor_vec_to_matrix(corrvec) meanlist = srlist * stdev_list weights.append( optimisation_with_data(corrmatrix.values, list(meanlist), list(stdev_list))) thing.iterate() weights = pd.DataFrame(weights, columns=['SP500', 'US10', 'US5'])
base_config = base_system.config ## run all possible combinations of TF to get base performance instruments = base_system.get_instrument_list() results = dict() wlist = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, 125, 150, 175, 200, 250 ] #wlist = [1,250] instrument_list = base_system.get_instrument_list() from syscore.genutils import progressBar thing = progressBar(len(wlist) * len(wlist) * len(instrument_list)) for Aspeed in wlist: for Bspeed in wlist: if Aspeed == Bspeed: continue config = copy(base_config) trading_rules = dict(rule=dict( function='systems.provided.futures_chapter15.rules.ewmac', data=[ 'rawdata.get_daily_prices', 'rawdata.daily_returns_volatility' ], other_args=dict(Lfast=Aspeed, Lslow=Bspeed)))