def forecast_turnover(self, instrument_code, rule_variation_name): """ Get the annualised turnover for a forecast/rule combination :param instrument_code: instrument to get values for :type instrument_code: str :param rule_variation_name: rule to get values for :type rule_variation_name: str :returns: float """ use_pooled_turnover = str2Bool( self.parent.config.forecast_cost_estimates['use_pooled_turnover']) if use_pooled_turnover: instrument_code_list = self.has_same_rules_as_code(instrument_code) else: instrument_code_list = [instrument_code] turnover_for_SR = self.forecast_turnover_for_list( instrument_code_list, rule_variation_name) return turnover_for_SR
def _use_fixed_weights(self): if str2Bool(self.parent.config.use_forecast_scale_estimates): fixed_flavour = True else: fixed_flavour = False return fixed_flavour
def forecast_scalar(cs_forecasts, window=250000, min_periods=500, backfill=True): """ Work out the scaling factor for xcross such that T*x has an abs value of 10 (or whatever the average absolute forecast is) :param cs_forecasts: forecasts, cross sectionally :type cs_forecasts: pd.DataFrame TxN :param span: :type span: int :param min_periods: :returns: pd.DataFrame """ backfill = str2Bool(backfill) # in yaml will come in as text # We don't allow this to be changed in config target_abs_forecast = system_defaults['average_absolute_forecast'] # Take CS average first # we do this before we get the final TS average otherwise get jumps in # scalar when new markets introduced if cs_forecasts.shape[1] == 1: x = cs_forecasts.abs().iloc[:, 0] else: x = cs_forecasts.ffill().abs().median(axis=1) # now the TS avg_abs_value = x.rolling(window=window, min_periods=min_periods).mean() scaling_factor = target_abs_forecast / avg_abs_value if backfill: scaling_factor = scaling_factor.fillna(method="bfill") return scaling_factor
def __init__(self, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, method="bootstrap", cleaning=True, cost_multiplier=1.0, apply_cost_weight=True, ann_target_SR=TARGET_ANN_SR, equalise_gross=False, **passed_params): cleaning=str2Bool(cleaning) optimise_params=copy(passed_params) ann_dict=dict(D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0) annualisation=ann_dict.get(frequency, 1.0) period_target_SR=ann_target_SR/(annualisation**.5) moments_estimator=momentsEstimator(optimise_params, annualisation, ann_target_SR) optimiser=optimiserWithParams(method, optimise_params, moments_estimator) setattr(self, "optimiser", optimiser) setattr(self, "log", log) setattr(self, "frequency", frequency) setattr(self, "method", method) setattr(self, "equalise_gross", equalise_gross) setattr(self, "cost_multiplier", cost_multiplier) setattr(self, "annualisation", annualisation) setattr(self, "period_target_SR", period_target_SR) setattr(self, "date_method", date_method) setattr(self, "rollyears", rollyears) setattr(self, "cleaning", cleaning) setattr(self, "apply_cost_weight", apply_cost_weight)
def forecast_scalar(xcross, window=250000, min_periods=500, backfill=True): """ Work out the scaling factor for xcross such that T*x has an abs value of 10 :param x: :type x: pd.DataFrame TxN :param span: :type span: int :param min_periods: :returns: pd.DataFrame """ backfill = str2Bool(backfill) # in yaml will come in as text # We don't allow this to be changed in config target_abs_forecast = system_defaults['average_absolute_forecast'] # Take CS average first # we do this before we get the final TS average otherwise get jumps in # scalar if xcross.shape[1] == 1: x = xcross.abs().iloc[:, 0] else: x = xcross.ffill().abs().median(axis=1) # now the TS avg_abs_value = x.rolling(window=window, min_periods=min_periods).mean() scaling_factor = target_abs_forecast / avg_abs_value if backfill: scaling_factor = scaling_factor.fillna(method="bfill") return scaling_factor
def get_SR_transaction_cost_for_instrument_forecast( self, instrument_code: str, rule_variation_name: str) -> float: """ Get the SR cost for a forecast/rule combination :param instrument_code: instrument to get values for :type instrument_code: str :param rule_variation_name: rule to get values for :type rule_variation_name: str :returns: float KEY OUTPUT """ use_pooled_costs = str2Bool( self.config.forecast_cost_estimates["use_pooled_costs"]) if use_pooled_costs: SR_cost = self._get_SR_cost_for_rule_with_pooled_costs( instrument_code, rule_variation_name) else: SR_cost = self._get_SR_cost_of_rule_for_individual_instrument( instrument_code, rule_variation_name) return SR_cost
def __init__(self, data, frequency="W", date_method="expanding", rollyears=20, dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs): cleaning=str2Bool(cleaning) group_dict=group_dict_from_natural(dict_group) data=df_from_list(data) column_names=list(data.columns) data=data.resample(frequency, how="last") fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears) size=len(column_names) corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag) corr_list=[] print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Correlation estimate") for fit_period in fit_dates: print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Estimating from %s to %s" % (fit_period.period_start, fit_period.period_end)) if fit_period.no_data: corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan) corrmat=corr_with_nan else: data_for_estimate=data[fit_period.fit_start:fit_period.fit_end] corrmat=correlation_single_period(data_for_estimate, **kwargs) if cleaning: current_period_data=data[fit_period.fit_start:fit_period.fit_end] must_haves=must_have_item(current_period_data) corrmat=clean_correlation(corrmat, corr_with_no_data, must_haves) corr_list.append(corrmat) setattr(self, "corr_list", corr_list) setattr(self, "columns", column_names) setattr(self, "fit_dates", fit_dates)
def forecast_scalar(xcross, window=250000, min_periods=500, backfill=True): """ Work out the scaling factor for xcross such that T*x has an abs value of 10 :param x: :type x: pd.DataFrame TxN :param span: :type span: int :param min_periods: :returns: pd.DataFrame """ backfill=str2Bool(backfill) ## in yaml will come in as text ##We don't allow this to be changed in config target_abs_forecast = system_defaults['average_absolute_forecast'] ## Take CS average first ## we do this before we get the final TS average otherwise get jumps in scalar if xcross.shape[1]==1: x=xcross.abs().iloc[:,0] else: x=xcross.ffill().abs().median(axis=1) ## now the TS avg_abs_value=pd.rolling_mean(x, window=window, min_periods=min_periods) scaling_factor=target_abs_forecast/avg_abs_value if backfill: scaling_factor=scaling_factor.fillna(method="bfill") return scaling_factor
def correlation_single_period(data_for_estimate, using_exponent=True, min_periods=20, ew_lookback=250, floor_at_zero=True): """ We generate a correlation from eithier a pd.DataFrame, or a list of them if we're pooling It's important that forward filling, or index / ffill / diff has been done before we begin also that we're on the right time frame, eg weekly if that's what we're doing :param data_for_estimate: Data to get correlations from :type data_for_estimate: pd.DataFrame :param using_exponent: Should we use exponential weighting? :type using_exponent: bool :param ew_lookback: Lookback, in periods, for exp. weighting :type ew_lookback: int :param min_periods: Minimum periods before we get a correlation :type min_periods: int :param floor_at_zero: remove negative correlations before proceeding :type floor_at_zero: bool or str :returns: 2-dim square np.array """ # These may come from config as str using_exponent = str2Bool(using_exponent) if using_exponent: # If we stack there will be duplicate dates # So we massage the span so it's correct # This assumes the index is at least daily and on same timestamp # This is an artifact of how we prepare the data dindex = data_for_estimate.index dlenadj = float(len(dindex)) / len(set(list(dindex))) # Usual use for IDM, FDM calculation when whole data set is used corrmat = pd.ewmcorr( data_for_estimate, span=int( ew_lookback * dlenadj), min_periods=min_periods) # only want the final one corrmat = corrmat.values[-1] else: # Use normal correlation # Usual use for bootstrapping when only have sub sample corrmat = data_for_estimate.corr(min_periods=min_periods) corrmat = corrmat.values if floor_at_zero: corrmat[corrmat < 0] = 0.0 return corrmat
def calculation_of_raw_forecast_weights(self, instrument_code): """ returns the forecast weights for a given instrument code Checks to see if there are pooled forecasts """ ## Get some useful stuff from the config ## do we pool our estimation? pooling_returns = str2Bool(self.parent.config.forecast_weight_estimate["pool_gross_returns"]) pooling_costs = str2Bool(self.parent.config.forecast_cost_estimates["use_pooled_costs"]) if (pooling_returns & pooling_costs): return self.calculation_of_pooled_raw_forecast_weights(instrument_code) else: ## could still be using pooled returns return self.calculation_of_raw_forecast_weights_for_instrument(instrument_code)
def run_on_completion_only(self, process_name, method_name): this_method_dict = self.get_method_configuration_for_process_name( process_name, method_name) run_on_completion_only = this_method_dict.get("run_on_completion_only", False) run_on_completion_only = str2Bool(run_on_completion_only) return run_on_completion_only
def correlation_single_period(data_for_estimate, using_exponent=True, min_periods=20, ew_lookback=250, floor_at_zero=True): """ We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling It's important that forward filling, or index / ffill / diff has been done before we begin also that we're on the right time frame, eg weekly if that's what we're doing :param data_for_estimate: Data to get correlations from :type data_for_estimate: pd.DataFrame :param using_exponent: Should we use exponential weighting? :type using_exponent: bool :param ew_lookback: Lookback, in periods, for exp. weighting :type ew_lookback: int :param min_periods: Minimum periods before we get a correlation :type min_periods: int :param floor_at_zero: remove negative correlations before proceeding :type floor_at_zero: bool or str :returns: 2-dim square np.array """ # These may come from config as str using_exponent = str2Bool(using_exponent) if using_exponent: # If we stack there will be duplicate dates # So we massage the span so it's correct # This assumes the index is at least daily and on same timestamp # This is an artifact of how we prepare the data dindex = data_for_estimate.index dlenadj = float(len(dindex)) / len(set(list(dindex))) # Usual use for IDM, FDM calculation when whole data set is used corrmat = data_for_estimate.ewm( span=int(ew_lookback * dlenadj), min_periods=min_periods).corr(pairwise=True) # only want the final one corrmat = corrmat.values[-1] else: # Use normal correlation # Usual use for bootstrapping when only have sub sample corrmat = data_for_estimate.corr(min_periods=min_periods) corrmat = corrmat.values if floor_at_zero: corrmat[corrmat < 0] = 0.0 return corrmat
def visible_on_lan() -> bool: config = get_control_config() visible = config.get_element_or_missing_data("dashboard_visible_on_lan") if visible is missing_data: return False visible = str2Bool(visible) return visible
def equalise_estimates_from_lists( mean_list: list, stdev_list: list, equalise_SR: bool = True, ann_target_SR: float = 0.5, equalise_vols: bool = True, ) -> list: equalise_vols = str2Bool(equalise_vols) equalise_SR = str2Bool(equalise_SR) if equalise_vols: mean_list, stdev_list = vol_equaliser( mean_list=mean_list, stdev_list=stdev_list ) if equalise_SR: mean_list = SR_equaliser(stdev_list, target_SR=ann_target_SR) return mean_list, stdev_list
def calculation_of_raw_forecast_weights(self, instrument_code): """ returns the forecast weights for a given instrument code Checks to see if there are pooled forecasts """ # Get some useful stuff from the config # do we pool our estimation? pooling_returns = str2Bool( self.parent.config.forecast_weight_estimate["pool_gross_returns"]) pooling_costs = str2Bool( self.parent.config.forecast_cost_estimates["use_pooled_costs"]) if (pooling_returns & pooling_costs): return self.calculation_of_pooled_raw_forecast_weights( instrument_code) else: # could still be using pooled returns return self.calculation_of_raw_forecast_weights_for_instrument( instrument_code)
def _get_instrument_code_depending_on_pooling_status( instrument_code: str, forecast_scalar_config: dict) -> str: # this determines whether we pool or not pool_instruments = str2Bool(forecast_scalar_config.pop("pool_instruments")) if pool_instruments: # pooled, same for all instruments instrument_code_to_pass = ALL_KEYNAME else: instrument_code_to_pass = copy(instrument_code) return instrument_code_to_pass
def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs): cleaning=str2Bool(cleaning) ## grouping dictionary, convert to faster, algo friendly, form group_dict=group_dict_from_natural(dict_group) data=df_from_list(data) column_names=list(data.columns) data=data.resample(frequency, how="last") ### Generate time periods fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears) size=len(column_names) corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag) ## create a list of correlation matrices corr_list=[] print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Correlation estimate") ## Now for each time period, estimate correlation for fit_period in fit_dates: print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Estimating from %s to %s" % (fit_period.period_start, fit_period.period_end)) if fit_period.no_data: ## no data to fit with corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan) corrmat=corr_with_nan else: data_for_estimate=data[fit_period.fit_start:fit_period.fit_end] corrmat=correlation_single_period(data_for_estimate, **kwargs) if cleaning: current_period_data=data[fit_period.fit_start:fit_period.fit_end] must_haves=must_have_item(current_period_data) # means we can use earlier correlations with sensible values corrmat=clean_correlation(corrmat, corr_with_no_data, must_haves) corr_list.append(corrmat) setattr(self, "corr_list", corr_list) setattr(self, "columns", column_names) setattr(self, "fit_dates", fit_dates)
def forecast_turnover(self, instrument_code: str, rule_variation_name: str) -> float: use_pooled_turnover = str2Bool( self.parent.config.forecast_cost_estimates["use_pooled_turnover"] ) if use_pooled_turnover: else: instrument_code_list = [instrument_code] return turnover_for_SR
def forecast_turnover(self, instrument_code: str, rule_variation_name: str) -> float: use_pooled_turnover = str2Bool( self.config.forecast_cost_estimates["use_pooled_turnover"]) if use_pooled_turnover: turnover = self._forecast_turnover_pooled(instrument_code, rule_variation_name) else: turnover = self._forecast_turnover_for_individual_instrument( instrument_code, rule_variation_name) return turnover
def __init__(self, data_for_correlation, ew_lookback: int = 250, min_periods: int = 20, cleaning: bool = True, floor_at_zero: bool = True, length_adjustment: int = 1, **_ignored_kwargs): cleaning = str2Bool(cleaning) floor_at_zero = str2Bool(floor_at_zero) self._cleaning = cleaning self._floor_at_zero = floor_at_zero correlation_calculations = exponentialCorrelationResults( data_for_correlation, ew_lookback=ew_lookback, min_periods=min_periods, length_adjustment=length_adjustment) self._correlation_calculations = correlation_calculations self._data_for_correlation = data_for_correlation
def __init__(self, data_as_df, length_of_data=1, ew_lookback=250, boring_offdiag=0.99, cleaning=True, floor_at_zero=True, **kwargs): """ Create an object to calculate correlations We set up one of these with a set of data and parameters, and then call repeatedly :param data_as_df: The dataframe of correlations :param boring_offdiag: The off diagonal element to put into the matrix if data is absent :param cleaning: Should we include fake values in the matrix so we don't need a warm up period? :param floor_at_zero: Should we remove negative correlations? :param ew_lookback: Lookback to use if exponential calculation used :param length_of_data: Original length of data passed in (to correct for stacking of dataframe) :return: np.array of correlation matrix """ self.cleaning = str2Bool(cleaning) self.floor_at_zero = str2Bool(floor_at_zero) # correct the lookback if we're jamming stuff together self.ew_lookback_corrected = length_of_data * ew_lookback size = data_as_df.shape[1] self.corr_with_no_data = boring_corr_matrix(size, offdiag=np.nan) self.corr_for_cleaning = boring_corr_matrix(size, offdiag=boring_offdiag) self.kwargs = kwargs self.data_as_df = data_as_df
def __init__(self, data_as_df, length_of_data=1, ew_lookback=250, boring_offdiag=0.99, cleaning=True, floor_at_zero=True, **kwargs): """ Create an object to calculate correlations We set up one of these with a set of data and parameters, and then call repeatedly :param data_as_df: The dataframe of correlations :param boring_offdiag: The off diagonal element to put into the matrix if data is absent :param cleaning: Should we include fake values in the matrix so we don't need a warm up period? :param floor_at_zero: Should we remove negative correlations? :param ew_lookback: Lookback to use if exponential calculation used :param length_of_data: Original length of data passed in (to correct for stacking of dataframe) :return: np.array of correlation matrix """ self.cleaning = str2Bool(cleaning) self.floor_at_zero = str2Bool(floor_at_zero) ## correct the lookback if we're jamming stuff together self.ew_lookback_corrected = length_of_data * ew_lookback size = data_as_df.shape[1] self.corr_with_no_data = boring_corr_matrix(size, offdiag=np.nan) self.corr_for_cleaning = boring_corr_matrix( size, offdiag=boring_offdiag) self.kwargs = kwargs self.data_as_df = data_as_df
def get_costs(self, instrument_code): """ Get the relevant kinds of cost for an instrument :param instrument_code: instrument to value for :type instrument_code: str :returns: 2 tuple """ use_SR_costs = str2Bool(self.parent.config.use_SR_costs) if use_SR_costs: return (self.get_SR_cost(instrument_code), None) else: return (None, self.get_cash_costs(instrument_code))
def correlation_single_period(data_for_estimate, using_exponent=True, min_periods=20, ew_lookback=250, floor_at_zero=True): using_exponent=str2Bool(using_exponent) if using_exponent: dindex=data_for_estimate.index dlenadj=float(len(dindex))/len(set(list(dindex))) corrmat=pd.ewmcorr(data_for_estimate, span=int(ew_lookback*dlenadj), min_periods=min_periods) corrmat=corrmat.values[-1] else: corrmat=data_for_estimate.corr(min_periods=min_periods) corrmat=corrmat.values if floor_at_zero: corrmat[corrmat<0]=0.0 return corrmat
def get_forecast_correlation_matrices_from_code_list(self, codes_to_use): """ Returns a correlationList object which contains a history of correlation matricies :param codes_to_use: :type str: :returns: correlation_list object >>> from systems.tests.testdata import get_test_object_futures_with_rules_and_capping_estimate >>> from systems.basesystem import System >>> (accounts, fcs, rules, rawdata, data, config)=get_test_object_futures_with_rules_and_capping_estimate() >>> system=System([rawdata, rules, fcs, accounts, ForecastCombineEstimated()], data, config) >>> ans=system.combForecast.get_forecast_correlation_matrices("EDOLLAR") >>> ans.corr_list[-1] array([[ 1. , 0.1168699 , 0.08038547], [ 0.1168699 , 1. , 0.86907623], [ 0.08038547, 0.86907623, 1. ]]) >>> print(ans.columns) ['carry', 'ewmac16', 'ewmac8'] """ # Get some useful stuff from the config corr_params = copy(self.parent.config.forecast_correlation_estimate) # do we pool our estimation? pooling = str2Bool(corr_params.pop("pool_instruments")) # which function to use for calculation corr_func = resolve_function(corr_params.pop("func")) self.log.terse( "Calculating forecast correlations over %s" % ", ".join(codes_to_use)) forecast_data = [ self.get_all_forecasts( instr_code, self.check_for_cheap_enough_rules(instr_code)) for instr_code in codes_to_use] # if we're not pooling passes a list of one forecast_data = [forecast_ts.ffill() for forecast_ts in forecast_data] return corr_func(forecast_data, log=self.log.setup( call="correlation"), **corr_params)
def forecast_scalar(cs_forecasts, window=250000, min_periods=500, backfill=True): """ Work out the scaling factor for xcross such that T*x has an abs value of 10 (or whatever the average absolute forecast is) :param cs_forecasts: forecasts, cross sectionally :type cs_forecasts: pd.DataFrame TxN :param span: :type span: int :param min_periods: :returns: pd.DataFrame """ backfill = str2Bool(backfill) # in yaml will come in as text # We don't allow this to be changed in config target_abs_forecast = get_default_config_key_value( "average_absolute_forecast") if target_abs_forecast is missing_data: raise Exception( "average_absolute_forecast not defined in system defaults file") # Remove zeros/nans copy_cs_forecasts = copy(cs_forecasts) copy_cs_forecasts[copy_cs_forecasts == 0.0] = np.nan # Take CS average first # we do this before we get the final TS average otherwise get jumps in # scalar when new markets introduced if copy_cs_forecasts.shape[1] == 1: x = copy_cs_forecasts.abs().iloc[:, 0] else: x = copy_cs_forecasts.ffill().abs().median(axis=1) # now the TS avg_abs_value = x.rolling(window=window, min_periods=min_periods).mean() scaling_factor = target_abs_forecast / avg_abs_value if backfill: scaling_factor = scaling_factor.fillna(method="bfill") return scaling_factor
def correlation_calculator(data_for_estimate, using_exponent=True, min_periods=20, ew_lookback=250): """ We generate a correlation from a pd.DataFrame, which could have been stacked up :param data_for_estimate: simData to get correlations from :type data_for_estimate: pd.DataFrame :param using_exponent: Should we use exponential weighting? If not every item is weighted equally :type using_exponent: bool :param ew_lookback: Lookback, in periods, for exp. weighting :type ew_lookback: int :param min_periods: Minimum periods before we get a correlation :type min_periods: int :returns: 2-dim square np.array """ # These may come from config as str using_exponent = str2Bool(using_exponent) if using_exponent: # If we have stacked there will be duplicate dates # So we massage the span so it's correct # This assumes the index is at least daily and on same timestamp # This is an artifact of how we prepare the data # Usual use for IDM, FDM calculation when whole data set is used corrmat = data_for_estimate.ewm( span=ew_lookback, min_periods=min_periods).corr(pairwise=True) number_of_items=data_for_estimate.shape[1] # only want the final one corrmat = corrmat.iloc[-number_of_items:,].values else: # Use normal correlation # Usual use for bootstrapping when only have sub sample corrmat = data_for_estimate.corr(min_periods=min_periods) corrmat = corrmat.values return corrmat
def correlation_calculator(data_for_estimate, using_exponent=True, min_periods=20, ew_lookback=250): """ We generate a correlation from a pd.DataFrame, which could have been stacked up :param data_for_estimate: simData to get correlations from :type data_for_estimate: pd.DataFrame :param using_exponent: Should we use exponential weighting? If not every item is weighted equally :type using_exponent: bool :param ew_lookback: Lookback, in periods, for exp. weighting :type ew_lookback: int :param min_periods: Minimum periods before we get a correlation :type min_periods: int :returns: 2-dim square np.array """ # These may come from config as str using_exponent = str2Bool(using_exponent) if using_exponent: # If we have stacked there will be duplicate dates # So we massage the span so it's correct # This assumes the index is at least daily and on same timestamp # This is an artifact of how we prepare the data # Usual use for IDM, FDM calculation when whole data set is used corrmat = data_for_estimate.ewm( span=ew_lookback, min_periods=min_periods).corr(pairwise=True) number_of_items = data_for_estimate.shape[1] # only want the final one corrmat = corrmat.iloc[-number_of_items:, ].values else: # Use normal correlation # Usual use for bootstrapping when only have sub sample corrmat = data_for_estimate.corr(min_periods=min_periods) corrmat = corrmat.values return corrmat
def get_forecast_correlation_matrices( self, instrument_code: str) -> CorrelationList: """ Returns a correlationList object which contains a history of correlation matricies :param instrument_code: :type str: :returns: correlation_list object >>> from systems.tests.testdata import get_test_object_futures_with_rules_and_capping_estimate >>> from systems.basesystem import System >>> (accounts, fcs, rules, rawdata, data, config)=get_test_object_futures_with_rules_and_capping_estimate() >>> system=System([rawdata, rules, fcs, accounts, ForecastCombineEstimated()], data, config) >>> ans=system.combForecast.get_forecast_correlation_matrices("EDOLLAR") >>> ans.corr_list[-1] array([[ 1. , 0.1168699 , 0.08038547], [ 0.1168699 , 1. , 0.86907623], [ 0.08038547, 0.86907623, 1. ]]) >>> print(ans.columns) ['carry', 'ewmac16', 'ewmac8'] """ # Get some useful stuff from the config corr_params = copy(self.config.forecast_correlation_estimate) # do we pool our estimation? pooling = str2Bool(corr_params.pop("pool_instruments")) if pooling: # find set of instruments with same trading rules as I have if self._use_estimated_weights(): codes_to_use = self.has_same_cheap_rules_as_code( instrument_code) else: codes_to_use = self.has_same_rules_as_code(instrument_code) else: codes_to_use = [instrument_code] correlation_list = ( self.get_forecast_correlation_matrices_from_instrument_code_list( codes_to_use)) return correlation_list
def forecast_scalar( cs_forecasts: pd.DataFrame, target_abs_forecast: float = 10.0, window: int = 250000, ## JUST A VERY LARGE NUMBER TO USE ALL DATA min_periods=500, # MINIMUM PERIODS BEFORE WE ESTIMATE A SCALAR, backfill=True ## BACKFILL OUR FIRST ESTIMATE, SLIGHTLY CHEATING, BUT... ) -> pd.Series: """ Work out the scaling factor for xcross such that T*x has an abs value of 10 (or whatever the average absolute forecast is) :param cs_forecasts: forecasts, cross sectionally :type cs_forecasts: pd.DataFrame TxN :param span: :type span: int :param min_periods: :returns: pd.DataFrame """ backfill = str2Bool(backfill) # in yaml will come in as text # Remove zeros/nans copy_cs_forecasts = copy(cs_forecasts) copy_cs_forecasts[copy_cs_forecasts == 0.0] = np.nan # Take CS average first # we do this before we get the final TS average otherwise get jumps in # scalar when new markets introduced if copy_cs_forecasts.shape[1] == 1: x = copy_cs_forecasts.abs().iloc[:, 0] else: x = copy_cs_forecasts.ffill().abs().median(axis=1) # now the TS avg_abs_value = x.rolling(window=window, min_periods=min_periods).mean() scaling_factor = target_abs_forecast / avg_abs_value if backfill: scaling_factor = scaling_factor.fillna(method="bfill") return scaling_factor
def get_forecast_correlation_matrices_from_instrument_code_list( self, codes_to_use: list) -> CorrelationList: """ Returns a correlationList object which contains a history of correlation matricies :param codes_to_use: :type str: :returns: correlation_list object >>> from systems.tests.testdata import get_test_object_futures_with_rules_and_capping_estimate >>> from systems.basesystem import System >>> (accounts, fcs, rules, rawdata, data, config)=get_test_object_futures_with_rules_and_capping_estimate() >>> system=System([rawdata, rules, fcs, accounts, ForecastCombineEstimated()], data, config) >>> ans=system.combForecast.get_forecast_correlation_matrices("EDOLLAR") >>> ans.corr_list[-1] array([[ 1. , 0.1168699 , 0.08038547], [ 0.1168699 , 1. , 0.86907623], [ 0.08038547, 0.86907623, 1. ]]) >>> print(ans.columns) ['carry', 'ewmac16', 'ewmac8'] """ # Get some useful stuff from the config corr_params = copy(self.config.forecast_correlation_estimate) # do we pool our estimation? # not used here since we've looked at this already _pooling_already_decided = str2Bool( corr_params.pop("pool_instruments")) # which function to use for calculation corr_func = resolve_function(corr_params.pop("func")) self.log.terse("Calculating forecast correlations over %s" % ", ".join(codes_to_use)) forecast_data = self.get_all_forecasts_for_a_list_of_instruments( codes_to_use) correlation_list = corr_func(forecast_data, **corr_params) return correlation_list
def _system_init(self, system): """ When we add this stage object to a system, this code will be run It will determine if we use an estimate or a fixed class of object """ if str2Bool(system.config.use_instrument_weight_estimates): fixed_flavour=False else: fixed_flavour=True if fixed_flavour: self.__class__= PortfoliosFixed self.__init__() setattr(self, "parent", system) else: self.__class__= PortfoliosEstimated self.__init__() setattr(self, "parent", system)
def _system_init(self, system): """ When we add this stage object to a system, this code will be run It will determine if we use an estimate or a fixed class of object """ if str2Bool(system.config.use_forecast_scale_estimates): fixed_flavour=False else: fixed_flavour=True if fixed_flavour: self.__class__=ForecastScaleCapFixed self.__init__() setattr(self, "parent", system) else: self.__class__=ForecastScaleCapEstimated self.__init__() setattr(self, "parent", system)
def _system_init(self, system): """ When we add this stage object to a system, this code will be run It will determine if we use an estimate or a fixed class of object """ if str2Bool(system.config.use_forecast_scale_estimates): fixed_flavour = False else: fixed_flavour = True if fixed_flavour: self.__class__ = ForecastScaleCapFixed self.__init__() setattr(self, "parent", system) else: self.__class__ = ForecastScaleCapEstimated self.__init__() setattr(self, "parent", system)
def _system_init(self, system): """ When we add this stage object to a system, this code will be run It will determine if we use an estimate or a fixed class of object """ if str2Bool(system.config.use_instrument_weight_estimates): fixed_flavour = False else: fixed_flavour = True if fixed_flavour: self.__class__ = PortfoliosFixed self.__init__() setattr(self, "parent", system) else: self.__class__ = PortfoliosEstimated self.__init__() setattr(self, "parent", system)
def get_optimal_weights_with_fixed_contract_values( self, relevant_date: datetime.datetime = arg_not_supplied, previous_weights: portfolioWeights = arg_not_supplied ) -> portfolioWeights: covariance_matrix = self.get_covariance_matrix( relevant_date=relevant_date) risk_aversion = self.risk_aversion_coefficient() expected_returns = self.get_implied_expected_returns(relevant_date) per_contract_value = self.get_per_contract_value(relevant_date) max_portfolio_weights = self.get_maximum_portfolio_weight_at_date( relevant_date) original_portfolio_weights = self.original_portfolio_weights_for_relevant_date( relevant_date) max_risk_as_variance = self.get_max_risk_as_variance() costs = self.get_costs_per_contract_as_proportion_of_capital_all_instruments( ) use_process_pool = str2Bool( self.config.small_system['use_process_pool']) ## split up a bit?? optimal_weights = optimise_with_fixed_contract_values( per_contract_value=per_contract_value, expected_returns=expected_returns, risk_aversion=risk_aversion, covariance_matrix=covariance_matrix, max_portfolio_weights=max_portfolio_weights, original_portfolio_weights=original_portfolio_weights, max_risk_as_variance=max_risk_as_variance, costs=costs, previous_weights=previous_weights, use_process_pool=use_process_pool) return optimal_weights
def calculation_of_raw_forecast_weights(self, instrument_code): """ Estimate the forecast weights for this instrument We store this intermediate step to expose the calculation object :param instrument_code: :type str: :returns: TxK pd.DataFrame containing weights, columns are trading rule variation names, T covers all """ def _calculation_of_raw_forecast_weights(system, NotUsed1, NotUsed2, this_stage, codes_to_use, weighting_func, **weighting_params): this_stage.log.terse("Calculating raw forecast weights over %s" % ", ".join(codes_to_use)) if hasattr(system, "accounts"): pandl_forecasts=[this_stage.pandl_for_instrument_rules_unweighted(code) for code in codes_to_use] else: error_msg="You need an accounts stage in the system to estimate forecast weights" this_stage.log.critical(error_msg) output=weighting_func(pandl_forecasts, log=self.log.setup(call="weighting"), **weighting_params) return output ## Get some useful stuff from the config weighting_params=copy(self.parent.config.forecast_weight_estimate) ## do we pool our estimation? pooling=str2Bool(weighting_params.pop("pool_instruments")) ## which function to use for calculation weighting_func=resolve_function(weighting_params.pop("func")) if pooling: ## find set of instruments with same trading rules as I have codes_to_use=self._has_same_rules_as_code(instrument_code) instrument_code_ref=ALL_KEYNAME ## We label='_'.join(codes_to_use) else: codes_to_use=[instrument_code] label=instrument_code instrument_code_ref=instrument_code ## ## label: how we identify this thing in the cache ## instrument_code_ref: eithier the instrument code, or 'all markets' if pooling ## _get_raw_forecast_weights: function to call if we don't find in cache ## self: this_system stage object ## codes_to_use: instrument codes to get data for ## weighting_func: function to call to calculate weights ## **weighting_params: parameters to pass to weighting function ## raw_forecast_weights_calcs = self.parent.calc_or_cache_nested( 'calculation_of_raw_forecast_weights', instrument_code_ref, label, _calculation_of_raw_forecast_weights, self, codes_to_use, weighting_func, **weighting_params) return raw_forecast_weights_calcs
def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs): """ We generate a correlation from eithier a pd.DataFrame, or a list of them if we're pooling Its important that forward filling, or index / ffill / diff has been done before we begin :param data: Data to get correlations from :type data: pd.DataFrame or list if pooling :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param dict_group: dictionary of groupings; used to replace missing values :type dict_group: dict :param boring_offdiag: Value used in creating 'boring' matrix, for when no data :type boring_offdiag: float :param **kwargs: passed to correlation_single_period :returns: CorrelationList """ cleaning=str2Bool(cleaning) ## grouping dictionary, convert to faster, algo friendly, form group_dict=group_dict_from_natural(dict_group) data=df_from_list(data) column_names=list(data.columns) data=data.resample(frequency, how="last") ### Generate time periods fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears) size=len(column_names) corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag) ## create a list of correlation matrices corr_list=[] log.terse("Correlation estimate") ## Now for each time period, estimate correlation for fit_period in fit_dates: log.msg("Fitting from %s to %s" % (fit_period.period_start, fit_period.period_end)) if fit_period.no_data: ## no data to fit with corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan) corrmat=corr_with_nan else: data_for_estimate=data[fit_period.fit_start:fit_period.fit_end] corrmat=correlation_single_period(data_for_estimate, **kwargs) if cleaning: # means we can use earlier correlations with sensible values corrmat=clean_correlation(corrmat, corr_with_no_data, boring_offdiag) corr_list.append(corrmat) setattr(self, "corr_list", corr_list) setattr(self, "columns", column_names) setattr(self, "fit_dates", fit_dates)
def calculation_of_raw_forecast_weights_for_instrument(self, instrument_code): """ Does an optimisation for a single instrument We do this if we can't do the special case of a pooled optimisation Estimate the forecast weights for this instrument We store this intermediate step to expose the calculation object :param instrument_code: :type str: :returns: TxK pd.DataFrame containing weights, columns are trading rule variation names, T covers all """ def _calculation_of_raw_forecast_weights(system, instrument_code, this_stage, codes_to_use, weighting_func, pool_costs, **weighting_params): this_stage.log.terse("Calculating raw forecast weights for %s, over %s" % (instrument_code, ", ".join(codes_to_use))) rule_list = self.apply_cost_weighting(instrument_code) weight_func=weighting_func(log=self.log.setup(call="weighting"), **weighting_params) if weight_func.need_data(): ## returns a list of accountCurveGroups pandl_forecasts=[this_stage.get_returns_for_optimisation(code) for code in codes_to_use] ## the current curve is special pandl_forecasts_this_code=this_stage.get_returns_for_optimisation(instrument_code) ## have to decode these ## returns two lists of pd.DataFrames (pandl_forecasts_gross, pandl_forecasts_costs) = decompose_group_pandl(pandl_forecasts, pandl_forecasts_this_code, pool_costs=pool_costs) ## The weighting function requires two lists of pd.DataFrames, one gross, one for costs weight_func.set_up_data(data_gross = pandl_forecasts_gross, data_costs = pandl_forecasts_costs) else: ## in the case of equal weights, don't need data forecasts = this_stage.get_all_forecasts(instrument_code, rule_list) weight_func.set_up_data(weight_matrix=forecasts) SR_cost_list = [this_stage.get_SR_cost_for_instrument_forecast(instrument_code, rule_variation_name) for rule_variation_name in rule_list] weight_func.optimise(ann_SR_costs=SR_cost_list) return weight_func ## Get some useful stuff from the config weighting_params=copy(self.parent.config.forecast_weight_estimate) ## do we pool our estimation? pooling_returns = str2Bool(self.parent.config.forecast_weight_estimate["pool_gross_returns"]) pool_costs = str2Bool(self.parent.config.forecast_cost_estimates["use_pooled_costs"]) ## which function to use for calculation weighting_func=resolve_function(weighting_params.pop("func")) if pooling_returns: ## find set of instruments with same trading rules as I have codes_to_use=self.has_same_cheap_rules_as_code(instrument_code) else: codes_to_use=[instrument_code] ## ## _get_raw_forecast_weights: function to call if we don't find in cache ## self: this_system stage object ## codes_to_use: instrument codes to get data for ## weighting_func: function to call to calculate weights ## **weighting_params: parameters to pass to weighting function ## raw_forecast_weights_calcs = self.parent.calc_or_cache( 'calculation_of_raw_forecast_weights', instrument_code, _calculation_of_raw_forecast_weights, self, codes_to_use, weighting_func, pool_costs, **weighting_params) return raw_forecast_weights_calcs pass
def get_forecast_correlation_matrices(self, instrument_code): """ Returns a correlationList object which contains a history of correlation matricies :param instrument_code: :type str: :returns: correlation_list object >>> from systems.tests.testdata import get_test_object_futures_with_rules_and_capping_estimate >>> from systems.basesystem import System >>> (accounts, fcs, rules, rawdata, data, config)=get_test_object_futures_with_rules_and_capping_estimate() >>> system=System([rawdata, rules, fcs, accounts, ForecastCombineEstimated()], data, config) >>> ans=system.combForecast.get_forecast_correlation_matrices("EDOLLAR") >>> ans.corr_list[-1] array([[ 1. , 0.1168699 , 0.08038547], [ 0.1168699 , 1. , 0.86907623], [ 0.08038547, 0.86907623, 1. ]]) >>> print(ans.columns) ['carry', 'ewmac16', 'ewmac8'] """ def _get_forecast_correlation_matrices(system, NotUsed1, NotUsed2, this_stage, codes_to_use, corr_func, **corr_params): print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Calculating forecast correlations over %s" % ", ".join(codes_to_use)) forecast_data=[this_stage.get_all_forecasts(instr_code, this_stage.apply_cost_weighting(instr_code)) for instr_code in codes_to_use] ## if we're not pooling passes a list of one forecast_data=[forecast_ts.ffill() for forecast_ts in forecast_data] return corr_func(forecast_data, log=self.log.setup(call="correlation"), **corr_params) ## Get some useful stuff from the config corr_params=copy(self.parent.config.forecast_correlation_estimate) ## do we pool our estimation? pooling=str2Bool(corr_params.pop("pool_instruments")) ## which function to use for calculation corr_func=resolve_function(corr_params.pop("func")) if pooling: ## find set of instruments with same trading rules as I have codes_to_use=self.has_same_cheap_rules_as_code(instrument_code) instrument_code_ref=ALL_KEYNAME ## We label='_'.join(codes_to_use) else: codes_to_use=[instrument_code] label=instrument_code instrument_code_ref=instrument_code ## ## label: how we identify this thing in the cache ## instrument_code_ref: eithier the instrument code, or 'all markets' if pooling ## _get_forecast_correlation_matrices: function to call if we don't find in cache ## self: this_system stage object ## codes_to_use: instrument codes ## func: function to call to calculate correlations ## **corr_params: parameters to pass to correlation function ## forecast_corr_list = self.parent.calc_or_cache_nested( 'get_forecast_correlation_matrices', instrument_code_ref, label, _get_forecast_correlation_matrices, self, codes_to_use, corr_func, **corr_params) return forecast_corr_list
def use_estimated_instrument_div_mult(self): """ It will determine if we use an estimate or a fixed class of object """ return str2Bool(self.parent.config.use_instrument_div_mult_estimates)
def __init__(self, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, method="bootstrap", cleaning=True, cost_multiplier=1.0, apply_cost_weight=True, ann_target_SR=TARGET_ANN_SR, equalise_gross=False, **passed_params): """ Set up optimiser :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period' :type method: str :param equalise_gross: Should we equalise expected gross returns so that only costs affect weightings? :type equalise_gross: bool :param cost_multiplier: Multiply costs by this number :type cost_multiplier: float :param apply_cost_weight: Should we adjust our weightings to reflect costs? :type apply_cost_weight: bool :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions :returns: pd.DataFrame of weights """ ## Because interaction of parameters is complex, display warnings display_warnings(log, cost_multiplier, equalise_gross, apply_cost_weight, method, **passed_params) cleaning = str2Bool(cleaning) optimise_params = copy(passed_params) ## annualisation ann_dict = dict(D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0) annualisation = ann_dict.get(frequency, 1.0) period_target_SR = ann_target_SR / (annualisation**.5) ## A moments estimator works out the mean, vol, correlation ## Also stores annualisation factor and target SR (used for shrinkage and equalising) moments_estimator = momentsEstimator(optimise_params, annualisation, ann_target_SR) ## The optimiser instance will do the optimation once we have the appropriate data optimiser = optimiserWithParams(method, optimise_params, moments_estimator) setattr(self, "optimiser", optimiser) setattr(self, "log", log) setattr(self, "frequency", frequency) setattr(self, "method", method) setattr(self, "equalise_gross", equalise_gross) setattr(self, "cost_multiplier", cost_multiplier) setattr(self, "annualisation", annualisation) setattr(self, "period_target_SR", period_target_SR) setattr(self, "date_method", date_method) setattr(self, "rollyears", rollyears) setattr(self, "cleaning", cleaning) setattr(self, "apply_cost_weight", apply_cost_weight)
def __init__(self, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, method="bootstrap", cleaning=True, cost_multiplier=1.0, apply_cost_weight=True, ann_target_SR=TARGET_ANN_SR, equalise_gross=False, **passed_params): """ Set up optimiser :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period' :type method: str :param equalise_gross: Should we equalise expected gross returns so that only costs affect weightings? :type equalise_gross: bool :param cost_multiplier: Multiply costs by this number :type cost_multiplier: float :param apply_cost_weight: Should we adjust our weightings to reflect costs? :type apply_cost_weight: bool :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions :returns: pd.DataFrame of weights """ ## Because interaction of parameters is complex, display warnings display_warnings(log, cost_multiplier, equalise_gross, apply_cost_weight, method, **passed_params) cleaning=str2Bool(cleaning) optimise_params=copy(passed_params) ## annualisation ann_dict=dict(D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0) annualisation=ann_dict.get(frequency, 1.0) period_target_SR=ann_target_SR/(annualisation**.5) ## A moments estimator works out the mean, vol, correlation ## Also stores annualisation factor and target SR (used for shrinkage and equalising) moments_estimator=momentsEstimator(optimise_params, annualisation, ann_target_SR) ## The optimiser instance will do the optimation once we have the appropriate data optimiser=optimiserWithParams(method, optimise_params, moments_estimator) setattr(self, "optimiser", optimiser) setattr(self, "log", log) setattr(self, "frequency", frequency) setattr(self, "method", method) setattr(self, "equalise_gross", equalise_gross) setattr(self, "cost_multiplier", cost_multiplier) setattr(self, "annualisation", annualisation) setattr(self, "period_target_SR", period_target_SR) setattr(self, "date_method", date_method) setattr(self, "rollyears", rollyears) setattr(self, "cleaning", cleaning) setattr(self, "apply_cost_weight", apply_cost_weight)
def calculation_of_pooled_raw_forecast_weights(self, instrument_code): """ Estimate the forecast weights for this instrument We store this intermediate step to expose the calculation object :param instrument_code: :type str: :returns: TxK pd.DataFrame containing weights, columns are trading rule variation names, T covers all """ def _calculation_of_pooled_raw_forecast_weights(system, instrument_code_ref, this_stage, codes_to_use, weighting_func, **weighting_params): print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Calculating pooled raw forecast weights over instruments: %s" % instrument_code_ref) rule_list = self.apply_cost_weighting(instrument_code) weight_func=weighting_func(log=self.log.setup(call="weighting"), **weighting_params) if weight_func.need_data(): ## returns a list of accountCurveGroups ## cost pooling will already have been applied pandl_forecasts=[this_stage.get_returns_for_optimisation(code) for code in codes_to_use] ## have to decode these ## returns two lists of pd.DataFrames (pandl_forecasts_gross, pandl_forecasts_costs) = decompose_group_pandl(pandl_forecasts, pool_costs=True) ## The weighting function requires two lists of pd.DataFrames, one gross, one for costs weight_func.set_up_data(data_gross = pandl_forecasts_gross, data_costs = pandl_forecasts_costs) else: ## in the case of equal weights, don't need data forecasts = this_stage.get_all_forecasts(instrument_code, rule_list) weight_func.set_up_data(weight_matrix=forecasts) SR_cost_list = [this_stage.get_SR_cost_for_instrument_forecast(instrument_code, rule_variation_name) for rule_variation_name in rule_list] weight_func.optimise(ann_SR_costs=SR_cost_list) return weight_func ## Get some useful stuff from the config weighting_params=copy(self.parent.config.forecast_weight_estimate) ## do we pool our estimation? pooling_returns = str2Bool(weighting_params.pop("pool_gross_returns")) pooling_costs = self.parent.config.forecast_cost_estimates['use_pooled_costs'] assert pooling_returns and pooling_costs ## which function to use for calculation weighting_func=resolve_function(weighting_params.pop("func")) codes_to_use=self.has_same_cheap_rules_as_code(instrument_code) instrument_code_ref ="_".join(codes_to_use) ## ensures we don't repeat optimisation ## ## _get_raw_forecast_weights: function to call if we don't find in cache ## self: this_system stage object ## codes_to_use: instrument codes to get data for ## weighting_func: function to call to calculate weights ## **weighting_params: parameters to pass to weighting function ## raw_forecast_weights_calcs = self.parent.calc_or_cache( 'calculation_of_raw_forecast_weights', instrument_code_ref, _calculation_of_pooled_raw_forecast_weights, self, codes_to_use, weighting_func, **weighting_params) return raw_forecast_weights_calcs
def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, fit_method="bootstrap", cleaning=True, **passed_params): """ Optimise weights over some returns data :param data: Returns data :type data: pd.DataFrame or list if pooling :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param fit_method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period' :type fit_method: str :param cleaning: Should we clean correlations so can use incomplete data? :type cleaning: bool :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions :returns: pd.DataFrame of weights """ cleaning=str2Bool(cleaning) optimise_params=copy(passed_params) ## A moments estimator works out the mean, vol, correlation moments_estimator=momentsEstimator(optimise_params) ## The optimiser instance will do the optimation once we have the appropriate data optimiser=optimiserWithParams(optimise_params, moments_estimator) ## annualisation ann_dict=dict(D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0) annualisation=ann_dict.get(frequency, 1.0) ## de-pool pooled data data=df_from_list(data) ## resample, indexing before and differencing after (returns, remember) data=data.cumsum().resample(frequency, how="last").diff() ## account for change in frequency data=data*annualisation fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears) setattr(self, "fit_dates", fit_dates) ## Now for each time period, estimate weights ## create a list of weight vectors weight_list=[] ## create a class object for each period opt_results=[] log.terse("Optimising...") for fit_period in fit_dates: log.msg("Optimising for data from %s to %s" % (str(fit_period.period_start), str(fit_period.period_end))) ## Do the optimisation for one period, using a particular optimiser instance results_this_period=optSinglePeriod(self, data, fit_period, optimiser, cleaning) opt_results.append(results_this_period) weights=results_this_period.weights ## We adjust dates slightly to ensure no overlaps dindex=[fit_period.period_start+datetime.timedelta(days=1), fit_period.period_end-datetime.timedelta(days=1)] ## create a double row to delineate start and end of test period weight_row=pd.DataFrame([weights]*2, index=dindex, columns=data.columns) weight_list.append(weight_row) ## Stack everything up weight_df=pd.concat(weight_list, axis=0) setattr(self, "results", opt_results) setattr(self, "weights", weight_df)
def get_forecast_scalar(self, instrument_code, rule_variation_name): """ Get the scalar to apply to raw forecasts If not cached, these are estimated from past forecasts If configuration variable pool_forecasts_for_scalar is "True", then we do this across instruments. :param instrument_code: :type str: :param rule_variation_name: :type str: name of the trading rule variation :returns: float >>> from systems.tests.testdata import get_test_object_futures_with_rules >>> from systems.basesystem import System >>> (rules, rawdata, data, config)=get_test_object_futures_with_rules() >>> system1=System([rawdata, rules, ForecastScaleCapEstimated()], data, config) >>> >>> ## From default >>> system1.forecastScaleCap.get_forecast_scalar("EDOLLAR", "ewmac8").tail(3) scale_factor 2015-12-09 5.849888 2015-12-10 5.850474 2015-12-11 5.851091 >>> system1.forecastScaleCap.get_capped_forecast("EDOLLAR", "ewmac8").tail(3) ewmac8 2015-12-09 0.645585 2015-12-10 -0.210377 2015-12-11 0.961821 >>> >>> ## From config >>> scale_config=dict(pool_instruments=False) >>> config.forecast_scalar_estimate=scale_config >>> system3=System([rawdata, rules, ForecastScaleCapEstimated()], data, config) >>> system3.forecastScaleCap.get_forecast_scalar("EDOLLAR", "ewmac8").tail(3) scale_factor 2015-12-09 5.652174 2015-12-10 5.652833 2015-12-11 5.653444 >>> """ def _get_forecast_scalar( system, Not_Used, rule_variation_name, this_stage, instrument_list, scalar_function, forecast_scalar_config): """ instrument_list contains multiple things, pools everything across all instruments """ print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Getting forecast scalar for %s over %s" % (rule_variation_name, ", ".join(instrument_list))) ## Get forecasts for each instrument forecast_list=[ this_stage.get_raw_forecast(instrument_code, rule_variation_name) for instrument_code in instrument_list] cs_forecasts=pd.concat(forecast_list, axis=1) scaling_factor=scalar_function(cs_forecasts, **forecast_scalar_config) return scaling_factor ## Get some useful stuff from the config forecast_scalar_config=copy(self.parent.config.forecast_scalar_estimate) # The config contains 'func' and some other arguments # we turn func which could be a string into a function, and then # call it with the other ags scalarfunction = resolve_function(forecast_scalar_config.pop('func')) ## this determines whether we pool or not pool_instruments=str2Bool(forecast_scalar_config.pop("pool_instruments")) if pool_instruments: ## pooled, same for all instruments instrument_code_key=ALL_KEYNAME instrument_list=self.parent.get_instrument_list() else: ## not pooled instrument_code_key=instrument_code instrument_list=[instrument_code] forecast_scalar = self.parent.calc_or_cache_nested( "get_forecast_scalar", instrument_code_key, rule_variation_name, _get_forecast_scalar, self, instrument_list, scalarfunction, forecast_scalar_config) return forecast_scalar
def calculation_of_raw_forecast_weights(self, instrument_code): """ Estimate the forecast weights for this instrument We store this intermediate step to expose the calculation object :param instrument_code: :type str: :returns: TxK pd.DataFrame containing weights, columns are trading rule variation names, T covers all """ def _calculation_of_raw_forecast_weights(system, instrument_code, this_stage, codes_to_use, weighting_func, pool_costs=False, **weighting_params): this_stage.log.terse("Calculating raw forecast weights over %s" % ", ".join(codes_to_use)) if hasattr(system, "accounts"): ## returns a list of accountCurveGroups pandl_forecasts=[this_stage.pandl_for_instrument_rules_unweighted(code) for code in codes_to_use] ## the current curve is special pandl_forecasts_this_code=this_stage.pandl_for_instrument_rules_unweighted(instrument_code) ## have to decode these ## returns two lists of pd.DataFrames (pandl_forecasts_gross, pandl_forecasts_costs) = decompose_group_pandl(pandl_forecasts, pandl_forecasts_this_code, pool_costs=pool_costs) else: error_msg="You need an accounts stage in the system to estimate forecast weights" this_stage.log.critical(error_msg) ## The weighting function requires two lists of pd.DataFrames, one gross, one for costs output=weighting_func(pandl_forecasts_gross, pandl_forecasts_costs, log=self.log.setup(call="weighting"), **weighting_params) return output ## Get some useful stuff from the config weighting_params=copy(self.parent.config.forecast_weight_estimate) ## do we pool our estimation? pooling=str2Bool(weighting_params.pop("pool_instruments")) ## which function to use for calculation weighting_func=resolve_function(weighting_params.pop("func")) if pooling: ## find set of instruments with same trading rules as I have codes_to_use=self._has_same_rules_as_code(instrument_code) else: codes_to_use=[instrument_code] ## ## _get_raw_forecast_weights: function to call if we don't find in cache ## self: this_system stage object ## codes_to_use: instrument codes to get data for ## weighting_func: function to call to calculate weights ## **weighting_params: parameters to pass to weighting function ## raw_forecast_weights_calcs = self.parent.calc_or_cache( 'calculation_of_raw_forecast_weights', instrument_code, _calculation_of_raw_forecast_weights, self, codes_to_use, weighting_func, **weighting_params) return raw_forecast_weights_calcs
def _get_forecast_scalar_estimated(self, instrument_code, rule_variation_name): """ Get the scalar to apply to raw forecasts If not cached, these are estimated from past forecasts If configuration variable pool_forecasts_for_scalar is "True", then we do this across instruments. :param instrument_code: :type str: :param rule_variation_name: :type str: name of the trading rule variation :returns: float >>> from systems.tests.testdata import get_test_object_futures_with_rules >>> from systems.basesystem import System >>> (rules, rawdata, data, config)=get_test_object_futures_with_rules() >>> system1=System([rawdata, rules, ForecastScaleCapEstimated()], data, config) >>> >>> ## From default >>> system1.forecastScaleCap.get_forecast_scalar("EDOLLAR", "ewmac8").tail(3) scale_factor 2015-12-09 5.849888 2015-12-10 5.850474 2015-12-11 5.851091 >>> system1.forecastScaleCap.get_capped_forecast("EDOLLAR", "ewmac8").tail(3) ewmac8 2015-12-09 0.645585 2015-12-10 -0.210377 2015-12-11 0.961821 >>> >>> ## From config >>> scale_config=dict(pool_instruments=False) >>> config.forecast_scalar_estimate=scale_config >>> system3=System([rawdata, rules, ForecastScaleCapEstimated()], data, config) >>> system3.forecastScaleCap.get_forecast_scalar("EDOLLAR", "ewmac8").tail(3) scale_factor 2015-12-09 5.652174 2015-12-10 5.652833 2015-12-11 5.653444 >>> """ # Get some useful stuff from the config forecast_scalar_config = copy( self.parent.config.forecast_scalar_estimate) # this determines whether we pool or not pool_instruments = str2Bool( forecast_scalar_config.pop("pool_instruments")) if pool_instruments: # pooled, same for all instruments instrument_code_to_pass = ALL_KEYNAME else: instrument_code_to_pass = copy(instrument_code) scaling_factor = self._get_forecast_scalar_estimated_from_instrument_list( instrument_code_to_pass, rule_variation_name, forecast_scalar_config) forecast = self.get_raw_forecast(instrument_code, rule_variation_name) scaling_factor = scaling_factor.reindex(forecast.index, method="ffill") return scaling_factor
def _use_estimated_weights(self): return str2Bool(self.parent.config.use_forecast_scale_estimates)
def __init__(self, data, identifier=None, parent = None, frequency="W", date_method="expanding", rollyears=20, method="bootstrap", cleaning=True, cost_multiplier=1.0, apply_cost_weight=True, ann_target_SR=TARGET_ANN_SR, equalise_gross=False, pool_gross_returns=False, use_pooled_costs=False, use_pooled_turnover=None, # not used **passed_params): """ Set up optimiser :param data: A dict of account curve dataframes, if not pooled data will only have one entry :type data: dict :param identifier: A dictionary key which refers to the element in data which we're optimising for :type identifier: str :param parent: The parent object, probably a system stage, which we get the log attribute from :type parent: systemStage :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period' :type method: str :param cleaning: Do we clean weights so we don't need a warmup period? :type cleaning: bool :param equalise_gross: Should we equalise expected gross returns so that only costs affect weightings? :type equalise_gross: bool :param pool_gross_returns: Should we pool gross returns together? :type pool_gross_returns: bool :param use_pooled_costs: Should we pool costs together? :type use_pooled_costs: bool :param cost_multiplier: Multiply costs by this number :type cost_multiplier: float :param apply_cost_weight: Should we adjust our weightings to reflect costs? :type apply_cost_weight: bool :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions :returns: pd.DataFrame of weights """ if parent is None: log = logtoscreen("optimiser") else: log = parent.log setattr(self, "log", log) # Because interaction of parameters is complex, display warnings self.display_warnings(cost_multiplier, equalise_gross, apply_cost_weight, method, **passed_params) cleaning = str2Bool(cleaning) optimise_params = copy(passed_params) # annualisation ANN_DICT = dict( D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0) annualisation = ANN_DICT.get(frequency, 1.0) self.set_up_data(data, frequency=frequency, equalise_gross=equalise_gross, cost_multiplier=cost_multiplier, annualisation=annualisation, ann_target_SR=TARGET_ANN_SR, use_pooled_costs=use_pooled_costs, pool_gross_returns=pool_gross_returns, identifier=identifier) # A moments estimator works out the mean, vol, correlation # Also stores annualisation factor and target SR (used for shrinkage # and equalising) moments_estimator = momentsEstimator(optimise_params, annualisation, ann_target_SR) # The optimiser instance will do the optimation once we have the # appropriate data optimiser = optimiserWithParams(method, optimise_params, moments_estimator) setattr(self, "optimiser", optimiser) setattr(self, "frequency", frequency) setattr(self, "method", method) setattr(self, "equalise_gross", equalise_gross) setattr(self, "cost_multiplier", cost_multiplier) setattr(self, "annualisation", annualisation) setattr(self, "date_method", date_method) setattr(self, "rollyears", rollyears) setattr(self, "cleaning", cleaning) setattr(self, "apply_cost_weight", apply_cost_weight)
def __init__(self, data_gross, data_costs, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, fit_method="bootstrap", cleaning=True, equalise_gross=False, cost_multiplier=1.0, apply_cost_weight=True, ceiling_cost_SR=0.13, ann_target_SR=TARGET_ANN_SR, **passed_params): """ Optimise weights over some returns data :param data_gross: Returns data for gross returns :type data_gross: pd.DataFrame or list if pooling :param data_net: Returns data for costs :type data_net: pd.DataFrame or list if pooling :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param fit_method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period' :type fit_method: str :param equalise_gross: Should we equalise expected gross returns so that only costs affect weightings? :type equalise_gross: bool :param cost_multiplier: Multiply costs by this number :type cost_multiplier: float :param apply_cost_weight: Should we adjust our weightings to reflect costs? :type apply_cost_weight: bool :param ceiling_cost_SR: What is the maximum SR cost beyond which I don't allocate to an asset. Set to 999 to avoid using. :type ceiling_cost_SR: float :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions :returns: pd.DataFrame of weights """ ## Because interaction of parameters is complex, display warnings display_warnings(log, cost_multiplier, equalise_gross, apply_cost_weight, **passed_params) cleaning=str2Bool(cleaning) optimise_params=copy(passed_params) ## annualisation ann_dict=dict(D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0) annualisation=ann_dict.get(frequency, 1.0) period_target_SR=ann_target_SR/(annualisation**.5) ceiling_cost_SR_period=ceiling_cost_SR/(annualisation**.5) ## A moments estimator works out the mean, vol, correlation ## Also stores annualisation factor and target SR (used for shrinkage and equalising) moments_estimator=momentsEstimator(optimise_params, annualisation, ann_target_SR) ## The optimiser instance will do the optimation once we have the appropriate data optimiser=optimiserWithParams(optimise_params, moments_estimator) ## resample, indexing before and differencing after (returns, remember) data_gross = [data_item.cumsum().resample(frequency, how="last").diff() for data_item in data_gross] data_costs = [data_item.cumsum().resample(frequency, how="last").diff() for data_item in data_costs] ## stack de-pool pooled data data_gross=df_from_list(data_gross) data_costs=df_from_list(data_costs) ## net gross and costs if equalise_gross: log.terse("Setting all gross returns to be identical - optimisation driven only by costs") if cost_multiplier!=1.0: log.terse("Using cost multiplier on optimisation of %.2f" % cost_multiplier) data = work_out_net(data_gross, data_costs, annualisation=annualisation, equalise_gross=equalise_gross, cost_multiplier=cost_multiplier, ceiling_cost_ann_SR=ceiling_cost_SR, period_target_SR=period_target_SR) fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears) setattr(self, "fit_dates", fit_dates) ## Now for each time period, estimate weights ## create a list of weight vectors weight_list=[] ## create a class object for each period opt_results=[] log.terse("Optimising...") for fit_period in fit_dates: log.msg("Optimising for data from %s to %s" % (str(fit_period.period_start), str(fit_period.period_end))) ## Do the optimisation for one period, using a particular optimiser instance results_this_period=optSinglePeriod(self, data, fit_period, optimiser, cleaning) opt_results.append(results_this_period) weights=results_this_period.weights ## We adjust dates slightly to ensure no overlaps dindex=[fit_period.period_start+datetime.timedelta(days=1), fit_period.period_end-datetime.timedelta(days=1)] ## create a double row to delineate start and end of test period weight_row=pd.DataFrame([weights]*2, index=dindex, columns=data.columns) weight_list.append(weight_row) ## Stack everything up raw_weight_df=pd.concat(weight_list, axis=0) if apply_cost_weight: log.terse("Applying cost weighting to optimisation results") weight_df = apply_cost_weighting(raw_weight_df, data_gross, data_costs, annualisation) else: weight_df =raw_weight_df setattr(self, "results", opt_results) setattr(self, "weights", weight_df) setattr(self, "raw_weights", raw_weight_df)