def set_up_data(self, data_gross=None, data_costs=None, weight_matrix=None): if weight_matrix is not None: setattr(self, "data", weight_matrix.ffill()) return None log=self.log frequency=self.frequency equalise_gross = self.equalise_gross cost_multiplier = self.cost_multiplier annualisation = self.annualisation period_target_SR = self.period_target_SR data_gross = [data_item.cumsum().resample(frequency, how="last").diff() for data_item in data_gross] data_costs = [data_item.cumsum().resample(frequency, how="last").diff() for data_item in data_costs] data_gross=df_from_list(data_gross) data_costs=df_from_list(data_costs) if equalise_gross: print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Setting all gross returns to be identical - optimisation driven only by costs") if cost_multiplier!=1.0: print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Using cost multiplier on optimisation of %.2f" % cost_multiplier) data = work_out_net(data_gross, data_costs, annualisation=annualisation, equalise_gross=equalise_gross, cost_multiplier=cost_multiplier, period_target_SR=period_target_SR) setattr(self, "data", data)
def set_up_data(self, data_gross=None, data_costs=None, weight_matrix=None): """ Optimise weights over some returns data :param data_gross: Returns data for gross returns :type data_gross: pd.DataFrame or list if pooling :param data_net: Returns data for costs :type data_net: pd.DataFrame or list if pooling :param weight_matrix: some weight_matrix, used if equal weights and so don't need returns data :type weight_matrix: pd.DataFrame or list if pooling """ if weight_matrix is not None: setattr(self, "data", weight_matrix.ffill()) return None log = self.log frequency = self.frequency equalise_gross = self.equalise_gross cost_multiplier = self.cost_multiplier annualisation = self.annualisation period_target_SR = self.period_target_SR # resample, indexing before and differencing after (returns, remember) data_gross = [data_item.cumsum().resample(frequency, how="last").diff() for data_item in data_gross] data_costs = [data_item.cumsum().resample(frequency, how="last").diff() for data_item in data_costs] # stack de-pool pooled data data_gross = df_from_list(data_gross) data_costs = df_from_list(data_costs) # net gross and costs if equalise_gross: log.terse( "Setting all gross returns to be identical - optimisation driven only by costs") if cost_multiplier != 1.0: log.terse( "Using cost multiplier on optimisation of %.2f" % cost_multiplier) data = work_out_net(data_gross, data_costs, annualisation=annualisation, equalise_gross=equalise_gross, cost_multiplier=cost_multiplier, period_target_SR=period_target_SR) setattr(self, "data", data)
def __init__(self, data, frequency="W", date_method="expanding", rollyears=20, dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs): cleaning=str2Bool(cleaning) group_dict=group_dict_from_natural(dict_group) data=df_from_list(data) column_names=list(data.columns) data=data.resample(frequency, how="last") fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears) size=len(column_names) corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag) corr_list=[] print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Correlation estimate") for fit_period in fit_dates: print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Estimating from %s to %s" % (fit_period.period_start, fit_period.period_end)) if fit_period.no_data: corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan) corrmat=corr_with_nan else: data_for_estimate=data[fit_period.fit_start:fit_period.fit_end] corrmat=correlation_single_period(data_for_estimate, **kwargs) if cleaning: current_period_data=data[fit_period.fit_start:fit_period.fit_end] must_haves=must_have_item(current_period_data) corrmat=clean_correlation(corrmat, corr_with_no_data, must_haves) corr_list.append(corrmat) setattr(self, "corr_list", corr_list) setattr(self, "columns", column_names) setattr(self, "fit_dates", fit_dates)
def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs): cleaning=str2Bool(cleaning) ## grouping dictionary, convert to faster, algo friendly, form group_dict=group_dict_from_natural(dict_group) data=df_from_list(data) column_names=list(data.columns) data=data.resample(frequency, how="last") ### Generate time periods fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears) size=len(column_names) corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag) ## create a list of correlation matrices corr_list=[] print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Correlation estimate") ## Now for each time period, estimate correlation for fit_period in fit_dates: print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Estimating from %s to %s" % (fit_period.period_start, fit_period.period_end)) if fit_period.no_data: ## no data to fit with corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan) corrmat=corr_with_nan else: data_for_estimate=data[fit_period.fit_start:fit_period.fit_end] corrmat=correlation_single_period(data_for_estimate, **kwargs) if cleaning: current_period_data=data[fit_period.fit_start:fit_period.fit_end] must_haves=must_have_item(current_period_data) # means we can use earlier correlations with sensible values corrmat=clean_correlation(corrmat, corr_with_no_data, must_haves) corr_list.append(corrmat) setattr(self, "corr_list", corr_list) setattr(self, "columns", column_names) setattr(self, "fit_dates", fit_dates)
def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs): """ We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling Its important that forward filling, or index / ffill / diff has been done before we begin :param data: Data to get correlations from :type data: pd.DataFrame or list if pooling :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param dict_group: dictionary of groupings; used to replace missing values :type dict_group: dict :param boring_offdiag: Value used in creating 'boring' matrix, for when no data :type boring_offdiag: float :param **kwargs: passed to correlation_single_period :returns: CorrelationList """ cleaning = str2Bool(cleaning) # grouping dictionary, convert to faster, algo friendly, form group_dict = group_dict_from_natural(dict_group) data = df_from_list(data) column_names = list(data.columns) data = data.resample(frequency).last() # Generate time periods fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears) size = len(column_names) corr_with_no_data = boring_corr_matrix(size, offdiag=boring_offdiag) # create a list of correlation matrices corr_list = [] progress = progressBar(len(fit_dates), "Estimating correlations") # Now for each time period, estimate correlation for fit_period in fit_dates: progress.iterate() if fit_period.no_data: # no data to fit with corr_with_nan = boring_corr_matrix(size, offdiag=np.nan, diag=np.nan) corrmat = corr_with_nan else: data_for_estimate = data[fit_period.fit_start:fit_period. fit_end] corrmat = correlation_single_period(data_for_estimate, **kwargs) if cleaning: current_period_data = data[fit_period.fit_start:fit_period. fit_end] must_haves = must_have_item(current_period_data) # means we can use earlier correlations with sensible values corrmat = clean_correlation(corrmat, corr_with_no_data, must_haves) corr_list.append(corrmat) setattr(self, "corr_list", corr_list) setattr(self, "columns", column_names) setattr(self, "fit_dates", fit_dates)
def __init__(self, data, frequency="W", date_method="expanding", rollyears=20, **kwargs): """ We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling Its important that forward filling, or index / ffill / diff has been done before we begin :param data: simData to get correlations from :type data: pd.DataFrame or list if pooling :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param **kwargs: passed to correlationSinglePeriod :returns: CorrelationList """ if isinstance(data, list): # turn the list of data into a single dataframe. This will have a unique time series, which we manage # through adding a small offset of a few microseconds length_of_data = len(data) data_resampled = [ data_item.resample(frequency).last() for data_item in data ] data_as_df = df_from_list(data_resampled) else: length_of_data = 1 data_as_df = data.resample(frequency).last() column_names = list(data_as_df.columns) # Generate time periods fit_dates = generate_fitting_dates(data_as_df, date_method=date_method, rollyears=rollyears) # create a single period correlation estimator correlation_estimator_for_one_period = correlationSinglePeriod( data_as_df, length_of_data=length_of_data, **kwargs) # create a list of correlation matrices corr_list = [] progress = progressBar(len(fit_dates), "Estimating correlations") # Now for each time period, estimate correlation for fit_period in fit_dates: progress.iterate() corrmat = correlation_estimator_for_one_period.calculate( fit_period) corr_list.append(corrmat) setattr(self, "corr_list", corr_list) setattr(self, "columns", column_names) setattr(self, "fit_dates", fit_dates)
def set_up_data(self, data, frequency = "W", equalise_gross = False, cost_multiplier = 1.0, annualisation = BUSINESS_DAYS_IN_YEAR, ann_target_SR = TARGET_ANN_SR, use_pooled_costs = False, pool_gross_returns = False, identifier=None): """ Optimise weights over some returns data :param data_gross: Returns data for gross returns :type data_gross: pd.DataFrame or list if pooling :param data_net: Returns data for costs :type data_net: pd.DataFrame or list if pooling """ log = self.log # have to decode these # returns two lists of pd.DataFrames # The weighting function requires two lists of pd.DataFrames, # one gross, one for costs if identifier is None and len(data.keys()) > 1: log.warning( "No identifier passed to optimisation code with pooled data passed - using arbitary code - results may be weird") identifier = data.keys()[0] (data_gross, data_costs) = decompose_group_pandl( data, identifier, pool_costs=use_pooled_costs, pool_gross = pool_gross_returns) # resample, indexing before and differencing after (returns, remember) data_gross = [ data_item.cumsum().resample(frequency).last().diff() for data_item in data_gross ] data_costs = [ data_item.cumsum().resample(frequency).last().diff() for data_item in data_costs ] # for diagnostic purposes # FIXME: HACK TO GET THIS REFACTOR WORKING self.unmultiplied_costs = df_from_list(data_costs) # net gross and costs # first some warnings if equalise_gross: log.terse( "Setting all gross returns to be identical - optimisation driven only by costs" ) if cost_multiplier != 1.0: log.terse("Using cost multiplier on optimisation of %.2f" % cost_multiplier) # Will be needed if we equalise_gross returns period_target_SR = ann_target_SR / (annualisation ** .5) # now work out the net net_return_data = work_out_net( data_gross, data_costs, equalise_gross=equalise_gross, cost_multiplier=cost_multiplier, period_target_SR=period_target_SR) # FIXME: I STILL HAVE CONCERNS THAT THIS PREMATURE, SO DIVE INTO OPTIMISATION CODE AT NEXT REFACTOR net_return_data = df_from_list(net_return_data) setattr(self, "data", net_return_data) setattr(self, "period_target_SR", period_target_SR)
def __init__(self, data_gross, data_costs, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, fit_method="bootstrap", cleaning=True, equalise_gross=False, cost_multiplier=1.0, apply_cost_weight=True, ceiling_cost_SR=0.13, ann_target_SR=TARGET_ANN_SR, **passed_params): """ Optimise weights over some returns data :param data_gross: Returns data for gross returns :type data_gross: pd.DataFrame or list if pooling :param data_net: Returns data for costs :type data_net: pd.DataFrame or list if pooling :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param fit_method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period' :type fit_method: str :param equalise_gross: Should we equalise expected gross returns so that only costs affect weightings? :type equalise_gross: bool :param cost_multiplier: Multiply costs by this number :type cost_multiplier: float :param apply_cost_weight: Should we adjust our weightings to reflect costs? :type apply_cost_weight: bool :param ceiling_cost_SR: What is the maximum SR cost beyond which I don't allocate to an asset. Set to 999 to avoid using. :type ceiling_cost_SR: float :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions :returns: pd.DataFrame of weights """ ## Because interaction of parameters is complex, display warnings display_warnings(log, cost_multiplier, equalise_gross, apply_cost_weight, **passed_params) cleaning=str2Bool(cleaning) optimise_params=copy(passed_params) ## annualisation ann_dict=dict(D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0) annualisation=ann_dict.get(frequency, 1.0) period_target_SR=ann_target_SR/(annualisation**.5) ceiling_cost_SR_period=ceiling_cost_SR/(annualisation**.5) ## A moments estimator works out the mean, vol, correlation ## Also stores annualisation factor and target SR (used for shrinkage and equalising) moments_estimator=momentsEstimator(optimise_params, annualisation, ann_target_SR) ## The optimiser instance will do the optimation once we have the appropriate data optimiser=optimiserWithParams(optimise_params, moments_estimator) ## resample, indexing before and differencing after (returns, remember) data_gross = [data_item.cumsum().resample(frequency, how="last").diff() for data_item in data_gross] data_costs = [data_item.cumsum().resample(frequency, how="last").diff() for data_item in data_costs] ## stack de-pool pooled data data_gross=df_from_list(data_gross) data_costs=df_from_list(data_costs) ## net gross and costs if equalise_gross: log.terse("Setting all gross returns to be identical - optimisation driven only by costs") if cost_multiplier!=1.0: log.terse("Using cost multiplier on optimisation of %.2f" % cost_multiplier) data = work_out_net(data_gross, data_costs, annualisation=annualisation, equalise_gross=equalise_gross, cost_multiplier=cost_multiplier, ceiling_cost_ann_SR=ceiling_cost_SR, period_target_SR=period_target_SR) fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears) setattr(self, "fit_dates", fit_dates) ## Now for each time period, estimate weights ## create a list of weight vectors weight_list=[] ## create a class object for each period opt_results=[] log.terse("Optimising...") for fit_period in fit_dates: log.msg("Optimising for data from %s to %s" % (str(fit_period.period_start), str(fit_period.period_end))) ## Do the optimisation for one period, using a particular optimiser instance results_this_period=optSinglePeriod(self, data, fit_period, optimiser, cleaning) opt_results.append(results_this_period) weights=results_this_period.weights ## We adjust dates slightly to ensure no overlaps dindex=[fit_period.period_start+datetime.timedelta(days=1), fit_period.period_end-datetime.timedelta(days=1)] ## create a double row to delineate start and end of test period weight_row=pd.DataFrame([weights]*2, index=dindex, columns=data.columns) weight_list.append(weight_row) ## Stack everything up raw_weight_df=pd.concat(weight_list, axis=0) if apply_cost_weight: log.terse("Applying cost weighting to optimisation results") weight_df = apply_cost_weighting(raw_weight_df, data_gross, data_costs, annualisation) else: weight_df =raw_weight_df setattr(self, "results", opt_results) setattr(self, "weights", weight_df) setattr(self, "raw_weights", raw_weight_df)
def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs): """ We generate a correlation from eithier a pd.DataFrame, or a list of them if we're pooling Its important that forward filling, or index / ffill / diff has been done before we begin :param data: Data to get correlations from :type data: pd.DataFrame or list if pooling :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param dict_group: dictionary of groupings; used to replace missing values :type dict_group: dict :param boring_offdiag: Value used in creating 'boring' matrix, for when no data :type boring_offdiag: float :param **kwargs: passed to correlation_single_period :returns: CorrelationList """ cleaning=str2Bool(cleaning) ## grouping dictionary, convert to faster, algo friendly, form group_dict=group_dict_from_natural(dict_group) data=df_from_list(data) column_names=list(data.columns) data=data.resample(frequency, how="last") ### Generate time periods fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears) size=len(column_names) corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag) ## create a list of correlation matrices corr_list=[] log.terse("Correlation estimate") ## Now for each time period, estimate correlation for fit_period in fit_dates: log.msg("Fitting from %s to %s" % (fit_period.period_start, fit_period.period_end)) if fit_period.no_data: ## no data to fit with corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan) corrmat=corr_with_nan else: data_for_estimate=data[fit_period.fit_start:fit_period.fit_end] corrmat=correlation_single_period(data_for_estimate, **kwargs) if cleaning: # means we can use earlier correlations with sensible values corrmat=clean_correlation(corrmat, corr_with_no_data, boring_offdiag) corr_list.append(corrmat) setattr(self, "corr_list", corr_list) setattr(self, "columns", column_names) setattr(self, "fit_dates", fit_dates)
def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, fit_method="bootstrap", cleaning=True, **passed_params): """ Optimise weights over some returns data :param data: Returns data :type data: pd.DataFrame or list if pooling :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param fit_method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period' :type fit_method: str :param cleaning: Should we clean correlations so can use incomplete data? :type cleaning: bool :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions :returns: pd.DataFrame of weights """ cleaning=str2Bool(cleaning) optimise_params=copy(passed_params) ## A moments estimator works out the mean, vol, correlation moments_estimator=momentsEstimator(optimise_params) ## The optimiser instance will do the optimation once we have the appropriate data optimiser=optimiserWithParams(optimise_params, moments_estimator) ## annualisation ann_dict=dict(D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0) annualisation=ann_dict.get(frequency, 1.0) ## de-pool pooled data data=df_from_list(data) ## resample, indexing before and differencing after (returns, remember) data=data.cumsum().resample(frequency, how="last").diff() ## account for change in frequency data=data*annualisation fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears) setattr(self, "fit_dates", fit_dates) ## Now for each time period, estimate weights ## create a list of weight vectors weight_list=[] ## create a class object for each period opt_results=[] log.terse("Optimising...") for fit_period in fit_dates: log.msg("Optimising for data from %s to %s" % (str(fit_period.period_start), str(fit_period.period_end))) ## Do the optimisation for one period, using a particular optimiser instance results_this_period=optSinglePeriod(self, data, fit_period, optimiser, cleaning) opt_results.append(results_this_period) weights=results_this_period.weights ## We adjust dates slightly to ensure no overlaps dindex=[fit_period.period_start+datetime.timedelta(days=1), fit_period.period_end-datetime.timedelta(days=1)] ## create a double row to delineate start and end of test period weight_row=pd.DataFrame([weights]*2, index=dindex, columns=data.columns) weight_list.append(weight_row) ## Stack everything up weight_df=pd.concat(weight_list, axis=0) setattr(self, "results", opt_results) setattr(self, "weights", weight_df)
def __init__(self, data_gross, data_costs, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, fit_method="bootstrap", cleaning=True, equalise_gross=False, cost_multiplier=1.0, apply_cost_weight=True, ceiling_cost_SR=0.13, ann_target_SR=TARGET_ANN_SR, **passed_params): """ Optimise weights over some returns data :param data_gross: Returns data for gross returns :type data_gross: pd.DataFrame or list if pooling :param data_net: Returns data for costs :type data_net: pd.DataFrame or list if pooling :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param fit_method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period' :type fit_method: str :param equalise_gross: Should we equalise expected gross returns so that only costs affect weightings? :type equalise_gross: bool :param cost_multiplier: Multiply costs by this number :type cost_multiplier: float :param apply_cost_weight: Should we adjust our weightings to reflect costs? :type apply_cost_weight: bool :param ceiling_cost_SR: What is the maximum SR cost beyond which I don't allocate to an asset. Set to 999 to avoid using. :type ceiling_cost_SR: float :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions :returns: pd.DataFrame of weights """ ## Because interaction of parameters is complex, display warnings display_warnings(log, cost_multiplier, equalise_gross, apply_cost_weight, **passed_params) cleaning = str2Bool(cleaning) optimise_params = copy(passed_params) ## annualisation ann_dict = dict(D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0) annualisation = ann_dict.get(frequency, 1.0) period_target_SR = ann_target_SR / (annualisation**.5) ceiling_cost_SR_period = ceiling_cost_SR / (annualisation**.5) ## A moments estimator works out the mean, vol, correlation ## Also stores annualisation factor and target SR (used for shrinkage and equalising) moments_estimator = momentsEstimator(optimise_params, annualisation, ann_target_SR) ## The optimiser instance will do the optimation once we have the appropriate data optimiser = optimiserWithParams(optimise_params, moments_estimator) ## resample, indexing before and differencing after (returns, remember) data_gross = [ data_item.cumsum().resample(frequency, how="last").diff() for data_item in data_gross ] data_costs = [ data_item.cumsum().resample(frequency, how="last").diff() for data_item in data_costs ] ## stack de-pool pooled data data_gross = df_from_list(data_gross) data_costs = df_from_list(data_costs) ## net gross and costs if equalise_gross: log.terse( "Setting all gross returns to be identical - optimisation driven only by costs" ) if cost_multiplier != 1.0: log.terse("Using cost multiplier on optimisation of %.2f" % cost_multiplier) data = work_out_net(data_gross, data_costs, annualisation=annualisation, equalise_gross=equalise_gross, cost_multiplier=cost_multiplier, ceiling_cost_ann_SR=ceiling_cost_SR, period_target_SR=period_target_SR) fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears) setattr(self, "fit_dates", fit_dates) ## Now for each time period, estimate weights ## create a list of weight vectors weight_list = [] ## create a class object for each period opt_results = [] log.terse("Optimising...") for fit_period in fit_dates: log.msg("Optimising for data from %s to %s" % (str(fit_period.period_start), str(fit_period.period_end))) ## Do the optimisation for one period, using a particular optimiser instance results_this_period = optSinglePeriod(self, data, fit_period, optimiser, cleaning) opt_results.append(results_this_period) weights = results_this_period.weights ## We adjust dates slightly to ensure no overlaps dindex = [ fit_period.period_start + datetime.timedelta(days=1), fit_period.period_end - datetime.timedelta(days=1) ] ## create a double row to delineate start and end of test period weight_row = pd.DataFrame([weights] * 2, index=dindex, columns=data.columns) weight_list.append(weight_row) ## Stack everything up raw_weight_df = pd.concat(weight_list, axis=0) if apply_cost_weight: log.terse("Applying cost weighting to optimisation results") weight_df = apply_cost_weighting(raw_weight_df, data_gross, data_costs, annualisation) else: weight_df = raw_weight_df setattr(self, "results", opt_results) setattr(self, "weights", weight_df) setattr(self, "raw_weights", raw_weight_df)
def __init__(self, data, frequency="W", date_method="expanding", rollyears=20, **kwargs): """ We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling Its important that forward filling, or index / ffill / diff has been done before we begin :param data: simData to get correlations from :type data: pd.DataFrame or list if pooling :param frequency: Downsampling frequency. Must be "D", "W" or bigger :type frequency: str :param date_method: Method to pass to generate_fitting_dates :type date_method: str :param roll_years: If date_method is "rolling", number of years in window :type roll_years: int :param **kwargs: passed to correlationSinglePeriod :returns: CorrelationList """ if type(data) is list: # turn the list of data into a single dataframe. This will have a unique time series, which we manage # through adding a small offset of a few microseconds length_of_data = len(data) data_resampled = [ data_item.resample(frequency).last() for data_item in data ] data_as_df = df_from_list(data_resampled) else: length_of_data = 1 data_as_df = data.resample(frequency).last() column_names = list(data_as_df.columns) # Generate time periods fit_dates = generate_fitting_dates( data_as_df, date_method=date_method, rollyears=rollyears) # create a single period correlation estimator correlation_estimator_for_one_period = correlationSinglePeriod( data_as_df, length_of_data=length_of_data, **kwargs) # create a list of correlation matrices corr_list = [] progress = progressBar(len(fit_dates), "Estimating correlations") # Now for each time period, estimate correlation for fit_period in fit_dates: progress.iterate() corrmat = correlation_estimator_for_one_period.calculate( fit_period) corr_list.append(corrmat) setattr(self, "corr_list", corr_list) setattr(self, "columns", column_names) setattr(self, "fit_dates", fit_dates)