Beispiel #1
0
    def __init__(self, parent, data, fit_period, optimiser, cleaning):

        if cleaning:
            # Generate 'must have' from the period we need
            # because if we're bootstrapping could be completely different
            # periods
            current_period_data = data[fit_period.period_start:
                                       fit_period.period_end]
            must_haves = must_have_item(current_period_data)

        else:
            must_haves = None

        if fit_period.no_data:
            # no data to fit with

            diag = None

            size = current_period_data.shape[1]
            weights_with_nan = [np.nan / size] * size
            weights = weights_with_nan

            if cleaning:
                weights = clean_weights(weights, must_haves)

        else:
            # we have data
            subset_fitting_data = data[fit_period.fit_start:fit_period.fit_end]

            (weights, diag) = optimiser.call(subset_fitting_data, cleaning,
                                             must_haves)

        ##
        setattr(self, "diag", diag)
        setattr(self, "weights", weights)
Beispiel #2
0
    def __init__(self, parent, data, fit_period, optimiser, cleaning):

        if cleaning:
            ### Generate 'must have' from the period we need
            ###  because if we're bootstrapping could be completely different periods
            current_period_data = data[fit_period.period_start:fit_period.
                                       period_end]
            must_haves = must_have_item(current_period_data)

        else:
            must_haves = None

        if fit_period.no_data:
            ## no data to fit with

            diag = None

            size = current_period_data.shape[1]
            weights_with_nan = [np.nan / size] * size
            weights = weights_with_nan

            if cleaning:
                weights = clean_weights(weights, must_haves)

        else:
            ## we have data
            subset_fitting_data = data[fit_period.fit_start:fit_period.fit_end]

            (weights, diag) = optimiser.call(subset_fitting_data, cleaning,
                                             must_haves)

        ##
        setattr(self, "diag", diag)
        setattr(self, "weights", weights)
Beispiel #3
0
    def __init__(self, data, frequency="W",
                 date_method="expanding", rollyears=20, 
                 dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs):
        cleaning=str2Bool(cleaning)
        group_dict=group_dict_from_natural(dict_group)
        data=df_from_list(data)    
        column_names=list(data.columns)
        data=data.resample(frequency, how="last")
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)
        size=len(column_names)
        corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag)        
        corr_list=[]        
        print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Correlation estimate")
        for fit_period in fit_dates:
            print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Estimating from %s to %s" % (fit_period.period_start, fit_period.period_end))            
            if fit_period.no_data:
                corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan)
                corrmat=corr_with_nan                
            else:                
                data_for_estimate=data[fit_period.fit_start:fit_period.fit_end]  
                corrmat=correlation_single_period(data_for_estimate, **kwargs)

            if cleaning:
                current_period_data=data[fit_period.fit_start:fit_period.fit_end] 
                must_haves=must_have_item(current_period_data)
                corrmat=clean_correlation(corrmat, corr_with_no_data, must_haves) 

            corr_list.append(corrmat)
        
        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
Beispiel #4
0
def get_must_have_dict_from_data(data: pd.DataFrame) -> dict:
    must_have_list = must_have_item(data)
    list_of_asset_names = list(data.columns)
    must_have_dict = dict([
        (asset_name, must_have)
        for asset_name, must_have in zip(list_of_asset_names, must_have_list)
    ])

    return must_have_dict
Beispiel #5
0
    def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", 
                 rollyears=20, 
                 dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs):
        cleaning=str2Bool(cleaning)
    
        ## grouping dictionary, convert to faster, algo friendly, form
        group_dict=group_dict_from_natural(dict_group)

        data=df_from_list(data)    
        column_names=list(data.columns)

        data=data.resample(frequency, how="last")
            
        ### Generate time periods
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)

        size=len(column_names)
        corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag)
        
        ## create a list of correlation matrices
        corr_list=[]
        
        print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Correlation estimate")
        
        ## Now for each time period, estimate correlation
        for fit_period in fit_dates:
            print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Estimating from %s to %s" % (fit_period.period_start, fit_period.period_end))
            
            if fit_period.no_data:
                ## no data to fit with
                corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan)
                corrmat=corr_with_nan
                
            else:
                
                data_for_estimate=data[fit_period.fit_start:fit_period.fit_end] 
                
                corrmat=correlation_single_period(data_for_estimate, 
                                                     **kwargs)

            if cleaning:
                current_period_data=data[fit_period.fit_start:fit_period.fit_end] 
                must_haves=must_have_item(current_period_data)

                # means we can use earlier correlations with sensible values
                corrmat=clean_correlation(corrmat, corr_with_no_data, must_haves) 

            corr_list.append(corrmat)
        
        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
Beispiel #6
0
    def clean_corr_matrix_given_data(self, fit_period: fitDates,
                                     data_for_correlation: pd.DataFrame):
        if fit_period.no_data:
            return self

        current_period_data = data_for_correlation[
            fit_period.fit_start:fit_period.fit_end]

        # must_haves are items with data in this period, so we need some
        # kind of correlation
        must_haves = must_have_item(current_period_data)

        clean_correlation = self.clean_correlations(must_haves)

        return clean_correlation
Beispiel #7
0
    def calculate(self, fit_period):
        """
        Work out the correlation for a single period

        :param fit_period: Specification of the period we're calculating the correlation for

        :return: np.array of correlation matrix
        """

        cleaning = self.cleaning

        corr_with_no_data = self.corr_with_no_data
        corr_for_cleaning = self.corr_for_cleaning

        data_as_df = self.data_as_df
        kwargs = self.kwargs
        ew_lookback_corrected = self.ew_lookback_corrected
        floor_at_zero = self.floor_at_zero

        if fit_period.no_data:
            # no data to fit with
            corrmat = corr_with_no_data
        else:

            data_for_estimate = data_as_df[fit_period.fit_start:fit_period.
                                           fit_end]

            corrmat = correlation_calculator(data_for_estimate,
                                             ew_lookback=ew_lookback_corrected,
                                             **kwargs)

        if cleaning:
            current_period_data = data_as_df[fit_period.fit_start:fit_period.
                                             fit_end]

            # must_haves are items with data in this period, so we need some
            # kind of correlation
            must_haves = must_have_item(current_period_data)

            # means we can use earlier correlations with sensible values
            corrmat = clean_correlation(corrmat, corr_for_cleaning, must_haves)

            # can't do this earlier as might have nans
            if floor_at_zero:
                corrmat[corrmat < 0] = 0.0

        return corrmat
    def calculate(self, fit_period):
        """
        Work out the correlation for a single period

        :param fit_period: Specification of the period we're calculating the correlation for

        :return: np.array of correlation matrix
        """

        cleaning = self.cleaning

        corr_with_no_data = self.corr_with_no_data
        corr_for_cleaning = self.corr_for_cleaning

        data_as_df = self.data_as_df
        kwargs = self.kwargs
        ew_lookback_corrected = self.ew_lookback_corrected
        floor_at_zero = self.floor_at_zero

        if fit_period.no_data:
            # no data to fit with
            corrmat = corr_with_no_data
        else:

            data_for_estimate = data_as_df[fit_period.fit_start:
                                           fit_period.fit_end]

            corrmat = correlation_calculator(
                data_for_estimate, ew_lookback=ew_lookback_corrected, **kwargs)

        if cleaning:
            current_period_data = data_as_df[fit_period.fit_start:
                                             fit_period.fit_end]

            # must_haves are items with data in this period, so we need some kind of correlation
            must_haves = must_have_item(current_period_data)

            # means we can use earlier correlations with sensible values
            corrmat = clean_correlation(corrmat, corr_for_cleaning, must_haves)

            # can't do this earlier as might have nans
            if floor_at_zero:
                corrmat[corrmat < 0] = 0.0

        return corrmat
Beispiel #9
0
    def __init__(self, parent, data, fit_period, optimiser, cleaning):
        if cleaning:
            current_period_data=data[fit_period.period_start:fit_period.period_end] 
            must_haves=must_have_item(current_period_data)        
        else:
            must_haves=None
        
        if fit_period.no_data:
            diag=None
            size=current_period_data.shape[1]
            weights_with_nan=[np.nan/size]*size
            weights=weights_with_nan
            if cleaning:
                weights=clean_weights(weights, must_haves)
        else:
            subset_fitting_data=data[fit_period.fit_start:fit_period.fit_end]    
            (weights, diag)=optimiser.call(subset_fitting_data, cleaning, must_haves)

        setattr(self, "diag", diag)
        setattr(self, "weights", weights)
Beispiel #10
0
    def __init__(self,
                 data,
                 log=logtoscreen("optimiser"),
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 dict_group=dict(),
                 boring_offdiag=0.99,
                 cleaning=True,
                 **kwargs):
        """

        We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling

        Its important that forward filling, or index / ffill / diff has been done before we begin

        :param data: Data to get correlations from
        :type data: pd.DataFrame or list if pooling

        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates
        :type date_method: str

        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int

        :param dict_group: dictionary of groupings; used to replace missing values
        :type dict_group: dict

        :param boring_offdiag: Value used in creating 'boring' matrix, for when no data
        :type boring_offdiag: float

        :param **kwargs: passed to correlation_single_period

        :returns: CorrelationList
        """

        cleaning = str2Bool(cleaning)

        # grouping dictionary, convert to faster, algo friendly, form
        group_dict = group_dict_from_natural(dict_group)

        data = df_from_list(data)
        column_names = list(data.columns)

        data = data.resample(frequency).last()

        # Generate time periods
        fit_dates = generate_fitting_dates(data,
                                           date_method=date_method,
                                           rollyears=rollyears)

        size = len(column_names)
        corr_with_no_data = boring_corr_matrix(size, offdiag=boring_offdiag)

        # create a list of correlation matrices
        corr_list = []

        progress = progressBar(len(fit_dates), "Estimating correlations")
        # Now for each time period, estimate correlation
        for fit_period in fit_dates:
            progress.iterate()
            if fit_period.no_data:
                # no data to fit with
                corr_with_nan = boring_corr_matrix(size,
                                                   offdiag=np.nan,
                                                   diag=np.nan)
                corrmat = corr_with_nan

            else:

                data_for_estimate = data[fit_period.fit_start:fit_period.
                                         fit_end]

                corrmat = correlation_single_period(data_for_estimate,
                                                    **kwargs)

            if cleaning:
                current_period_data = data[fit_period.fit_start:fit_period.
                                           fit_end]
                must_haves = must_have_item(current_period_data)

                # means we can use earlier correlations with sensible values
                corrmat = clean_correlation(corrmat, corr_with_no_data,
                                            must_haves)

            corr_list.append(corrmat)

        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
    def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", 
                 rollyears=20, 
                 dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs):
        """
    
        We generate a correlation from eithier a pd.DataFrame, or a list of them if we're pooling
        
        Its important that forward filling, or index / ffill / diff has been done before we begin
                
        :param data: Data to get correlations from
        :type data: pd.DataFrame or list if pooling
    
        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates 
        :type date_method: str
    
        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int
    
        :param dict_group: dictionary of groupings; used to replace missing values
        :type dict_group: dict
    
        :param boring_offdiag: Value used in creating 'boring' matrix, for when no data
        :type boring_offdiag: float 
    
        :param **kwargs: passed to correlation_single_period
        
        :returns: CorrelationList
        """

        cleaning=str2Bool(cleaning)
    
        ## grouping dictionary, convert to faster, algo friendly, form
        group_dict=group_dict_from_natural(dict_group)

        data=df_from_list(data)    
        column_names=list(data.columns)

        data=data.resample(frequency, how="last")
            
        ### Generate time periods
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)

        size=len(column_names)
        corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag)
        
        ## create a list of correlation matrices
        corr_list=[]
        
        log.terse("Correlation estimate")
        
        ## Now for each time period, estimate correlation
        for fit_period in fit_dates:
            log.msg("Estimating from %s to %s" % (fit_period.period_start, fit_period.period_end))
            
            if fit_period.no_data:
                ## no data to fit with
                corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan)
                corrmat=corr_with_nan
                
            else:
                
                data_for_estimate=data[fit_period.fit_start:fit_period.fit_end] 
                
                corrmat=correlation_single_period(data_for_estimate, 
                                                     **kwargs)

            if cleaning:
                current_period_data=data[fit_period.fit_start:fit_period.fit_end] 
                must_haves=must_have_item(current_period_data)

                # means we can use earlier correlations with sensible values
                corrmat=clean_correlation(corrmat, corr_with_no_data, must_haves) 

            corr_list.append(corrmat)
        
        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)