Exemple #1
0
    def __init__(self,  log=logtoscreen("optimiser"), frequency="W", date_method="expanding", 
                         rollyears=20, method="bootstrap", cleaning=True, 
                         cost_multiplier=1.0, apply_cost_weight=True, 
                         ann_target_SR=TARGET_ANN_SR, equalise_gross=False,
                         **passed_params):
                
        cleaning=str2Bool(cleaning)
        optimise_params=copy(passed_params)
        ann_dict=dict(D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0)
        annualisation=ann_dict.get(frequency, 1.0)
        period_target_SR=ann_target_SR/(annualisation**.5)        
        moments_estimator=momentsEstimator(optimise_params, annualisation,  ann_target_SR)
        optimiser=optimiserWithParams(method, optimise_params, moments_estimator)

        setattr(self, "optimiser", optimiser)
        setattr(self, "log", log)
        setattr(self, "frequency", frequency)
        setattr(self, "method", method)
        setattr(self, "equalise_gross", equalise_gross)
        setattr(self, "cost_multiplier", cost_multiplier)
        setattr(self, "annualisation", annualisation)
        setattr(self, "period_target_SR", period_target_SR)
        setattr(self, "date_method", date_method)
        setattr(self, "rollyears", rollyears)
        setattr(self, "cleaning", cleaning)
        setattr(self, "apply_cost_weight", apply_cost_weight)
    def __init__(self, config_object=dict()):
        """
        Config objects control the behaviour of systems

        :param config_object: Eithier:
                        a string (which points to a YAML filename)
                        or a dict (which may nest many things)
                        or a list of strings or dicts (build config from multiple elements, latter elements will overwrite earlier oness)

        :type config_object: str or dict

        :returns: new Config object

        >>> Config(dict(parameters=dict(p1=3, p2=4.6), another_thing=[]))
        Config with elements: another_thing, parameters

        >>> Config("sysdata.tests.exampleconfig.yaml")
        Config with elements: parameters, trading_rules

        >>> Config(["sysdata.tests.exampleconfig.yaml", dict(parameters=dict(p1=3, p2=4.6), another_thing=[])])
        Config with elements: another_thing, parameters, trading_rules

        """
        setattr(self, "_elements", []) ## will be populated later
        
        if isinstance(config_object, list):
            # multiple configs
            for config_item in config_object:
                self._create_config_from_item(config_item)
        else:
            self._create_config_from_item(config_object)

        ## this will normally be overriden by the base system
        setattr(self, "log", logtoscreen( stage="config"))
    def __init__(self):
        """
        simData socket base class

        >>> data = baseData()
        >>> data
        simData object
        """
        # this will normally be overriden by the base system
        setattr(self, "log", logtoscreen(stage="data"))
    def __init__(self):
        '''

        '''

        # We set these to empty lists but in the inherited object they'll be
        # overriden
        setattr(self, "_protected", [])
        setattr(self, "name", self._name())
        setattr(self, "description", self._description())

        # this will normally be overriden by the base system when we call _system_init
        setattr(self, "log", logtoscreen(stage="config"))
 def __init__(self,
              ibconnection: connectionIB,
              log: logger = logtoscreen("ibFxPricesData")):
     super().__init__(log=log)
     self._ibconnection = ibconnection
 def __init__(self, log=logtoscreen("brokerStaticData")):
     super().__init__(log=log)
Exemple #7
0
    def __init__(self, log=logtoscreen("ibClient")):

        self.log = log
        ## means our first call won't be throttled for pacing
        self.last_historic_price_calltime = datetime.datetime.now(
        ) - datetime.timedelta(seconds=_PACING_PERIOD_SECONDS)
Exemple #8
0
 def __init__(self, ibconnection, log=logtoscreen("ibFuturesContractPriceData")):
     setattr(self, "ibconnection", ibconnection)
     setattr(self, "log", log)
    def __init__(self, mongo_db=None, log=logtoscreen("arcticFxPricesData")):

        super().__init__(log=log)
        self._arctic = articData(SPOTFX_COLLECTION, mongo_db=mongo_db)
Exemple #10
0
def mark_contracts_as_stopped_sampling(instrument_code, contracts_not_sampling, data, log=logtoscreen("")):
    """

    :param instrument_code: str
    :param contracts_not_sampling: list of contractDate objects
    :param data: dataBlobg
    :return: None
    """
    for contract_date_object in contracts_not_sampling:
        contract_date = contract_date_object.date

        #Mark it as stop sampling in the database
        contract = data.mongo_futures_contract.get_contract_data(instrument_code, contract_date)
        if contract.currently_sampling:
            contract.sampling_off()
            data.mongo_futures_contract.add_contract_data(contract, ignore_duplication=True)

            log.msg("Contract %s has now stopped sampling" % str(contract), contract_date=contract.date)
        else:
            # nothing to do
            pass

    return None
Exemple #11
0
    def __init__(self,
                 stage_list,
                 data,
                 config=None,
                 log=logtoscreen("base_system")):
        """
        Create a system object for doing simulations or live trading

        :param stage_list: A list of stages
        :type stage_list: list of systems.stage.SystemStage (or anything that inherits from it)

        :param data: data for doing simulations
        :type data: sysdata.data.Data (or anything that inherits from that)

        :param config: Optional configuration
        :type config: sysdata.configdata.Config

        :returns: new system object

        >>> from systems.stage import SystemStage
        >>> stage=SystemStage()
        >>> from sysdata.csvdata import csvFuturesData
        >>> data=csvFuturesData()
        >>> System([stage], data)
        System with stages: unnamed

        """

        if config is None:
            # Default - for very dull systems this is sufficient
            config = Config()

        config.fill_with_defaults()

        setattr(self, "data", data)
        setattr(self, "config", config)
        setattr(self, "log", log)

        setattr(data, "log", log.setup(stage="data"))
        setattr(config, "log", log.setup(stage="config"))

        protected = []
        nopickle = []
        stage_names = []

        assert isinstance(stage_list, list)

        for stage in stage_list:
            """
            This is where we put the methods to store various stages of the process

            """

            # Stages have names, which are also how we find them in the system
            # attributes
            sub_name = stage.name

            # Each stage has a link back to the parent system
            stage._system_init(self)

            # and a log
            log = log.setup(stage=sub_name)
            setattr(stage, "log", log)

            if sub_name in stage_names:
                raise Exception(
                    "You have duplicate subsystems with the name %s. Remove "
                    "one of them, or change a name." % sub_name)

            setattr(self, sub_name, stage)

            stage_names.append(sub_name)

            # list of attributes / methods of the stage which are protected
            stage_protected = getattr(stage, "_protected", [])
            stage_protected = [(sub_name, protected_item, "*")
                               for protected_item in stage_protected]
            protected += stage_protected

            stage_nopickle = getattr(stage, "_nopickle", [])
            stage_nopickle = [(sub_name, protected_item, "*")
                              for protected_item in stage_nopickle]
            nopickle += stage_nopickle

        setattr(self, "_stage_names", stage_names)
        """
        The cache hides all intermediate results

        We call optimal_positions and then that propogates back finding all the
        data we need

        The results are then cached in the object. Should we call
            delete_instrument_data (in base class system) then everything
            related to a particular instrument is removed from these 'nodes'
            except for protected items

        This is very useful in live trading when we don't want to update eg
            cross sectional data every sample
        """

        setattr(self, "_cache", dict())
        setattr(self, "_protected", protected)
        setattr(self, "_nopickle", nopickle)
Exemple #12
0
 def __init__(self, ibconnection, log=logtoscreen("ibFxPricesData")):
     setattr(self, "ibconnection", ibconnection)
     setattr(self, "log", log)
Exemple #13
0
 def __init__(self, mongo_db=None, log=logtoscreen("mongoTradeLimitData")):
     super().__init__(log=log)
     self._mongo_data = mongoDataWithMultipleKeys(LIMIT_STATUS_COLLECTION,
                                                  mongo_db=mongo_db)
Exemple #14
0
 def __init__(self, log=logtoscreen("Overrides")):
     super().__init__(log=log)
Exemple #15
0
 def __init__(self, log=logtoscreen("controlProcessData")):
     self.log = log
     self._control_store = dict()
    def __init__(self, log=logtoscreen("listOfEntriesData")):

        super().__init__(log=log)
        self.name = self._name()
Exemple #17
0
 def __init__(self, ibconnection: connectionIB, log=logtoscreen(
         "ibFuturesContractPriceData")):
     self._ibconnection = ibconnection
     super().__init__(log=log)
Exemple #18
0
 def __init__(self, log=logtoscreen("futuresInstrumentData")):
     self._log = log
Exemple #19
0
    def __init__(self,
                 data,
                 log=logtoscreen("optimiser"),
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 dict_group=dict(),
                 boring_offdiag=0.99,
                 cleaning=True,
                 **kwargs):
        """

        We generate a correlation from eithier a pd.DataFrame, or a list of them if we're pooling

        Its important that forward filling, or index / ffill / diff has been done before we begin

        :param data: Data to get correlations from
        :type data: pd.DataFrame or list if pooling

        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates
        :type date_method: str

        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int

        :param dict_group: dictionary of groupings; used to replace missing values
        :type dict_group: dict

        :param boring_offdiag: Value used in creating 'boring' matrix, for when no data
        :type boring_offdiag: float

        :param **kwargs: passed to correlation_single_period

        :returns: CorrelationList
        """

        cleaning = str2Bool(cleaning)

        # grouping dictionary, convert to faster, algo friendly, form
        group_dict = group_dict_from_natural(dict_group)

        data = df_from_list(data)
        column_names = list(data.columns)

        data = data.resample(frequency).last()

        # Generate time periods
        fit_dates = generate_fitting_dates(data,
                                           date_method=date_method,
                                           rollyears=rollyears)

        size = len(column_names)
        corr_with_no_data = boring_corr_matrix(size, offdiag=boring_offdiag)

        # create a list of correlation matrices
        corr_list = []

        log.terse("Correlation estimate")

        # Now for each time period, estimate correlation
        for fit_period in fit_dates:
            log.msg("Estimating from %s to %s" %
                    (fit_period.period_start, fit_period.period_end))

            if fit_period.no_data:
                # no data to fit with
                corr_with_nan = boring_corr_matrix(size,
                                                   offdiag=np.nan,
                                                   diag=np.nan)
                corrmat = corr_with_nan

            else:

                data_for_estimate = data[fit_period.fit_start:fit_period.
                                         fit_end]

                corrmat = correlation_single_period(data_for_estimate,
                                                    **kwargs)

            if cleaning:
                current_period_data = data[fit_period.fit_start:fit_period.
                                           fit_end]
                must_haves = must_have_item(current_period_data)

                # means we can use earlier correlations with sensible values
                corrmat = clean_correlation(corrmat, corr_with_no_data,
                                            must_haves)

            corr_list.append(corrmat)

        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
 def __init__(self, log: logger = logtoscreen("brokerCapitalData")):
     super().__init__(log=log)
 def __init__(self, ibconnection, log=logtoscreen("ibFuturesContractData")):
     super().__init__(log=log)
     self._ibconnection = ibconnection
Exemple #22
0
 def __init__(self, log=logtoscreen("futuresInstrumentData")):
     super().__init__(log=log)
Exemple #23
0
def update_contract_database_with_contract_chain( instrument_code, required_contract_chain, data, log=logtoscreen("")):
    """

    :param required_contract_chain: list of contract dates 'yyyymm'
    :param instrument_code: str
    :param data: dataBlob
    :return: None
    """

    # Get list of contracts in the database
    all_contracts_in_db = data.mongo_futures_contract.get_all_contract_objects_for_instrument_code(instrument_code)
    current_contract_chain = all_contracts_in_db.currently_sampling()

    #Is something in required_contract_chain, but not in the database?
    missing_from_db = required_contract_chain.difference(current_contract_chain)

    #They have probably been added as the result of a recent roll
    # Let's add them
    add_missing_contracts_to_database(instrument_code, missing_from_db, data, log=log)

    #Is something in the database, but not in required_contract_chain?
    #Then it's eithier expired or weirdly very far in the future (maybe we changed the roll parameters)
    #Eithier way, we stop sampling it (if it hasn't expired, will be added in the future)
    contracts_not_sampling = current_contract_chain.difference(required_contract_chain)
    mark_contracts_as_stopped_sampling(instrument_code, contracts_not_sampling, data, log=log)

    return None
 def __init__(self, log=logtoscreen("ibServer")):
     self._contract_register = dict()
     super().__init__(log=log)
Exemple #25
0
 def __init__(self, wrapper, reqIDoffset, log=logtoscreen()):
     ## Set up with a wrapper inside
     EClient.__init__(self, wrapper)
     self.ib_init_request_id_factory(reqIDoffset)
     self.log = log
Exemple #26
0
 def __init__(self, log=logtoscreen("order-stack")):
     self.log = log
Exemple #27
0
    def __init__(self, ibconnection, log=logtoscreen("ibFuturesContractPriceData")):
        setattr(self, "ibconnection", ibconnection)
        setattr(self, "log", log)

        self._traded_object_store = dict()
    def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, 
                 dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs):
        """
    
        We generate a correlation from eithier a pd.DataFrame, or a list of them if we're pooling
        
        Its important that forward filling, or index / ffill / diff has been done before we begin
                
        :param data: Data to get correlations from
        :type data: pd.DataFrame or list if pooling
    
        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates 
        :type date_method: str
    
        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int
    
        :param dict_group: dictionary of groupings; used to replace missing values
        :type dict_group: dict
    
        :param boring_offdiag: Value used in creating 'boring' matrix, for when no data
        :type boring_offdiag: float 
    
        :param **kwargs: passed to correlation_single_period
        
        :returns: CorrelationList
        """

        cleaning=str2Bool(cleaning)
    
        ## grouping dictionary, convert to faster, algo friendly, form
        group_dict=group_dict_from_natural(dict_group)

        data=df_from_list(data)    
        column_names=list(data.columns)

        data=data.resample(frequency, how="last")
            
        ### Generate time periods
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)

        size=len(column_names)
        corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag)
        
        ## create a list of correlation matrices
        corr_list=[]
        
        log.terse("Correlation estimate")
        
        ## Now for each time period, estimate correlation
        for fit_period in fit_dates:
            log.msg("Fitting from %s to %s" % (fit_period.period_start, fit_period.period_end))
            
            if fit_period.no_data:
                ## no data to fit with
                corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan)
                corrmat=corr_with_nan
                
            else:
                
                data_for_estimate=data[fit_period.fit_start:fit_period.fit_end] 
                
                corrmat=correlation_single_period(data_for_estimate, 
                                                     **kwargs)

            if cleaning:
                # means we can use earlier correlations with sensible values
                corrmat=clean_correlation(corrmat, corr_with_no_data, boring_offdiag) 

            corr_list.append(corrmat)
        
        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
Exemple #29
0
    def __init__(self, log=logtoscreen("optimiser"), frequency="W", date_method="expanding",
                 rollyears=20, method="bootstrap", cleaning=True,
                 cost_multiplier=1.0, apply_cost_weight=True,
                 ann_target_SR=TARGET_ANN_SR, equalise_gross=False,
                 **passed_params):
        """

        Set up optimiser

        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates
        :type date_method: str

        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int

        :param method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period'
        :type method: str

        :param equalise_gross: Should we equalise expected gross returns so that only costs affect weightings?
        :type equalise_gross: bool

        :param cost_multiplier: Multiply costs by this number
        :type cost_multiplier: float

        :param apply_cost_weight: Should we adjust our weightings to reflect costs?
        :type apply_cost_weight: bool

        :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions

        :returns: pd.DataFrame of weights
        """

        # Because interaction of parameters is complex, display warnings
        display_warnings(
            log,
            cost_multiplier,
            equalise_gross,
            apply_cost_weight,
            method,
            **passed_params)

        cleaning = str2Bool(cleaning)
        optimise_params = copy(passed_params)

        # annualisation
        ann_dict = dict(
            D=BUSINESS_DAYS_IN_YEAR,
            W=WEEKS_IN_YEAR,
            M=MONTHS_IN_YEAR,
            Y=1.0)
        annualisation = ann_dict.get(frequency, 1.0)

        period_target_SR = ann_target_SR / (annualisation**.5)

        # A moments estimator works out the mean, vol, correlation
        # Also stores annualisation factor and target SR (used for shrinkage
        # and equalising)
        moments_estimator = momentsEstimator(
            optimise_params, annualisation, ann_target_SR)

        # The optimiser instance will do the optimation once we have the
        # appropriate data
        optimiser = optimiserWithParams(
            method, optimise_params, moments_estimator)

        setattr(self, "optimiser", optimiser)
        setattr(self, "log", log)
        setattr(self, "frequency", frequency)
        setattr(self, "method", method)
        setattr(self, "equalise_gross", equalise_gross)
        setattr(self, "cost_multiplier", cost_multiplier)
        setattr(self, "annualisation", annualisation)
        setattr(self, "period_target_SR", period_target_SR)
        setattr(self, "date_method", date_method)
        setattr(self, "rollyears", rollyears)
        setattr(self, "cleaning", cleaning)
        setattr(self, "apply_cost_weight", apply_cost_weight)
    def __init__(self,
                 stage_list,
                 data,
                 config=None,
                 log=logtoscreen("base_system")):
        """
        Create a system object for doing simulations or live trading

        :param stage_list: A list of stages
        :type stage_list: list of systems.stage.SystemStage (or anything that inherits from it)

        :param data: data for doing simulations
        :type data: sysdata.data.simData (or anything that inherits from that)

        :param config: Optional configuration
        :type config: sysdata.configdata.Config

        :returns: new system object

        >>> from systems.stage import SystemStage
        >>> stage=SystemStage()
        >>> from sysdata.csv.csv_sim_futures_data import csvFuturesSimData
        >>> data=csvFuturesSimData()
        >>> System([stage], data)
        System base_system with .config, .data, and .stages: unnamed

        """

        if config is None:
            # Default - for very dull systems this is sufficient
            config = Config()

        setattr(self, "data", data)
        setattr(self, "config", config)
        setattr(self, "log", log)

        self.config._system_init(self)
        self.data._system_init(self)

        stage_names = []

        try:
            iter(stage_list)
        except AssertionError:
            raise Exception(
                "You didn't pass a list into this System instance; even just one stage should be System([stage_instance])"
            )

        for stage in stage_list:
            """
            This is where we put the methods to store various stages of the process

            """

            # Stages have names, which are also how we find them in the system
            # attributes
            sub_name = stage.name

            # Each stage has a link back to the parent system
            # This init sets this, and also passes the system logging object
            stage._system_init(self)

            if sub_name in stage_names:
                raise Exception(
                    "You have duplicate subsystems with the name %s. Remove "
                    "one of them, or change a name." % sub_name)

            setattr(self, sub_name, stage)

            stage_names.append(sub_name)

        setattr(self, "_stage_names", stage_names)
        """
        The cache hides all intermediate results

        We call optimal_positions and then that propogates back finding all the
        data we need

        The results are then cached in the object. Should we call
            delete_instrument_data (in base class system) then everything
            related to a particular instrument is removed from these 'nodes'
            except for protected items

        This is very useful in live trading when we don't want to update eg
            cross sectional data every sample
        """

        setattr(self, "cache", systemCache(self))
        self.name = "base_system"  # makes caching work and for general consistency
 def __init__(self, log=logtoscreen("Overrides")):
     self.log = log
     self._limits = {}
Exemple #32
0
    def __init__(self,
                 data,
                 identifier=None,
                 parent = None,
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 method="bootstrap",
                 cleaning=True,
                 cost_multiplier=1.0,
                 apply_cost_weight=True,
                 ann_target_SR=TARGET_ANN_SR,
                 equalise_gross=False,
                 pool_gross_returns=False,
                 use_pooled_costs=False,
                 use_pooled_turnover=None, # not used
                 **passed_params):
        """

        Set up optimiser
        :param data: A dict of account curve dataframes, if not pooled data will only have one entry
        :type data: dict

        :param identifier: A dictionary key which refers to the element in data which we're optimising for
        :type identifier: str

        :param parent: The parent object, probably a system stage, which we get the log attribute from
        :type parent: systemStage

        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates
        :type date_method: str

        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int

        :param method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period'
        :type method: str

        :param cleaning: Do we clean weights so we don't need a warmup period?
        :type cleaning: bool

        :param equalise_gross: Should we equalise expected gross returns so that only costs affect weightings?
        :type equalise_gross: bool

        :param pool_gross_returns: Should we pool gross returns together?
        :type pool_gross_returns: bool

        :param use_pooled_costs: Should we pool costs together?
        :type use_pooled_costs: bool

        :param cost_multiplier: Multiply costs by this number
        :type cost_multiplier: float

        :param apply_cost_weight: Should we adjust our weightings to reflect costs?
        :type apply_cost_weight: bool

        :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions

        :returns: pd.DataFrame of weights
        """

        if parent is None:
            log = logtoscreen("optimiser")
        else:
            log = parent.log

        setattr(self, "log", log)

        # Because interaction of parameters is complex, display warnings
        self.display_warnings(cost_multiplier, equalise_gross,
                         apply_cost_weight, method, **passed_params)

        cleaning = str2Bool(cleaning)
        optimise_params = copy(passed_params)

        # annualisation
        ANN_DICT = dict(
            D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0)
        annualisation = ANN_DICT.get(frequency, 1.0)

        self.set_up_data(data, frequency=frequency, equalise_gross=equalise_gross,
                    cost_multiplier=cost_multiplier, annualisation=annualisation,
                    ann_target_SR=TARGET_ANN_SR,
                    use_pooled_costs=use_pooled_costs,
                         pool_gross_returns=pool_gross_returns,
                         identifier=identifier)

        # A moments estimator works out the mean, vol, correlation
        # Also stores annualisation factor and target SR (used for shrinkage
        # and equalising)
        moments_estimator = momentsEstimator(optimise_params, annualisation,
                                             ann_target_SR)

        # The optimiser instance will do the optimation once we have the
        # appropriate data
        optimiser = optimiserWithParams(method, optimise_params,
                                        moments_estimator)

        setattr(self, "optimiser", optimiser)
        setattr(self, "frequency", frequency)
        setattr(self, "method", method)
        setattr(self, "equalise_gross", equalise_gross)
        setattr(self, "cost_multiplier", cost_multiplier)
        setattr(self, "annualisation", annualisation)
        setattr(self, "date_method", date_method)
        setattr(self, "rollyears", rollyears)
        setattr(self, "cleaning", cleaning)
        setattr(self, "apply_cost_weight", apply_cost_weight)
Exemple #33
0
    def __init__(self,
                 data,
                 log=logtoscreen("optimiser"),
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 fit_method="bootstrap",
                 cleaning=True,
                 **passed_params):
        """
    
        Optimise weights over some returns data
        
        :param data: Returns data
        :type data: pd.DataFrame or list if pooling
    
        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str
    
        :param date_method: Method to pass to generate_fitting_dates 
        :type date_method: str
    
        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int
    
        :param fit_method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period'
        :type fit_method: str
    
        :param cleaning: Should we clean correlations so can use incomplete data?
        :type cleaning: bool
    
        :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions
        
        :returns: pd.DataFrame of weights
        """
        cleaning = str2Bool(cleaning)
        optimise_params = copy(passed_params)

        ## A moments estimator works out the mean, vol, correlation
        moments_estimator = momentsEstimator(optimise_params)

        ## The optimiser instance will do the optimation once we have the appropriate data
        optimiser = optimiserWithParams(optimise_params, moments_estimator)

        ## annualisation
        ann_dict = dict(D=BUSINESS_DAYS_IN_YEAR,
                        W=WEEKS_IN_YEAR,
                        M=MONTHS_IN_YEAR,
                        Y=1.0)
        annualisation = ann_dict.get(frequency, 1.0)

        ## de-pool pooled data
        data = df_from_list(data)

        ## resample, indexing before and differencing after (returns, remember)
        data = data.cumsum().resample(frequency, how="last").diff()

        ## account for change in frequency
        data = data * annualisation

        fit_dates = generate_fitting_dates(data,
                                           date_method=date_method,
                                           rollyears=rollyears)
        setattr(self, "fit_dates", fit_dates)

        ## Now for each time period, estimate weights
        ## create a list of weight vectors
        weight_list = []

        ## create a class object for each period
        opt_results = []

        log.terse("Optimising...")

        for fit_period in fit_dates:
            log.msg("Optimising for data from %s to %s" %
                    (str(fit_period.period_start), str(fit_period.period_end)))
            ## Do the optimisation for one period, using a particular optimiser instance
            results_this_period = optSinglePeriod(self, data, fit_period,
                                                  optimiser, cleaning)

            opt_results.append(results_this_period)

            weights = results_this_period.weights

            ## We adjust dates slightly to ensure no overlaps
            dindex = [
                fit_period.period_start + datetime.timedelta(days=1),
                fit_period.period_end - datetime.timedelta(days=1)
            ]

            ## create a double row to delineate start and end of test period
            weight_row = pd.DataFrame([weights] * 2,
                                      index=dindex,
                                      columns=data.columns)
            weight_list.append(weight_row)

        ## Stack everything up
        weight_df = pd.concat(weight_list, axis=0)

        setattr(self, "results", opt_results)
        setattr(self, "weights", weight_df)
Exemple #34
0
 def __init__(self):
     """
     Data socket base class
     """
     ## this will normally be overriden by the base system
     setattr(self, "log", logtoscreen( stage="data"))
Exemple #35
0
    def __init__(self, stage_list, data, config=None, log=logtoscreen("base_system")):
        """
        Create a system object for doing simulations or live trading

        :param stage_list: A list of stages
        :type stage_list: list of systems.stage.SystemStage (or anything that inherits from it)

        :param data: data for doing simulations
        :type data: sysdata.data.Data (or anything that inherits from that)

        :param config: Optional configuration
        :type config: sysdata.configdata.Config

        :returns: new system object

        >>> from systems.stage import SystemStage
        >>> stage=SystemStage()
        >>> from sysdata.csvdata import csvFuturesData
        >>> data=csvFuturesData()
        >>> System([stage], data)
        System with stages: unnamed

        """

        if config is None:
            # Default - for very dull systems this is sufficient
            config = Config()

        config.fill_with_defaults()
        
        setattr(self, "data", data)
        setattr(self, "config", config)
        setattr(self, "log", log)
        
        setattr(data, "log", log.setup(stage="data"))
        setattr(config, "log", log.setup(stage="config"))

        protected = []
        nopickle=[]
        stage_names = []

        assert isinstance(stage_list, list)

        for stage in stage_list:

            """
            This is where we put the methods to store various stages of the process

            """

            # Stages have names, which are also how we find them in the system
            # attributes
            sub_name = stage.name

            # Each stage has a link back to the parent system
            stage._system_init(self)
            
            # and a log
            log=log.setup(stage=sub_name)
            setattr(stage, "log", log)

            if sub_name in stage_names:
                raise Exception(
                    "You have duplicate subsystems with the name %s. Remove "
                    "one of them, or change a name." % sub_name)

            setattr(self, sub_name, stage)

            stage_names.append(sub_name)

            # list of attributes / methods of the stage which are protected
            stage_protected = getattr(stage, "_protected", [])
            stage_protected = [(sub_name, protected_item, "*") for protected_item in stage_protected]
            protected += stage_protected
            
            stage_nopickle=getattr(stage, "_nopickle", [])
            stage_nopickle = [(sub_name, protected_item, "*") for protected_item in stage_nopickle]
            nopickle += stage_nopickle
            

        setattr(self, "_stage_names", stage_names)

        """
        The cache hides all intermediate results

        We call optimal_positions and then that propogates back finding all the
        data we need

        The results are then cached in the object. Should we call
            delete_instrument_data (in base class system) then everything
            related to a particular instrument is removed from these 'nodes'
            except for protected items

        This is very useful in live trading when we don't want to update eg
            cross sectional data every sample
        """

        setattr(self, "_cache", dict())
        setattr(self, "_protected", protected)
        setattr(self, "_nopickle", nopickle)
    def __init__(self,
                 stage_list,
                 data,
                 config=None,
                 log=logtoscreen("base_system")):
        """
        Create a system object for doing simulations or live trading

        :param stage_list: A list of stages
        :type stage_list: list of systems.stage.SystemStage (or anything that inherits from it)

        :param data: data for doing simulations
        :type data: sysdata.data.simData (or anything that inherits from that)

        :param config: Optional configuration
        :type config: sysdata.configdata.Config

        :returns: new system object

        >>> from systems.stage import SystemStage
        >>> stage=SystemStage()
        >>> from sysdata.csv.csv_sim_futures_data import csvFuturesSimData
        >>> data=csvFuturesSimData()
        >>> System([stage], data)
        System base_system with .config, .data, and .stages: unnamed

        """

        if config is None:
            # Default - for very dull systems this is sufficient
            config = Config()

        setattr(self, "data", data)
        setattr(self, "config", config)
        self._log = log

        self.config._system_init(self)
        self.data._system_init(self)

        stage_names = []

        try:
            iter(stage_list)
        except AssertionError:
            raise Exception(
                "You didn't pass a list into this System instance; even just one stage should be System([stage_instance])"
            )

        for stage in stage_list:
            """
            This is where we put the methods to store various stages of the process

            """

            # Stages have names, which are also how we find them in the system
            # attributes
            sub_name = stage.name

            # Each stage has a link back to the parent system
            # This init sets this, and also passes the system logging object
            stage._system_init(self)

            if sub_name in stage_names:
                raise Exception(
                    "You have duplicate subsystems with the name %s. Remove "
                    "one of them, or change a name." % sub_name)

            setattr(self, sub_name, stage)

            stage_names.append(sub_name)

        setattr(self, "_stage_names", stage_names)
        """
        The cache hides all intermediate results

        We call optimal_positions and then that propogates back finding all the
        data we need

        The results are then cached in the object. Should we call
            delete_instrument_data (in base class system) then everything
            related to a particular instrument is removed from these 'nodes'
            except for protected items

        This is very useful in live trading when we don't want to update eg
            cross sectional data every sample
        """

        setattr(self, "cache", systemCache(self))
        self.name = "base_system"  # makes caching work and for general consistency
    def __init__(self, data_gross, data_costs, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", 
                         rollyears=20, fit_method="bootstrap", cleaning=True, equalise_gross=False,
                         cost_multiplier=1.0, apply_cost_weight=True, ceiling_cost_SR=0.13,
                         ann_target_SR=TARGET_ANN_SR,
                         **passed_params):
        
        """
    
        Optimise weights over some returns data
        
        :param data_gross: Returns data for gross returns
        :type data_gross: pd.DataFrame or list if pooling

        :param data_net: Returns data for costs
        :type data_net: pd.DataFrame or list if pooling
    
        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str
    
        :param date_method: Method to pass to generate_fitting_dates 
        :type date_method: str
    
        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int
    
        :param fit_method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period'
        :type fit_method: str
    
        :param equalise_gross: Should we equalise expected gross returns so that only costs affect weightings?
        :type equalise_gross: bool

        :param cost_multiplier: Multiply costs by this number
        :type cost_multiplier: float

        :param apply_cost_weight: Should we adjust our weightings to reflect costs?
        :type apply_cost_weight: bool

        :param ceiling_cost_SR: What is the maximum SR cost beyond which I don't allocate to an asset. Set to 999 to avoid using.
        :type ceiling_cost_SR: float
    
        :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions
        
        :returns: pd.DataFrame of weights
        """

        ## Because interaction of parameters is complex, display warnings         
        display_warnings(log, cost_multiplier, equalise_gross, apply_cost_weight, **passed_params)
        
        cleaning=str2Bool(cleaning)
        optimise_params=copy(passed_params)

        ## annualisation
        ann_dict=dict(D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0)
        annualisation=ann_dict.get(frequency, 1.0)

        period_target_SR=ann_target_SR/(annualisation**.5)
        ceiling_cost_SR_period=ceiling_cost_SR/(annualisation**.5)
        
        ## A moments estimator works out the mean, vol, correlation
        ## Also stores annualisation factor and target SR (used for shrinkage and equalising)
        moments_estimator=momentsEstimator(optimise_params, annualisation,  ann_target_SR)

        ## The optimiser instance will do the optimation once we have the appropriate data
        optimiser=optimiserWithParams(optimise_params, moments_estimator)
    
    
        ## resample, indexing before and differencing after (returns, remember)
        data_gross = [data_item.cumsum().resample(frequency, how="last").diff() for
                       data_item in data_gross]
        
        data_costs = [data_item.cumsum().resample(frequency, how="last").diff() for
                      data_item in data_costs]

        ## stack de-pool pooled data    
        data_gross=df_from_list(data_gross)    
        data_costs=df_from_list(data_costs)    
        
        ## net gross and costs
        if equalise_gross:
            log.terse("Setting all gross returns to be identical - optimisation driven only by costs")
        if cost_multiplier!=1.0:
            log.terse("Using cost multiplier on optimisation of %.2f" % cost_multiplier)
        
        
        data = work_out_net(data_gross, data_costs, annualisation=annualisation,
                            equalise_gross=equalise_gross, cost_multiplier=cost_multiplier,
                            ceiling_cost_ann_SR=ceiling_cost_SR, 
                            period_target_SR=period_target_SR)
            
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)
        setattr(self, "fit_dates", fit_dates)
    
        ## Now for each time period, estimate weights
        ## create a list of weight vectors
        weight_list=[]
        
        ## create a class object for each period
        opt_results=[]
        
        log.terse("Optimising...")
        
        for fit_period in fit_dates:
            log.msg("Optimising for data from %s to %s" % (str(fit_period.period_start), str(fit_period.period_end)))
            ## Do the optimisation for one period, using a particular optimiser instance
            results_this_period=optSinglePeriod(self, data, fit_period, optimiser, cleaning)

            opt_results.append(results_this_period)

            weights=results_this_period.weights
            
            ## We adjust dates slightly to ensure no overlaps
            dindex=[fit_period.period_start+datetime.timedelta(days=1), 
                    fit_period.period_end-datetime.timedelta(days=1)]
            
            ## create a double row to delineate start and end of test period
            weight_row=pd.DataFrame([weights]*2, index=dindex, columns=data.columns)
            weight_list.append(weight_row)

        ## Stack everything up    
        raw_weight_df=pd.concat(weight_list, axis=0)

        if apply_cost_weight:
            log.terse("Applying cost weighting to optimisation results")
            weight_df = apply_cost_weighting(raw_weight_df, data_gross, data_costs, annualisation)
        else:
            weight_df =raw_weight_df 
        
        setattr(self, "results", opt_results)
        setattr(self, "weights", weight_df)
        setattr(self, "raw_weights", raw_weight_df)
 def __init__(self, log=logtoscreen("futuresContractPriceData")):
     setattr(self, "_log", log)
Exemple #39
0
 def __init__(self):
     """
     Data socket base class
     """
     # this will normally be overriden by the base system
     setattr(self, "log", logtoscreen(stage="data"))
Exemple #40
0
 def __init__(self, ibconnection: connectionIB, log=logtoscreen(
         "ibFXHandlingData")):
     self._ibconnection = ibconnection
     super().__init__(log=log)