Ejemplo n.º 1
0
def get_rolling_estimate(returns, func_name, value_index=0, monte_length=1000):
    slice_end = pd.date_range(returns.index[1], returns.index[-1], freq="12M")
    thing = progressBar(len(slice_end))

    lower_points = []
    upper_points = []
    for end_point in slice_end:
        subset_returns = returns[:end_point]
        subset_distribution = distribution_of_statistic(
            subset_returns, func_name, monte_length=monte_length)
        lower_points.append(subset_distribution.quantile(0.1)[value_index])
        upper_points.append(subset_distribution.quantile(0.9)[value_index])
        thing.iterate()

    output = pd.DataFrame(dict(upper=upper_points, lower=lower_points),
                          index=slice_end)

    return output
Ejemplo n.º 2
0
    def _get_portfolio_risk_given_weights(self, portfolio_weights: pd.DataFrame) -> pd.Series:
        risk_series = []
        common_index = self.common_index()
        p = progressBar(len(common_index), show_timings=True, show_each_time=False)

        for relevant_date in common_index:
            p.iterate()
            weights_on_date = portfolioWeights(
                get_row_of_df_aligned_to_weights_as_dict(portfolio_weights, relevant_date))
            covariance = self.get_covariance_matrix(relevant_date)
            risk_on_date = calculate_risk(weights = weights_on_date,
                                          covariance = covariance)
            risk_series.append(risk_on_date)

        p.finished()
        risk_series = pd.Series(risk_series, common_index)

        return risk_series
Ejemplo n.º 3
0
def calc_historic_confidence(perc, function_to_use, rollperiods=250):
    fitting_dates = generate_fitting_dates(perc,
                                           "rolling",
                                           rollperiods=rollperiods)

    list_of_confidence = []
    thing = progressBar(len(fitting_dates) - 1)
    for fit_date in fitting_dates[1:]:
        list_of_confidence.append(function_to_use(perc, fit_date))
        thing.iterate()
    thing.finished()

    list_of_confidence = pd.DataFrame(
        list_of_confidence,
        index=[fit_date.fit_end for fit_date in fitting_dates[1:]])
    list_of_confidence.columns = ["lower", "upper"]

    return list_of_confidence
Ejemplo n.º 4
0
def get_liquidity_data_df(data: dataBlob):
    diag_prices = diagPrices(data)

    instrument_list = diag_prices.get_list_of_instruments_with_contract_prices(
    )

    print("Getting data... patience")
    p = progressBar(len(instrument_list))
    all_liquidity = []
    for instrument_code in instrument_list:
        p.iterate()
        liquidity_this_instrument = get_liquidity_dict_for_instrument_code(
            data, instrument_code)
        all_liquidity.append(liquidity_this_instrument)

    all_liquidity_df = pd.DataFrame(all_liquidity)
    all_liquidity_df.index = instrument_list

    return all_liquidity_df
Ejemplo n.º 5
0
    def weights(self) -> pd.DataFrame:
        fit_dates = self.fit_dates
        optimiser = self.optimiser

        progress = progressBar(len(fit_dates), "Optimising weights")


        weight_list = []
        # Now for each time period, estimate weights
        for fit_period in fit_dates:
            progress.iterate()
            weight_dict = optimiser.calculate_weights_for_period(
                fit_period)
            weight_list.append(weight_dict)

        weight_index= fit_dates.list_of_starting_periods()
        weights = pd.DataFrame(weight_list, index = weight_index)

        return weights
Ejemplo n.º 6
0
def get_instrument_risk_table(data, only_held_instruments=True):
    ## INSTRUMENT RISK (daily %, annual %, return space daily and annual, base currency per contract daily and annual, positions)
    if only_held_instruments:
        instrument_list = get_instruments_with_positions_all_strategies(data)
    else:
        instrument_list = get_list_of_instruments()

    p = progressBar(len(instrument_list))
    risk_data_list = []
    for instrument_code in instrument_list:
        risk_this_instrument = get_risk_data_for_instrument(
            data, instrument_code)
        risk_data_list.append(risk_this_instrument)
        p.iterate()

    p.finished()

    risk_df = pd.DataFrame(risk_data_list, index=instrument_list).transpose()
    risk_df = sorted_clean_df(risk_df, "annual_risk_perc_capital")

    return risk_df
Ejemplo n.º 7
0
    def get_optimised_weights_df(self) -> pd.DataFrame:
        self.log.msg(
            "Optimising positions for small capital: may take a while!")
        common_index = list(self.common_index())
        p = progressBar(len(common_index),
                        show_timings=True,
                        show_each_time=True)
        previous_optimal_weights = portfolioWeights.allzeros(
            self.instrument_list())
        weights_list = []
        for relevant_date in common_index:
            #self.log.msg(relevant_date)
            optimal_weights = self.get_optimal_weights_with_fixed_contract_values(
                relevant_date, previous_weights=previous_optimal_weights)
            weights_list.append(optimal_weights)
            previous_optimal_weights = copy(optimal_weights)
            p.iterate()
        p.finished()
        weights_list_df = pd.DataFrame(weights_list, index=common_index)

        return weights_list_df
def get_trading_hours_for_all_instruments(data=arg_not_supplied):
    if data is arg_not_supplied:
        data = dataBlob()

    diag_prices = diagPrices(data)
    list_of_instruments = diag_prices.get_list_of_instruments_with_contract_prices()

    p = progressBar(len(list_of_instruments))
    all_trading_hours = {}
    for instrument_code in list_of_instruments:
        p.iterate()
        trading_hours = get_trading_hours_for_instrument(data, instrument_code)
        if trading_hours is missing_contract:
            print("*** NO EXPIRY FOR %s ***" % instrument_code)
            continue

        ## will have several days use first one
        check_trading_hours(trading_hours, instrument_code)
        all_trading_hours[instrument_code] = trading_hours

    p.finished()

    return all_trading_hours
Ejemplo n.º 9
0
def distribution_of_statistic(returns,
                              stat_function,
                              monte_length=1000,
                              horizon=None,
                              colnames=None):

    if colnames == None:
        colnames = list(returns.columns)

    if horizon is None:
        horizon = len(returns.index)

    list_of_bs_stats = []
    thing = progressBar(monte_length)
    for notUsed in range(monte_length):
        list_of_bs_stats.append(
            statistic_from_bootstrap(returns, stat_function, horizon=horizon))
        thing.iterate()

    ans = pd.DataFrame(np.array(list_of_bs_stats))
    ans = pd.DataFrame(ans)
    ans.columns = colnames

    return ans
Ejemplo n.º 10
0
    def __init__(self,
                 data,
                 log=logtoscreen("optimiser"),
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 dict_group=dict(),
                 boring_offdiag=0.99,
                 cleaning=True,
                 **kwargs):
        """

        We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling

        Its important that forward filling, or index / ffill / diff has been done before we begin

        :param data: Data to get correlations from
        :type data: pd.DataFrame or list if pooling

        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates
        :type date_method: str

        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int

        :param dict_group: dictionary of groupings; used to replace missing values
        :type dict_group: dict

        :param boring_offdiag: Value used in creating 'boring' matrix, for when no data
        :type boring_offdiag: float

        :param **kwargs: passed to correlation_single_period

        :returns: CorrelationList
        """

        cleaning = str2Bool(cleaning)

        # grouping dictionary, convert to faster, algo friendly, form
        group_dict = group_dict_from_natural(dict_group)

        data = df_from_list(data)
        column_names = list(data.columns)

        data = data.resample(frequency).last()

        # Generate time periods
        fit_dates = generate_fitting_dates(data,
                                           date_method=date_method,
                                           rollyears=rollyears)

        size = len(column_names)
        corr_with_no_data = boring_corr_matrix(size, offdiag=boring_offdiag)

        # create a list of correlation matrices
        corr_list = []

        progress = progressBar(len(fit_dates), "Estimating correlations")
        # Now for each time period, estimate correlation
        for fit_period in fit_dates:
            progress.iterate()
            if fit_period.no_data:
                # no data to fit with
                corr_with_nan = boring_corr_matrix(size,
                                                   offdiag=np.nan,
                                                   diag=np.nan)
                corrmat = corr_with_nan

            else:

                data_for_estimate = data[fit_period.fit_start:fit_period.
                                         fit_end]

                corrmat = correlation_single_period(data_for_estimate,
                                                    **kwargs)

            if cleaning:
                current_period_data = data[fit_period.fit_start:fit_period.
                                           fit_end]
                must_haves = must_have_item(current_period_data)

                # means we can use earlier correlations with sensible values
                corrmat = clean_correlation(corrmat, corr_with_no_data,
                                            must_haves)

            corr_list.append(corrmat)

        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
Ejemplo n.º 11
0
    def __init__(self,
                 data,
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 **kwargs):
        """

        We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling

        Its important that forward filling, or index / ffill / diff has been done before we begin

        :param data: simData to get correlations from
        :type data: pd.DataFrame or list if pooling

        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates
        :type date_method: str

        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int

        :param **kwargs: passed to correlationSinglePeriod

        :returns: CorrelationList
        """

        if isinstance(data, list):

            # turn the list of data into a single dataframe. This will have a unique time series, which we manage
            #   through adding a small offset of a few microseconds

            length_of_data = len(data)
            data_resampled = [
                data_item.resample(frequency).last() for data_item in data
            ]
            data_as_df = df_from_list(data_resampled)

        else:
            length_of_data = 1
            data_as_df = data.resample(frequency).last()

        column_names = list(data_as_df.columns)

        # Generate time periods
        fit_dates = generate_fitting_dates(data_as_df,
                                           date_method=date_method,
                                           rollyears=rollyears)

        # create a single period correlation estimator
        correlation_estimator_for_one_period = correlationSinglePeriod(
            data_as_df, length_of_data=length_of_data, **kwargs)

        # create a list of correlation matrices
        corr_list = []

        progress = progressBar(len(fit_dates), "Estimating correlations")
        # Now for each time period, estimate correlation
        for fit_period in fit_dates:

            progress.iterate()
            corrmat = correlation_estimator_for_one_period.calculate(
                fit_period)
            corr_list.append(corrmat)

        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
Ejemplo n.º 12
0
    def optimise(self):
        """

        Optimise weights over some returns data



        """

        log = self.log
        date_method = self.date_method
        rollyears = self.rollyears
        optimiser = self.optimiser
        cleaning = self.cleaning
        apply_cost_weight = self.apply_cost_weight

        data = getattr(self, "data", None)


        if data is None:
            log.critical("You need to run .set_up_data() before .optimise()")

        fit_dates = generate_fitting_dates(
            data, date_method=date_method, rollyears=rollyears)
        setattr(self, "fit_dates", fit_dates)

        # Now for each time period, estimate weights
        # create a list of weight vectors
        weight_list = []

        # create a class object for each period
        opt_results = []

        progress = progressBar(len(fit_dates), "Optimising")

        for fit_period in fit_dates:
            # Do the optimisation for one period, using a particular optimiser
            # instance
            results_this_period = optSinglePeriod(self, data, fit_period,
                                                  optimiser, cleaning)

            opt_results.append(results_this_period)

            weights = results_this_period.weights

            # We adjust dates slightly to ensure no overlaps
            dindex = [
                fit_period.period_start + datetime.timedelta(days=1),
                fit_period.period_end - datetime.timedelta(days=1)
            ]

            # create a double row to delineate start and end of test period
            weight_row = pd.DataFrame(
                [weights] * 2, index=dindex, columns=data.columns)
            weight_list.append(weight_row)
            progress.iterate()

        # Stack everything up
        raw_weight_df = pd.concat(weight_list, axis=0)

        if apply_cost_weight:
            log.terse("Applying cost weighting to optimisation results")
            # ann_SR_costs must be calculated before a cost multiplier is applied
            ann_SR_costs = self.calculate_ann_SR_costs()

            weight_df = apply_cost_weighting(raw_weight_df, ann_SR_costs)
        else:
            weight_df = raw_weight_df

        setattr(self, "results", opt_results)
        setattr(self, "weights", weight_df)
        setattr(self, "raw_weights", raw_weight_df)
Ejemplo n.º 13
0
    def optimise(self):
        """

        Optimise weights over some returns data



        """

        log = self.log
        date_method = self.date_method
        rollyears = self.rollyears
        optimiser = self.optimiser
        cleaning = self.cleaning
        apply_cost_weight = self.apply_cost_weight

        data = getattr(self, "data", None)


        if data is None:
            log.critical("You need to run .set_up_data() before .optimise()")

        fit_dates = generate_fitting_dates(
            data, date_method=date_method, rollyears=rollyears)
        setattr(self, "fit_dates", fit_dates)

        # Now for each time period, estimate weights
        # create a list of weight vectors
        weight_list = []

        # create a class object for each period
        opt_results = []

        progress = progressBar(len(fit_dates), "Optimising")

        for fit_period in fit_dates:
            # Do the optimisation for one period, using a particular optimiser
            # instance
            results_this_period = optSinglePeriod(self, data, fit_period,
                                                  optimiser, cleaning)

            opt_results.append(results_this_period)

            weights = results_this_period.weights

            # We adjust dates slightly to ensure no overlaps
            dindex = [
                fit_period.period_start + datetime.timedelta(days=1),
                fit_period.period_end - datetime.timedelta(days=1)
            ]

            # create a double row to delineate start and end of test period
            weight_row = pd.DataFrame(
                [weights] * 2, index=dindex, columns=data.columns)
            weight_list.append(weight_row)
            progress.iterate()

        # Stack everything up
        raw_weight_df = pd.concat(weight_list, axis=0)

        if apply_cost_weight:
            log.terse("Applying cost weighting to optimisation results")
            # ann_SR_costs must be calculated before a cost multiplier is applied
            ann_SR_costs = self.calculate_ann_SR_costs()

            weight_df = apply_cost_weighting(raw_weight_df, ann_SR_costs)
        else:
            weight_df = raw_weight_df

        setattr(self, "results", opt_results)
        setattr(self, "weights", weight_df)
        setattr(self, "raw_weights", raw_weight_df)
Ejemplo n.º 14
0
    def __init__(self,
                 data,
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 **kwargs):
        """

        We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling

        Its important that forward filling, or index / ffill / diff has been done before we begin

        :param data: simData to get correlations from
        :type data: pd.DataFrame or list if pooling

        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates
        :type date_method: str

        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int

        :param **kwargs: passed to correlationSinglePeriod

        :returns: CorrelationList
        """

        if type(data) is list:

            # turn the list of data into a single dataframe. This will have a unique time series, which we manage
            #   through adding a small offset of a few microseconds

            length_of_data = len(data)
            data_resampled = [
                data_item.resample(frequency).last() for data_item in data
            ]
            data_as_df = df_from_list(data_resampled)

        else:
            length_of_data = 1
            data_as_df = data.resample(frequency).last()

        column_names = list(data_as_df.columns)

        # Generate time periods
        fit_dates = generate_fitting_dates(
            data_as_df, date_method=date_method, rollyears=rollyears)

        # create a single period correlation estimator
        correlation_estimator_for_one_period = correlationSinglePeriod(
            data_as_df, length_of_data=length_of_data, **kwargs)

        # create a list of correlation matrices
        corr_list = []

        progress = progressBar(len(fit_dates), "Estimating correlations")
        # Now for each time period, estimate correlation
        for fit_period in fit_dates:

            progress.iterate()
            corrmat = correlation_estimator_for_one_period.calculate(
                fit_period)
            corr_list.append(corrmat)

        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
Ejemplo n.º 15
0
    corrmatrix.iloc[0][0] = 1.0
    corrmatrix.iloc[0][1] = corrvec[1]
    corrmatrix.iloc[0][2] = corrvec[0]
    corrmatrix.iloc[1][0] = corrvec[1]
    corrmatrix.iloc[1][1] = 1.0
    corrmatrix.iloc[1][2] = corrvec[2]
    corrmatrix.iloc[2][0] = corrvec[0]
    corrmatrix.iloc[2][1] = corrvec[2]
    corrmatrix.iloc[2][2] = 1.0

    return corrmatrix


monte_length_inside = 5000
weights = []
thing = progressBar(monte_length_inside)

for notUsed in range(monte_length_inside):

    corrvec = corr_dist.iloc[int(random.uniform(0, len(corr_dist)))]
    srlist = SR_distr.iloc[int(random.uniform(0, len(corr_dist)))]
    stdev_list = stdev_distr.iloc[int(random.uniform(0, len(corr_dist)))]
    corrmatrix = from_cor_vec_to_matrix(corrvec)
    meanlist = srlist * stdev_list

    weights.append(
        optimisation_with_data(corrmatrix.values, list(meanlist),
                               list(stdev_list)))
    thing.iterate()

weights = pd.DataFrame(weights, columns=['SP500', 'US10', 'US5'])
Ejemplo n.º 16
0
base_config = base_system.config
## run all possible combinations of TF to get base performance

instruments = base_system.get_instrument_list()

results = dict()
wlist = [
    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80,
    90, 100, 125, 150, 175, 200, 250
]
#wlist = [1,250]

instrument_list = base_system.get_instrument_list()

from syscore.genutils import progressBar
thing = progressBar(len(wlist) * len(wlist) * len(instrument_list))

for Aspeed in wlist:
    for Bspeed in wlist:

        if Aspeed == Bspeed:
            continue

        config = copy(base_config)
        trading_rules = dict(rule=dict(
            function='systems.provided.futures_chapter15.rules.ewmac',
            data=[
                'rawdata.get_daily_prices', 'rawdata.daily_returns_volatility'
            ],
            other_args=dict(Lfast=Aspeed, Lslow=Bspeed)))