Example #1
0
    def create_model(realdata=True, force_implementation=None, filter_p=None):
        if realdata:
            ts = read_test_data(325278)
            start, end = datetime(1998, 1, 1), datetime(2007, 1, 1)
            breaktime = datetime(2002, 6, 19)
            ts = ts[start:end]
            ts.rename(columns={
                'CCI_41_COMBINED': 'candidate',
                'merra2': 'reference'
            },
                      inplace=True)
        else:
            ts, breaktime, [start, end] = create_artificial_test_data('asc2')

        if realdata:  # bias correction
            slope, inter = linreg_params(ts.dropna().candidate,
                                         ts.dropna().reference)
            ts.candidate = linreg_stored_params(ts.candidate, slope,
                                                inter)  # scale

        regress = LinearRegression(ts['candidate'].loc[start:end],
                                   ts['reference'].loc[start:end],
                                   filter_p=filter_p,
                                   fit_intercept=True,
                                   force_implementation=force_implementation)
        return regress
Example #2
0
    def setUpClass(cls):
        ts = read_test_data(707393)
        ts_full = ts.rename(columns={'CCI_44_COMBINED': 'candidate',
                                     'MERRA2': 'reference'}).loc['2007-01-01':].copy(True)
        ts_full['candidate_original'] = ts_full['candidate'] # keep original
        # introduce some breaks to correct
        cls.breaktimes = np.array([datetime(2012,7,1), datetime(2010,1,15)])
        can_biased = ts_full.loc[:, 'candidate'].copy(True)
        can_biased[cls.breaktimes[0]:] += 0.1 # first break
        can_biased.loc[:cls.breaktimes[1]] -= 0.1 # second break
        ts_full.loc[:, 'candidate'] = can_biased
        ts_full['flags'] = 0. # all are good in the example

        test_kwargs = dict([('test_resample', ('M', 0.3)),
                            ('mean_test', 'wilkoxon'),
                            ('var_test', 'scipy_fligner_killeen'),
                            ('alpha', 0.01),
                            ('test_check_min_data', 5),
                            ('test_check_spearR_sig', [0., 1.])])

        adjmodel_kwargs = dict([('regress_resample', ('M', 0.3)),
                                ('filter', None),
                                ('poly_orders', [1, 2]),
                                ('select_by', 'R'),
                                ('cdf_types', None)])

        adjfct_kwargs = {'alpha': 0.4,
                         'use_separate_cdf': False,
                         'from_bins': False}

        adjcheck_kwargs = {'adjust_check_fix_temp_coverage': False,
                           'adjust_check_min_group_range': 365,
                           'adjust_check_pearsR_sig': (0., 1.)}

        cls.ts_full = ts_full.copy(True)

        cls.src = TsRelMultiBreak(candidate=cls.ts_full['candidate'],
                              reference=cls.ts_full['reference'],
                              breaktimes=cls.breaktimes,
                              adjustment_method='HOM',
                              candidate_flags=(cls.ts_full['flags'], [0]),
                              full_period_bias_corr_method='cdf_match',
                              sub_period_bias_corr_method='linreg',
                              base_breaktime=None,
                              HSP_init_breaktest=True,
                              models_from_hsp=True,
                              adjust_within='breaks',
                              input_resolution='D',
                              test_kwargs=test_kwargs,
                              adjmodel_kwargs=adjmodel_kwargs,
                              adjcheck_kwargs=adjcheck_kwargs,
                              create_model_plots=True,
                              frame_ts_figure=True,
                              frame_tsstats_plots=True)

        (res, freq) = dt_freq(cls.src.df_original.index)
        assert (res, freq) == (1., 'D')
        cls.candidate_adjusted = cls.src.adjust_all(**adjfct_kwargs)
        assert cls.src.candidate_has_changed()
Example #3
0
    def setUpClass(cls):
        ts = read_test_data(654079)
        ts.rename(columns={'CCI': 'can', 'REF': 'ref'}, inplace=True)
        cls.ts_full = ts  # this can be used to test if values to adjust are not same as for model
        breaktime = datetime(2012, 7, 1)
        start = datetime(2010, 1, 15)
        ts_frame = ts.loc[start:, :]

        qcm_kwargs = dict(categories=4, first_last='formula', fit='mean')

        qcm = QuantileCatMatch(ts_frame['can'],
                               ts_frame['ref'],
                               breaktime,
                               bias_corr_method='linreg',
                               adjust_group=0,
                               **qcm_kwargs)
        cls.qcm = qcm
Example #4
0
    def setUpClass(cls):
        ts = read_test_data(707393)
        ts_full = ts.rename(columns={'CCI_44_COMBINED': 'candidate',
                                     'MERRA2': 'reference'}).loc['2007-01-01':].copy(True)
        ts_full['candidate_original'] = ts_full['candidate'] # keep original
        # introduce some breaks to correct
        cls.breaktimes = np.array([datetime(2012,7,1), datetime(2010,1,15)])
        can_biased = ts_full.loc[:, 'candidate'].copy(True)
        can_biased[cls.breaktimes[0]:] += 0.1 # first break
        can_biased.loc[:cls.breaktimes[1]] -= 0.1 # second break
        ts_full.loc[:, 'candidate'] = can_biased
        ts_full['flags'] = 0. # all are good in the example

        test_kwargs = dict([('test_resample', ('M', 0.3)),
                            ('mean_test', 'wilkoxon'),
                            ('var_test', 'scipy_fligner_killeen'),
                            ('alpha', 0.01),
                            ('test_check_min_data', 5),
                            ('test_check_spearR_sig', [0., 1.])])

        cls.ts_full = ts_full.copy(True)

        cls.src = TsRelMultiBreak(candidate=cls.ts_full['candidate'],
                                  reference=cls.ts_full['reference'],
                                  breaktimes=cls.breaktimes,
                                  adjustment_method=None,
                                  candidate_flags=(cls.ts_full['flags'], [0]),
                                  full_period_bias_corr_method='cdf_match',
                                  sub_period_bias_corr_method='linreg',
                                  base_breaktime=None,
                                  HSP_init_breaktest=False,
                                  models_from_hsp=False,
                                  adjust_within='frames',
                                  input_resolution='D',
                                  test_kwargs=test_kwargs,
                                  adjmodel_kwargs={},
                                  adjcheck_kwargs={},
                                  create_model_plots=False,
                                  frame_ts_figure=False,
                                  frame_tsstats_plots=False)

        (res, freq) = dt_freq(cls.src.df_original.index)
        assert (res, freq) == (1., 'D')
        cls.src.test_all()
        assert cls.src.candidate_has_changed() == False
Example #5
0
    def setUpClass(cls):
        ts = read_test_data(707393)
        cls.ts_full = ts.rename(columns={
            'CCI_44_COMBINED': 'candidate',
            'MERRA2': 'reference'
        }).loc['2007-01-01':]
        breaktime = datetime(2012, 7, 1)
        timeframe = np.array([datetime(2010, 1, 15), datetime(2018, 6, 30)])
        # introduce a mean break of 0.1
        cls.ts_full.loc[breaktime:, 'candidate'] += 0.1

        cls.ts_frame = cls.ts_full.loc[timeframe[0]:timeframe[1]]

        test_kwargs = dict(test_resample=('M', 0.3),
                           test_check_min_data=3,
                           test_check_spearR_sig=(0, 0.05),
                           alpha=0.01)

        adjmodel_kwargs = dict(regress_resample=None,
                               filter=None,
                               poly_orders=[1, 2],
                               select_by='R',
                               cdf_types=None)

        kwargs = dict(candidate_flags=None,
                      timeframe=timeframe,
                      bias_corr_method='cdf_match',
                      adjust_tf_only=False,
                      adjust_group=0,
                      input_resolution='D',
                      adjust_check_pearsR_sig=(0.5, 0.01),
                      adjust_check_fix_temp_coverage=False,
                      adjust_check_min_group_range=365,
                      adjust_check_coverdiff_max=None,
                      adjust_check_ppcheck=(True, False),
                      create_model_plots=True,
                      test_kwargs=test_kwargs,
                      adjmodel_kwargs=adjmodel_kwargs)

        cls.src = TsRelBreakAdjust(cls.ts_full['candidate'],
                                   cls.ts_full['reference'],
                                   breaktime,
                                   adjustment_method='HOM',
                                   **kwargs)
Example #6
0
    def setUpClass(cls):
        ts = read_test_data(654079)
        ts.rename(columns={'CCI': 'can', 'REF': 'ref'}, inplace=True)
        cls.ts_full = ts  # this can be used to test if values to adjust are not same as for model
        breaktime = datetime(2012, 7, 1)
        start = datetime(2010, 1, 15)
        ts_frame = ts.loc[start:, :]

        lmp_kwargs = dict(regress_resample=('M', 0.3))

        lmp = RegressPairFit(ts_frame['can'],
                             ts_frame['ref'],
                             breaktime,
                             candidate_freq='D',
                             bias_corr_method='linreg',
                             filter=('both', 5),
                             adjust_group=0,
                             model_intercept=True,
                             **lmp_kwargs)
        cls.lmp = lmp
Example #7
0
    def create_model(realdata=True, poly_order=2, filter_p=None):
        if realdata:
            ts = read_test_data(325278)
            start, end = datetime(1998, 1, 1), datetime(2007, 1, 1)
            ts.rename(columns={
                'CCI_41_COMBINED': 'can',
                'merra2': 'ref'
            },
                      inplace=True)
            ts_drop = ts.dropna()
            slope, inter = linreg_params(ts_drop['can'], ts_drop['ref'])
            ts['can'] = linreg_stored_params(ts['can'], slope, inter)  # scale
        else:
            ts, breaktime, [start, end] = create_artificial_test_data('asc2')

        regress = HigherOrderRegression(ts['can'].loc[start:end],
                                        ts['ref'].loc[start:end],
                                        poly_order=poly_order,
                                        filter_p=filter_p)
        return regress
Example #8
0
def test_meta_dict():
    ''' Test creation of meta info dict'''
    df = read_test_data(431790)
    breaktime = datetime(2000, 7, 1)

    test = TsRelBreakTest(candidate=df['CCI_41_COMBINED'],
                          reference=df['merra2'],
                          breaktime=breaktime,
                          test_resample=('M', 0.3),
                          test_check_min_data=3,
                          test_check_spearR_sig=(0, 0.1),
                          bias_corr_method='linreg',
                          alpha=0.01)

    meta = test.get_test_meta()

    assert (meta['0'] == 'No error occurred')
    assert (meta['1'] == 'No data for selected time frame')
    assert (meta['2'] == 'Spearman correlation failed')
    assert (meta['3'] == 'Min. observations N not reached')
    assert (meta['9'] == 'Unknown Error')
Example #9
0
def test_merge_results():
    '''Test function for merged results dict'''

    df = read_test_data(431790)
    breaktime = datetime(2000, 7, 1)

    test = TsRelBreakTest(candidate=df['CCI_41_COMBINED'],
                          reference=df['merra2'],
                          breaktime=breaktime,
                          test_resample=('M', 0.3),
                          test_check_min_data=3,
                          test_check_spearR_sig=(0, 0.1),
                          bias_corr_method='linreg',
                          alpha=0.01)

    test.run_tests()

    results_flat = test.get_flat_results()
    testresults, error_dict, checkstats = test.get_results()

    assert (dict_depth(results_flat) == 1)
    assert (dict_depth(testresults) == 3)
Example #10
0
    def setUpClass(cls):
        ts = read_test_data(654079)
        ts.rename(columns={'CCI': 'can',
                           'REF': 'ref'}, inplace=True)
        cls.ts_full = ts  # this can be used to test if values to adjust are not same as for model
        breaktime = datetime(2012, 7, 1)
        start = datetime(2010, 1, 15)
        ts_frame = ts.loc[start:, :]

        hom_kwargs = dict(regress_resample=('M', 0.3))

        hom = HigherOrderMoments(ts_frame['can'],
                                 ts_frame['ref'],
                                 breaktime,
                                 bias_corr_method='linreg',
                                 filter=('both', 5),
                                 adjust_group=0,
                                 poly_orders=[1,2],
                                 select_by='R',
                                 cdf_types=None,
                                 **hom_kwargs)
        cls.hom = hom
Example #11
0
def usecase():
    from tests.helper_functions import read_test_data
    from pybreaks.break_test import TsRelBreakTest
    gpi = 654079  # bad: 395790,402962

    ts_full, breaktime = read_test_data(gpi)
    canname, refname = 'CCI', 'REF'
    ts_full = ts_full[[canname, refname]]
    ts_full['original'] = ts_full[canname].copy(True)

    adjust_group = 0

    breaktime = datetime(2012, 7, 1)
    timeframe = np.array([datetime(2010, 7, 1), datetime(2018, 6, 30)])

    ts_frame = ts_full[datetime(2002, 6, 19):timeframe[1]].copy(True)

    testds = TsRelBreakTest(ts_frame[canname],
                            ts_frame[refname],
                            breaktime=breaktime,
                            bias_corr_method='linreg')

    isbreak, breaktype, testresult, errorcode = testds.run_tests()
    print(isbreak, breaktype)

    obj = RegressPairFit(
        ts_frame[canname],
        ts_frame[refname],
        breaktime,
        candidate_freq='D',
        regress_resample=None,  # ('M', 0.3),
        bias_corr_method='linreg',
        filter=('both', 5),
        adjust_group=adjust_group,
        model_intercept=True)

    obj.plot_models()

    values_to_adjust = ts_full[canname].loc[:breaktime]
    can_adjusted = obj.adjust(values_to_adjust,
                              corrections_from_core=True,
                              resample_corrections=True,
                              interpolation_method='linear',
                              values_to_adjust_freq='D')
    obj.plot_adjustments()

    can_unchanged = obj.get_group_data(obj.other_group, obj.df_original,
                                       (obj.candidate_col_name))
    can_new = pd.concat([can_adjusted, can_unchanged], axis=0)

    ref_new = obj.get_group_data(None, obj.df_original,
                                 (obj.reference_col_name))

    nobj = RegressPairFit(can_new,
                          ref_new,
                          breaktime,
                          regress_resample=None,
                          bias_corr_method=None,
                          filter=(None, 5),
                          adjust_group=adjust_group,
                          model_intercept=True)
    nobj.plot_models()
    testds = TsRelBreakTest(can_new,
                            ts_frame[refname],
                            breaktime=breaktime,
                            bias_corr_method='linreg')

    isbreak, breaktype, testresult, errorcode = testds.run_tests()

    print(isbreak, breaktype)
Example #12
0
def test_conditions():
    ''' Test conditions for the calculation of test statistics'''

    df = read_test_data(431790)
    breaktime = datetime(2000, 7, 1)

    # will not raise error
    test = TsRelBreakTest(candidate=df['CCI_41_COMBINED'],
                          reference=df['merra2'],
                          alpha=0.01,
                          breaktime=breaktime,
                          test_resample=('M', 0.3),
                          test_check_min_data=5,
                          test_check_spearR_sig=(0, 0.1),
                          bias_corr_method='linreg')

    assert (test.error_code_test == 0)

    # will raise no data error
    ccinan = pd.DataFrame(index=df.index, columns=['CCI_41_COMBINED'])
    test = TsRelBreakTest(candidate=ccinan,
                          reference=df['merra2'],
                          alpha=0.01,
                          breaktime=breaktime,
                          test_resample=('M', 0.3),
                          test_check_min_data=5,
                          test_check_spearR_sig=(0, 0.1),
                          bias_corr_method='linreg')
    assert (test.error_code_test == 1)

    # will raise correlation error (R)
    test = TsRelBreakTest(candidate=df['CCI_41_COMBINED'],
                          reference=df['merra2'],
                          alpha=0.01,
                          breaktime=breaktime,
                          test_resample=('M', 0.3),
                          test_check_min_data=5,
                          test_check_spearR_sig=(1, 1),
                          bias_corr_method='linreg')
    assert (test.error_code_test == 2)

    # will raise correlation error (p)
    test = TsRelBreakTest(candidate=df['CCI_41_COMBINED'],
                          reference=df['merra2'],
                          alpha=0.01,
                          breaktime=breaktime,
                          test_resample=('M', 0.3),
                          test_check_min_data=5,
                          test_check_spearR_sig=(0, 0),
                          bias_corr_method='linreg')
    assert (test.error_code_test == 2)

    # will raise test_min_data error
    test = TsRelBreakTest(candidate=df['CCI_41_COMBINED'],
                          reference=df['merra2'],
                          alpha=0.01,
                          breaktime=breaktime,
                          test_resample=('M', 0.3),
                          test_check_min_data=10000,
                          test_check_spearR_sig=(0, 1),
                          bias_corr_method='linreg')
    assert (test.error_code_test == 3)
Example #13
0
 def setUpClass(cls):
     cls.start = datetime(1998, 1, 1)
     cls.end = datetime(2007, 1, 1)
     original_data = read_test_data(431790)
     cls.original_data = original_data.loc[cls.start:cls.end]
     cls.breaktime = datetime(2002, 7, 1)