Example #1
0
    def test_prediction(self):
        data_frame = datasets.get('gas_2016_hour')
        data_frame_month = data_frame.resample('MS').sum().loc['2016', :]
        data_frame_training = data_frame_month.iloc[:-1, :]
        data_frame_pred = data_frame_month.iloc[[-1], :]
        mvlr = regression.MultiVarLinReg(data_frame=data_frame_month,
                                         dependent_var='313b',
                                         options={'p_max': 0.04})
        mvlr.do_analysis()
        data_frame_pred_95 = mvlr._predict(
            mvlr.fit, data_frame=data_frame_pred)
        mvlr.confint = 0.98
        data_frame_pred_98 = mvlr._predict(
            mvlr.fit, data_frame=data_frame_pred)
        self.assertAlmostEqual(
            data_frame_pred_95.loc['2016-12-01', 'predicted'], data_frame_pred_98.loc['2016-12-01', 'predicted'])
        self.assertTrue(data_frame_pred_98.loc['2016-12-01', 'interval_u']
                        > data_frame_pred_95.loc['2016-12-01', 'interval_u'])
        self.assertTrue(data_frame_pred_98.loc['2016-12-01', 'interval_l']
                        < data_frame_pred_95.loc['2016-12-01', 'interval_l'])

        # check limitation to zero
        mvlr.allow_negative_predictions = False
        mvlr.add_prediction()
        self.assertTrue(mvlr.data_frame['predicted'].min() >= 0)
Example #2
0
    def test_standby(self):
        df = datasets.get('elec_power_min_1sensor')
        res = analysis.standby(df, 'D')
        self.assertEqual(res.index.tz.zone, 'Europe/Brussels')

        self.assertRaises(exceptions.EmptyDataFrame, analysis.standby,
                          pd.DataFrame)
Example #3
0
def writeInputFileLists(sample_name, njobs, datadir, outdir):
    dspath = datasets.get(sample_name)
    if dspath is None:
        print("Unknown sample:", sample_name)
        print("Registered samples are:", list(datasets.keys()))

    dir_reco = os.path.join(datadir, dspath) + 'tt.root'
    dir_truth = os.path.join(datadir, dspath) + 'tt_truth.root'
    dir_PL = os.path.join(datadir, dspath) + 'tt_PL.root'
    dir_sumw = os.path.join(datadir, dspath) + 'sumWeights.root'

    files_reco = [
        os.path.join(dir_reco, fp) for fp in sorted(os.listdir(dir_reco))
    ]
    files_truth = [
        os.path.join(dir_truth, fp) for fp in sorted(os.listdir(dir_truth))
    ]
    files_PL = [os.path.join(dir_PL, fp) for fp in sorted(os.listdir(dir_PL))]
    files_sumw = [
        os.path.join(dir_sumw, fp) for fp in sorted(os.listdir(dir_sumw))
    ]

    lists_dir = os.path.join(outdir, 'input_lists')
    if not os.path.isdir(lists_dir):
        print("Create directory", lists_dir)
        os.makedirs(lists_dir)

    inlist_reco = os.path.join(lists_dir,
                               'input_' + sample_name + '_reco_{}.txt')
    inlist_truth = os.path.join(lists_dir,
                                'input_' + sample_name + '_truth_{}.txt')
    inlist_PL = os.path.join(lists_dir, 'input_' + sample_name + '_PL_{}.txt')
    inlist_sumw = os.path.join(lists_dir,
                               'input_' + sample_name + '_sumw_{}.txt')

    nfiles = len(files_reco)
    nfilesPerJob = int(nfiles / njobs)

    for j in range(njobs):
        istart = j * nfilesPerJob
        iend = istart + nfilesPerJob if j < njobs - 1 else None

        f_list_reco = open(inlist_reco.format(j), 'w')
        f_list_reco.write('\n'.join(files_reco[istart:iend]))
        f_list_reco.close()

        f_list_truth = open(inlist_truth.format(j), 'w')
        f_list_truth.write('\n'.join(files_truth[istart:iend]))
        f_list_truth.close()

        f_list_PL = open(inlist_PL.format(j), 'w')
        f_list_PL.write('\n'.join(files_PL[istart:iend]))
        f_list_PL.close()

        f_list_sumw = open(inlist_sumw.format(j), 'w')
        f_list_sumw.write('\n'.join(files_sumw[istart:iend]))
        f_list_sumw.close()

    return inlist_reco, inlist_truth, inlist_PL, inlist_sumw
Example #4
0
 def test_init(self):
     data_frame = datasets.get('gas_2016_hour')
     data_frame_month = data_frame.resample('MS').sum()
     mvlr = regression.MultiVarLinReg(data_frame=data_frame_month,
                                      dependent_var='313b',
                                      options={'p_max': 0.04})
     mvlr.do_analysis()
     self.assertTrue(hasattr(mvlr, 'list_of_fits'))
Example #5
0
 def test_strange_names(self):
     data_frame = datasets.get('gas_2016_hour')
     data_frame_month = data_frame.resample('MS').sum()
     data_frame_month.rename(columns={'d5a7': '3*tempĂȘte !'}, inplace=True)
     mvlr = regression.MultiVarLinReg(data_frame=data_frame_month,
                                      dependent_var='313b',
                                      options={'p_max': 0.04})
     mvlr.do_analysis()
     self.assertTrue(hasattr(mvlr, 'list_of_fits'))
Example #6
0
    def test_plot(self):
        data_frame = datasets.get('gas_2016_hour')
        data_frame_month = data_frame.resample('MS').sum()
        mvlr = regression.MultiVarLinReg(data_frame=data_frame_month,
                                         dependent_var='313b',
                                         options={'p_max': 0.04})
        mvlr.do_analysis()

        with mock.patch.object(plt_mocked, 'subplots', return_value=(fig_mock, ax_mock)):
            mvlr.plot()
Example #7
0
    def test_predict(self):
        data_frame = datasets.get('gas_2016_hour')
        data_frame_month = data_frame.resample('MS').sum()
        data_frame_month.rename(columns={'d5a7': '3*tempĂȘte !'}, inplace=True)
        mvlr = regression.MultiVarLinReg(data_frame=data_frame_month,
                                         dependent_var='313b',
                                         options={'p_max': 0.04})
        mvlr.do_analysis()
        mvlr.add_prediction()

        self.assertListEqual(mvlr.data_frame.columns.tolist(),
                             data_frame_month.columns.tolist() + ['predicted', 'interval_l', 'interval_u'])
Example #8
0
 def test_raises(self):
     data_frame = datasets.get('gas_2016_hour')
     data_frame_month = data_frame.resample('MS').sum()
     mvlr = regression.MultiVarLinReg(data_frame=data_frame_month,
                                      dependent_var='313b',
                                      options={'p_max': 0.04})
     self.assertRaises(UnboundLocalError, mvlr.add_prediction)
     try:
         x = mvlr.list_of_fits
         self.assertTrue(False)
     except UnboundLocalError:
         self.assertTrue(True)
Example #9
0
 def test_alternative_metrics(self):
     data_frame = datasets.get('gas_2016_hour')
     data_frame_month = data_frame.resample('MS').sum()
     mvlr = regression.MultiVarLinReg(data_frame=data_frame_month,
                                      dependent_var='313b',
                                      options={'p_max': 0.04})
     mvlr.do_analysis()
     best_rsquared = mvlr.find_best_rsquared(mvlr.list_of_fits)
     best_akaike = mvlr.find_best_akaike(mvlr.list_of_fits)
     best_bic = mvlr.find_best_bic(mvlr.list_of_fits)
     self.assertEqual(best_rsquared, best_akaike)
     self.assertEqual(best_rsquared, best_bic)
Example #10
0
    def test_load_factor(self):
        ts = datasets.get('electricity_2016_hour')
        ts = ts['e1de'].truncate(after=pd.Timestamp('20160107'))
        lf1 = analysis.calculate_load_factor(time_series=ts)
        self.assertIsInstance(ts, pd.Series)
        self.assertAlmostEqual(ts.iloc[0], (lf1 * ts.max()).iloc[0])

        lf2 = analysis.calculate_load_factor(time_series=ts,
                                             resolution='3h',
                                             norm=800)
        self.assertIsInstance(ts, pd.Series)
        self.assertAlmostEqual(175.0345212009457, (lf2 * 800).iloc[0])
Example #11
0
    def test_standby_with_time_window(self):
        df = datasets.get('elec_power_min_1sensor')
        res = analysis.standby(df, 'D', time_window=('01:00', '06:00'))
        self.assertEqual(res.index.tz.zone, 'Europe/Brussels')
        self.assertEqual(
            res.squeeze().to_json(),
            '{"1507327200000":61.739999936,"1507413600000":214.9799999222,"1507500000000":53.0399997951,"1507586400000":55.7399999164,"1507672800000":59.94000006,"1507759200000":69.4800002407,"1507845600000":56.8200000236,"1507932000000":54.1799997864,"1508018400000":54.779999801,"1508104800000":54.7199997772,"1508191200000":98.5199999576,"1508277600000":55.6799999066,"1508364000000":53.9399997052,"1508450400000":109.5599999931,"1508536800000":144.3600001093,"1508623200000":52.7999997279}'
        )

        res = analysis.standby(df, 'D', time_window=('22:00', '06:00'))
        self.assertEqual(res.index.tz.zone, 'Europe/Brussels')
        self.assertEqual(
            res.squeeze().to_json(),
            '{"1507327200000":61.739999936,"1507413600000":119.2800000636,"1507500000000":53.0399997951,"1507586400000":55.7399999164,"1507672800000":59.94000006,"1507759200000":69.4800002407,"1507845600000":56.8200000236,"1507932000000":54.1799997864,"1508018400000":54.779999801,"1508104800000":54.7199997772,"1508191200000":98.5199999576,"1508277600000":55.6799999066,"1508364000000":53.9399997052,"1508450400000":96.3000000408,"1508536800000":133.9200000744,"1508623200000":52.7999997279}'
        )
Example #12
0
    def test_pickle_round_trip(self):
        "Pickle, unpickle and check results"
        data_frame = datasets.get('gas_2016_hour')
        data_frame_month = data_frame.resample('MS').sum().loc['2016', :]
        data_frame_training = data_frame_month.iloc[:-1, :]
        data_frame_pred = data_frame_month.iloc[[-1], :]
        mvlr = regression.MultiVarLinReg(data_frame=data_frame_month,
                                         dependent_var='313b',
                                         options={'p_max': 0.04})
        mvlr.do_analysis()
        data_frame_pred_95_orig = mvlr._predict(
            mvlr.fit, data_frame=data_frame_pred)

        s = pickle.dumps(mvlr)
        m = pickle.loads(s)
        self.assertTrue(hasattr(m, 'list_of_fits'))
        data_frame_pred_95_roundtrip = m._predict(
            m.fit, data_frame=data_frame_pred)
        self.assertAlmostEqual(
            data_frame_pred_95_orig.loc['2016-12-01', 'predicted'], data_frame_pred_95_roundtrip.loc['2016-12-01', 'predicted'])
Example #13
0
    def test_prune(self):
        "Create overfitted model and prune it"
        data_frame = datasets.get('gas_2016_hour')
        data_frame_month = data_frame.resample('MS').sum()
        mvlr = regression.MultiVarLinReg(data_frame=data_frame_month,
                                         dependent_var='313b')
        mvlr.do_analysis()
        self.assertTrue("ba14" in mvlr.fit.model.exog_names)
        pruned = mvlr._prune(mvlr.fit, 0.05)
        self.assertTrue("ba14" in pruned.model.exog_names)
        # with this value, both x will be removed, which is a bit counter-intuitive because initially only ba14 has a pvalue > p_max.
        pruned = mvlr._prune(mvlr.fit, 0.00009)
        self.assertFalse("ba14" in pruned.model.exog_names)
        self.assertFalse("d5a7" in pruned.model.exog_names)

        mvlr = regression.MultiVarLinReg(data_frame=data_frame_month,
                                         dependent_var='313b',
                                         options={'p_max': 0.00009})
        mvlr.do_analysis()
        self.assertFalse("ba14" in mvlr.fit.model.exog_names)
        self.assertFalse("d5a7" in mvlr.fit.model.exog_names)
Example #14
0
 def test_count_peaks(self):
     df = datasets.get('gas_dec2016_min')
     ts = df['313b'].head(100)
     count = analysis.count_peaks(ts)
     self.assertEqual(count, 13)