def test_merge_asserts_timevar_links(self): """ Test that merge_levels checks input df_links timevar """ df_links_wrong_timevar = self.df_links.copy() df_links_wrong_timevar.index.rename(["wrong_timevar", self.groupvar_l], inplace=True) with self.assertRaises(AssertionError) as _: Crosslevel.merge_levels(self.df_h, self.df_l, df_links_wrong_timevar)
def test_setup_state_varsets(self): this_cl = Crosslevel() this_cl._setup_state(self.df_h, self.df_l, self.df_links, self.timevar, self.groupvar_h, self.groupvar_l) self.assertEqual(this_cl.groupvar_h, self.groupvar_h) self.assertEqual(this_cl.groupvar_l, self.groupvar_l) self.assertEqual(this_cl.timevar, self.timevar)
def test_compute_product(self): """ Test compute_product() """ df = Crosslevel.merge_levels(self.df_h, self.df_l, self.df_links) first_col = df.columns[0] second_col = df.columns[1] product_1 = Crosslevel.compute_product(df, first_col, second_col) product_2 = df[first_col] * df[second_col] pd.testing.assert_series_equal(product_1, product_2)
def test_init_cols(self): """ Test loading a runfile from json """ with tempfile.TemporaryDirectory() as tempdir: path_run_params = os.path.sep.join([tempdir, "run.json"]) with open(path_run_params, 'w') as f: json.dump(self.run_db, f) cl = Crosslevel() cl.load_runfile(path_run_params) self.assertEqual(cl.col_h , self.run_db['col_h'])
def test_load_run(self): """ Test that job loading into cl sets the run attributes """ cl = Crosslevel() cl._load_run(self.run_db) self.assertEqual(cl.table_h, self.run_db['table_h']) self.assertEqual(cl.table_l, self.run_db['table_l']) self.assertEqual(cl.groupvar_h, self.run_db['groupvar_h']) self.assertEqual(cl.groupvar_l, self.run_db['groupvar_l']) self.assertEqual(cl.timevar, self.run_db['timevar'])
def test_load_local_settings_file_connecstring(self): with tempfile.TemporaryDirectory() as tempdir: path = os.path.sep.join([tempdir, "local_settings.json"]) with open(path, 'w') as f: json.dump(self.local_settings, f) cl = Crosslevel() cl.load_local_settings(path) self.assertEqual(self.local_settings['connectstring'], cl.connectstring)
def test_load_local_settings_file(self): """ Test loading local settings from json """ with tempfile.TemporaryDirectory() as tempdir: path_local_settings = os.path.sep.join([tempdir, "local_settings.json"]) with open(path_local_settings, 'w') as f: json.dump(self.local_settings, f) cl = Crosslevel() cl.load_local_settings_file(path_local_settings) self.assertEqual(cl.connectstring, self.connectstring)
def test_setup_state_df(self): this_cl = Crosslevel() this_cl._setup_state(self.df_h, self.df_l, self.df_links, self.timevar, self.groupvar_h, self.groupvar_l) self.assertIsInstance(this_cl.df, pd.DataFrame) df_merged = Crosslevel.merge_levels(self.df_h, self.df_l, self.df_links) pd.testing.assert_frame_equal(this_cl.df, df_merged) with self.assertRaises(AssertionError) as _: pd.testing.assert_frame_equal(this_cl.df, self.df_h)
def test_load_runfile_sets_cols(self): cols_h = ["ds_pgm_sb", "osa_pgm_sb"] cols_l = ["ds_cm_sb"] with tempfile.TemporaryDirectory() as tempdir: path = os.path.sep.join([tempdir, "run.json"]) with open(path, 'w') as f: json.dump(self.run_db, f) cl = Crosslevel() cl.load_runfile(path) self.assertEqual(cl.cols_h, cols_h) self.assertEqual(cl.cols_l, cols_l) self.assertEqual(self.run_db['jobs'], cl.jobs) self.assertEqual(cl.table_h, "collected_pgm_fcast_test") self.assertEqual(cl.table_l, "collected_cm_fcast_test") self.assertEqual(cl.schema_h, "landed_test") self.assertEqual(cl.schema_l, "landed_test")
def test_compute_colaresi(self): """ Test compute_colaresi() """ df = Crosslevel.merge_levels(self.df_h, self.df_l, self.df_links) col_h = self.df_h.columns[0] col_l = self.df_l.columns[0] sum_h_by_l = df.groupby([self.timevar, self.groupvar_l])[col_h].transform('sum') p_h = df[col_h] p_l = df[col_l] joint_1 = p_l * (p_h / sum_h_by_l) joint_2 = Crosslevel.compute_colaresi(df, col_h, col_l, self.timevar, self.groupvar_l) pd.testing.assert_series_equal(joint_1, joint_2)
def test_load_runfile_cols(self): """ Test that the cols needed for jobs are correctly initalised """ jobs = self.run_db['jobs'] cols_h = [] cols_l = [] for job in jobs: cols_h.append(job['col_h']) cols_l.append(job['col_l']) with tempfile.TemporaryDirectory() as tempdir: path_run_params = os.path.sep.join([tempdir, "run.json"]) with open(path_run_params, 'w') as f: json.dump(self.run_db, f) cl = Crosslevel() cl.load_runfile(path_run_params) self.assertEqual(cl.cols_h, cols_h) self.assertEqual(cl.cols_l, cols_l)
def test_fetch_data_is_df(self): with tempfile.TemporaryDirectory() as tempdir: path_settings = os.path.sep.join([tempdir, "local_settings.json"]) path_runfile = os.path.sep.join([tempdir, "runfile.json"]) with open(path_settings, 'w') as f: json.dump(self.local_settings, f) with open(path_runfile, 'w') as f: json.dump(self.run_db, f) cl = Crosslevel() cl.load_local_settings(path_settings) cl.load_runfile(path_runfile) cl.fetch_data()
def test_merge_levels_cols(self): """ Test that all cols from low res df are included in merged df""" df = Crosslevel.merge_levels(self.df_h, self.df_l, self.df_links) cols_merged = sorted(list(df.columns)) cols_h = sorted(list(self.df_h.columns)) cols_l = sorted(list(self.df_l.columns)) cols_wanted = cols_h + cols_l + [self.groupvar_l] cols_wanted = sorted(cols_wanted) self.assertEqual(cols_merged, cols_wanted)
def test_worker_product(self): """ Test simple product worker chain """ # Compute colaresian product by calling static worker directly df_merged = Crosslevel.merge_levels(self.df_h, self.df_l, self.df_links) job = self.job_product result_1 = Crosslevel.compute_product(df=df_merged, col_a=job['col_h'], col_b=job['col_l']) # Compute by calling the worker chain this_cl = Crosslevel() this_cl._setup_state(self.df_h, self.df_l, self.df_links, self.timevar, self.groupvar_h, self.groupvar_l) result_2 = this_cl.worker(self.local_settings, job) pd.testing.assert_series_equal(result_1, result_2)
def test_fetch_data(self): cl = Crosslevel() cl._load_run(self.run_db) cl._load_local_settings(self.local_settings) self.assertIsInstance(cl.df_h, pd.DataFrame())
def test_merge_levels_len(self): """ Test that merged df has same number of rows as high res df""" df = Crosslevel.merge_levels(self.df_h, self.df_l, self.df_links) self.assertEqual(len(df), len(self.df_h))