def test_csv_io(self): fpath = abspath('test_session_csv') self.session.to_csv(fpath) s = Session() s.load(fpath, engine='pandas_csv') # CSV cannot keep ordering (so we always sort keys) self.assertEqual(list(s.keys()), ['e', 'f', 'g'])
def test_csv_io(tmpdir, session, meta): try: fpath = _test_io(tmpdir, session, meta, engine='pandas_csv', ext='csv') names = Session( {k: v for k, v in session.items() if isinstance(v, Array)}).names # test loading with a pattern pattern = os.path.join(fpath, '*.csv') s = Session(pattern) assert s.names == names assert s.meta == meta # create an invalid .csv file invalid_fpath = os.path.join(fpath, 'invalid.csv') with open(invalid_fpath, 'w') as f: f.write(',",') # try loading the directory with the invalid file with pytest.raises(pd.errors.ParserError): s = Session(pattern) # test loading a pattern, ignoring invalid/unsupported files s = Session() s.load(pattern, ignore_exceptions=True) assert s.names == names assert s.meta == meta finally: shutil.rmtree(fpath)
def test_global_arrays(self): # exclude private global arrays s = global_arrays() s_expected = Session([('global_arr1', global_arr1)]) assert s.equals(s_expected) # all global arrays s = global_arrays(include_private=True) s_expected = Session([('global_arr1', global_arr1), ('_global_arr2', _global_arr2)]) assert s.equals(s_expected)
def test_global_arrays(): # exclude private global arrays s = global_arrays() s_expected = Session([('e', e), ('f', f), ('g', g), ('h', h), ('k', k)]) assert s.equals(s_expected) # all global arrays s = global_arrays(include_private=True) s_expected = Session([('e', e), ('_e', _e), ('f', f), ('g', g), ('h', h), ('k', k)]) assert s.equals(s_expected)
def test_init(self): s = Session(self.b, self.a, c=self.c, d=self.d, e=self.e, f=self.f, g=self.g) self.assertEqual(s.names, ['a', 'b', 'c', 'd', 'e', 'f', 'g']) s = Session(abspath('test_session.h5')) self.assertEqual(s.names, ['e', 'f', 'g'])
def test_local_arrays(): h = ndtest(2) _h = ndtest(3) # exclude private local arrays s = local_arrays() s_expected = Session([('h', h)]) assert s.equals(s_expected) # all local arrays s = local_arrays(include_private=True) s_expected = Session([('h', h), ('_h', _h)]) assert s.equals(s_expected)
def test_local_arrays(self): local_arr1 = ndtest(2) _local_arr2 = ndtest(3) # exclude private local arrays s = local_arrays() s_expected = Session([('local_arr1', local_arr1)]) assert s.equals(s_expected) # all local arrays s = local_arrays(include_private=True) s_expected = Session([('local_arr1', local_arr1), ('_local_arr2', _local_arr2)]) assert s.equals(s_expected)
def test_ne(self): sess = self.session.filter(kind=LArray) expected = Session([('e', self.e), ('f', self.f), ('g', self.g)]) assert ([(~array).all() for array in (sess != expected).values()]) other = Session([('e', self.e), ('f', self.f)]) res = sess != other assert [(~arr).all() for arr in res.values()] == [True, False, True] e2 = self.e.copy() e2.i[1, 1] = 42 other = Session([('e', e2), ('f', self.f)]) res = sess != other assert [(~arr).all() for arr in res.values()] == [False, False, True]
def test_arrays(): i = ndtest(2) _i = ndtest(3) # exclude private arrays s = arrays() s_expected = Session([('e', e), ('f', f), ('g', g), ('h', h), ('i', i), ('k', k)]) assert s.equals(s_expected) # all arrays s = arrays(include_private=True) s_expected = Session([('_e', _e), ('_i', _i), ('e', e), ('f', f), ('g', g), ('h', h), ('i', i), ('k', k)]) assert s.equals(s_expected)
def test_eq(self): sess = self.session.filter(kind=LArray) expected = Session([('e', self.e), ('f', self.f), ('g', self.g)]) assert all([array.all() for array in (sess == expected).values()]) other = Session([('e', self.e), ('f', self.f)]) res = sess == other assert list(res.keys()) == ['e', 'g', 'f'] assert [arr.all() for arr in res.values()] == [True, False, True] e2 = self.e.copy() e2.i[1, 1] = 42 other = Session([('e', e2), ('f', self.f)]) res = sess == other assert [arr.all() for arr in res.values()] == [False, False, True]
def test_sub(self): sess = self.session.filter(kind=LArray) other = Session({'e': self.e - 1, 'f': 1}) diff = sess - other assert_array_nan_equal(diff['e'], np.full((2, 3), 1, dtype=np.int32)) assert_array_nan_equal(diff['f'], np.arange(-1, 5).reshape(3, 2)) self.assertTrue(isnan(diff['g']).all())
def test_getitem_larray(session): s1 = session.filter(kind=Array) s2 = Session({'e': e + 1, 'f': f}) res_eq = s1[s1.element_equals(s2)] res_neq = s1[~(s1.element_equals(s2))] assert list(res_eq) == [f] assert list(res_neq) == [e, g, h]
def test_getitem_larray(self): s1 = self.session.filter(kind=LArray) s2 = Session({'e': self.e + 1, 'f': self.f}) res_eq = s1[s1 == s2] res_neq = s1[s1 != s2] self.assertEqual(list(res_eq), [self.f]) self.assertEqual(list(res_neq), [self.e, self.g])
def test_getitem_larray(self): s1 = self.session.filter(kind=LArray) s2 = Session({'e': self.e + 1, 'f': self.f}) res_eq = s1[s1.array_equals(s2)] res_neq = s1[~(s1.array_equals(s2))] assert list(res_eq) == [self.f] assert list(res_neq) == [self.e, self.g]
def test_pickle_io(self): fpath = abspath('test_session.pkl') self.session.save(fpath) s = Session() s.load(fpath, engine='pickle') self.assertEqual(list(s.keys()), ['e', 'g', 'f']) # update an array (overwrite=False) Session(e=self.e2).save(fpath, overwrite=False) s.load(fpath, engine='pickle') self.assertEqual(list(s.keys()), ['e', 'g', 'f']) assert_array_nan_equal(s['e'], self.e2)
def test_sub(session): sess = session # session - session other = Session({'e': e, 'f': f}) other['e'] = e - 1 other['f'] = ones_like(f) diff = sess - other assert_array_nan_equal(diff['e'], np.full((2, 3), 1, dtype=np.int32)) assert_array_nan_equal(diff['f'], f - ones_like(f)) assert isnan(diff['g']).all() assert diff.a is a assert diff.a01 is a01 assert diff.c is c # session - scalar diff = sess - 2 assert_array_nan_equal(diff['e'], e - 2) assert_array_nan_equal(diff['f'], f - 2) assert_array_nan_equal(diff['g'], g - 2) assert diff.a is a assert diff.a01 is a01 assert diff.c is c # session - dict(Array and scalar) other = {'e': ones_like(e), 'f': 1} diff = sess - other assert_array_nan_equal(diff['e'], e - ones_like(e)) assert_array_nan_equal(diff['f'], f - 1) assert isnan(diff['g']).all() assert diff.a is a assert diff.a01 is a01 assert diff.c is c # session - array axes = [a, b] other = Session([('a', a), ('a01', a01), ('c', c), ('e', ndtest((a, b))), ('f', full((a, b), fill_value=3)), ('g', ndtest('c=c0..c2'))]) diff = other - ones(axes) assert_array_nan_equal(diff['e'], other['e'] - ones(axes)) assert_array_nan_equal(diff['f'], other['f'] - ones(axes)) assert_array_nan_equal(diff['g'], other['g'] - ones(axes)) assert diff.a is a assert diff.a01 is a01 assert diff.c is c
def setUp(self): self.a = Axis([], 'a') self.b = Axis([], 'b') self.c = 'c' self.d = {} self.e = ndrange([(2, 'a0'), (3, 'a1')]) self.e2 = ndrange(('a=a0..a2', 'b=b0..b2')) self.f = ndrange([(3, 'a0'), (2, 'a1')]) self.g = ndrange([(2, 'a0'), (4, 'a1')]) self.session = Session([ ('b', self.b), ('a', self.a), ('c', self.c), ('d', self.d), ('e', self.e), ('g', self.g), ('f', self.f), ])
def test_ne(self): sess = self.session.filter(kind=LArray) expected = Session([('e', self.e), ('f', self.f), ('g', self.g)]) self.assertFalse(any(sess != expected)) other = Session({'e': self.e, 'f': self.f}) res = sess != other self.assertEqual(res.axes.names, ['name']) self.assertTrue(np.array_equal(res.axes.labels[0], ['e', 'g', 'f'])) self.assertEqual(list(res), [False, True, False]) e2 = self.e.copy() e2.i[1, 1] = 42 other = Session({'e': e2, 'f': self.f}) res = sess != other self.assertEqual(res.axes.names, ['name']) self.assertTrue(np.array_equal(res.axes.labels[0], ['e', 'g', 'f'])) self.assertEqual(list(res), [True, True, False])
def test_array_equals(self): sess = self.session.filter(kind=LArray) expected = Session([('e', self.e), ('f', self.f), ('g', self.g)]) assert all(sess.array_equals(expected)) other = Session({'e': self.e, 'f': self.f}) res = sess.array_equals(other) assert res.ndim == 1 assert res.axes.names == ['name'] assert np.array_equal(res.axes.labels[0], ['e', 'g', 'f']) assert list(res) == [True, False, True] e2 = self.e.copy() e2.i[1, 1] = 42 other = Session({'e': e2, 'f': self.f}) res = sess.array_equals(other) assert res.axes.names == ['name'] assert np.array_equal(res.axes.labels[0], ['e', 'g', 'f']) assert list(res) == [False, False, True]
def test_init_session(meta): s = Session(b, b024, a, a01, a2=a2, anonymous=anonymous, ano01=ano01, c=c, d=d, e=e, g=g, f=f, h=h) assert list(s.keys()) == [ 'b', 'b024', 'a', 'a01', 'a2', 'anonymous', 'ano01', 'c', 'd', 'e', 'g', 'f', 'h' ] # TODO: format auto-detection does not work in this case # s = Session('test_session_csv') # assert list(s.keys()) == ['e', 'f', 'g'] # metadata s = Session(b, b024, a, a01, a2=a2, anonymous=anonymous, ano01=ano01, c=c, d=d, e=e, f=f, g=g, h=h, meta=meta) assert s.meta == meta
def eurostat_get(indicators, drop_markers=True): """Gets one or several Eurostat indicators and return them as an array or a session. Parameters ---------- indicators : str or list/tuple of str Name(s) of eurostat indicator(s). When requesting a single indicator, the result is an Array, otherwise it is a Session. drop_markers : bool, optional Whether or not to drop special markers. Defaults to True. Returns ------- Array or Session Examples -------- >>> data = eurostat_get('avia_ec_enterp') >>> data.info 2 x 16 x 13 enterpr [2]: 'AIRP' 'AVIA' geo [16]: 'CY' 'CZ' 'EE' ... 'BG' 'FI' 'SE' time [13]: 2013 2012 2011 ... 2003 2002 2001 dtype: float64 memory used: 3.25 Kb >>> indicators = eurostat_get(['avia_ec_enterp', 'apro_mt_lsequi']) >>> indicators.names ['apro_mt_lsequi', 'avia_ec_enterp'] >>> indicators.avia_ec_enterp.info 2 x 16 x 13 enterpr [2]: 'AIRP' 'AVIA' geo [16]: 'CY' 'CZ' 'EE' ... 'BG' 'FI' 'SE' time [13]: 2013 2012 2011 ... 2003 2002 2001 dtype: float64 memory used: 3.25 Kb >>> indicators.apro_mt_lsequi.info 3 x 1 x 28 x 38 animals [3]: 'A1000' 'A1100' 'A1200' unit [1]: 'THS_HD' geo [28]: 'AL' 'BE' 'BG' ... 'SI' 'SK' 'UK' time [38]: 1997 1996 1995 ... 1962 1961 1960 dtype: float64 memory used: 24.94 Kb """ if isinstance(indicators, (tuple, list)): return Session([(i, _get_one(i, drop_markers=drop_markers)) for i in indicators]) else: return _get_one(indicators, drop_markers=drop_markers)
def test_div(self): sess = self.session.filter(kind=LArray) other = Session({'e': self.e - 1, 'f': self.f + 1}) with pytest.warns(RuntimeWarning) as caught_warnings: res = sess / other assert len(caught_warnings) == 1 assert caught_warnings[0].message.args[ 0] == "divide by zero encountered during operation" assert caught_warnings[0].filename == __file__ with np.errstate(divide='ignore', invalid='ignore'): flat_e = np.arange(6) / np.arange(-1, 5) assert_array_nan_equal(res['e'], flat_e.reshape(2, 3)) flat_f = np.arange(6) / np.arange(1, 7) assert_array_nan_equal(res['f'], flat_f.reshape(3, 2)) self.assertTrue(isnan(res['g']).all())
def test_sub(self): sess = self.session.filter(kind=LArray) # session - session other = Session({'e': self.e - 1, 'f': ones_like(self.f)}) diff = sess - other assert_array_nan_equal(diff['e'], np.full((2, 3), 1, dtype=np.int32)) assert_array_nan_equal(diff['f'], self.f - ones_like(self.f)) assert isnan(diff['g']).all() # session - scalar diff = sess - 2 assert_array_nan_equal(diff['e'], self.e - 2) assert_array_nan_equal(diff['f'], self.f - 2) assert_array_nan_equal(diff['g'], self.g - 2) # session - dict(LArray and scalar) other = {'e': ones_like(self.e), 'f': 1} diff = sess - other assert_array_nan_equal(diff['e'], self.e - ones_like(self.e)) assert_array_nan_equal(diff['f'], self.f - 1) assert isnan(diff['g']).all()
def test_h5_io(tmpdir, session, meta): fpath = tmp_path(tmpdir, 'test_session.h5') session.meta = meta session.save(fpath) s = Session() s.load(fpath) # HDF does *not* keep ordering (ie, keys are always sorted + # read Axis objects, then Groups objects and finally LArray objects) assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] assert s.meta == meta # update a Group + an Axis + an array (overwrite=False) a2 = Axis('a=0..2') a2_01 = a2['0,1'] >> 'a01' e2 = ndtest((a2, 'b=b0..b2')) Session(a=a2, a01=a2_01, e=e2).save(fpath, overwrite=False) s = Session() s.load(fpath) assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] assert s['a'].equals(a2) assert all(s['a01'] == a2_01) assert_array_nan_equal(s['e'], e2) assert s.meta == meta # load only some objects s = Session() s.load(fpath, names=['a', 'a01', 'e', 'f']) assert list(s.keys()) == ['a', 'a01', 'e', 'f'] assert s.meta == meta
def _test_io(tmpdir, session, meta, engine, ext): filename = f"test_{engine}.{ext}" if 'csv' not in engine else f"test_{engine}{ext}" fpath = tmp_path(tmpdir, filename) is_excel_or_csv = 'excel' in engine or 'csv' in engine kind = Array if is_excel_or_csv else (Axis, Group, Array) + _supported_scalars_types session = session.filter(kind=kind) session.meta = meta # save and load session.save(fpath, engine=engine) s = Session() s.load(fpath, engine=engine) # use Session.names instead of Session.keys because CSV, Excel and HDF do *not* keep ordering assert s.names == session.names assert s.equals(session) if not is_excel_or_csv: for key in s.filter(kind=Axis).keys(): assert s[key].dtype == session[key].dtype if engine != 'pandas_excel': assert s.meta == meta # update a Group + an Axis + an array (overwrite=False) a4 = Axis('a=0..3') a4_01 = a3['0,1'] >> 'a01' e2 = ndtest((a4, 'b=b0..b2')) h2 = full_like(h, fill_value=10) Session(a=a4, a01=a4_01, e=e2, h=h2).save(fpath, overwrite=False, engine=engine) s = Session() s.load(fpath, engine=engine) if engine == 'pandas_excel': # Session.save() via engine='pandas_excel' always overwrite the output Excel files assert s.names == ['e', 'h'] elif is_excel_or_csv: assert s.names == ['e', 'f', 'g', 'h'] else: assert s.names == session.names assert s['a'].equals(a4) assert s['a01'].equals(a4_01) assert_array_nan_equal(s['e'], e2) if engine != 'pandas_excel': assert s.meta == meta # load only some objects session.save(fpath, engine=engine) s = Session() names_to_load = ['e', 'f'] if is_excel_or_csv else [ 'a', 'a01', 'a2', 'anonymous', 'e', 'f', 's_bool', 's_int' ] s.load(fpath, names=names_to_load, engine=engine) assert s.names == names_to_load if engine != 'pandas_excel': assert s.meta == meta return fpath
def session(): return Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), ('a01', a01), ('ano01', ano01), ('c', c), ('d', d), ('e', e), ('g', g), ('f', f), ('h', h)])
def test_xlsx_pandas_io(tmpdir, session, meta): fpath = tmp_path(tmpdir, 'test_session.xlsx') session.meta = meta session.save(fpath, engine='pandas_excel') s = Session() s.load(fpath, engine='pandas_excel') assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f'] assert s.meta == meta # update a Group + an Axis + an array # XXX: overwrite is not taken into account by the pandas_excel engine a2 = Axis('a=0..2') a2_01 = a2['0,1'] >> 'a01' e2 = ndtest((a2, 'b=b0..b2')) Session(a=a2, a01=a2_01, e=e2, meta=meta).save(fpath, engine='pandas_excel') s = Session() s.load(fpath, engine='pandas_excel') assert list(s.keys()) == ['a', 'a01', 'e'] assert s['a'].equals(a2) assert all(s['a01'] == a2_01) assert_array_nan_equal(s['e'], e2) assert s.meta == meta # load only some objects session.save(fpath, engine='pandas_excel') s = Session() s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='pandas_excel') assert list(s.keys()) == ['a', 'a01', 'e', 'f'] assert s.meta == meta
def test_csv_io(tmpdir, session, meta): try: fpath = tmp_path(tmpdir, 'test_session_csv') session.meta = meta session.to_csv(fpath) # test loading a directory s = Session() s.load(fpath, engine='pandas_csv') # CSV cannot keep ordering (so we always sort keys) # Also, Axis objects are read first, then Groups objects and finally LArray objects assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] assert s.meta == meta # test loading with a pattern pattern = os.path.join(fpath, '*.csv') s = Session(pattern) # s = Session() # s.load(pattern) assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] assert s.meta == meta # create an invalid .csv file invalid_fpath = os.path.join(fpath, 'invalid.csv') with open(invalid_fpath, 'w') as f: f.write(',",') # try loading the directory with the invalid file with pytest.raises(pd.errors.ParserError) as e_info: s = Session(pattern) # test loading a pattern, ignoring invalid/unsupported files s = Session() s.load(pattern, ignore_exceptions=True) assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] assert s.meta == meta # load only some objects s = Session() s.load(fpath, names=['a', 'a01', 'e', 'f']) assert list(s.keys()) == ['a', 'a01', 'e', 'f'] assert s.meta == meta finally: shutil.rmtree(fpath)
class TestSession(TestCase): def setUp(self): self.a = Axis([], 'a') self.b = Axis([], 'b') self.c = 'c' self.d = {} self.e = ndrange([(2, 'a0'), (3, 'a1')]) self.e2 = ndrange(('a=a0..a2', 'b=b0..b2')) self.f = ndrange([(3, 'a0'), (2, 'a1')]) self.g = ndrange([(2, 'a0'), (4, 'a1')]) self.session = Session([ ('b', self.b), ('a', self.a), ('c', self.c), ('d', self.d), ('e', self.e), ('g', self.g), ('f', self.f), ]) def assertObjListEqual(self, got, expected): self.assertEqual(len(got), len(expected)) for e1, e2 in zip(got, expected): self.assertTrue(equal(e1, e2), "{} != {}".format(e1, e2)) def test_init(self): s = Session(self.b, self.a, c=self.c, d=self.d, e=self.e, f=self.f, g=self.g) self.assertEqual(s.names, ['a', 'b', 'c', 'd', 'e', 'f', 'g']) s = Session(abspath('test_session.h5')) self.assertEqual(s.names, ['e', 'f', 'g']) # this needs xlwings installed # s = Session('test_session_ef.xlsx') # self.assertEqual(s.names, ['e', 'f']) # TODO: format autodetection does not work in this case # s = Session('test_session_csv') # self.assertEqual(s.names, ['e', 'f', 'g']) def test_getitem(self): s = self.session self.assertIs(s['a'], self.a) self.assertIs(s['b'], self.b) self.assertEqual(s['c'], 'c') self.assertEqual(s['d'], {}) def test_getitem_list(self): s = self.session self.assertEqual(list(s[[]]), []) self.assertEqual(list(s[['b', 'a']]), [self.b, self.a]) self.assertEqual(list(s[['a', 'b']]), [self.a, self.b]) self.assertEqual(list(s[['a', 'e', 'g']]), [self.a, self.e, self.g]) self.assertEqual(list(s[['g', 'a', 'e']]), [self.g, self.a, self.e]) def test_getitem_larray(self): s1 = self.session.filter(kind=LArray) s2 = Session({'e': self.e + 1, 'f': self.f}) res_eq = s1[s1 == s2] res_neq = s1[s1 != s2] self.assertEqual(list(res_eq), [self.f]) self.assertEqual(list(res_neq), [self.e, self.g]) def test_setitem(self): s = self.session s['g'] = 'g' self.assertEqual(s['g'], 'g') def test_getattr(self): s = self.session self.assertIs(s.a, self.a) self.assertIs(s.b, self.b) self.assertEqual(s.c, 'c') self.assertEqual(s.d, {}) def test_setattr(self): s = self.session s.h = 'h' self.assertEqual(s.h, 'h') def test_add(self): s = self.session h = Axis([], 'h') s.add(h, i='i') self.assertTrue(h.equals(s.h)) self.assertEqual(s.i, 'i') def test_iter(self): expected = [self.b, self.a, self.c, self.d, self.e, self.g, self.f] self.assertObjListEqual(self.session, expected) def test_filter(self): s = self.session s.ax = 'ax' self.assertObjListEqual( s.filter(), [self.b, self.a, 'c', {}, self.e, self.g, self.f, 'ax']) self.assertEqual(list(s.filter('a')), [self.a, 'ax']) self.assertEqual(list(s.filter('a', dict)), []) self.assertEqual(list(s.filter('a', str)), ['ax']) self.assertEqual(list(s.filter('a', Axis)), [self.a]) self.assertEqual(list(s.filter(kind=Axis)), [self.b, self.a]) self.assertObjListEqual(s.filter(kind=LArray), [self.e, self.g, self.f]) self.assertEqual(list(s.filter(kind=dict)), [{}]) def test_names(self): s = self.session self.assertEqual(s.names, ['a', 'b', 'c', 'd', 'e', 'f', 'g']) # add them in the "wrong" order s.add(i='i') s.add(h='h') self.assertEqual(s.names, ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']) def test_h5_io(self): fpath = abspath('test_session.h5') self.session.save(fpath) s = Session() s.load(fpath) # HDF does *not* keep ordering (ie, keys are always sorted) self.assertEqual(list(s.keys()), ['e', 'f', 'g']) # update an array (overwrite=False) Session(e=self.e2).save(fpath, overwrite=False) s.load(fpath) self.assertEqual(list(s.keys()), ['e', 'f', 'g']) assert_array_nan_equal(s['e'], self.e2) s = Session() s.load(fpath, ['e', 'f']) self.assertEqual(list(s.keys()), ['e', 'f']) def test_xlsx_pandas_io(self): fpath = abspath('test_session.xlsx') self.session.save(fpath, engine='pandas_excel') s = Session() s.load(fpath, engine='pandas_excel') self.assertEqual(list(s.keys()), ['e', 'g', 'f']) # update an array (overwrite=False) Session(e=self.e2).save(fpath, engine='pandas_excel', overwrite=False) s.load(fpath, engine='pandas_excel') self.assertEqual(list(s.keys()), ['e', 'g', 'f']) assert_array_nan_equal(s['e'], self.e2) fpath = abspath('test_session_ef.xlsx') self.session.save(fpath, ['e', 'f'], engine='pandas_excel') s = Session() s.load(fpath, engine='pandas_excel') self.assertEqual(list(s.keys()), ['e', 'f']) @pytest.mark.skipif(xw is None, reason="xlwings is not available") def test_xlsx_xlwings_io(self): fpath = abspath('test_session_xw.xlsx') # test save when Excel file does not exist self.session.save(fpath, engine='xlwings_excel') s = Session() s.load(fpath, engine='xlwings_excel') # ordering is only kept if the file did not exist previously (otherwise the ordering is left intact) self.assertEqual(list(s.keys()), ['e', 'g', 'f']) # update an array (overwrite=False) Session(e=self.e2).save(fpath, engine='xlwings_excel', overwrite=False) s.load(fpath, engine='xlwings_excel') self.assertEqual(list(s.keys()), ['e', 'g', 'f']) assert_array_nan_equal(s['e'], self.e2) fpath = abspath('test_session_ef_xw.xlsx') self.session.save(fpath, ['e', 'f'], engine='xlwings_excel') s = Session() s.load(fpath, engine='xlwings_excel') self.assertEqual(list(s.keys()), ['e', 'f']) def test_csv_io(self): fpath = abspath('test_session_csv') self.session.to_csv(fpath) s = Session() s.load(fpath, engine='pandas_csv') # CSV cannot keep ordering (so we always sort keys) self.assertEqual(list(s.keys()), ['e', 'f', 'g']) def test_pickle_io(self): fpath = abspath('test_session.pkl') self.session.save(fpath) s = Session() s.load(fpath, engine='pickle') self.assertEqual(list(s.keys()), ['e', 'g', 'f']) # update an array (overwrite=False) Session(e=self.e2).save(fpath, overwrite=False) s.load(fpath, engine='pickle') self.assertEqual(list(s.keys()), ['e', 'g', 'f']) assert_array_nan_equal(s['e'], self.e2) def test_to_globals(self): with pytest.warns(RuntimeWarning) as caught_warnings: self.session.to_globals() assert len(caught_warnings) == 1 assert caught_warnings[0].message.args[0] == "Session.to_globals should usually only be used in interactive " \ "consoles and not in scripts. Use warn=False to deactivate this " \ "warning." assert caught_warnings[0].filename == __file__ self.assertIs(a, self.a) self.assertIs(b, self.b) self.assertIs(c, self.c) self.assertIs(d, self.d) self.assertIs(e, self.e) self.assertIs(f, self.f) self.assertIs(g, self.g) # test inplace backup_dest = e backup_value = self.session.e.copy() self.session.e = zeros_like(e) self.session.to_globals(inplace=True, warn=False) # check the variable is correct (the same as before) self.assertIs(e, backup_dest) self.assertIsNot(e, self.session.e) # check the content has changed assert_array_nan_equal(e, self.session.e) self.assertFalse(larray_equal(e, backup_value)) def test_eq(self): sess = self.session.filter(kind=LArray) expected = Session([('e', self.e), ('f', self.f), ('g', self.g)]) self.assertTrue(all(sess == expected)) other = Session({'e': self.e, 'f': self.f}) res = sess == other self.assertEqual(res.ndim, 1) self.assertEqual(res.axes.names, ['name']) self.assertTrue(np.array_equal(res.axes.labels[0], ['e', 'g', 'f'])) self.assertEqual(list(res), [True, False, True]) e2 = self.e.copy() e2.i[1, 1] = 42 other = Session({'e': e2, 'f': self.f}) res = sess == other self.assertEqual(res.axes.names, ['name']) self.assertTrue(np.array_equal(res.axes.labels[0], ['e', 'g', 'f'])) self.assertEqual(list(res), [False, False, True]) def test_ne(self): sess = self.session.filter(kind=LArray) expected = Session([('e', self.e), ('f', self.f), ('g', self.g)]) self.assertFalse(any(sess != expected)) other = Session({'e': self.e, 'f': self.f}) res = sess != other self.assertEqual(res.axes.names, ['name']) self.assertTrue(np.array_equal(res.axes.labels[0], ['e', 'g', 'f'])) self.assertEqual(list(res), [False, True, False]) e2 = self.e.copy() e2.i[1, 1] = 42 other = Session({'e': e2, 'f': self.f}) res = sess != other self.assertEqual(res.axes.names, ['name']) self.assertTrue(np.array_equal(res.axes.labels[0], ['e', 'g', 'f'])) self.assertEqual(list(res), [True, True, False]) def test_sub(self): sess = self.session.filter(kind=LArray) other = Session({'e': self.e - 1, 'f': 1}) diff = sess - other assert_array_nan_equal(diff['e'], np.full((2, 3), 1, dtype=np.int32)) assert_array_nan_equal(diff['f'], np.arange(-1, 5).reshape(3, 2)) self.assertTrue(isnan(diff['g']).all()) def test_div(self): sess = self.session.filter(kind=LArray) other = Session({'e': self.e - 1, 'f': self.f + 1}) with pytest.warns(RuntimeWarning) as caught_warnings: res = sess / other assert len(caught_warnings) == 1 assert caught_warnings[0].message.args[ 0] == "divide by zero encountered during operation" assert caught_warnings[0].filename == __file__ with np.errstate(divide='ignore', invalid='ignore'): flat_e = np.arange(6) / np.arange(-1, 5) assert_array_nan_equal(res['e'], flat_e.reshape(2, 3)) flat_f = np.arange(6) / np.arange(1, 7) assert_array_nan_equal(res['f'], flat_f.reshape(3, 2)) self.assertTrue(isnan(res['g']).all()) def test_summary(self): sess = self.session.filter(kind=LArray) self.assertEqual( sess.summary(), "e: a0*, a1*\n \n\n" "g: a0*, a1*\n \n\n" "f: a0*, a1*\n \n") def test_pickle_roundtrip(self): original = self.session s = pickle.dumps(original) res = pickle.loads(s) self.assertTrue(all(res == original))
def test_xlsx_pandas_io(self): fpath = abspath('test_session.xlsx') self.session.save(fpath, engine='pandas_excel') s = Session() s.load(fpath, engine='pandas_excel') self.assertEqual(list(s.keys()), ['e', 'g', 'f']) # update an array (overwrite=False) Session(e=self.e2).save(fpath, engine='pandas_excel', overwrite=False) s.load(fpath, engine='pandas_excel') self.assertEqual(list(s.keys()), ['e', 'g', 'f']) assert_array_nan_equal(s['e'], self.e2) fpath = abspath('test_session_ef.xlsx') self.session.save(fpath, ['e', 'f'], engine='pandas_excel') s = Session() s.load(fpath, engine='pandas_excel') self.assertEqual(list(s.keys()), ['e', 'f'])
def test_xlsx_xlwings_io(self): fpath = abspath('test_session_xw.xlsx') # test save when Excel file does not exist self.session.save(fpath, engine='xlwings_excel') s = Session() s.load(fpath, engine='xlwings_excel') # ordering is only kept if the file did not exist previously (otherwise the ordering is left intact) self.assertEqual(list(s.keys()), ['e', 'g', 'f']) # update an array (overwrite=False) Session(e=self.e2).save(fpath, engine='xlwings_excel', overwrite=False) s.load(fpath, engine='xlwings_excel') self.assertEqual(list(s.keys()), ['e', 'g', 'f']) assert_array_nan_equal(s['e'], self.e2) fpath = abspath('test_session_ef_xw.xlsx') self.session.save(fpath, ['e', 'f'], engine='xlwings_excel') s = Session() s.load(fpath, engine='xlwings_excel') self.assertEqual(list(s.keys()), ['e', 'f'])
def test_xlsx_xlwings_io(tmpdir, session, meta): fpath = tmp_path(tmpdir, 'test_session_xw.xlsx') session.meta = meta # test save when Excel file does not exist session.save(fpath, engine='xlwings_excel') s = Session() s.load(fpath, engine='xlwings_excel') # ordering is only kept if the file did not exist previously (otherwise the ordering is left intact) assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f'] assert s.meta == meta # update a Group + an Axis + an array (overwrite=False) a2 = Axis('a=0..2') a2_01 = a2['0,1'] >> 'a01' e2 = ndtest((a2, 'b=b0..b2')) Session(a=a2, a01=a2_01, e=e2).save(fpath, engine='xlwings_excel', overwrite=False) s = Session() s.load(fpath, engine='xlwings_excel') assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f'] assert s['a'].equals(a2) assert all(s['a01'] == a2_01) assert_array_nan_equal(s['e'], e2) assert s.meta == meta # load only some objects s = Session() s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='xlwings_excel') assert list(s.keys()) == ['a', 'a01', 'e', 'f'] assert s.meta == meta
def generate_example_files(csv=True, excel=True, hdf5=True): from larray_eurostat import eurostat_get def prepare_eurostat_data(dataset_name, countries): arr = eurostat_get(dataset_name)[X.unit['NR'], X.age['TOTAL'], X.sex['M,F']] arr = arr[X.time[::-1]][2013:2017] arr = arr.rename('sex', 'gender') arr = arr.set_labels(gender='Male,Female') arr = arr.rename('geo', 'country') country_codes = list(countries.keys()) country_names = list(countries.values()) if dataset_name == 'migr_imm1ctz': # example of an array with ambiguous axes arr = arr['COMPLET', X.citizen[country_codes], X.country[country_codes]].astype(int) arr = arr.rename('citizen', 'citizenship') arr = arr.set_labels('citizenship', country_names) arr = arr.set_labels('country', country_names) arr = arr.transpose('country', 'citizenship', 'gender', 'time') else: arr = arr[country_codes].astype(int) arr = arr.set_labels('country', country_names) arr = arr.transpose('country', 'gender', 'time') return arr countries = {'BE': 'Belgium', 'FR': 'France', 'DE': 'Germany'} benelux = {'BE': 'Belgium', 'LU': 'Luxembourg', 'NL': 'Netherlands'} # Arrays population = prepare_eurostat_data('demo_pjan', countries) population.meta.title = 'Population on 1 January by age and sex' population.meta.source = 'table demo_pjan from Eurostat' # ---- population_benelux = prepare_eurostat_data('demo_pjan', benelux) population_benelux.meta.title = 'Population on 1 January by age and sex (Benelux)' population_benelux.meta.source = 'table demo_pjan from Eurostat' # ---- population_5_countries = population.extend( 'country', population_benelux[['Luxembourg', 'Netherlands']]) population_5_countries.meta.title = 'Population on 1 January by age and sex (Benelux + France + Germany)' population_5_countries.meta.source = 'table demo_pjan from Eurostat' # ---- births = prepare_eurostat_data('demo_fasec', countries) births.meta.title = "Live births by mother's age and newborn's sex" births.meta.source = 'table demo_fasec from Eurostat' # ---- deaths = prepare_eurostat_data('demo_magec', countries) deaths.meta.title = 'Deaths by age and sex' deaths.meta.source = 'table demo_magec from Eurostat' # ---- immigration = prepare_eurostat_data('migr_imm1ctz', benelux) immigration.meta.title = 'Immigration by age group, sex and citizenship' immigration.meta.source = 'table migr_imm1ctz from Eurostat' # Groups even_years = population.time[2014::2] >> 'even_years' odd_years = population.time[2013::2] >> 'odd_years' # Session ses = Session({ 'country': population.country, 'country_benelux': immigration.country, 'citizenship': immigration.citizenship, 'gender': population.gender, 'time': population.time, 'even_years': even_years, 'odd_years': odd_years, 'population': population, 'population_benelux': population_benelux, 'population_5_countries': population_5_countries, 'births': births, 'deaths': deaths, 'immigration': immigration }) ses.meta.title = 'Demographic datasets for a small selection of countries in Europe' ses.meta.source = 'demo_jpan, demo_fasec, demo_magec and migr_imm1ctz tables from Eurostat' # EUROSTAT DATASET if csv: ses.save(os.path.join(DATA_DIR, 'demography_eurostat')) if excel: ses.save(os.path.join(DATA_DIR, 'demography_eurostat.xlsx')) if hdf5: ses.save(os.path.join(DATA_DIR, 'demography_eurostat.h5')) # EXAMPLE FILES years = population.time[2013:2015] population = population[years] population_narrow = population['Belgium,France'].sum('gender') births = births[years] deaths = deaths[years] immigration = immigration[years] # Dataframes (for testing missing axis/values) df_missing_axis_name = population.to_frame(fold_last_axis_name=False) df_missing_values = population.to_frame(fold_last_axis_name=True) df_missing_values.drop([('France', 'Male'), ('Germany', 'Female')], inplace=True) if csv: examples_dir = os.path.join(DATA_DIR, 'examples') population.to_csv(os.path.join(examples_dir, 'population.csv')) births.to_csv(os.path.join(examples_dir, 'births.csv')) deaths.to_csv(os.path.join(examples_dir, 'deaths.csv')) immigration.to_csv(os.path.join(examples_dir, 'immigration.csv')) df_missing_axis_name.to_csv(os.path.join( examples_dir, 'population_missing_axis_name.csv'), sep=',', na_rep='') df_missing_values.to_csv(os.path.join(examples_dir, 'population_missing_values.csv'), sep=',', na_rep='') population_narrow.to_csv(os.path.join(examples_dir, 'population_narrow_format.csv'), wide=False) if excel: with open_excel(os.path.join(DATA_DIR, 'examples.xlsx'), overwrite_file=True) as wb: wb['population'] = population.dump() wb['births'] = births.dump() wb['deaths'] = deaths.dump() wb['immigration'] = immigration.dump() wb['population_births_deaths'] = population.dump() wb['population_births_deaths']['A9'] = births.dump() wb['population_births_deaths']['A17'] = deaths.dump() wb['population_missing_axis_name'] = '' wb['population_missing_axis_name']['A1'].options( ).value = df_missing_axis_name wb['population_missing_values'] = '' wb['population_missing_values']['A1'].options( ).value = df_missing_values # wb['population_narrow_format'] = population_narrow.dump(wide=False) wb.save() population_narrow.to_excel(os.path.join(DATA_DIR, 'examples.xlsx'), 'population_narrow_format', wide=False) Session({ 'country': population.country, 'gender': population.gender, 'time': population.time, 'population': population }).save(os.path.join(DATA_DIR, 'population_only.xlsx')) Session({ 'births': births, 'deaths': deaths }).save(os.path.join(DATA_DIR, 'births_and_deaths.xlsx')) if hdf5: examples_h5_file = os.path.join(DATA_DIR, 'examples.h5') population.to_hdf(examples_h5_file, 'population') births.to_hdf(examples_h5_file, 'births') deaths.to_hdf(examples_h5_file, 'deaths') immigration.to_hdf(examples_h5_file, 'immigration')
def test_pickle_io(tmpdir, session, meta): fpath = tmp_path(tmpdir, 'test_session.pkl') session.meta = meta session.save(fpath) s = Session() s.load(fpath, engine='pickle') assert list(s.keys()) == ['b', 'a', 'b12', 'a01', 'e', 'g', 'f'] assert s.meta == meta # update a Group + an Axis + an array (overwrite=False) a2 = Axis('a=0..2') a2_01 = a2['0,1'] >> 'a01' e2 = ndtest((a2, 'b=b0..b2')) Session(a=a2, a01=a2_01, e=e2).save(fpath, overwrite=False) s = Session() s.load(fpath, engine='pickle') assert list(s.keys()) == ['b', 'a', 'b12', 'a01', 'e', 'g', 'f'] assert s['a'].equals(a2) assert isinstance(a2_01, Group) assert isinstance(s['a01'], Group) assert s['a01'].eval() == a2_01.eval() assert_array_nan_equal(s['e'], e2) assert s.meta == meta # load only some objects s = Session() s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='pickle') assert list(s.keys()) == ['a', 'a01', 'e', 'f'] assert s.meta == meta
def test_h5_io(self): fpath = abspath('test_session.h5') self.session.save(fpath) s = Session() s.load(fpath) # HDF does *not* keep ordering (ie, keys are always sorted) self.assertEqual(list(s.keys()), ['e', 'f', 'g']) # update an array (overwrite=False) Session(e=self.e2).save(fpath, overwrite=False) s.load(fpath) self.assertEqual(list(s.keys()), ['e', 'f', 'g']) assert_array_nan_equal(s['e'], self.e2) s = Session() s.load(fpath, ['e', 'f']) self.assertEqual(list(s.keys()), ['e', 'f'])