Example #1
0
    def test_csv_io(self):
        fpath = abspath('test_session_csv')
        self.session.to_csv(fpath)

        s = Session()
        s.load(fpath, engine='pandas_csv')
        # CSV cannot keep ordering (so we always sort keys)
        self.assertEqual(list(s.keys()), ['e', 'f', 'g'])
Example #2
0
def test_csv_io(tmpdir, session, meta):
    try:
        fpath = _test_io(tmpdir, session, meta, engine='pandas_csv', ext='csv')

        names = Session(
            {k: v
             for k, v in session.items() if isinstance(v, Array)}).names

        # test loading with a pattern
        pattern = os.path.join(fpath, '*.csv')
        s = Session(pattern)
        assert s.names == names
        assert s.meta == meta

        # create an invalid .csv file
        invalid_fpath = os.path.join(fpath, 'invalid.csv')
        with open(invalid_fpath, 'w') as f:
            f.write(',",')

        # try loading the directory with the invalid file
        with pytest.raises(pd.errors.ParserError):
            s = Session(pattern)

        # test loading a pattern, ignoring invalid/unsupported files
        s = Session()
        s.load(pattern, ignore_exceptions=True)
        assert s.names == names
        assert s.meta == meta
    finally:
        shutil.rmtree(fpath)
Example #3
0
    def test_global_arrays(self):
        # exclude private global arrays
        s = global_arrays()
        s_expected = Session([('global_arr1', global_arr1)])
        assert s.equals(s_expected)

        # all global arrays
        s = global_arrays(include_private=True)
        s_expected = Session([('global_arr1', global_arr1),
                              ('_global_arr2', _global_arr2)])
        assert s.equals(s_expected)
Example #4
0
def test_global_arrays():
    # exclude private global arrays
    s = global_arrays()
    s_expected = Session([('e', e), ('f', f), ('g', g), ('h', h), ('k', k)])
    assert s.equals(s_expected)

    # all global arrays
    s = global_arrays(include_private=True)
    s_expected = Session([('e', e), ('_e', _e), ('f', f), ('g', g), ('h', h),
                          ('k', k)])
    assert s.equals(s_expected)
Example #5
0
    def test_init(self):
        s = Session(self.b,
                    self.a,
                    c=self.c,
                    d=self.d,
                    e=self.e,
                    f=self.f,
                    g=self.g)
        self.assertEqual(s.names, ['a', 'b', 'c', 'd', 'e', 'f', 'g'])

        s = Session(abspath('test_session.h5'))
        self.assertEqual(s.names, ['e', 'f', 'g'])
Example #6
0
def test_local_arrays():
    h = ndtest(2)
    _h = ndtest(3)

    # exclude private local arrays
    s = local_arrays()
    s_expected = Session([('h', h)])
    assert s.equals(s_expected)

    # all local arrays
    s = local_arrays(include_private=True)
    s_expected = Session([('h', h), ('_h', _h)])
    assert s.equals(s_expected)
Example #7
0
    def test_local_arrays(self):
        local_arr1 = ndtest(2)
        _local_arr2 = ndtest(3)

        # exclude private local arrays
        s = local_arrays()
        s_expected = Session([('local_arr1', local_arr1)])
        assert s.equals(s_expected)

        # all local arrays
        s = local_arrays(include_private=True)
        s_expected = Session([('local_arr1', local_arr1),
                              ('_local_arr2', _local_arr2)])
        assert s.equals(s_expected)
Example #8
0
    def test_ne(self):
        sess = self.session.filter(kind=LArray)
        expected = Session([('e', self.e), ('f', self.f), ('g', self.g)])
        assert ([(~array).all() for array in (sess != expected).values()])

        other = Session([('e', self.e), ('f', self.f)])
        res = sess != other
        assert [(~arr).all() for arr in res.values()] == [True, False, True]

        e2 = self.e.copy()
        e2.i[1, 1] = 42
        other = Session([('e', e2), ('f', self.f)])
        res = sess != other
        assert [(~arr).all() for arr in res.values()] == [False, False, True]
Example #9
0
def test_arrays():
    i = ndtest(2)
    _i = ndtest(3)

    # exclude private arrays
    s = arrays()
    s_expected = Session([('e', e), ('f', f), ('g', g), ('h', h), ('i', i),
                          ('k', k)])
    assert s.equals(s_expected)

    # all arrays
    s = arrays(include_private=True)
    s_expected = Session([('_e', _e), ('_i', _i), ('e', e), ('f', f), ('g', g),
                          ('h', h), ('i', i), ('k', k)])
    assert s.equals(s_expected)
Example #10
0
    def test_eq(self):
        sess = self.session.filter(kind=LArray)
        expected = Session([('e', self.e), ('f', self.f), ('g', self.g)])
        assert all([array.all() for array in (sess == expected).values()])

        other = Session([('e', self.e), ('f', self.f)])
        res = sess == other
        assert list(res.keys()) == ['e', 'g', 'f']
        assert [arr.all() for arr in res.values()] == [True, False, True]

        e2 = self.e.copy()
        e2.i[1, 1] = 42
        other = Session([('e', e2), ('f', self.f)])
        res = sess == other
        assert [arr.all() for arr in res.values()] == [False, False, True]
Example #11
0
 def test_sub(self):
     sess = self.session.filter(kind=LArray)
     other = Session({'e': self.e - 1, 'f': 1})
     diff = sess - other
     assert_array_nan_equal(diff['e'], np.full((2, 3), 1, dtype=np.int32))
     assert_array_nan_equal(diff['f'], np.arange(-1, 5).reshape(3, 2))
     self.assertTrue(isnan(diff['g']).all())
Example #12
0
def test_getitem_larray(session):
    s1 = session.filter(kind=Array)
    s2 = Session({'e': e + 1, 'f': f})
    res_eq = s1[s1.element_equals(s2)]
    res_neq = s1[~(s1.element_equals(s2))]
    assert list(res_eq) == [f]
    assert list(res_neq) == [e, g, h]
Example #13
0
 def test_getitem_larray(self):
     s1 = self.session.filter(kind=LArray)
     s2 = Session({'e': self.e + 1, 'f': self.f})
     res_eq = s1[s1 == s2]
     res_neq = s1[s1 != s2]
     self.assertEqual(list(res_eq), [self.f])
     self.assertEqual(list(res_neq), [self.e, self.g])
Example #14
0
 def test_getitem_larray(self):
     s1 = self.session.filter(kind=LArray)
     s2 = Session({'e': self.e + 1, 'f': self.f})
     res_eq = s1[s1.array_equals(s2)]
     res_neq = s1[~(s1.array_equals(s2))]
     assert list(res_eq) == [self.f]
     assert list(res_neq) == [self.e, self.g]
Example #15
0
    def test_pickle_io(self):
        fpath = abspath('test_session.pkl')
        self.session.save(fpath)

        s = Session()
        s.load(fpath, engine='pickle')
        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])

        # update an array (overwrite=False)
        Session(e=self.e2).save(fpath, overwrite=False)
        s.load(fpath, engine='pickle')
        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])
        assert_array_nan_equal(s['e'], self.e2)
Example #16
0
def test_sub(session):
    sess = session

    # session - session
    other = Session({'e': e, 'f': f})
    other['e'] = e - 1
    other['f'] = ones_like(f)
    diff = sess - other
    assert_array_nan_equal(diff['e'], np.full((2, 3), 1, dtype=np.int32))
    assert_array_nan_equal(diff['f'], f - ones_like(f))
    assert isnan(diff['g']).all()
    assert diff.a is a
    assert diff.a01 is a01
    assert diff.c is c

    # session - scalar
    diff = sess - 2
    assert_array_nan_equal(diff['e'], e - 2)
    assert_array_nan_equal(diff['f'], f - 2)
    assert_array_nan_equal(diff['g'], g - 2)
    assert diff.a is a
    assert diff.a01 is a01
    assert diff.c is c

    # session - dict(Array and scalar)
    other = {'e': ones_like(e), 'f': 1}
    diff = sess - other
    assert_array_nan_equal(diff['e'], e - ones_like(e))
    assert_array_nan_equal(diff['f'], f - 1)
    assert isnan(diff['g']).all()
    assert diff.a is a
    assert diff.a01 is a01
    assert diff.c is c

    # session - array
    axes = [a, b]
    other = Session([('a', a), ('a01', a01), ('c', c), ('e', ndtest((a, b))),
                     ('f', full((a, b), fill_value=3)),
                     ('g', ndtest('c=c0..c2'))])
    diff = other - ones(axes)
    assert_array_nan_equal(diff['e'], other['e'] - ones(axes))
    assert_array_nan_equal(diff['f'], other['f'] - ones(axes))
    assert_array_nan_equal(diff['g'], other['g'] - ones(axes))
    assert diff.a is a
    assert diff.a01 is a01
    assert diff.c is c
Example #17
0
 def setUp(self):
     self.a = Axis([], 'a')
     self.b = Axis([], 'b')
     self.c = 'c'
     self.d = {}
     self.e = ndrange([(2, 'a0'), (3, 'a1')])
     self.e2 = ndrange(('a=a0..a2', 'b=b0..b2'))
     self.f = ndrange([(3, 'a0'), (2, 'a1')])
     self.g = ndrange([(2, 'a0'), (4, 'a1')])
     self.session = Session([
         ('b', self.b),
         ('a', self.a),
         ('c', self.c),
         ('d', self.d),
         ('e', self.e),
         ('g', self.g),
         ('f', self.f),
     ])
Example #18
0
    def test_ne(self):
        sess = self.session.filter(kind=LArray)
        expected = Session([('e', self.e), ('f', self.f), ('g', self.g)])
        self.assertFalse(any(sess != expected))

        other = Session({'e': self.e, 'f': self.f})
        res = sess != other
        self.assertEqual(res.axes.names, ['name'])
        self.assertTrue(np.array_equal(res.axes.labels[0], ['e', 'g', 'f']))
        self.assertEqual(list(res), [False, True, False])

        e2 = self.e.copy()
        e2.i[1, 1] = 42
        other = Session({'e': e2, 'f': self.f})
        res = sess != other
        self.assertEqual(res.axes.names, ['name'])
        self.assertTrue(np.array_equal(res.axes.labels[0], ['e', 'g', 'f']))
        self.assertEqual(list(res), [True, True, False])
Example #19
0
    def test_array_equals(self):
        sess = self.session.filter(kind=LArray)
        expected = Session([('e', self.e), ('f', self.f), ('g', self.g)])
        assert all(sess.array_equals(expected))

        other = Session({'e': self.e, 'f': self.f})
        res = sess.array_equals(other)
        assert res.ndim == 1
        assert res.axes.names == ['name']
        assert np.array_equal(res.axes.labels[0], ['e', 'g', 'f'])
        assert list(res) == [True, False, True]

        e2 = self.e.copy()
        e2.i[1, 1] = 42
        other = Session({'e': e2, 'f': self.f})
        res = sess.array_equals(other)
        assert res.axes.names == ['name']
        assert np.array_equal(res.axes.labels[0], ['e', 'g', 'f'])
        assert list(res) == [False, False, True]
Example #20
0
def test_init_session(meta):
    s = Session(b,
                b024,
                a,
                a01,
                a2=a2,
                anonymous=anonymous,
                ano01=ano01,
                c=c,
                d=d,
                e=e,
                g=g,
                f=f,
                h=h)
    assert list(s.keys()) == [
        'b', 'b024', 'a', 'a01', 'a2', 'anonymous', 'ano01', 'c', 'd', 'e',
        'g', 'f', 'h'
    ]

    # TODO: format auto-detection does not work in this case
    # s = Session('test_session_csv')
    # assert list(s.keys()) == ['e', 'f', 'g']

    # metadata
    s = Session(b,
                b024,
                a,
                a01,
                a2=a2,
                anonymous=anonymous,
                ano01=ano01,
                c=c,
                d=d,
                e=e,
                f=f,
                g=g,
                h=h,
                meta=meta)
    assert s.meta == meta
Example #21
0
def eurostat_get(indicators, drop_markers=True):
    """Gets one or several Eurostat indicators and return them as an array or a session.

    Parameters
    ----------
    indicators : str or list/tuple of str
        Name(s) of eurostat indicator(s). When requesting a single indicator, the result is an Array, otherwise it is a
        Session.
    drop_markers : bool, optional
        Whether or not to drop special markers. Defaults to True.

    Returns
    -------
    Array or Session

    Examples
    --------
    >>> data = eurostat_get('avia_ec_enterp')
    >>> data.info
    2 x 16 x 13
     enterpr [2]: 'AIRP' 'AVIA'
     geo [16]: 'CY' 'CZ' 'EE' ... 'BG' 'FI' 'SE'
     time [13]: 2013 2012 2011 ... 2003 2002 2001
    dtype: float64
    memory used: 3.25 Kb
    >>> indicators = eurostat_get(['avia_ec_enterp', 'apro_mt_lsequi'])
    >>> indicators.names
    ['apro_mt_lsequi', 'avia_ec_enterp']
    >>> indicators.avia_ec_enterp.info
    2 x 16 x 13
     enterpr [2]: 'AIRP' 'AVIA'
     geo [16]: 'CY' 'CZ' 'EE' ... 'BG' 'FI' 'SE'
     time [13]: 2013 2012 2011 ... 2003 2002 2001
    dtype: float64
    memory used: 3.25 Kb
    >>> indicators.apro_mt_lsequi.info
    3 x 1 x 28 x 38
     animals [3]: 'A1000' 'A1100' 'A1200'
     unit [1]: 'THS_HD'
     geo [28]: 'AL' 'BE' 'BG' ... 'SI' 'SK' 'UK'
     time [38]: 1997 1996 1995 ... 1962 1961 1960
    dtype: float64
    memory used: 24.94 Kb
    """
    if isinstance(indicators, (tuple, list)):
        return Session([(i, _get_one(i, drop_markers=drop_markers))
                        for i in indicators])
    else:
        return _get_one(indicators, drop_markers=drop_markers)
Example #22
0
    def test_div(self):
        sess = self.session.filter(kind=LArray)
        other = Session({'e': self.e - 1, 'f': self.f + 1})

        with pytest.warns(RuntimeWarning) as caught_warnings:
            res = sess / other
        assert len(caught_warnings) == 1
        assert caught_warnings[0].message.args[
            0] == "divide by zero encountered during operation"
        assert caught_warnings[0].filename == __file__

        with np.errstate(divide='ignore', invalid='ignore'):
            flat_e = np.arange(6) / np.arange(-1, 5)
        assert_array_nan_equal(res['e'], flat_e.reshape(2, 3))

        flat_f = np.arange(6) / np.arange(1, 7)
        assert_array_nan_equal(res['f'], flat_f.reshape(3, 2))
        self.assertTrue(isnan(res['g']).all())
Example #23
0
    def test_sub(self):
        sess = self.session.filter(kind=LArray)

        # session - session
        other = Session({'e': self.e - 1, 'f': ones_like(self.f)})
        diff = sess - other
        assert_array_nan_equal(diff['e'], np.full((2, 3), 1, dtype=np.int32))
        assert_array_nan_equal(diff['f'], self.f - ones_like(self.f))
        assert isnan(diff['g']).all()

        # session - scalar
        diff = sess - 2
        assert_array_nan_equal(diff['e'], self.e - 2)
        assert_array_nan_equal(diff['f'], self.f - 2)
        assert_array_nan_equal(diff['g'], self.g - 2)

        # session - dict(LArray and scalar)
        other = {'e': ones_like(self.e), 'f': 1}
        diff = sess - other
        assert_array_nan_equal(diff['e'], self.e - ones_like(self.e))
        assert_array_nan_equal(diff['f'], self.f - 1)
        assert isnan(diff['g']).all()
Example #24
0
def test_h5_io(tmpdir, session, meta):
    fpath = tmp_path(tmpdir, 'test_session.h5')
    session.meta = meta
    session.save(fpath)

    s = Session()
    s.load(fpath)
    # HDF does *not* keep ordering (ie, keys are always sorted +
    # read Axis objects, then Groups objects and finally LArray objects)
    assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g']
    assert s.meta == meta

    # update a Group + an Axis + an array (overwrite=False)
    a2 = Axis('a=0..2')
    a2_01 = a2['0,1'] >> 'a01'
    e2 = ndtest((a2, 'b=b0..b2'))
    Session(a=a2, a01=a2_01, e=e2).save(fpath, overwrite=False)
    s = Session()
    s.load(fpath)
    assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g']
    assert s['a'].equals(a2)
    assert all(s['a01'] == a2_01)
    assert_array_nan_equal(s['e'], e2)
    assert s.meta == meta

    # load only some objects
    s = Session()
    s.load(fpath, names=['a', 'a01', 'e', 'f'])
    assert list(s.keys()) == ['a', 'a01', 'e', 'f']
    assert s.meta == meta
Example #25
0
def _test_io(tmpdir, session, meta, engine, ext):
    filename = f"test_{engine}.{ext}" if 'csv' not in engine else f"test_{engine}{ext}"
    fpath = tmp_path(tmpdir, filename)

    is_excel_or_csv = 'excel' in engine or 'csv' in engine

    kind = Array if is_excel_or_csv else (Axis, Group,
                                          Array) + _supported_scalars_types
    session = session.filter(kind=kind)

    session.meta = meta

    # save and load
    session.save(fpath, engine=engine)
    s = Session()
    s.load(fpath, engine=engine)
    # use Session.names instead of Session.keys because CSV, Excel and HDF do *not* keep ordering
    assert s.names == session.names
    assert s.equals(session)
    if not is_excel_or_csv:
        for key in s.filter(kind=Axis).keys():
            assert s[key].dtype == session[key].dtype
    if engine != 'pandas_excel':
        assert s.meta == meta

    # update a Group + an Axis + an array (overwrite=False)
    a4 = Axis('a=0..3')
    a4_01 = a3['0,1'] >> 'a01'
    e2 = ndtest((a4, 'b=b0..b2'))
    h2 = full_like(h, fill_value=10)
    Session(a=a4, a01=a4_01, e=e2, h=h2).save(fpath,
                                              overwrite=False,
                                              engine=engine)
    s = Session()
    s.load(fpath, engine=engine)
    if engine == 'pandas_excel':
        # Session.save() via engine='pandas_excel' always overwrite the output Excel files
        assert s.names == ['e', 'h']
    elif is_excel_or_csv:
        assert s.names == ['e', 'f', 'g', 'h']
    else:
        assert s.names == session.names
        assert s['a'].equals(a4)
        assert s['a01'].equals(a4_01)
    assert_array_nan_equal(s['e'], e2)
    if engine != 'pandas_excel':
        assert s.meta == meta

    # load only some objects
    session.save(fpath, engine=engine)
    s = Session()
    names_to_load = ['e', 'f'] if is_excel_or_csv else [
        'a', 'a01', 'a2', 'anonymous', 'e', 'f', 's_bool', 's_int'
    ]
    s.load(fpath, names=names_to_load, engine=engine)
    assert s.names == names_to_load
    if engine != 'pandas_excel':
        assert s.meta == meta

    return fpath
Example #26
0
def session():
    return Session([('b', b), ('b024', b024), ('a', a), ('a2', a2),
                    ('anonymous', anonymous), ('a01', a01), ('ano01', ano01),
                    ('c', c), ('d', d), ('e', e), ('g', g), ('f', f),
                    ('h', h)])
Example #27
0
def test_xlsx_pandas_io(tmpdir, session, meta):
    fpath = tmp_path(tmpdir, 'test_session.xlsx')
    session.meta = meta
    session.save(fpath, engine='pandas_excel')

    s = Session()
    s.load(fpath, engine='pandas_excel')
    assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f']
    assert s.meta == meta

    # update a Group + an Axis + an array
    # XXX: overwrite is not taken into account by the pandas_excel engine
    a2 = Axis('a=0..2')
    a2_01 = a2['0,1'] >> 'a01'
    e2 = ndtest((a2, 'b=b0..b2'))
    Session(a=a2, a01=a2_01, e=e2, meta=meta).save(fpath, engine='pandas_excel')
    s = Session()
    s.load(fpath, engine='pandas_excel')
    assert list(s.keys()) == ['a', 'a01', 'e']
    assert s['a'].equals(a2)
    assert all(s['a01'] == a2_01)
    assert_array_nan_equal(s['e'], e2)
    assert s.meta == meta

    # load only some objects
    session.save(fpath, engine='pandas_excel')
    s = Session()
    s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='pandas_excel')
    assert list(s.keys()) == ['a', 'a01', 'e', 'f']
    assert s.meta == meta
Example #28
0
def test_csv_io(tmpdir, session, meta):
    try:
        fpath = tmp_path(tmpdir, 'test_session_csv')
        session.meta = meta
        session.to_csv(fpath)

        # test loading a directory
        s = Session()
        s.load(fpath, engine='pandas_csv')
        # CSV cannot keep ordering (so we always sort keys)
        # Also, Axis objects are read first, then Groups objects and finally LArray objects
        assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g']
        assert s.meta == meta

        # test loading with a pattern
        pattern = os.path.join(fpath, '*.csv')
        s = Session(pattern)
        # s = Session()
        # s.load(pattern)
        assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g']
        assert s.meta == meta

        # create an invalid .csv file
        invalid_fpath = os.path.join(fpath, 'invalid.csv')
        with open(invalid_fpath, 'w') as f:
            f.write(',",')

        # try loading the directory with the invalid file
        with pytest.raises(pd.errors.ParserError) as e_info:
            s = Session(pattern)

        # test loading a pattern, ignoring invalid/unsupported files
        s = Session()
        s.load(pattern, ignore_exceptions=True)
        assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g']
        assert s.meta == meta

        # load only some objects
        s = Session()
        s.load(fpath, names=['a', 'a01', 'e', 'f'])
        assert list(s.keys()) == ['a', 'a01', 'e', 'f']
        assert s.meta == meta
    finally:
        shutil.rmtree(fpath)
Example #29
0
class TestSession(TestCase):
    def setUp(self):
        self.a = Axis([], 'a')
        self.b = Axis([], 'b')
        self.c = 'c'
        self.d = {}
        self.e = ndrange([(2, 'a0'), (3, 'a1')])
        self.e2 = ndrange(('a=a0..a2', 'b=b0..b2'))
        self.f = ndrange([(3, 'a0'), (2, 'a1')])
        self.g = ndrange([(2, 'a0'), (4, 'a1')])
        self.session = Session([
            ('b', self.b),
            ('a', self.a),
            ('c', self.c),
            ('d', self.d),
            ('e', self.e),
            ('g', self.g),
            ('f', self.f),
        ])

    def assertObjListEqual(self, got, expected):
        self.assertEqual(len(got), len(expected))
        for e1, e2 in zip(got, expected):
            self.assertTrue(equal(e1, e2), "{} != {}".format(e1, e2))

    def test_init(self):
        s = Session(self.b,
                    self.a,
                    c=self.c,
                    d=self.d,
                    e=self.e,
                    f=self.f,
                    g=self.g)
        self.assertEqual(s.names, ['a', 'b', 'c', 'd', 'e', 'f', 'g'])

        s = Session(abspath('test_session.h5'))
        self.assertEqual(s.names, ['e', 'f', 'g'])

        # this needs xlwings installed
        # s = Session('test_session_ef.xlsx')
        # self.assertEqual(s.names, ['e', 'f'])

        # TODO: format autodetection does not work in this case
        # s = Session('test_session_csv')
        # self.assertEqual(s.names, ['e', 'f', 'g'])

    def test_getitem(self):
        s = self.session
        self.assertIs(s['a'], self.a)
        self.assertIs(s['b'], self.b)
        self.assertEqual(s['c'], 'c')
        self.assertEqual(s['d'], {})

    def test_getitem_list(self):
        s = self.session
        self.assertEqual(list(s[[]]), [])
        self.assertEqual(list(s[['b', 'a']]), [self.b, self.a])
        self.assertEqual(list(s[['a', 'b']]), [self.a, self.b])
        self.assertEqual(list(s[['a', 'e', 'g']]), [self.a, self.e, self.g])
        self.assertEqual(list(s[['g', 'a', 'e']]), [self.g, self.a, self.e])

    def test_getitem_larray(self):
        s1 = self.session.filter(kind=LArray)
        s2 = Session({'e': self.e + 1, 'f': self.f})
        res_eq = s1[s1 == s2]
        res_neq = s1[s1 != s2]
        self.assertEqual(list(res_eq), [self.f])
        self.assertEqual(list(res_neq), [self.e, self.g])

    def test_setitem(self):
        s = self.session
        s['g'] = 'g'
        self.assertEqual(s['g'], 'g')

    def test_getattr(self):
        s = self.session
        self.assertIs(s.a, self.a)
        self.assertIs(s.b, self.b)
        self.assertEqual(s.c, 'c')
        self.assertEqual(s.d, {})

    def test_setattr(self):
        s = self.session
        s.h = 'h'
        self.assertEqual(s.h, 'h')

    def test_add(self):
        s = self.session
        h = Axis([], 'h')
        s.add(h, i='i')
        self.assertTrue(h.equals(s.h))
        self.assertEqual(s.i, 'i')

    def test_iter(self):
        expected = [self.b, self.a, self.c, self.d, self.e, self.g, self.f]
        self.assertObjListEqual(self.session, expected)

    def test_filter(self):
        s = self.session
        s.ax = 'ax'
        self.assertObjListEqual(
            s.filter(),
            [self.b, self.a, 'c', {}, self.e, self.g, self.f, 'ax'])
        self.assertEqual(list(s.filter('a')), [self.a, 'ax'])
        self.assertEqual(list(s.filter('a', dict)), [])
        self.assertEqual(list(s.filter('a', str)), ['ax'])
        self.assertEqual(list(s.filter('a', Axis)), [self.a])
        self.assertEqual(list(s.filter(kind=Axis)), [self.b, self.a])
        self.assertObjListEqual(s.filter(kind=LArray),
                                [self.e, self.g, self.f])
        self.assertEqual(list(s.filter(kind=dict)), [{}])

    def test_names(self):
        s = self.session
        self.assertEqual(s.names, ['a', 'b', 'c', 'd', 'e', 'f', 'g'])
        # add them in the "wrong" order
        s.add(i='i')
        s.add(h='h')
        self.assertEqual(s.names,
                         ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'])

    def test_h5_io(self):
        fpath = abspath('test_session.h5')
        self.session.save(fpath)

        s = Session()
        s.load(fpath)
        # HDF does *not* keep ordering (ie, keys are always sorted)
        self.assertEqual(list(s.keys()), ['e', 'f', 'g'])

        # update an array (overwrite=False)
        Session(e=self.e2).save(fpath, overwrite=False)
        s.load(fpath)
        self.assertEqual(list(s.keys()), ['e', 'f', 'g'])
        assert_array_nan_equal(s['e'], self.e2)

        s = Session()
        s.load(fpath, ['e', 'f'])
        self.assertEqual(list(s.keys()), ['e', 'f'])

    def test_xlsx_pandas_io(self):
        fpath = abspath('test_session.xlsx')
        self.session.save(fpath, engine='pandas_excel')

        s = Session()
        s.load(fpath, engine='pandas_excel')
        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])

        # update an array (overwrite=False)
        Session(e=self.e2).save(fpath, engine='pandas_excel', overwrite=False)
        s.load(fpath, engine='pandas_excel')
        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])
        assert_array_nan_equal(s['e'], self.e2)

        fpath = abspath('test_session_ef.xlsx')
        self.session.save(fpath, ['e', 'f'], engine='pandas_excel')
        s = Session()
        s.load(fpath, engine='pandas_excel')
        self.assertEqual(list(s.keys()), ['e', 'f'])

    @pytest.mark.skipif(xw is None, reason="xlwings is not available")
    def test_xlsx_xlwings_io(self):
        fpath = abspath('test_session_xw.xlsx')
        # test save when Excel file does not exist
        self.session.save(fpath, engine='xlwings_excel')

        s = Session()
        s.load(fpath, engine='xlwings_excel')
        # ordering is only kept if the file did not exist previously (otherwise the ordering is left intact)
        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])

        # update an array (overwrite=False)
        Session(e=self.e2).save(fpath, engine='xlwings_excel', overwrite=False)
        s.load(fpath, engine='xlwings_excel')
        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])
        assert_array_nan_equal(s['e'], self.e2)

        fpath = abspath('test_session_ef_xw.xlsx')
        self.session.save(fpath, ['e', 'f'], engine='xlwings_excel')
        s = Session()
        s.load(fpath, engine='xlwings_excel')
        self.assertEqual(list(s.keys()), ['e', 'f'])

    def test_csv_io(self):
        fpath = abspath('test_session_csv')
        self.session.to_csv(fpath)

        s = Session()
        s.load(fpath, engine='pandas_csv')
        # CSV cannot keep ordering (so we always sort keys)
        self.assertEqual(list(s.keys()), ['e', 'f', 'g'])

    def test_pickle_io(self):
        fpath = abspath('test_session.pkl')
        self.session.save(fpath)

        s = Session()
        s.load(fpath, engine='pickle')
        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])

        # update an array (overwrite=False)
        Session(e=self.e2).save(fpath, overwrite=False)
        s.load(fpath, engine='pickle')
        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])
        assert_array_nan_equal(s['e'], self.e2)

    def test_to_globals(self):
        with pytest.warns(RuntimeWarning) as caught_warnings:
            self.session.to_globals()
        assert len(caught_warnings) == 1
        assert caught_warnings[0].message.args[0] == "Session.to_globals should usually only be used in interactive " \
                                                     "consoles and not in scripts. Use warn=False to deactivate this " \
                                                     "warning."
        assert caught_warnings[0].filename == __file__

        self.assertIs(a, self.a)
        self.assertIs(b, self.b)
        self.assertIs(c, self.c)
        self.assertIs(d, self.d)
        self.assertIs(e, self.e)
        self.assertIs(f, self.f)
        self.assertIs(g, self.g)

        # test inplace
        backup_dest = e
        backup_value = self.session.e.copy()
        self.session.e = zeros_like(e)
        self.session.to_globals(inplace=True, warn=False)
        # check the variable is correct (the same as before)
        self.assertIs(e, backup_dest)
        self.assertIsNot(e, self.session.e)
        # check the content has changed
        assert_array_nan_equal(e, self.session.e)
        self.assertFalse(larray_equal(e, backup_value))

    def test_eq(self):
        sess = self.session.filter(kind=LArray)
        expected = Session([('e', self.e), ('f', self.f), ('g', self.g)])
        self.assertTrue(all(sess == expected))

        other = Session({'e': self.e, 'f': self.f})
        res = sess == other
        self.assertEqual(res.ndim, 1)
        self.assertEqual(res.axes.names, ['name'])
        self.assertTrue(np.array_equal(res.axes.labels[0], ['e', 'g', 'f']))
        self.assertEqual(list(res), [True, False, True])

        e2 = self.e.copy()
        e2.i[1, 1] = 42
        other = Session({'e': e2, 'f': self.f})
        res = sess == other
        self.assertEqual(res.axes.names, ['name'])
        self.assertTrue(np.array_equal(res.axes.labels[0], ['e', 'g', 'f']))
        self.assertEqual(list(res), [False, False, True])

    def test_ne(self):
        sess = self.session.filter(kind=LArray)
        expected = Session([('e', self.e), ('f', self.f), ('g', self.g)])
        self.assertFalse(any(sess != expected))

        other = Session({'e': self.e, 'f': self.f})
        res = sess != other
        self.assertEqual(res.axes.names, ['name'])
        self.assertTrue(np.array_equal(res.axes.labels[0], ['e', 'g', 'f']))
        self.assertEqual(list(res), [False, True, False])

        e2 = self.e.copy()
        e2.i[1, 1] = 42
        other = Session({'e': e2, 'f': self.f})
        res = sess != other
        self.assertEqual(res.axes.names, ['name'])
        self.assertTrue(np.array_equal(res.axes.labels[0], ['e', 'g', 'f']))
        self.assertEqual(list(res), [True, True, False])

    def test_sub(self):
        sess = self.session.filter(kind=LArray)
        other = Session({'e': self.e - 1, 'f': 1})
        diff = sess - other
        assert_array_nan_equal(diff['e'], np.full((2, 3), 1, dtype=np.int32))
        assert_array_nan_equal(diff['f'], np.arange(-1, 5).reshape(3, 2))
        self.assertTrue(isnan(diff['g']).all())

    def test_div(self):
        sess = self.session.filter(kind=LArray)
        other = Session({'e': self.e - 1, 'f': self.f + 1})

        with pytest.warns(RuntimeWarning) as caught_warnings:
            res = sess / other
        assert len(caught_warnings) == 1
        assert caught_warnings[0].message.args[
            0] == "divide by zero encountered during operation"
        assert caught_warnings[0].filename == __file__

        with np.errstate(divide='ignore', invalid='ignore'):
            flat_e = np.arange(6) / np.arange(-1, 5)
        assert_array_nan_equal(res['e'], flat_e.reshape(2, 3))

        flat_f = np.arange(6) / np.arange(1, 7)
        assert_array_nan_equal(res['f'], flat_f.reshape(3, 2))
        self.assertTrue(isnan(res['g']).all())

    def test_summary(self):
        sess = self.session.filter(kind=LArray)
        self.assertEqual(
            sess.summary(), "e: a0*, a1*\n    \n\n"
            "g: a0*, a1*\n    \n\n"
            "f: a0*, a1*\n    \n")

    def test_pickle_roundtrip(self):
        original = self.session
        s = pickle.dumps(original)
        res = pickle.loads(s)
        self.assertTrue(all(res == original))
Example #30
0
    def test_xlsx_pandas_io(self):
        fpath = abspath('test_session.xlsx')
        self.session.save(fpath, engine='pandas_excel')

        s = Session()
        s.load(fpath, engine='pandas_excel')
        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])

        # update an array (overwrite=False)
        Session(e=self.e2).save(fpath, engine='pandas_excel', overwrite=False)
        s.load(fpath, engine='pandas_excel')
        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])
        assert_array_nan_equal(s['e'], self.e2)

        fpath = abspath('test_session_ef.xlsx')
        self.session.save(fpath, ['e', 'f'], engine='pandas_excel')
        s = Session()
        s.load(fpath, engine='pandas_excel')
        self.assertEqual(list(s.keys()), ['e', 'f'])
Example #31
0
    def test_xlsx_xlwings_io(self):
        fpath = abspath('test_session_xw.xlsx')
        # test save when Excel file does not exist
        self.session.save(fpath, engine='xlwings_excel')

        s = Session()
        s.load(fpath, engine='xlwings_excel')
        # ordering is only kept if the file did not exist previously (otherwise the ordering is left intact)
        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])

        # update an array (overwrite=False)
        Session(e=self.e2).save(fpath, engine='xlwings_excel', overwrite=False)
        s.load(fpath, engine='xlwings_excel')
        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])
        assert_array_nan_equal(s['e'], self.e2)

        fpath = abspath('test_session_ef_xw.xlsx')
        self.session.save(fpath, ['e', 'f'], engine='xlwings_excel')
        s = Session()
        s.load(fpath, engine='xlwings_excel')
        self.assertEqual(list(s.keys()), ['e', 'f'])
Example #32
0
def test_xlsx_xlwings_io(tmpdir, session, meta):
    fpath = tmp_path(tmpdir, 'test_session_xw.xlsx')
    session.meta = meta
    # test save when Excel file does not exist
    session.save(fpath, engine='xlwings_excel')

    s = Session()
    s.load(fpath, engine='xlwings_excel')
    # ordering is only kept if the file did not exist previously (otherwise the ordering is left intact)
    assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f']
    assert s.meta == meta

    # update a Group + an Axis + an array (overwrite=False)
    a2 = Axis('a=0..2')
    a2_01 = a2['0,1'] >> 'a01'
    e2 = ndtest((a2, 'b=b0..b2'))
    Session(a=a2, a01=a2_01, e=e2).save(fpath, engine='xlwings_excel', overwrite=False)
    s = Session()
    s.load(fpath, engine='xlwings_excel')
    assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f']
    assert s['a'].equals(a2)
    assert all(s['a01'] == a2_01)
    assert_array_nan_equal(s['e'], e2)
    assert s.meta == meta

    # load only some objects
    s = Session()
    s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='xlwings_excel')
    assert list(s.keys()) == ['a', 'a01', 'e', 'f']
    assert s.meta == meta
Example #33
0
def generate_example_files(csv=True, excel=True, hdf5=True):
    from larray_eurostat import eurostat_get

    def prepare_eurostat_data(dataset_name, countries):
        arr = eurostat_get(dataset_name)[X.unit['NR'], X.age['TOTAL'],
                                         X.sex['M,F']]
        arr = arr[X.time[::-1]][2013:2017]
        arr = arr.rename('sex', 'gender')
        arr = arr.set_labels(gender='Male,Female')
        arr = arr.rename('geo', 'country')
        country_codes = list(countries.keys())
        country_names = list(countries.values())
        if dataset_name == 'migr_imm1ctz':
            # example of an array with ambiguous axes
            arr = arr['COMPLET', X.citizen[country_codes],
                      X.country[country_codes]].astype(int)
            arr = arr.rename('citizen', 'citizenship')
            arr = arr.set_labels('citizenship', country_names)
            arr = arr.set_labels('country', country_names)
            arr = arr.transpose('country', 'citizenship', 'gender', 'time')
        else:
            arr = arr[country_codes].astype(int)
            arr = arr.set_labels('country', country_names)
            arr = arr.transpose('country', 'gender', 'time')
        return arr

    countries = {'BE': 'Belgium', 'FR': 'France', 'DE': 'Germany'}
    benelux = {'BE': 'Belgium', 'LU': 'Luxembourg', 'NL': 'Netherlands'}

    # Arrays
    population = prepare_eurostat_data('demo_pjan', countries)
    population.meta.title = 'Population on 1 January by age and sex'
    population.meta.source = 'table demo_pjan from Eurostat'
    # ----
    population_benelux = prepare_eurostat_data('demo_pjan', benelux)
    population_benelux.meta.title = 'Population on 1 January by age and sex (Benelux)'
    population_benelux.meta.source = 'table demo_pjan from Eurostat'
    # ----
    population_5_countries = population.extend(
        'country', population_benelux[['Luxembourg', 'Netherlands']])
    population_5_countries.meta.title = 'Population on 1 January by age and sex (Benelux + France + Germany)'
    population_5_countries.meta.source = 'table demo_pjan from Eurostat'
    # ----
    births = prepare_eurostat_data('demo_fasec', countries)
    births.meta.title = "Live births by mother's age and newborn's sex"
    births.meta.source = 'table demo_fasec from Eurostat'
    # ----
    deaths = prepare_eurostat_data('demo_magec', countries)
    deaths.meta.title = 'Deaths by age and sex'
    deaths.meta.source = 'table demo_magec from Eurostat'
    # ----
    immigration = prepare_eurostat_data('migr_imm1ctz', benelux)
    immigration.meta.title = 'Immigration by age group, sex and citizenship'
    immigration.meta.source = 'table migr_imm1ctz from Eurostat'

    # Groups
    even_years = population.time[2014::2] >> 'even_years'
    odd_years = population.time[2013::2] >> 'odd_years'

    # Session
    ses = Session({
        'country': population.country,
        'country_benelux': immigration.country,
        'citizenship': immigration.citizenship,
        'gender': population.gender,
        'time': population.time,
        'even_years': even_years,
        'odd_years': odd_years,
        'population': population,
        'population_benelux': population_benelux,
        'population_5_countries': population_5_countries,
        'births': births,
        'deaths': deaths,
        'immigration': immigration
    })
    ses.meta.title = 'Demographic datasets for a small selection of countries in Europe'
    ses.meta.source = 'demo_jpan, demo_fasec, demo_magec and migr_imm1ctz tables from Eurostat'

    # EUROSTAT DATASET

    if csv:
        ses.save(os.path.join(DATA_DIR, 'demography_eurostat'))
    if excel:
        ses.save(os.path.join(DATA_DIR, 'demography_eurostat.xlsx'))
    if hdf5:
        ses.save(os.path.join(DATA_DIR, 'demography_eurostat.h5'))

    # EXAMPLE FILES

    years = population.time[2013:2015]
    population = population[years]
    population_narrow = population['Belgium,France'].sum('gender')
    births = births[years]
    deaths = deaths[years]
    immigration = immigration[years]

    # Dataframes (for testing missing axis/values)
    df_missing_axis_name = population.to_frame(fold_last_axis_name=False)
    df_missing_values = population.to_frame(fold_last_axis_name=True)
    df_missing_values.drop([('France', 'Male'), ('Germany', 'Female')],
                           inplace=True)

    if csv:
        examples_dir = os.path.join(DATA_DIR, 'examples')
        population.to_csv(os.path.join(examples_dir, 'population.csv'))
        births.to_csv(os.path.join(examples_dir, 'births.csv'))
        deaths.to_csv(os.path.join(examples_dir, 'deaths.csv'))
        immigration.to_csv(os.path.join(examples_dir, 'immigration.csv'))
        df_missing_axis_name.to_csv(os.path.join(
            examples_dir, 'population_missing_axis_name.csv'),
                                    sep=',',
                                    na_rep='')
        df_missing_values.to_csv(os.path.join(examples_dir,
                                              'population_missing_values.csv'),
                                 sep=',',
                                 na_rep='')
        population_narrow.to_csv(os.path.join(examples_dir,
                                              'population_narrow_format.csv'),
                                 wide=False)

    if excel:
        with open_excel(os.path.join(DATA_DIR, 'examples.xlsx'),
                        overwrite_file=True) as wb:
            wb['population'] = population.dump()
            wb['births'] = births.dump()
            wb['deaths'] = deaths.dump()
            wb['immigration'] = immigration.dump()
            wb['population_births_deaths'] = population.dump()
            wb['population_births_deaths']['A9'] = births.dump()
            wb['population_births_deaths']['A17'] = deaths.dump()
            wb['population_missing_axis_name'] = ''
            wb['population_missing_axis_name']['A1'].options(
            ).value = df_missing_axis_name
            wb['population_missing_values'] = ''
            wb['population_missing_values']['A1'].options(
            ).value = df_missing_values
            # wb['population_narrow_format'] = population_narrow.dump(wide=False)
            wb.save()
        population_narrow.to_excel(os.path.join(DATA_DIR, 'examples.xlsx'),
                                   'population_narrow_format',
                                   wide=False)
        Session({
            'country': population.country,
            'gender': population.gender,
            'time': population.time,
            'population': population
        }).save(os.path.join(DATA_DIR, 'population_only.xlsx'))
        Session({
            'births': births,
            'deaths': deaths
        }).save(os.path.join(DATA_DIR, 'births_and_deaths.xlsx'))

    if hdf5:
        examples_h5_file = os.path.join(DATA_DIR, 'examples.h5')
        population.to_hdf(examples_h5_file, 'population')
        births.to_hdf(examples_h5_file, 'births')
        deaths.to_hdf(examples_h5_file, 'deaths')
        immigration.to_hdf(examples_h5_file, 'immigration')
Example #34
0
def test_pickle_io(tmpdir, session, meta):
    fpath = tmp_path(tmpdir, 'test_session.pkl')
    session.meta = meta
    session.save(fpath)

    s = Session()
    s.load(fpath, engine='pickle')
    assert list(s.keys()) == ['b', 'a', 'b12', 'a01', 'e', 'g', 'f']
    assert s.meta == meta

    # update a Group + an Axis + an array (overwrite=False)
    a2 = Axis('a=0..2')
    a2_01 = a2['0,1'] >> 'a01'
    e2 = ndtest((a2, 'b=b0..b2'))
    Session(a=a2, a01=a2_01, e=e2).save(fpath, overwrite=False)
    s = Session()
    s.load(fpath, engine='pickle')
    assert list(s.keys()) == ['b', 'a', 'b12', 'a01', 'e', 'g', 'f']
    assert s['a'].equals(a2)
    assert isinstance(a2_01, Group)
    assert isinstance(s['a01'], Group)
    assert s['a01'].eval() == a2_01.eval()
    assert_array_nan_equal(s['e'], e2)
    assert s.meta == meta

    # load only some objects
    s = Session()
    s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='pickle')
    assert list(s.keys()) == ['a', 'a01', 'e', 'f']
    assert s.meta == meta
Example #35
0
    def test_h5_io(self):
        fpath = abspath('test_session.h5')
        self.session.save(fpath)

        s = Session()
        s.load(fpath)
        # HDF does *not* keep ordering (ie, keys are always sorted)
        self.assertEqual(list(s.keys()), ['e', 'f', 'g'])

        # update an array (overwrite=False)
        Session(e=self.e2).save(fpath, overwrite=False)
        s.load(fpath)
        self.assertEqual(list(s.keys()), ['e', 'f', 'g'])
        assert_array_nan_equal(s['e'], self.e2)

        s = Session()
        s.load(fpath, ['e', 'f'])
        self.assertEqual(list(s.keys()), ['e', 'f'])