Ejemplo n.º 1
0
def test_genfromdta_datetime():
    results = [
        (datetime(2006, 11, 19, 23, 13, 20), 1479596223000,
         datetime(2010, 1, 20), datetime(2010, 1, 8), datetime(2010, 1, 1),
         datetime(1974, 7, 1), datetime(2010, 1, 1), datetime(2010, 1, 1)),
        (datetime(1959, 12, 31, 20, 3, 20), -1479590, datetime(1953, 10, 2),
         datetime(1948, 6, 10), datetime(1955, 1, 1), datetime(1955, 7, 1),
         datetime(1955, 1, 1), datetime(2, 1, 1))
    ]
    with pytest.warns(FutureWarning):
        dta = genfromdta(
            os.path.join(curdir, "results/time_series_examples.dta"))

    assert_array_equal(dta[0].tolist(), results[0])
    assert_array_equal(dta[1].tolist(), results[1])

    with warnings.catch_warnings(record=True):
        with pytest.warns(FutureWarning):
            dta = genfromdta(os.path.join(curdir,
                                          "results/time_series_examples.dta"),
                             pandas=True)
    for i, row in enumerate(results):
        new = []
        for val in row:
            if isinstance(val, datetime) and val.year > 2:
                new.append(Timestamp(val))
            else:
                new.append(val)
        results[i] = new

    assert dta.iloc[0].tolist() == results[0]
    assert dta.iloc[1].tolist() == results[1]
Ejemplo n.º 2
0
def test_datetime_roundtrip():
    dta = np.array([(1, datetime(2010, 1, 1), 2),
                    (2, datetime(2010, 2, 1), 3),
                    (4, datetime(2010, 3, 1), 5)],
                    dtype=[('var1', float), ('var2', object), ('var3', float)])
    buf = BytesIO()

    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta, {"var2" : "tm"})

    writer.write_file()
    buf.seek(0)

    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf)

    assert_equal(dta, dta2)

    dta = DataFrame.from_records(dta)
    buf = BytesIO()

    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta, {"var2" : "tm"})

    writer.write_file()
    buf.seek(0)

    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf, pandas=True)

    ptesting.assert_frame_equal(dta, dta2.drop('index', axis=1))
Ejemplo n.º 3
0
def test_genfromdta_datetime():
    results = [
        (datetime(2006, 11, 19, 23, 13, 20), 1479596223000,
         datetime(2010, 1, 20), datetime(2010, 1, 8), datetime(2010, 1, 1),
         datetime(1974, 7, 1), datetime(2010, 1, 1), datetime(2010, 1, 1)),
        (datetime(1959, 12, 31, 20, 3, 20), -1479590, datetime(1953, 10, 2),
         datetime(1948, 6, 10), datetime(1955, 1, 1), datetime(1955, 7, 1),
         datetime(1955, 1, 1), datetime(2, 1, 1))
    ]
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter('always')
        dta = genfromdta(
            os.path.join(curdir, "results/time_series_examples.dta"))
        assert_(len(w) > 0)  # should get a warning for that format.

    assert_array_equal(dta[0].tolist(), results[0])
    assert_array_equal(dta[1].tolist(), results[1])

    with warnings.catch_warnings(record=True):
        dta = genfromdta(os.path.join(curdir,
                                      "results/time_series_examples.dta"),
                         pandas=True)

    assert_array_equal(dta.iloc[0].tolist(), results[0])
    assert_array_equal(dta.iloc[1].tolist(), results[1])
Ejemplo n.º 4
0
def test_datetime_roundtrip():
    dta = np.array([(1, datetime(2010, 1, 1), 2), (2, datetime(2010, 2, 1), 3),
                    (4, datetime(2010, 3, 1), 5)],
                   dtype=[('var1', float), ('var2', object), ('var3', float)])
    buf = BytesIO()

    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta, {"var2": "tm"})

    writer.write_file()
    buf.seek(0)

    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf)

    assert_equal(dta, dta2)

    dta = DataFrame.from_records(dta)
    buf = BytesIO()

    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta, {"var2": "tm"})

    writer.write_file()
    buf.seek(0)

    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf, pandas=True)

    assert_frame_equal(dta, dta2.drop('index', axis=1))
Ejemplo n.º 5
0
def test_genfromdta_datetime():
    results = [(datetime(2006, 11, 19, 23, 13, 20), 1479596223000,
            datetime(2010, 1, 20), datetime(2010, 1, 8), datetime(2010, 1, 1),
            datetime(1974, 7, 1), datetime(2010, 1, 1), datetime(2010, 1, 1)),
        (datetime(1959, 12, 31, 20, 3, 20), -1479590, datetime(1953, 10, 2),
            datetime(1948, 6, 10), datetime(1955, 1, 1), datetime(1955, 7, 1),
            datetime(1955, 1, 1), datetime(2, 1, 1))]
    dta = genfromdta("results/time_series_examples.dta")
    assert_array_equal(dta[0].tolist(), results[0])
    assert_array_equal(dta[1].tolist(), results[1])

    dta = genfromdta("results/time_series_examples.dta", pandas=True)
    assert_array_equal(dta.irow(0).tolist(), results[0])
    assert_array_equal(dta.irow(1).tolist(), results[1])
Ejemplo n.º 6
0
def test_missing_roundtrip():
    buf = StringIO()
    dta = np.array([(np.nan, np.inf, "")],
                      dtype=[("double_miss", float), ("float_miss", np.float32),
                              ("string_miss", "a1")])
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta = genfromdta(buf, missing_flt=np.nan)
    assert_(isnull(dta[0][0]))
    assert_(isnull(dta[0][1]))
    assert_(dta[0][2] == "")

    dta = genfromdta("./data_missing.dta", missing_flt=-999)
    assert_(np.all([dta[0][i] == -999 for i in range(5)]))
Ejemplo n.º 7
0
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load_pandas().data
    dta4 = dta.copy()
    for col in ('year', 'quarter'):
        dta[col] = dta[col].astype(np.int64)
        dta4[col] = dta4[col].astype(np.int32)
    # dta is int64 'i8'  given to Stata writer
    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta)

    with warnings.catch_warnings(record=True) as w:
        writer.write_file()
        assert len(w) == 0
    buf.seek(0)

    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf)

    dta5 = DataFrame.from_records(dta2)
    # dta2 is int32 'i4'  returned from Stata reader

    if dta5.dtypes[1] is np.dtype('int64'):
        assert_frame_equal(dta.reset_index(), dta5)
    else:
        # do not check index because it has different size, int32 versus int64
        assert_frame_equal(dta4, dta5[dta5.columns[1:]])
Ejemplo n.º 8
0
def test_missing_roundtrip():
    buf = BytesIO()
    dta = np.array([(np.nan, np.inf, "")],
                   dtype=[("double_miss", float), ("float_miss", np.float32),
                          ("string_miss", "a1")])
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta = genfromdta(buf, missing_flt=np.nan)
    assert_(isnull(dta[0][0]))
    assert_(isnull(dta[0][1]))
    assert_(dta[0][2] == asbytes(""))

    dta = genfromdta(os.path.join(curdir, "results/data_missing.dta"),
                     missing_flt=-999)
    assert_(np.all([dta[0][i] == -999 for i in range(5)]))
Ejemplo n.º 9
0
def test_missing_roundtrip():
    buf = BytesIO()
    dta = np.array([(np.nan, np.inf, "")],
                      dtype=[("double_miss", float), ("float_miss", np.float32),
                              ("string_miss", "a1")])
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta = genfromdta(buf, missing_flt=np.nan)
    assert_(isnull(dta[0][0]))
    assert_(isnull(dta[0][1]))
    assert_(dta[0][2] == asbytes(""))

    dta = genfromdta(os.path.join(curdir, "results/data_missing.dta"),
            missing_flt=-999)
    assert_(np.all([dta[0][i] == -999 for i in range(5)]))
Ejemplo n.º 10
0
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load_pandas().data
    dta4 = dta.copy()
    for col in ('year','quarter'):
        dta[col] = dta[col].astype(np.int64)
        dta4[col] = dta4[col].astype(np.int32)
    # dta is int64 'i8'  given to Stata writer
    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta)

    with warnings.catch_warnings(record=True) as w:
        writer.write_file()
        assert len(w) == 0
    buf.seek(0)

    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf)

    dta5 = DataFrame.from_records(dta2)
    # dta2 is int32 'i4'  returned from Stata reader

    if dta5.dtypes[1] is np.dtype('int64'):
        ptesting.assert_frame_equal(dta.reset_index(), dta5)
    else:
        # don't check index because it has different size, int32 versus int64
        ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
Ejemplo n.º 11
0
def test_genfromdta():
    #Test genfromdta vs. results/macrodta.npy created with genfromtxt.
    #NOTE: Stata handles data very oddly.  Round tripping from csv to dta
    #    to ndarray 2710.349 (csv) -> 2510.2491 (stata) -> 2710.34912109375
    #    (dta/ndarray)
    from .results.macrodata import macrodata_result as res2
    res1 = genfromdta(curdir+'/../../datasets/macrodata/macrodata.dta')
    assert_array_equal(res1 == res2, True)
Ejemplo n.º 12
0
def test_genfromdta():
    #Test genfromdta vs. results/macrodta.npy created with genfromtxt.
    #NOTE: Stata handles data very oddly.  Round tripping from csv to dta
    #    to ndarray 2710.349 (csv) -> 2510.2491 (stata) -> 2710.34912109375
    #    (dta/ndarray)
    from .results.macrodata import macrodata_result as res2
    res1 = genfromdta(curdir + '/../../datasets/macrodata/macrodata.dta')
    assert_array_equal(res1 == res2, True)
Ejemplo n.º 13
0
def test_genfromdta_pandas():
    dta = macrodata.load_pandas().data
    curdir = os.path.dirname(os.path.abspath(__file__))

    with pytest.warns(FutureWarning):
        res1 = genfromdta(curdir + '/../../datasets/macrodata/macrodata.dta',
                          pandas=True)

    res1 = res1.astype(float)
    assert_frame_equal(res1, dta.astype(float))
Ejemplo n.º 14
0
def test_stata_writer_structured():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    dta = dta.astype(
        np.dtype([('year', int), ('quarter', int)] + dtype.descr[2:]))
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    assert_array_equal(dta, dta2)
Ejemplo n.º 15
0
def test_genfromdta_pandas():
    from pandas.util.testing import assert_frame_equal
    dta = macrodata.load_pandas().data
    curdir = os.path.dirname(os.path.abspath(__file__))

    with pytest.warns(FutureWarning):
        res1 = genfromdta(curdir+'/../../datasets/macrodata/macrodata.dta',
                          pandas=True)

    res1 = res1.astype(float)
    assert_frame_equal(res1, dta.astype(float))
Ejemplo n.º 16
0
def test_stata_writer_array():
    buf = BytesIO()
    dta = macrodata.load().data
    dta = DataFrame.from_records(dta)
    dta.columns = ["v%d" % i for i in range(1,15)]
    writer = StataWriter(buf, dta.values)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    dta = dta.to_records(index=False)
    assert_array_equal(dta, dta2)
Ejemplo n.º 17
0
def test_genfromdta_datetime():
    results = [(datetime(2006, 11, 19, 23, 13, 20), 1479596223000,
            datetime(2010, 1, 20), datetime(2010, 1, 8), datetime(2010, 1, 1),
            datetime(1974, 7, 1), datetime(2010, 1, 1), datetime(2010, 1, 1)),
        (datetime(1959, 12, 31, 20, 3, 20), -1479590, datetime(1953, 10, 2),
            datetime(1948, 6, 10), datetime(1955, 1, 1), datetime(1955, 7, 1),
            datetime(1955, 1, 1), datetime(2, 1, 1))]
    with warnings.catch_warnings(record=True) as w:
        dta = genfromdta(os.path.join(curdir, "results/time_series_examples.dta"))
        assert_(len(w) == 1)  # should get a warning for that format.

    assert_array_equal(dta[0].tolist(), results[0])
    assert_array_equal(dta[1].tolist(), results[1])

    with warnings.catch_warnings(record=True):
        dta = genfromdta(os.path.join(curdir, "results/time_series_examples.dta"),
                         pandas=True)

    assert_array_equal(dta.iloc[0].tolist(), results[0])
    assert_array_equal(dta.iloc[1].tolist(), results[1])
Ejemplo n.º 18
0
def test_stata_writer_structured():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    dta = dta.astype(np.dtype([('year', int),
                               ('quarter', int)] + dtype.descr[2:]))
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    assert_array_equal(dta, dta2)
Ejemplo n.º 19
0
def test_stata_writer_array():
    buf = BytesIO()
    dta = macrodata.load().data
    dta = DataFrame.from_records(dta)
    dta.columns = ["v%d" % i for i in range(1, 15)]
    writer = StataWriter(buf, dta.values)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    dta = dta.to_records(index=False)
    assert_array_equal(dta, dta2)
Ejemplo n.º 20
0
def test_genfromdta():
    #Test genfromdta vs. results/macrodta.npy created with genfromtxt.
    #NOTE: Stata handles data very oddly.  Round tripping from csv to dta
    #    to ndarray 2710.349 (csv) -> 2510.2491 (stata) -> 2710.34912109375
    #    (dta/ndarray)
    curdir = os.path.dirname(os.path.abspath(__file__))
    #res2 = np.load(curdir+'/results/macrodata.npy')
    #res2 = res2.view((float,len(res2[0])))
    from results.macrodata import macrodata_result as res2
    res1 = genfromdta(curdir+'/../../datasets/macrodata/macrodata.dta')
    #res1 = res1.view((float,len(res1[0])))
    assert_array_equal(res1 == res2, True)
Ejemplo n.º 21
0
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    #as of 0.9.0 pandas only supports i8 and f8
    dta = dta.astype(np.dtype([('year', 'i8'),
                               ('quarter', 'i8')] + dtype.descr[2:]))
    dta = DataFrame.from_records(dta)
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    ptesting.assert_frame_equal(dta.reset_index(), DataFrame.from_records(dta2))
Ejemplo n.º 22
0
def test_stata_writer_structured():
    buf = BytesIO()
    dta = macrodata.load(as_pandas=False).data
    dtype = dta.dtype
    dt = [('year', int), ('quarter', int)] + dtype.descr[2:]
    if not PY3:  # Remove unicode
        dt = [(name.encode('ascii'), typ) for name, typ in dt]
    dta = dta.astype(np.dtype(dt))
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    assert_array_equal(dta, dta2)
Ejemplo n.º 23
0
def test_stata_writer_structured():
    buf = BytesIO()
    dta = macrodata.load(as_pandas=False).data
    dtype = dta.dtype
    dt = [('year', int), ('quarter', int)] + dtype.descr[2:]
    if not PY3:  # Remove unicode
        dt = [(name.encode('ascii'), typ) for name, typ in dt]
    dta = dta.astype(np.dtype(dt))
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    assert_array_equal(dta, dta2)
Ejemplo n.º 24
0
def test_genfromdta_datetime():
    results = [(datetime(2006, 11, 19, 23, 13, 20), 1479596223000,
                datetime(2010, 1, 20), datetime(2010, 1, 8),
                datetime(2010, 1, 1), datetime(1974, 7, 1),
                datetime(2010, 1, 1), datetime(2010, 1, 1)),
               (datetime(1959, 12, 31, 20, 3, 20), -1479590,
                datetime(1953, 10, 2), datetime(1948, 6, 10),
                datetime(1955, 1, 1), datetime(1955, 7, 1),
                datetime(1955, 1, 1), datetime(2, 1, 1))]
    with pytest.warns(FutureWarning):
        dta = genfromdta(os.path.join(curdir,
                                      "results/time_series_examples.dta"))

    assert_array_equal(dta[0].tolist(), results[0])
    assert_array_equal(dta[1].tolist(), results[1])

    with warnings.catch_warnings(record=True):
        with pytest.warns(FutureWarning):
            dta = genfromdta(os.path.join(curdir,
                                          "results/time_series_examples.dta"),
                             pandas=True)

    assert_array_equal(dta.iloc[0].tolist(), results[0])
    assert_array_equal(dta.iloc[1].tolist(), results[1])
Ejemplo n.º 25
0
def test_stata_writer_structured():
    buf = BytesIO()
    dta = macrodata.load(as_pandas=False).data
    dtype = dta.dtype
    dt = [('year', int), ('quarter', int)] + dtype.descr[2:]
    dta = dta.astype(np.dtype(dt))

    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta)

    writer.write_file()
    buf.seek(0)
    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf)

    assert_array_equal(dta, dta2)
Ejemplo n.º 26
0
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    #as of 0.9.0 pandas only supports i8 and f8
    dta = dta.astype(np.dtype([('year', 'i8'),
                               ('quarter', 'i8')] + dtype.descr[2:]))
    dta4 = dta.astype(np.dtype([('year', 'i4'),
                               ('quarter', 'i4')] + dtype.descr[2:]))
    dta = DataFrame.from_records(dta)
    dta4 = DataFrame.from_records(dta4)
    # dta is int64 'i8'  given to Stata writer
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    dta5 = DataFrame.from_records(dta2)
    # dta2 is int32 'i4'  returned from Stata reader

    if dta5.dtypes[1] is np.dtype('int64'):
        ptesting.assert_frame_equal(dta.reset_index(), dta5)
    else:
        # don't check index because it has different size, int32 versus int64
        ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
Ejemplo n.º 27
0
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    #as of 0.9.0 pandas only supports i8 and f8
    dta = dta.astype(
        np.dtype([('year', 'i8'), ('quarter', 'i8')] + dtype.descr[2:]))
    dta4 = dta.astype(
        np.dtype([('year', 'i4'), ('quarter', 'i4')] + dtype.descr[2:]))
    dta = DataFrame.from_records(dta)
    dta4 = DataFrame.from_records(dta4)
    # dta is int64 'i8'  given to Stata writer
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    dta5 = DataFrame.from_records(dta2)
    # dta2 is int32 'i4'  returned from Stata reader

    if dta5.dtypes[1] is np.dtype('int64'):
        ptesting.assert_frame_equal(dta.reset_index(), dta5)
    else:
        # don't check index because it has different size, int32 versus int64
        ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
Ejemplo n.º 28
0
Created on Fri Dec 16 12:52:13 2011
Author: Josef Perktold
"""

import numpy as np
from numpy.testing import assert_almost_equal

import statsmodels.api as sm
import statsmodels.stats.sandwich_covariance as sw

#http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta
import statsmodels.iolib.foreign as dta

try:
    srs = dta.genfromdta("srs.dta")
    print 'using local file'
except IOError:
    import urllib
    urllib.urlretrieve('http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta', 'srs.dta')
    print 'downloading file'
    srs = dta.genfromdta("srs.dta")
#    from statsmodels.tools.tools import webuse
#    srs = webuse('srs', 'http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/')
#    #does currently not cache file

y = srs['api00']
#older numpy don't reorder
#x = srs[['growth', 'emer', 'yr_rnd']].view(float).reshape(len(y), -1)
#force sequence
x = np.column_stack([srs[ii] for ii in ['growth', 'emer', 'yr_rnd']])
Ejemplo n.º 29
0
Author: Josef Perktold
"""
from urllib.request import urlretrieve

import numpy as np
from numpy.testing import assert_almost_equal

import statsmodels.api as sm
import statsmodels.stats.sandwich_covariance as sw

#http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta

import statsmodels.iolib.foreign as dta

try:
    srs = dta.genfromdta("srs.dta")
    print('using local file')
except IOError:
    urlretrieve('http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta', 'srs.dta')
    print('downloading file')
    srs = dta.genfromdta("srs.dta")
#    from statsmodels.datasets import webuse
#    srs = webuse('srs', 'http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/')
#    #does currently not cache file

y = srs['api00']
#older numpy do not reorder
#x = srs[['growth', 'emer', 'yr_rnd']].view(float).reshape(len(y), -1)
#force sequence
x = np.column_stack([srs[ii] for ii in ['growth', 'emer', 'yr_rnd']])
group = srs['dnum']