def test_genfromdta_datetime(): results = [ (datetime(2006, 11, 19, 23, 13, 20), 1479596223000, datetime(2010, 1, 20), datetime(2010, 1, 8), datetime(2010, 1, 1), datetime(1974, 7, 1), datetime(2010, 1, 1), datetime(2010, 1, 1)), (datetime(1959, 12, 31, 20, 3, 20), -1479590, datetime(1953, 10, 2), datetime(1948, 6, 10), datetime(1955, 1, 1), datetime(1955, 7, 1), datetime(1955, 1, 1), datetime(2, 1, 1)) ] with pytest.warns(FutureWarning): dta = genfromdta( os.path.join(curdir, "results/time_series_examples.dta")) assert_array_equal(dta[0].tolist(), results[0]) assert_array_equal(dta[1].tolist(), results[1]) with warnings.catch_warnings(record=True): with pytest.warns(FutureWarning): dta = genfromdta(os.path.join(curdir, "results/time_series_examples.dta"), pandas=True) for i, row in enumerate(results): new = [] for val in row: if isinstance(val, datetime) and val.year > 2: new.append(Timestamp(val)) else: new.append(val) results[i] = new assert dta.iloc[0].tolist() == results[0] assert dta.iloc[1].tolist() == results[1]
def test_datetime_roundtrip(): dta = np.array([(1, datetime(2010, 1, 1), 2), (2, datetime(2010, 2, 1), 3), (4, datetime(2010, 3, 1), 5)], dtype=[('var1', float), ('var2', object), ('var3', float)]) buf = BytesIO() with pytest.warns(FutureWarning): writer = StataWriter(buf, dta, {"var2" : "tm"}) writer.write_file() buf.seek(0) with pytest.warns(FutureWarning): dta2 = genfromdta(buf) assert_equal(dta, dta2) dta = DataFrame.from_records(dta) buf = BytesIO() with pytest.warns(FutureWarning): writer = StataWriter(buf, dta, {"var2" : "tm"}) writer.write_file() buf.seek(0) with pytest.warns(FutureWarning): dta2 = genfromdta(buf, pandas=True) ptesting.assert_frame_equal(dta, dta2.drop('index', axis=1))
def test_genfromdta_datetime(): results = [ (datetime(2006, 11, 19, 23, 13, 20), 1479596223000, datetime(2010, 1, 20), datetime(2010, 1, 8), datetime(2010, 1, 1), datetime(1974, 7, 1), datetime(2010, 1, 1), datetime(2010, 1, 1)), (datetime(1959, 12, 31, 20, 3, 20), -1479590, datetime(1953, 10, 2), datetime(1948, 6, 10), datetime(1955, 1, 1), datetime(1955, 7, 1), datetime(1955, 1, 1), datetime(2, 1, 1)) ] with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') dta = genfromdta( os.path.join(curdir, "results/time_series_examples.dta")) assert_(len(w) > 0) # should get a warning for that format. assert_array_equal(dta[0].tolist(), results[0]) assert_array_equal(dta[1].tolist(), results[1]) with warnings.catch_warnings(record=True): dta = genfromdta(os.path.join(curdir, "results/time_series_examples.dta"), pandas=True) assert_array_equal(dta.iloc[0].tolist(), results[0]) assert_array_equal(dta.iloc[1].tolist(), results[1])
def test_datetime_roundtrip(): dta = np.array([(1, datetime(2010, 1, 1), 2), (2, datetime(2010, 2, 1), 3), (4, datetime(2010, 3, 1), 5)], dtype=[('var1', float), ('var2', object), ('var3', float)]) buf = BytesIO() with pytest.warns(FutureWarning): writer = StataWriter(buf, dta, {"var2": "tm"}) writer.write_file() buf.seek(0) with pytest.warns(FutureWarning): dta2 = genfromdta(buf) assert_equal(dta, dta2) dta = DataFrame.from_records(dta) buf = BytesIO() with pytest.warns(FutureWarning): writer = StataWriter(buf, dta, {"var2": "tm"}) writer.write_file() buf.seek(0) with pytest.warns(FutureWarning): dta2 = genfromdta(buf, pandas=True) assert_frame_equal(dta, dta2.drop('index', axis=1))
def test_genfromdta_datetime(): results = [(datetime(2006, 11, 19, 23, 13, 20), 1479596223000, datetime(2010, 1, 20), datetime(2010, 1, 8), datetime(2010, 1, 1), datetime(1974, 7, 1), datetime(2010, 1, 1), datetime(2010, 1, 1)), (datetime(1959, 12, 31, 20, 3, 20), -1479590, datetime(1953, 10, 2), datetime(1948, 6, 10), datetime(1955, 1, 1), datetime(1955, 7, 1), datetime(1955, 1, 1), datetime(2, 1, 1))] dta = genfromdta("results/time_series_examples.dta") assert_array_equal(dta[0].tolist(), results[0]) assert_array_equal(dta[1].tolist(), results[1]) dta = genfromdta("results/time_series_examples.dta", pandas=True) assert_array_equal(dta.irow(0).tolist(), results[0]) assert_array_equal(dta.irow(1).tolist(), results[1])
def test_missing_roundtrip(): buf = StringIO() dta = np.array([(np.nan, np.inf, "")], dtype=[("double_miss", float), ("float_miss", np.float32), ("string_miss", "a1")]) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta = genfromdta(buf, missing_flt=np.nan) assert_(isnull(dta[0][0])) assert_(isnull(dta[0][1])) assert_(dta[0][2] == "") dta = genfromdta("./data_missing.dta", missing_flt=-999) assert_(np.all([dta[0][i] == -999 for i in range(5)]))
def test_stata_writer_pandas(): buf = BytesIO() dta = macrodata.load_pandas().data dta4 = dta.copy() for col in ('year', 'quarter'): dta[col] = dta[col].astype(np.int64) dta4[col] = dta4[col].astype(np.int32) # dta is int64 'i8' given to Stata writer with pytest.warns(FutureWarning): writer = StataWriter(buf, dta) with warnings.catch_warnings(record=True) as w: writer.write_file() assert len(w) == 0 buf.seek(0) with pytest.warns(FutureWarning): dta2 = genfromdta(buf) dta5 = DataFrame.from_records(dta2) # dta2 is int32 'i4' returned from Stata reader if dta5.dtypes[1] is np.dtype('int64'): assert_frame_equal(dta.reset_index(), dta5) else: # do not check index because it has different size, int32 versus int64 assert_frame_equal(dta4, dta5[dta5.columns[1:]])
def test_missing_roundtrip(): buf = BytesIO() dta = np.array([(np.nan, np.inf, "")], dtype=[("double_miss", float), ("float_miss", np.float32), ("string_miss", "a1")]) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta = genfromdta(buf, missing_flt=np.nan) assert_(isnull(dta[0][0])) assert_(isnull(dta[0][1])) assert_(dta[0][2] == asbytes("")) dta = genfromdta(os.path.join(curdir, "results/data_missing.dta"), missing_flt=-999) assert_(np.all([dta[0][i] == -999 for i in range(5)]))
def test_stata_writer_pandas(): buf = BytesIO() dta = macrodata.load_pandas().data dta4 = dta.copy() for col in ('year','quarter'): dta[col] = dta[col].astype(np.int64) dta4[col] = dta4[col].astype(np.int32) # dta is int64 'i8' given to Stata writer with pytest.warns(FutureWarning): writer = StataWriter(buf, dta) with warnings.catch_warnings(record=True) as w: writer.write_file() assert len(w) == 0 buf.seek(0) with pytest.warns(FutureWarning): dta2 = genfromdta(buf) dta5 = DataFrame.from_records(dta2) # dta2 is int32 'i4' returned from Stata reader if dta5.dtypes[1] is np.dtype('int64'): ptesting.assert_frame_equal(dta.reset_index(), dta5) else: # don't check index because it has different size, int32 versus int64 ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
def test_genfromdta(): #Test genfromdta vs. results/macrodta.npy created with genfromtxt. #NOTE: Stata handles data very oddly. Round tripping from csv to dta # to ndarray 2710.349 (csv) -> 2510.2491 (stata) -> 2710.34912109375 # (dta/ndarray) from .results.macrodata import macrodata_result as res2 res1 = genfromdta(curdir+'/../../datasets/macrodata/macrodata.dta') assert_array_equal(res1 == res2, True)
def test_genfromdta(): #Test genfromdta vs. results/macrodta.npy created with genfromtxt. #NOTE: Stata handles data very oddly. Round tripping from csv to dta # to ndarray 2710.349 (csv) -> 2510.2491 (stata) -> 2710.34912109375 # (dta/ndarray) from .results.macrodata import macrodata_result as res2 res1 = genfromdta(curdir + '/../../datasets/macrodata/macrodata.dta') assert_array_equal(res1 == res2, True)
def test_genfromdta_pandas(): dta = macrodata.load_pandas().data curdir = os.path.dirname(os.path.abspath(__file__)) with pytest.warns(FutureWarning): res1 = genfromdta(curdir + '/../../datasets/macrodata/macrodata.dta', pandas=True) res1 = res1.astype(float) assert_frame_equal(res1, dta.astype(float))
def test_stata_writer_structured(): buf = BytesIO() dta = macrodata.load().data dtype = dta.dtype dta = dta.astype( np.dtype([('year', int), ('quarter', int)] + dtype.descr[2:])) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) assert_array_equal(dta, dta2)
def test_genfromdta_pandas(): from pandas.util.testing import assert_frame_equal dta = macrodata.load_pandas().data curdir = os.path.dirname(os.path.abspath(__file__)) with pytest.warns(FutureWarning): res1 = genfromdta(curdir+'/../../datasets/macrodata/macrodata.dta', pandas=True) res1 = res1.astype(float) assert_frame_equal(res1, dta.astype(float))
def test_stata_writer_array(): buf = BytesIO() dta = macrodata.load().data dta = DataFrame.from_records(dta) dta.columns = ["v%d" % i for i in range(1,15)] writer = StataWriter(buf, dta.values) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) dta = dta.to_records(index=False) assert_array_equal(dta, dta2)
def test_genfromdta_datetime(): results = [(datetime(2006, 11, 19, 23, 13, 20), 1479596223000, datetime(2010, 1, 20), datetime(2010, 1, 8), datetime(2010, 1, 1), datetime(1974, 7, 1), datetime(2010, 1, 1), datetime(2010, 1, 1)), (datetime(1959, 12, 31, 20, 3, 20), -1479590, datetime(1953, 10, 2), datetime(1948, 6, 10), datetime(1955, 1, 1), datetime(1955, 7, 1), datetime(1955, 1, 1), datetime(2, 1, 1))] with warnings.catch_warnings(record=True) as w: dta = genfromdta(os.path.join(curdir, "results/time_series_examples.dta")) assert_(len(w) == 1) # should get a warning for that format. assert_array_equal(dta[0].tolist(), results[0]) assert_array_equal(dta[1].tolist(), results[1]) with warnings.catch_warnings(record=True): dta = genfromdta(os.path.join(curdir, "results/time_series_examples.dta"), pandas=True) assert_array_equal(dta.iloc[0].tolist(), results[0]) assert_array_equal(dta.iloc[1].tolist(), results[1])
def test_stata_writer_structured(): buf = BytesIO() dta = macrodata.load().data dtype = dta.dtype dta = dta.astype(np.dtype([('year', int), ('quarter', int)] + dtype.descr[2:])) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) assert_array_equal(dta, dta2)
def test_stata_writer_array(): buf = BytesIO() dta = macrodata.load().data dta = DataFrame.from_records(dta) dta.columns = ["v%d" % i for i in range(1, 15)] writer = StataWriter(buf, dta.values) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) dta = dta.to_records(index=False) assert_array_equal(dta, dta2)
def test_genfromdta(): #Test genfromdta vs. results/macrodta.npy created with genfromtxt. #NOTE: Stata handles data very oddly. Round tripping from csv to dta # to ndarray 2710.349 (csv) -> 2510.2491 (stata) -> 2710.34912109375 # (dta/ndarray) curdir = os.path.dirname(os.path.abspath(__file__)) #res2 = np.load(curdir+'/results/macrodata.npy') #res2 = res2.view((float,len(res2[0]))) from results.macrodata import macrodata_result as res2 res1 = genfromdta(curdir+'/../../datasets/macrodata/macrodata.dta') #res1 = res1.view((float,len(res1[0]))) assert_array_equal(res1 == res2, True)
def test_stata_writer_pandas(): buf = BytesIO() dta = macrodata.load().data dtype = dta.dtype #as of 0.9.0 pandas only supports i8 and f8 dta = dta.astype(np.dtype([('year', 'i8'), ('quarter', 'i8')] + dtype.descr[2:])) dta = DataFrame.from_records(dta) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) ptesting.assert_frame_equal(dta.reset_index(), DataFrame.from_records(dta2))
def test_stata_writer_structured(): buf = BytesIO() dta = macrodata.load(as_pandas=False).data dtype = dta.dtype dt = [('year', int), ('quarter', int)] + dtype.descr[2:] if not PY3: # Remove unicode dt = [(name.encode('ascii'), typ) for name, typ in dt] dta = dta.astype(np.dtype(dt)) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) assert_array_equal(dta, dta2)
def test_genfromdta_datetime(): results = [(datetime(2006, 11, 19, 23, 13, 20), 1479596223000, datetime(2010, 1, 20), datetime(2010, 1, 8), datetime(2010, 1, 1), datetime(1974, 7, 1), datetime(2010, 1, 1), datetime(2010, 1, 1)), (datetime(1959, 12, 31, 20, 3, 20), -1479590, datetime(1953, 10, 2), datetime(1948, 6, 10), datetime(1955, 1, 1), datetime(1955, 7, 1), datetime(1955, 1, 1), datetime(2, 1, 1))] with pytest.warns(FutureWarning): dta = genfromdta(os.path.join(curdir, "results/time_series_examples.dta")) assert_array_equal(dta[0].tolist(), results[0]) assert_array_equal(dta[1].tolist(), results[1]) with warnings.catch_warnings(record=True): with pytest.warns(FutureWarning): dta = genfromdta(os.path.join(curdir, "results/time_series_examples.dta"), pandas=True) assert_array_equal(dta.iloc[0].tolist(), results[0]) assert_array_equal(dta.iloc[1].tolist(), results[1])
def test_stata_writer_structured(): buf = BytesIO() dta = macrodata.load(as_pandas=False).data dtype = dta.dtype dt = [('year', int), ('quarter', int)] + dtype.descr[2:] dta = dta.astype(np.dtype(dt)) with pytest.warns(FutureWarning): writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) with pytest.warns(FutureWarning): dta2 = genfromdta(buf) assert_array_equal(dta, dta2)
def test_stata_writer_pandas(): buf = BytesIO() dta = macrodata.load().data dtype = dta.dtype #as of 0.9.0 pandas only supports i8 and f8 dta = dta.astype(np.dtype([('year', 'i8'), ('quarter', 'i8')] + dtype.descr[2:])) dta4 = dta.astype(np.dtype([('year', 'i4'), ('quarter', 'i4')] + dtype.descr[2:])) dta = DataFrame.from_records(dta) dta4 = DataFrame.from_records(dta4) # dta is int64 'i8' given to Stata writer writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) dta5 = DataFrame.from_records(dta2) # dta2 is int32 'i4' returned from Stata reader if dta5.dtypes[1] is np.dtype('int64'): ptesting.assert_frame_equal(dta.reset_index(), dta5) else: # don't check index because it has different size, int32 versus int64 ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
def test_stata_writer_pandas(): buf = BytesIO() dta = macrodata.load().data dtype = dta.dtype #as of 0.9.0 pandas only supports i8 and f8 dta = dta.astype( np.dtype([('year', 'i8'), ('quarter', 'i8')] + dtype.descr[2:])) dta4 = dta.astype( np.dtype([('year', 'i4'), ('quarter', 'i4')] + dtype.descr[2:])) dta = DataFrame.from_records(dta) dta4 = DataFrame.from_records(dta4) # dta is int64 'i8' given to Stata writer writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) dta5 = DataFrame.from_records(dta2) # dta2 is int32 'i4' returned from Stata reader if dta5.dtypes[1] is np.dtype('int64'): ptesting.assert_frame_equal(dta.reset_index(), dta5) else: # don't check index because it has different size, int32 versus int64 ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
Created on Fri Dec 16 12:52:13 2011 Author: Josef Perktold """ import numpy as np from numpy.testing import assert_almost_equal import statsmodels.api as sm import statsmodels.stats.sandwich_covariance as sw #http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta import statsmodels.iolib.foreign as dta try: srs = dta.genfromdta("srs.dta") print 'using local file' except IOError: import urllib urllib.urlretrieve('http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta', 'srs.dta') print 'downloading file' srs = dta.genfromdta("srs.dta") # from statsmodels.tools.tools import webuse # srs = webuse('srs', 'http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/') # #does currently not cache file y = srs['api00'] #older numpy don't reorder #x = srs[['growth', 'emer', 'yr_rnd']].view(float).reshape(len(y), -1) #force sequence x = np.column_stack([srs[ii] for ii in ['growth', 'emer', 'yr_rnd']])
Author: Josef Perktold """ from urllib.request import urlretrieve import numpy as np from numpy.testing import assert_almost_equal import statsmodels.api as sm import statsmodels.stats.sandwich_covariance as sw #http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta import statsmodels.iolib.foreign as dta try: srs = dta.genfromdta("srs.dta") print('using local file') except IOError: urlretrieve('http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta', 'srs.dta') print('downloading file') srs = dta.genfromdta("srs.dta") # from statsmodels.datasets import webuse # srs = webuse('srs', 'http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/') # #does currently not cache file y = srs['api00'] #older numpy do not reorder #x = srs[['growth', 'emer', 'yr_rnd']].view(float).reshape(len(y), -1) #force sequence x = np.column_stack([srs[ii] for ii in ['growth', 'emer', 'yr_rnd']]) group = srs['dnum']