def test_pickle(): import tempfile from numpy.testing import assert_equal tmpdir = tempfile.mkdtemp(prefix='pickle') a = lrange(10) save_pickle(a, tmpdir+'/res.pkl') b = load_pickle(tmpdir+'/res.pkl') assert_equal(a, b) #cleanup, tested on Windows try: import os os.remove(tmpdir+'/res.pkl') os.rmdir(tmpdir) except (OSError, IOError): pass assert not os.path.exists(tmpdir) #test with file handle fh = BytesIO() save_pickle(a, fh) fh.seek(0,0) c = load_pickle(fh) fh.close() assert_equal(a,b)
def test_pickle_wrapper(self): fh = BytesIO() # use cPickle with binary content # test unwrapped results load save pickle self.results._results.save(fh) fh.seek(0, 0) res_unpickled = self.results._results.__class__.load(fh) assert_(type(res_unpickled) is type(self.results._results)) # test wrapped results load save fh.seek(0, 0) self.results.save(fh) fh.seek(0, 0) res_unpickled = self.results.__class__.load(fh) fh.close() # print type(res_unpickled) assert_(type(res_unpickled) is type(self.results)) before = sorted(iterkeys(self.results.__dict__)) after = sorted(iterkeys(res_unpickled.__dict__)) assert_(before == after, msg='not equal %r and %r' % (before, after)) before = sorted(iterkeys(self.results._results.__dict__)) after = sorted(iterkeys(res_unpickled._results.__dict__)) assert_(before == after, msg='not equal %r and %r' % (before, after)) before = sorted(iterkeys(self.results.model.__dict__)) after = sorted(iterkeys(res_unpickled.model.__dict__)) assert_(before == after, msg='not equal %r and %r' % (before, after)) before = sorted(iterkeys(self.results._cache)) after = sorted(iterkeys(res_unpickled._cache)) assert_(before == after, msg='not equal %r and %r' % (before, after))
def test_plot_acf_kwargs(): # Just test that it runs. fig = plt.figure() ax = fig.add_subplot(111) ar = np.r_[1., -0.9] ma = np.r_[1., 0.9] armaprocess = tsp.ArmaProcess(ar, ma) rs = np.random.RandomState(1234) acf = armaprocess.generate_sample(100, distrvs=rs.standard_normal) buff = BytesIO() plot_acf(acf, ax=ax) fig.savefig(buff, format='rgba') plt.close(fig) buff_with_vlines = BytesIO() fig_with_vlines = plt.figure() ax = fig_with_vlines.add_subplot(111) vlines_kwargs = {'linestyles': 'dashdot'} plot_acf(acf, ax=ax, vlines_kwargs=vlines_kwargs) fig_with_vlines.savefig(buff_with_vlines, format='rgba') plt.close(fig_with_vlines) buff.seek(0) buff_with_vlines.seek(0) plain = buff.read() with_vlines = buff_with_vlines.read() assert_(with_vlines != plain)
def test_stata_writer_pandas(): buf = BytesIO() dta = macrodata.load_pandas().data dta4 = dta.copy() for col in ('year','quarter'): dta[col] = dta[col].astype(np.int64) dta4[col] = dta4[col].astype(np.int32) # dta is int64 'i8' given to Stata writer with pytest.warns(FutureWarning): writer = StataWriter(buf, dta) with warnings.catch_warnings(record=True) as w: writer.write_file() assert len(w) == 0 buf.seek(0) with pytest.warns(FutureWarning): dta2 = genfromdta(buf) dta5 = DataFrame.from_records(dta2) # dta2 is int32 'i4' returned from Stata reader if dta5.dtypes[1] is np.dtype('int64'): ptesting.assert_frame_equal(dta.reset_index(), dta5) else: # don't check index because it has different size, int32 versus int64 ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
def test_pickle(self): fh = BytesIO() #test wrapped results load save pickle self.res.save(fh) fh.seek(0,0) res_unpickled = self.res.__class__.load(fh) assert_(type(res_unpickled) is type(self.res))
def test_pickle(self): from statsmodels.compat.python import BytesIO fh = BytesIO() #test wrapped results load save pickle self.res1.save(fh) fh.seek(0,0) res_unpickled = self.res1.__class__.load(fh) assert_(type(res_unpickled) is type(self.res1))
def test_pickle(self): from statsmodels.compat.python import BytesIO fh = BytesIO() #test wrapped results load save pickle self.res1.save(fh) fh.seek(0, 0) res_unpickled = self.res1.__class__.load(fh) assert type(res_unpickled) is type(self.res1) # noqa: E721
def test_pickle(self): fh = BytesIO() #test wrapped results load save pickle del self.res.model.data.orig_endog self.res.save(fh) fh.seek(0, 0) res_unpickled = self.res.__class__.load(fh) assert type(res_unpickled) is type(self.res) # noqa: E721
def test_pickle(self): fh = BytesIO() #test wrapped results load save pickle del self.res.model.data.orig_endog self.res.save(fh) fh.seek(0,0) res_unpickled = self.res.__class__.load(fh) assert type(res_unpickled) is type(self.res) # noqa: E721
def test_stata_writer_structured(): buf = BytesIO() dta = macrodata.load().data dtype = dta.dtype dta = dta.astype(np.dtype([('year', int), ('quarter', int)] + dtype.descr[2:])) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) assert_array_equal(dta, dta2)
def test_stata_writer_array(): buf = BytesIO() dta = macrodata.load(as_pandas=False).data dta = DataFrame.from_records(dta) dta.columns = ["v%d" % i for i in range(1,15)] writer = StataWriter(buf, dta.values) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) dta = dta.to_records(index=False) assert_array_equal(dta, dta2)
def test_stata_writer_array(): buf = BytesIO() dta = macrodata.load().data dta = DataFrame.from_records(dta) dta.columns = ["v%d" % i for i in range(1, 15)] writer = StataWriter(buf, dta.values) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) dta = dta.to_records(index=False) assert_array_equal(dta, dta2)
def test_stata_writer_structured(): buf = BytesIO() dta = macrodata.load().data dtype = dta.dtype dta = dta.astype( np.dtype([('year', int), ('quarter', int)] + dtype.descr[2:])) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) assert_array_equal(dta, dta2)
def test_stata_writer_structured(): buf = BytesIO() dta = macrodata.load(as_pandas=False).data dtype = dta.dtype dt = [('year', int), ('quarter', int)] + dtype.descr[2:] if not PY3: # Remove unicode dt = [(name.encode('ascii'), typ) for name, typ in dt] dta = dta.astype(np.dtype(dt)) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) assert_array_equal(dta, dta2)
def test_datetime_roundtrip(): dta = np.array([(1, datetime(2010, 1, 1), 2), (2, datetime(2010, 2, 1), 3), (4, datetime(2010, 3, 1), 5)], dtype=[('var1', float), ('var2', object), ('var3', float)]) buf = BytesIO() with pytest.warns(FutureWarning): writer = StataWriter(buf, dta, {"var2" : "tm"}) writer.write_file() buf.seek(0) with pytest.warns(FutureWarning): dta2 = genfromdta(buf) assert_equal(dta, dta2) dta = DataFrame.from_records(dta) buf = BytesIO() with pytest.warns(FutureWarning): writer = StataWriter(buf, dta, {"var2" : "tm"}) writer.write_file() buf.seek(0) with pytest.warns(FutureWarning): dta2 = genfromdta(buf, pandas=True) ptesting.assert_frame_equal(dta, dta2.drop('index', axis=1))
def test_missing_roundtrip(): buf = BytesIO() dta = np.array([(np.nan, np.inf, "")], dtype=[("double_miss", float), ("float_miss", np.float32), ("string_miss", "a1")]) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta = genfromdta(buf, missing_flt=np.nan) assert_(isnull(dta[0][0])) assert_(isnull(dta[0][1])) assert_(dta[0][2] == asbytes("")) dta = genfromdta(os.path.join(curdir, "results/data_missing.dta"), missing_flt=-999) assert_(np.all([dta[0][i] == -999 for i in range(5)]))
def test_stata_writer_structured(): buf = BytesIO() dta = macrodata.load(as_pandas=False).data dtype = dta.dtype dt = [('year', int), ('quarter', int)] + dtype.descr[2:] dta = dta.astype(np.dtype(dt)) with pytest.warns(FutureWarning): writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) with pytest.warns(FutureWarning): dta2 = genfromdta(buf) assert_array_equal(dta, dta2)
def check_pickle(obj): fh = BytesIO() cPickle.dump(obj, fh, protocol=cPickle.HIGHEST_PROTOCOL) plen = fh.tell() fh.seek(0, 0) res = cPickle.load(fh) fh.close() return res, plen
def test_pickle(): import tempfile from numpy.testing import assert_equal tmpdir = tempfile.mkdtemp(prefix='pickle') a = lrange(10) save_pickle(a, tmpdir + '/res.pkl') b = load_pickle(tmpdir + '/res.pkl') assert_equal(a, b) #cleanup, tested on Windows try: import os os.remove(tmpdir + '/res.pkl') os.rmdir(tmpdir) except (OSError, IOError): pass assert not os.path.exists(tmpdir) #test with file handle fh = BytesIO() save_pickle(a, fh) fh.seek(0, 0) c = load_pickle(fh) fh.close() assert_equal(a, b)
def webuse(data, baseurl='http://www.stata-press.com/data/r11/', as_df=True): """ Download and return an example dataset from Stata. Parameters ---------- data : str Name of dataset to fetch. baseurl : str The base URL to the stata datasets. as_df : bool If True, returns a `pandas.DataFrame` Returns ------- dta : Record Array A record array containing the Stata dataset. Examples -------- >>> dta = webuse('auto') Notes ----- Make sure baseurl has trailing forward slash. Doesn't do any error checking in response URLs. """ # lazy imports from statsmodels.iolib import genfromdta url = urljoin(baseurl, data + '.dta') dta = urlopen(url) dta = BytesIO(dta.read()) # make it truly file-like if as_df: # could make this faster if we don't process dta twice? return DataFrame.from_records(genfromdta(dta)) else: return genfromdta(dta)
def webuse(data, baseurl='http://www.stata-press.com/data/r11/', as_df=True): """ Download and return an example dataset from Stata. Parameters ---------- data : str Name of dataset to fetch. baseurl : str The base URL to the stata datasets. as_df : bool If True, returns a `pandas.DataFrame` Returns ------- dta : Record Array A record array containing the Stata dataset. Examples -------- >>> dta = webuse('auto') Notes ----- Make sure baseurl has trailing forward slash. Doesn't do any error checking in response URLs. """ # lazy imports from statsmodels.iolib import genfromdta url = urljoin(baseurl, data+'.dta') dta = urlopen(url) dta = BytesIO(dta.read()) # make it truly file-like if as_df: # could make this faster if we don't process dta twice? return DataFrame.from_records(genfromdta(dta)) else: return genfromdta(dta)
def test_stata_writer_pandas(): buf = BytesIO() dta = macrodata.load().data dtype = dta.dtype #as of 0.9.0 pandas only supports i8 and f8 dta = dta.astype(np.dtype([('year', 'i8'), ('quarter', 'i8')] + dtype.descr[2:])) dta4 = dta.astype(np.dtype([('year', 'i4'), ('quarter', 'i4')] + dtype.descr[2:])) dta = DataFrame.from_records(dta) dta4 = DataFrame.from_records(dta4) # dta is int64 'i8' given to Stata writer writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) dta5 = DataFrame.from_records(dta2) # dta2 is int32 'i4' returned from Stata reader if dta5.dtypes[1] is np.dtype('int64'): ptesting.assert_frame_equal(dta.reset_index(), dta5) else: # don't check index because it has different size, int32 versus int64 ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
def test_stata_writer_pandas(): buf = BytesIO() dta = macrodata.load().data dtype = dta.dtype #as of 0.9.0 pandas only supports i8 and f8 dta = dta.astype( np.dtype([('year', 'i8'), ('quarter', 'i8')] + dtype.descr[2:])) dta4 = dta.astype( np.dtype([('year', 'i4'), ('quarter', 'i4')] + dtype.descr[2:])) dta = DataFrame.from_records(dta) dta4 = DataFrame.from_records(dta4) # dta is int64 'i8' given to Stata writer writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) dta5 = DataFrame.from_records(dta2) # dta2 is int32 'i4' returned from Stata reader if dta5.dtypes[1] is np.dtype('int64'): ptesting.assert_frame_equal(dta.reset_index(), dta5) else: # don't check index because it has different size, int32 versus int64 ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
cyl_labels = np.array(['USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'France', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'Japan', 'USA', 'USA', 'USA', 'Japan', 'Germany', 'France', 'Germany', 'Sweden', 'Germany', 'USA', 'USA', 'USA', 'USA', 'USA', 'Germany', 'USA', 'USA', 'France', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'Germany', 'Japan', 'USA', 'USA', 'USA', 'USA', 'Germany', 'Japan', 'Japan', 'USA', 'Sweden', 'USA', 'France', 'Japan', 'Germany', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'Germany', 'Japan', 'Japan', 'USA', 'USA', 'Japan', 'Japan', 'Japan', 'Japan', 'Japan', 'Japan', 'USA', 'USA', 'USA', 'USA', 'Japan', 'USA', 'USA', 'USA', 'Germany', 'USA', 'USA', 'USA']) #accommodate recfromtxt for python 3.2, requires bytes ss = asbytes(ss) ss2 = asbytes(ss2) ss3 = asbytes(ss3) ss5 = asbytes(ss5) dta = np.recfromtxt(BytesIO(ss), names=("Rust","Brand","Replication")) dta2 = np.recfromtxt(BytesIO(ss2), names = ("idx", "Treatment", "StressReduction")) dta3 = np.recfromtxt(BytesIO(ss3), names = ("Brand", "Relief")) dta5 = np.recfromtxt(BytesIO(ss5), names = ('pair', 'mean', 'lower', 'upper', 'sig'), delimiter='\t') sas_ = dta5[[1,3,2]] from statsmodels.stats.multicomp import (tukeyhsd, pairwise_tukeyhsd, MultiComparison) #import statsmodels.sandbox.stats.multicomp as multi #print tukeyhsd(dta['Brand'], dta['Rust']) def get_thsd(mci, alpha=0.05): var_ = np.var(mci.groupstats.groupdemean(), ddof=len(mci.groupsunique)) means = mci.groupstats.groupmean nobs = mci.groupstats.groupnobs resi = tukeyhsd(means, nobs, var_, df=None, alpha=alpha,
'USA', 'France', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'Germany', 'Japan', 'USA', 'USA', 'USA', 'USA', 'Germany', 'Japan', 'Japan', 'USA', 'Sweden', 'USA', 'France', 'Japan', 'Germany', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'Germany', 'Japan', 'Japan', 'USA', 'USA', 'Japan', 'Japan', 'Japan', 'Japan', 'Japan', 'Japan', 'USA', 'USA', 'USA', 'USA', 'Japan', 'USA', 'USA', 'USA', 'Germany', 'USA', 'USA', 'USA' ]) #accommodate recfromtxt for python 3.2, requires bytes ss = asbytes(ss) ss2 = asbytes(ss2) ss3 = asbytes(ss3) ss5 = asbytes(ss5) dta = pd.read_csv(BytesIO(ss), sep=r'\s+', header=None, engine='python') dta.columns = "Rust", "Brand", "Replication" dta2 = pd.read_csv(BytesIO(ss2), sep=r'\s+', header=None, engine='python') dta2.columns = "idx", "Treatment", "StressReduction" dta2["Treatment"] = dta2["Treatment"].map(lambda v: v.encode('utf-8')) dta3 = pd.read_csv(BytesIO(ss3), sep=r'\s+', header=None, engine='python') dta3.columns = ["Brand", "Relief"] dta5 = pd.read_csv(BytesIO(ss5), sep=r'\t', header=None, engine='python') dta5.columns = ['pair', 'mean', 'lower', 'upper', 'sig'] for col in ('pair', 'sig'): dta5[col] = dta5[col].map(lambda v: v.encode('utf-8')) sas_ = dta5.iloc[[1, 3, 2]] from statsmodels.stats.multicomp import (tukeyhsd, pairwise_tukeyhsd, MultiComparison)
'USA', 'France', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'Germany', 'Japan', 'USA', 'USA', 'USA', 'USA', 'Germany', 'Japan', 'Japan', 'USA', 'Sweden', 'USA', 'France', 'Japan', 'Germany', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'Germany', 'Japan', 'Japan', 'USA', 'USA', 'Japan', 'Japan', 'Japan', 'Japan', 'Japan', 'Japan', 'USA', 'USA', 'USA', 'USA', 'Japan', 'USA', 'USA', 'USA', 'Germany', 'USA', 'USA', 'USA' ]) #accommodate recfromtxt for python 3.2, requires bytes ss = asbytes(ss) ss2 = asbytes(ss2) ss3 = asbytes(ss3) ss5 = asbytes(ss5) dta = np.recfromtxt(BytesIO(ss), names=("Rust", "Brand", "Replication")) dta2 = np.recfromtxt(BytesIO(ss2), names=("idx", "Treatment", "StressReduction")) dta3 = np.recfromtxt(BytesIO(ss3), names=("Brand", "Relief")) dta5 = np.recfromtxt(BytesIO(ss5), names=('pair', 'mean', 'lower', 'upper', 'sig'), delimiter='\t') dta = pd.DataFrame.from_records(dta) dta2 = pd.DataFrame.from_records(dta2) dta3 = pd.DataFrame.from_records(dta3) dta5 = pd.DataFrame.from_records(dta5) sas_ = dta5.iloc[[1, 3, 2]] from statsmodels.stats.multicomp import (tukeyhsd, pairwise_tukeyhsd, MultiComparison)