예제 #1
0
 def test_pickle(self):
     from statsmodels.compatnp.py3k import BytesIO
     fh = BytesIO()
     #test wrapped results load save pickle
     self.res.save(fh)
     fh.seek(0, 0)
     res_unpickled = self.res.__class__.load(fh)
     assert_(type(res_unpickled) is type(self.res))
예제 #2
0
 def test_pickle(self):
     from statsmodels.compatnp.py3k import BytesIO
     fh = BytesIO()
     #test wrapped results load save pickle
     self.res1.save(fh)
     fh.seek(0,0)
     res_unpickled = self.res1.__class__.load(fh)
     assert_(type(res_unpickled) is type(self.res1))
예제 #3
0
def test_stata_writer_array():
    buf = BytesIO()
    dta = macrodata.load().data
    dta = DataFrame.from_records(dta)
    dta.columns = ["v%d" % i for i in range(1,15)]
    writer = StataWriter(buf, dta.values)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    dta = dta.to_records(index=False)
    assert_array_equal(dta, dta2)
예제 #4
0
def test_stata_writer_structured():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    dta = dta.astype(np.dtype([('year', int),
                               ('quarter', int)] + dtype.descr[2:]))
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    assert_array_equal(dta, dta2)
예제 #5
0
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    #as of 0.9.0 pandas only supports i8 and f8
    dta = dta.astype(np.dtype([('year', 'i8'),
                               ('quarter', 'i8')] + dtype.descr[2:]))
    dta = DataFrame.from_records(dta)
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    ptesting.assert_frame_equal(dta.reset_index(), DataFrame.from_records(dta2))
예제 #6
0
def test_missing_roundtrip():
    buf = BytesIO()
    dta = np.array([(np.nan, np.inf, "")],
                      dtype=[("double_miss", float), ("float_miss", np.float32),
                              ("string_miss", "a1")])
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta = genfromdta(buf, missing_flt=np.nan)
    assert_(isnull(dta[0][0]))
    assert_(isnull(dta[0][1]))
    assert_(dta[0][2] == asbytes(""))

    dta = genfromdta(os.path.join(curdir, "results/data_missing.dta"),
            missing_flt=-999)
    assert_(np.all([dta[0][i] == -999 for i in range(5)]))
예제 #7
0
def check_pickle(obj):
    from statsmodels.compatnp.py3k import BytesIO
    fh = BytesIO()
    pickle.dump(obj, fh)
    plen = fh.tell()
    fh.seek(0, 0)
    res = pickle.load(fh)
    fh.close()
    return res, plen
예제 #8
0
    def test_pickle_wrapper(self):

        from statsmodels.iolib.smpickle import save_pickle, load_pickle

        from statsmodels.compatnp.py3k import BytesIO

        fh = BytesIO()  #use cPickle with binary content

        #test unwrapped results load save pickle
        self.results._results.save(fh)
        fh.seek(0, 0)
        res_unpickled = self.results._results.__class__.load(fh)
        assert_(type(res_unpickled) is type(self.results._results))

        #test wrapped results load save
        fh.seek(0, 0)
        #save_pickle(self.results, fh)
        self.results.save(fh)
        fh.seek(0, 0)
        #res_unpickled = load_pickle(fh)
        res_unpickled = self.results.__class__.load(fh)
        fh.close()
        #print type(res_unpickled)
        assert_(type(res_unpickled) is type(self.results))

        before = sorted(self.results.__dict__.keys())
        after = sorted(res_unpickled.__dict__.keys())
        assert_(before == after, msg='not equal %r and %r' % (before, after))

        before = sorted(self.results._results.__dict__.keys())
        after = sorted(res_unpickled._results.__dict__.keys())
        assert_(before == after, msg='not equal %r and %r' % (before, after))

        before = sorted(self.results.model.__dict__.keys())
        after = sorted(res_unpickled.model.__dict__.keys())
        assert_(before == after, msg='not equal %r and %r' % (before, after))

        before = sorted(self.results._cache.keys())
        after = sorted(res_unpickled._cache.keys())
        assert_(before == after, msg='not equal %r and %r' % (before, after))
예제 #9
0
def test_pickle():
    import tempfile
    from numpy.testing import assert_equal
    tmpdir = tempfile.mkdtemp(prefix='pickle')
    a = range(10)
    save_pickle(a, tmpdir+'/res.pkl')
    b = load_pickle(tmpdir+'/res.pkl')
    assert_equal(a, b)

    #cleanup, tested on Windows
    try:
        import os
        os.remove(tmpdir+'/res.pkl')
        os.rmdir(tmpdir)
    except (OSError, IOError):
        pass
    assert not os.path.exists(tmpdir)

    #test with file handle
    from statsmodels.compatnp.py3k import BytesIO
    fh = BytesIO()
    save_pickle(a, fh)
    fh.seek(0,0)
    c = load_pickle(fh)
    fh.close()
    assert_equal(a,b)
def check_pickle(obj):
    from statsmodels.compatnp.py3k import BytesIO
    fh = BytesIO()
    pickle.dump(obj, fh)
    plen = fh.tell()
    fh.seek(0, 0)
    res = pickle.load(fh)
    fh.close()
    return res, plen
예제 #11
0
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    #as of 0.9.0 pandas only supports i8 and f8
    dta = dta.astype(
        np.dtype([('year', 'i8'), ('quarter', 'i8')] + dtype.descr[2:]))
    dta4 = dta.astype(
        np.dtype([('year', 'i4'), ('quarter', 'i4')] + dtype.descr[2:]))
    dta = DataFrame.from_records(dta)
    dta4 = DataFrame.from_records(dta4)
    # dta is int64 'i8'  given to Stata writer
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    dta5 = DataFrame.from_records(dta2)
    # dta2 is int32 'i4'  returned from Stata reader

    if dta5.dtypes[1] is np.dtype('int64'):
        ptesting.assert_frame_equal(dta.reset_index(), dta5)
    else:
        # don't check index because it has different size, int32 versus int64
        ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
예제 #12
0
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    #as of 0.9.0 pandas only supports i8 and f8
    dta = dta.astype(np.dtype([('year', 'i8'),
                               ('quarter', 'i8')] + dtype.descr[2:]))
    dta4 = dta.astype(np.dtype([('year', 'i4'),
                               ('quarter', 'i4')] + dtype.descr[2:]))
    dta = DataFrame.from_records(dta)
    dta4 = DataFrame.from_records(dta4)
    # dta is int64 'i8'  given to Stata writer
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    dta5 = DataFrame.from_records(dta2)
    # dta2 is int32 'i4'  returned from Stata reader

    if dta5.dtypes[1] is np.dtype('int64'):
        ptesting.assert_frame_equal(dta.reset_index(), dta5)
    else:
        # don't check index because it has different size, int32 versus int64
        ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
예제 #13
0
    def test_pickle_wrapper(self):

        from statsmodels.iolib.smpickle import save_pickle, load_pickle

        from statsmodels.compatnp.py3k import BytesIO

        fh = BytesIO()  #use cPickle with binary content

        #test unwrapped results load save pickle
        self.results._results.save(fh)
        fh.seek(0,0)
        res_unpickled = self.results._results.__class__.load(fh)
        assert_(type(res_unpickled) is type(self.results._results))

        #test wrapped results load save
        fh.seek(0,0)
        #save_pickle(self.results, fh)
        self.results.save(fh)
        fh.seek(0,0)
        #res_unpickled = load_pickle(fh)
        res_unpickled = self.results.__class__.load(fh)
        fh.close()
        #print type(res_unpickled)
        assert_(type(res_unpickled) is type(self.results))

        before = sorted(self.results.__dict__.keys())
        after = sorted(res_unpickled.__dict__.keys())
        assert_(before == after, msg='not equal %r and %r' % (before, after))

        before = sorted(self.results._results.__dict__.keys())
        after = sorted(res_unpickled._results.__dict__.keys())
        assert_(before == after, msg='not equal %r and %r' % (before, after))

        before = sorted(self.results.model.__dict__.keys())
        after = sorted(res_unpickled.model.__dict__.keys())
        assert_(before == after, msg='not equal %r and %r' % (before, after))

        before = sorted(self.results._cache.keys())
        after = sorted(res_unpickled._cache.keys())
        assert_(before == after, msg='not equal %r and %r' % (before, after))
예제 #14
0
def test_datetime_roundtrip():
    dta = np.array([(1, datetime(2010, 1, 1), 2), (2, datetime(2010, 2, 1), 3),
                    (4, datetime(2010, 3, 1), 5)],
                   dtype=[('var1', float), ('var2', object), ('var3', float)])
    buf = BytesIO()
    writer = StataWriter(buf, dta, {"var2": "tm"})
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    assert_equal(dta, dta2)

    dta = DataFrame.from_records(dta)
    buf = BytesIO()
    writer = StataWriter(buf, dta, {"var2": "tm"})
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf, pandas=True)
    ptesting.assert_frame_equal(dta, dta2.drop('index', axis=1))
예제 #15
0
def test_datetime_roundtrip():
    dta = np.array([(1, datetime(2010, 1, 1), 2),
                    (2, datetime(2010, 2, 1), 3),
                    (4, datetime(2010, 3, 1), 5)],
                    dtype=[('var1', float), ('var2', object), ('var3', float)])
    buf = BytesIO()
    writer = StataWriter(buf, dta, {"var2" : "tm"})
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    assert_equal(dta, dta2)

    dta = DataFrame.from_records(dta)
    buf = BytesIO()
    writer = StataWriter(buf, dta, {"var2" : "tm"})
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf, pandas=True)
    ptesting.assert_frame_equal(dta, dta2.drop('index', axis=1))
예제 #16
0
ss5 = '''\
2 - 3\t4.340\t0.691\t7.989\t***
2 - 1\t4.600\t0.951\t8.249\t***
3 - 2\t-4.340\t-7.989\t-0.691\t***
3 - 1\t0.260\t-3.389\t3.909\t-
1 - 2\t-4.600\t-8.249\t-0.951\t***
1 - 3\t-0.260\t-3.909\t3.389\t'''

#accommodate recfromtxt for python 3.2, requires bytes
ss = asbytes(ss)
ss2 = asbytes(ss2)
ss3 = asbytes(ss3)
ss5 = asbytes(ss5)

dta = np.recfromtxt(BytesIO(ss), names=("Rust", "Brand", "Replication"))
dta2 = np.recfromtxt(BytesIO(ss2),
                     names=("idx", "Treatment", "StressReduction"))
dta3 = np.recfromtxt(BytesIO(ss3), names=("Brand", "Relief"))
dta5 = np.recfromtxt(BytesIO(ss5),
                     names=('pair', 'mean', 'lower', 'upper', 'sig'),
                     delimiter='\t')
sas_ = dta5[[1, 3, 2]]

from statsmodels.stats.multicomp import (tukeyhsd, pairwise_tukeyhsd,
                                         MultiComparison)
#import statsmodels.sandbox.stats.multicomp as multi
#print tukeyhsd(dta['Brand'], dta['Rust'])


def get_thsd(mci, alpha=0.05):
예제 #17
0
ss5 = '''\
2 - 3	4.340	0.691	7.989	***
2 - 1	4.600	0.951	8.249	***
3 - 2	-4.340	-7.989	-0.691	***
3 - 1	0.260	-3.389	3.909	 -
1 - 2	-4.600	-8.249	-0.951	***
1 - 3	-0.260	-3.909	3.389	'''

#accommodate recfromtxt for python 3.2, requires bytes
ss = asbytes(ss)
ss2 = asbytes(ss2)
ss3 = asbytes(ss3)
ss5 = asbytes(ss5)

dta = np.recfromtxt(BytesIO(ss), names=("Rust","Brand","Replication"))
dta2 = np.recfromtxt(BytesIO(ss2), names = ("idx", "Treatment", "StressReduction"))
dta3 = np.recfromtxt(BytesIO(ss3), names = ("Brand", "Relief"))
dta5 = np.recfromtxt(BytesIO(ss5), names = ('pair', 'mean', 'lower', 'upper', 'sig'), delimiter='\t')
sas_ = dta5[[1,3,2]]

from statsmodels.stats.multicomp import (tukeyhsd, pairwise_tukeyhsd,
                                         MultiComparison)
#import statsmodels.sandbox.stats.multicomp as multi
#print tukeyhsd(dta['Brand'], dta['Rust'])

def get_thsd(mci, alpha=0.05):
    var_ = np.var(mci.groupstats.groupdemean(), ddof=len(mci.groupsunique))
    means = mci.groupstats.groupmean
    nobs = mci.groupstats.groupnobs
    resi = tukeyhsd(means, nobs, var_, df=None, alpha=alpha,