コード例 #1
0
def webuse(data, baseurl='http://www.stata-press.com/data/r11/', as_df=True):
    """
    Parameters
    ----------
    data : str
        Name of dataset to fetch.
    baseurl : str
        The base URL to the stata datasets.
    as_df : bool
        If True, returns a `pandas.DataFrame`

    Returns
    -------
    dta : Record Array
        A record array containing the Stata dataset.

    Examples
    --------
    >>> dta = webuse('auto')

    Notes
    -----
    Make sure baseurl has trailing forward slash. Doesn't do any
    error checking in response URLs.
    """
    # lazy imports
    from statsmodels.iolib import genfromdta

    url = urljoin(baseurl, data+'.dta')
    dta = urlopen(url)
    dta = StringIO(dta.read())  # make it truly file-like
    if as_df:  # could make this faster if we don't process dta twice?
        return DataFrame.from_records(genfromdta(dta))
    else:
        return genfromdta(dta)
コード例 #2
0
ファイル: output.py プロジェクト: locolucco209/MongoScraper
    def _coef_table(self):
        model = self.model
        k = model.neqs

        Xnames = self.model.exog_names

        data = lzip(model.params.T.ravel(), model.stderr.T.ravel(),
                    model.tvalues.T.ravel(), model.pvalues.T.ravel())

        header = ('coefficient', 'std. error', 't-stat', 'prob')

        buf = StringIO()
        dim = k * model.k_ar + model.k_trend
        for i in range(k):
            section = "Results for equation %s" % model.names[i]
            buf.write(section + '\n')
            #print >> buf, section

            table = SimpleTable(data[dim * i:dim * (i + 1)],
                                header,
                                Xnames,
                                title=None,
                                txt_fmt=self.default_fmt)
            buf.write(str(table) + '\n')

            if i < k - 1:
                buf.write('\n')

        return buf.getvalue()
コード例 #3
0
ファイル: output.py プロジェクト: cong1989/statsmodels
    def _coef_table(self):
        model = self.model
        k = model.neqs

        Xnames = self.model.exog_names

        data = lzip(model.params.T.ravel(),
                   model.stderr.T.ravel(),
                   model.tvalues.T.ravel(),
                   model.pvalues.T.ravel())

        header = ('coefficient','std. error','t-stat','prob')

        buf = StringIO()
        dim = k * model.k_ar + model.k_trend
        for i in range(k):
            section = "Results for equation %s" % model.names[i]
            buf.write(section + '\n')
            #print >> buf, section

            table = SimpleTable(data[dim * i : dim * (i + 1)], header,
                                Xnames, title=None, txt_fmt = self.default_fmt)
            buf.write(str(table) + '\n')

            if i < k - 1:
                buf.write('\n')

        return buf.getvalue()
コード例 #4
0
ファイル: output.py プロジェクト: locolucco209/MongoScraper
    def make(self, endog_names=None, exog_names=None):
        """
        Summary of VAR model
        """
        buf = StringIO()

        buf.write(self._header_table() + '\n')
        buf.write(self._stats_table() + '\n')
        buf.write(self._coef_table() + '\n')
        buf.write(self._resid_info() + '\n')

        return buf.getvalue()
コード例 #5
0
def test_formula_labels():
    # make sure labels pass through patsy as expected
    # data(Duncan) from car in R
    dta = StringIO(""""type","income","education","prestige"\n"accountant","prof",62,86,82\n"pilot","prof",72,76,83\n"architect","prof",75,92,90\n"author","prof",55,90,76\n"chemist","prof",64,86,90\n"minister","prof",21,84,87\n"professor","prof",64,93,93\n"dentist","prof",80,100,90\n"reporter","wc",67,87,52\n"engineer","prof",72,86,88\n"undertaker","prof",42,74,57\n"lawyer","prof",76,98,89\n"physician","prof",76,97,97\n"welfare.worker","prof",41,84,59\n"teacher","prof",48,91,73\n"conductor","wc",76,34,38\n"contractor","prof",53,45,76\n"factory.owner","prof",60,56,81\n"store.manager","prof",42,44,45\n"banker","prof",78,82,92\n"bookkeeper","wc",29,72,39\n"mail.carrier","wc",48,55,34\n"insurance.agent","wc",55,71,41\n"store.clerk","wc",29,50,16\n"carpenter","bc",21,23,33\n"electrician","bc",47,39,53\n"RR.engineer","bc",81,28,67\n"machinist","bc",36,32,57\n"auto.repairman","bc",22,22,26\n"plumber","bc",44,25,29\n"gas.stn.attendant","bc",15,29,10\n"coal.miner","bc",7,7,15\n"streetcar.motorman","bc",42,26,19\n"taxi.driver","bc",9,19,10\n"truck.driver","bc",21,15,13\n"machine.operator","bc",21,20,24\n"barber","bc",16,26,20\n"bartender","bc",16,28,7\n"shoe.shiner","bc",9,17,3\n"cook","bc",14,22,16\n"soda.clerk","bc",12,30,6\n"watchman","bc",17,25,11\n"janitor","bc",7,20,8\n"policeman","bc",34,47,41\n"waiter","bc",8,32,10""")
    from pandas import read_csv
    dta = read_csv(dta)
    model = ols("prestige ~ income + education", dta).fit()
    assert_equal(model.fittedvalues.index, dta.index)
コード例 #6
0
def test_formula_labels():
    # make sure labels pass through patsy as expected
    # data(Duncan) from car in R
    dta = StringIO(""""type" "income" "education" "prestige"\n"accountant" "prof" 62 86 82\n"pilot" "prof" 72 76 83\n"architect" "prof" 75 92 90\n"author" "prof" 55 90 76\n"chemist" "prof" 64 86 90\n"minister" "prof" 21 84 87\n"professor" "prof" 64 93 93\n"dentist" "prof" 80 100 90\n"reporter" "wc" 67 87 52\n"engineer" "prof" 72 86 88\n"undertaker" "prof" 42 74 57\n"lawyer" "prof" 76 98 89\n"physician" "prof" 76 97 97\n"welfare.worker" "prof" 41 84 59\n"teacher" "prof" 48 91 73\n"conductor" "wc" 76 34 38\n"contractor" "prof" 53 45 76\n"factory.owner" "prof" 60 56 81\n"store.manager" "prof" 42 44 45\n"banker" "prof" 78 82 92\n"bookkeeper" "wc" 29 72 39\n"mail.carrier" "wc" 48 55 34\n"insurance.agent" "wc" 55 71 41\n"store.clerk" "wc" 29 50 16\n"carpenter" "bc" 21 23 33\n"electrician" "bc" 47 39 53\n"RR.engineer" "bc" 81 28 67\n"machinist" "bc" 36 32 57\n"auto.repairman" "bc" 22 22 26\n"plumber" "bc" 44 25 29\n"gas.stn.attendant" "bc" 15 29 10\n"coal.miner" "bc" 7 7 15\n"streetcar.motorman" "bc" 42 26 19\n"taxi.driver" "bc" 9 19 10\n"truck.driver" "bc" 21 15 13\n"machine.operator" "bc" 21 20 24\n"barber" "bc" 16 26 20\n"bartender" "bc" 16 28 7\n"shoe.shiner" "bc" 9 17 3\n"cook" "bc" 14 22 16\n"soda.clerk" "bc" 12 30 6\n"watchman" "bc" 17 25 11\n"janitor" "bc" 7 20 8\n"policeman" "bc" 34 47 41\n"waiter" "bc" 8 32 10""")
    from pandas import read_table
    dta = read_table(dta, sep=" ")
    model = ols("prestige ~ income + education", dta).fit()
    assert_equal(model.fittedvalues.index, dta.index)
コード例 #7
0
ファイル: output.py プロジェクト: locolucco209/MongoScraper
def print_ic_table(ics, selected_orders):
    """
    For VAR order selection

    """
    # Can factor this out into a utility method if so desired

    cols = sorted(ics)

    data = mat([["%#10.4g" % v for v in ics[c]] for c in cols], dtype=object).T

    # start minimums
    for i, col in enumerate(cols):
        idx = int(selected_orders[col]), i
        data[idx] = data[idx] + '*'
        # data[idx] = data[idx][:-1] + '*' # super hack, ugh

    fmt = dict(_default_table_fmt, data_fmts=("%s", ) * len(cols))

    buf = StringIO()
    table = SimpleTable(data,
                        cols,
                        lrange(len(data)),
                        title='VAR Order Selection',
                        txt_fmt=fmt)
    buf.write(str(table) + '\n')
    buf.write('* Minimum' + '\n')

    print(buf.getvalue())
コード例 #8
0
ファイル: output.py プロジェクト: locolucco209/MongoScraper
def pprint_matrix(values, rlabels, clabels, col_space=None):
    buf = StringIO()

    T, K = len(rlabels), len(clabels)

    if col_space is None:
        min_space = 10
        col_space = [max(len(str(c)) + 2, min_space) for c in clabels]
    else:
        col_space = (col_space, ) * K

    row_space = max([len(str(x)) for x in rlabels]) + 2

    head = _pfixed('', row_space)

    for j, h in enumerate(clabels):
        head += _pfixed(h, col_space[j])

    buf.write(head + '\n')

    for i, rlab in enumerate(rlabels):
        line = ('%s' % rlab).ljust(row_space)

        for j in range(K):
            line += _pfixed(values[i, j], col_space[j])

        buf.write(line + '\n')

    return buf.getvalue()
コード例 #9
0
ファイル: output.py プロジェクト: locolucco209/MongoScraper
    def _resid_info(self):
        buf = StringIO()
        names = self.model.names

        buf.write("Correlation matrix of residuals" + '\n')
        buf.write(pprint_matrix(self.model.resid_corr, names, names) + '\n')

        return buf.getvalue()
コード例 #10
0
ファイル: output.py プロジェクト: cong1989/statsmodels
    def make(self, endog_names=None, exog_names=None):
        """
        Summary of VAR model
        """
        buf = StringIO()

        buf.write(self._header_table() + '\n')
        buf.write(self._stats_table() + '\n')
        buf.write(self._coef_table() + '\n')
        buf.write(self._resid_info() + '\n')

        return buf.getvalue()
コード例 #11
0
def load_basic_data():
    raw_csv = StringIO(
        "res,qual\n2.00,=\n4.20,=\n4.62,=\n5.00,ND\n5.00,ND\n5.50,ND\n"
        "5.57,=\n5.66,=\n5.75,ND\n5.86,=\n6.65,=\n6.78,=\n6.79,=\n7.50,=\n"
        "7.50,=\n7.50,=\n8.63,=\n8.71,=\n8.99,=\n9.50,ND\n9.50,ND\n9.85,=\n"
        "10.82,=\n11.00,ND\n11.25,=\n11.25,=\n12.20,=\n14.92,=\n16.77,=\n"
        "17.81,=\n19.16,=\n19.19,=\n19.64,=\n20.18,=\n22.97,=\n")
    df = (pandas.read_csv(raw_csv).assign(conc=lambda df: df['res']).assign(
        censored=lambda df: df['qual'] == 'ND'))
    return df
コード例 #12
0
ファイル: utils.py プロジェクト: RDKCH/statsmodels
def _get_dataset_meta(dataname, package, cache):
    # get the index, you'll probably want this cached because you have
    # to download info about all the data to get info about any of the data...
    index_url = ("https://raw.githubusercontent.com/vincentarelbundock/"
                 "Rdatasets/master/datasets.csv")
    data, _ = _urlopen_cached(index_url, cache)
    data = data.decode('utf-8', 'strict')
    index = read_csv(StringIO(data))
    idx = np.logical_and(index.Item == dataname, index.Package == package)
    dataset_meta = index.loc[idx]
    return dataset_meta["Title"].item()
コード例 #13
0
def _get_data(base_url, dataname, cache, extension="csv"):
    url = base_url + (dataname + ".%s") % extension
    try:
        data, from_cache = _urlopen_cached(url, cache)
    except HTTPError as err:
        if '404' in str(err):
            raise ValueError("Dataset %s was not found." % dataname)
        else:
            raise err

    data = data.decode('utf-8', 'strict')
    return StringIO(data), from_cache
コード例 #14
0
ファイル: output.py プロジェクト: cong1989/statsmodels
def pprint_matrix(values, rlabels, clabels, col_space=None):
    buf = StringIO()

    T, K = len(rlabels), len(clabels)

    if col_space is None:
        min_space = 10
        col_space = [max(len(str(c)) + 2, min_space) for c in clabels]
    else:
        col_space = (col_space,) * K

    row_space = max([len(str(x)) for x in rlabels]) + 2

    head = _pfixed('', row_space)

    for j, h in enumerate(clabels):
        head += _pfixed(h, col_space[j])

    buf.write(head + '\n')

    for i, rlab in enumerate(rlabels):
        line = ('%s' % rlab).ljust(row_space)

        for j in range(K):
            line += _pfixed(values[i,j], col_space[j])

        buf.write(line + '\n')

    return buf.getvalue()
コード例 #15
0
ファイル: output.py プロジェクト: 0ceangypsy/statsmodels
def print_ic_table(ics, selected_orders):
    """
    For VAR order selection

    """
    # Can factor this out into a utility method if so desired

    cols = sorted(ics)

    data = mat([["%#10.4g" % v for v in ics[c]] for c in cols],
               dtype=object).T

    # start minimums
    for i, col in enumerate(cols):
        idx = int(selected_orders[col]), i
        data[idx] = data[idx] + '*'
        # data[idx] = data[idx][:-1] + '*' # super hack, ugh

    fmt = dict(_default_table_fmt,
               data_fmts=("%s",) * len(cols))

    buf = StringIO()
    table = SimpleTable(data, cols, lrange(len(data)),
                        title='VAR Order Selection', txt_fmt=fmt)
    buf.write(str(table) + '\n')
    buf.write('* Minimum' + '\n')

    print(buf.getvalue())
コード例 #16
0
ファイル: var_model.py プロジェクト: littlecooky/statsmodels
    def summary(self):
        buf = StringIO()

        rng = lrange(self.periods)
        for i in range(self.neqs):
            ppm = output.pprint_matrix(self.decomp[i], rng, self.names)

            buf.write('FEVD for %s\n' % self.names[i])
            buf.write(ppm + '\n')

        print(buf.getvalue())
コード例 #17
0
def check_pickle(obj):
    fh =StringIO()
    cPickle.dump(obj, fh)
    plen = fh.pos
    fh.seek(0,0)
    res = cPickle.load(fh)
    fh.close()
    return res, plen
コード例 #18
0
ファイル: output.py プロジェクト: cong1989/statsmodels
    def _resid_info(self):
        buf = StringIO()
        names = self.model.names

        buf.write("Correlation matrix of residuals" + '\n')
        buf.write(pprint_matrix(self.model.resid_corr, names, names) + '\n')

        return buf.getvalue()
コード例 #19
0
ファイル: var_model.py プロジェクト: bert9bert/statsmodels
    def summary(self):
        buf = StringIO()

        rng = lrange(self.periods)
        for i in range(self.neqs):
            ppm = output.pprint_matrix(self.decomp[i], rng, self.names)

            buf.write('FEVD for %s\n' % self.names[i])
            buf.write(ppm + '\n')

        print(buf.getvalue())
コード例 #20
0
def get_ic_table(ics, selected_orders):
    '''
    该方法将滞后阶数结果转换为表格化的分析结果
    :param ics: 滞后阶数结果
    :param selected_orders: 最大滞后阶数
    :return: 返回表格化的滞后阶数分析结果
    '''
    _default_table_fmt = dict(empty_cell='',
                              colsep='  ',
                              row_pre='',
                              row_post='',
                              table_dec_above='=',
                              table_dec_below='=',
                              header_dec_below='-',
                              header_fmt='%s',
                              stub_fmt='%s',
                              title_align='c',
                              header_align='r',
                              data_aligns='r',
                              stubs_align='l',
                              fmt='txt')
    cols = sorted(ics)
    data = np.array([["%#10.4g" % v for v in ics[c]] for c in cols],
                    dtype=object).T
    for i, col in enumerate(cols):
        idx = int(selected_orders[col]), i
        data[idx] = data[idx] + '*'
    fmt = dict(_default_table_fmt, data_fmts=("%s", ) * len(cols))
    buf = StringIO()
    table = SimpleTable(data,
                        cols,
                        lrange(len(data)),
                        title='VAR Order Selection',
                        txt_fmt=fmt)
    buf.write(str(table) + '\n')
    buf.write('* Minimum' + '\n')
    return buf.getvalue()
コード例 #21
0
kidney_table = StringIO("""Days      Duration Weight ID
    0.0      1      1      1
    2.0      1      1      2
    1.0      1      1      3
    3.0      1      1      4
    0.0      1      1      5
    2.0      1      1      6
    0.0      1      1      7
    5.0      1      1      8
    6.0      1      1      9
    8.0      1      1     10
    2.0      1      2      1
    4.0      1      2      2
    7.0      1      2      3
   12.0      1      2      4
   15.0      1      2      5
    4.0      1      2      6
    3.0      1      2      7
    1.0      1      2      8
    5.0      1      2      9
   20.0      1      2     10
   15.0      1      3      1
   10.0      1      3      2
    8.0      1      3      3
    5.0      1      3      4
   25.0      1      3      5
   16.0      1      3      6
    7.0      1      3      7
   30.0      1      3      8
    3.0      1      3      9
   27.0      1      3     10
    0.0      2      1      1
    1.0      2      1      2
    1.0      2      1      3
    0.0      2      1      4
    4.0      2      1      5
    2.0      2      1      6
    7.0      2      1      7
    4.0      2      1      8
    0.0      2      1      9
    3.0      2      1     10
    5.0      2      2      1
    3.0      2      2      2
    2.0      2      2      3
    0.0      2      2      4
    1.0      2      2      5
    1.0      2      2      6
    3.0      2      2      7
    6.0      2      2      8
    7.0      2      2      9
    9.0      2      2     10
   10.0      2      3      1
    8.0      2      3      2
   12.0      2      3      3
    3.0      2      3      4
    7.0      2      3      5
   15.0      2      3      6
    4.0      2      3      7
    9.0      2      3      8
    6.0      2      3      9
    1.0      2      3     10
""")
コード例 #22
0
ファイル: test_ros.py プロジェクト: kasunsp/pinalpha_mvp
class Test_ROS_RNADAdata(CheckROSMixin):
    decimal = 3
    datastring = StringIO(dedent("""\
        res cen
        0.090  True
        0.090  True
        0.090  True
        0.101 False
        0.136 False
        0.340 False
        0.457 False
        0.514 False
        0.629 False
        0.638 False
        0.774 False
        0.788 False
        0.900  True
        0.900  True
        0.900  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000 False
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.100 False
        2.000 False
        2.000 False
        2.404 False
        2.860 False
        3.000 False
        3.000 False
        3.705 False
        4.000 False
        5.000 False
        5.960 False
        6.000 False
        7.214 False
       16.000 False
       17.716 False
       25.000 False
       51.000 False"""
    ))
    rescol = 'res'
    cencol = 'cen'
    df = pandas.read_csv(datastring, sep='\s+')
    expected_final = numpy.array([
        0.01907990,  0.03826254,  0.06080717,  0.10100000,  0.13600000,
        0.34000000,  0.45700000,  0.51400000,  0.62900000,  0.63800000,
        0.77400000,  0.78800000,  0.08745914,  0.25257575,  0.58544205,
        0.01711153,  0.03373885,  0.05287083,  0.07506079,  0.10081573,
        1.00000000,  0.13070334,  0.16539309,  0.20569039,  0.25257575,
        0.30725491,  0.37122555,  0.44636843,  0.53507405,  0.64042242,
        0.76644378,  0.91850581,  1.10390531,  1.10000000,  2.00000000,
        2.00000000,  2.40400000,  2.86000000,  3.00000000,  3.00000000,
        3.70500000,  4.00000000,  5.00000000,  5.96000000,  6.00000000,
        7.21400000, 16.00000000, 17.71600000, 25.00000000, 51.00000000
    ])

    expected_cohn = pandas.DataFrame({
        'nuncen_above': numpy.array([9., 0.0, 18., numpy.nan]),
        'nobs_below': numpy.array([3., 15., 32., numpy.nan]),
        'ncen_equal': numpy.array([3., 3., 17., numpy.nan]),
        'prob_exceedance': numpy.array([0.84, 0.36, 0.36, 0]),
    })
コード例 #23
0
ファイル: output.py プロジェクト: cong1989/statsmodels
def hypothesis_test_table(results, title, null_hyp):
    fmt = dict(_default_table_fmt,
               data_fmts=["%#15.6F","%#15.6F","%#15.3F", "%s"])

    buf = StringIO()
    table = SimpleTable([[results['statistic'],
                          results['crit_value'],
                          results['pvalue'],
                          str(results['df'])]],
                        ['Test statistic', 'Critical Value', 'p-value',
                         'df'], [''], title=None, txt_fmt=fmt)

    buf.write(title + '\n')
    buf.write(str(table) + '\n')

    buf.write(null_hyp + '\n')

    buf.write("Conclusion: %s H_0" % results['conclusion'])
    buf.write(" at %.2f%% significance level" % (results['signif'] * 100))

    return buf.getvalue()
コード例 #24
0
ファイル: compatibility.py プロジェクト: TPLink32/spnk1
 def open(self, filename):
     fullfilename = [f for f in self.namelist() if filename in f][0]
     return StringIO(self.read(fullfilename))
コード例 #25
0
ファイル: test_anova.py プロジェクト: ChadFulton/statsmodels
kidney_table = StringIO("""Days      Duration Weight ID
    0.0      1      1      1
    2.0      1      1      2
    1.0      1      1      3
    3.0      1      1      4
    0.0      1      1      5
    2.0      1      1      6
    0.0      1      1      7
    5.0      1      1      8
    6.0      1      1      9
    8.0      1      1     10
    2.0      1      2      1
    4.0      1      2      2
    7.0      1      2      3
   12.0      1      2      4
   15.0      1      2      5
    4.0      1      2      6
    3.0      1      2      7
    1.0      1      2      8
    5.0      1      2      9
   20.0      1      2     10
   15.0      1      3      1
   10.0      1      3      2
    8.0      1      3      3
    5.0      1      3      4
   25.0      1      3      5
   16.0      1      3      6
    7.0      1      3      7
   30.0      1      3      8
    3.0      1      3      9
   27.0      1      3     10
    0.0      2      1      1
    1.0      2      1      2
    1.0      2      1      3
    0.0      2      1      4
    4.0      2      1      5
    2.0      2      1      6
    7.0      2      1      7
    4.0      2      1      8
    0.0      2      1      9
    3.0      2      1     10
    5.0      2      2      1
    3.0      2      2      2
    2.0      2      2      3
    0.0      2      2      4
    1.0      2      2      5
    1.0      2      2      6
    3.0      2      2      7
    6.0      2      2      8
    7.0      2      2      9
    9.0      2      2     10
   10.0      2      3      1
    8.0      2      3      2
   12.0      2      3      3
    3.0      2      3      4
    7.0      2      3      5
   15.0      2      3      6
    4.0      2      3      7
    9.0      2      3      8
    6.0      2      3      9
    1.0      2      3     10
""")
コード例 #26
0
ファイル: output.py プロジェクト: locolucco209/MongoScraper
def hypothesis_test_table(results, title, null_hyp):
    fmt = dict(_default_table_fmt,
               data_fmts=["%#15.6F", "%#15.6F", "%#15.3F", "%s"])

    buf = StringIO()
    table = SimpleTable([[
        results['statistic'], results['crit_value'], results['pvalue'],
        str(results['df'])
    ]], ['Test statistic', 'Critical Value', 'p-value', 'df'], [''],
                        title=None,
                        txt_fmt=fmt)

    buf.write(title + '\n')
    buf.write(str(table) + '\n')

    buf.write(null_hyp + '\n')

    buf.write("Conclusion: %s H_0" % results['conclusion'])
    buf.write(" at %.2f%% significance level" % (results['signif'] * 100))

    return buf.getvalue()
コード例 #27
0
])
cyl_labels = np.array([
    'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA',
    'France', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA',
    'Japan', 'USA', 'USA', 'USA', 'Japan', 'Germany', 'France', 'Germany',
    'Sweden', 'Germany', 'USA', 'USA', 'USA', 'USA', 'USA', 'Germany', 'USA',
    'USA', 'France', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA',
    'USA', 'USA', 'Germany', 'Japan', 'USA', 'USA', 'USA', 'USA', 'Germany',
    'Japan', 'Japan', 'USA', 'Sweden', 'USA', 'France', 'Japan', 'Germany',
    'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA',
    'USA', 'USA', 'Germany', 'Japan', 'Japan', 'USA', 'USA', 'Japan', 'Japan',
    'Japan', 'Japan', 'Japan', 'Japan', 'USA', 'USA', 'USA', 'USA', 'Japan',
    'USA', 'USA', 'USA', 'Germany', 'USA', 'USA', 'USA'
])

dta = np.recfromtxt(StringIO(ss), names=("Rust", "Brand", "Replication"))
dta2 = np.recfromtxt(StringIO(ss2),
                     names=("idx", "Treatment", "StressReduction"))
dta3 = np.recfromtxt(StringIO(ss3), names=("Brand", "Relief"))

from statsmodels.sandbox.stats.multicomp import tukeyhsd
import statsmodels.sandbox.stats.multicomp as multi
#print tukeyhsd(dta['Brand'], dta['Rust'])


def get_thsd(mci):
    var_ = np.var(mci.groupstats.groupdemean(), ddof=len(mci.groupsunique))
    means = mci.groupstats.groupmean
    nobs = mci.groupstats.groupnobs
    resi = tukeyhsd(means,
                    nobs,
コード例 #28
0
DEBUG = False

ss = '''\
agecat	smokes	deaths	pyears
1	1	32	52407
2	1	104	43248
3	1	206	28612
4	1	186	12663
5	1	102	5317
1	0	2	18790
2	0	12	10673
3	0	28	5710
4	0	28	2585
5	0	31	1462'''

data = pd.read_csv(StringIO(ss), delimiter='\t')
data = data.astype(int)
data['logpyears'] = np.log(data['pyears'])


class CheckPoissonConstrainedMixin(object):
    def test_basic(self):
        res1 = self.res1
        res2 = self.res2
        assert_allclose(res1[0], res2.params[self.idx], rtol=1e-6)
        # see below Stata has nan, we have zero
        bse1 = np.sqrt(np.diag(res1[1]))
        mask = (bse1 == 0) & np.isnan(res2.bse[self.idx])
        assert_allclose(bse1[~mask], res2.bse[self.idx][~mask], rtol=1e-6)

    def test_basic_method(self):
コード例 #29
0
    Means	Simultaneous 95% Confidence Limits	 Sign.
    2 - 3	4.340	0.691	7.989	***
    2 - 1	4.600	0.951	8.249	***
    3 - 2	-4.340	-7.989	-0.691	***
    3 - 1	0.260	-3.389	3.909	 -
    1 - 2	-4.600	-8.249	-0.951	***
    1 - 3	-0.260	-3.909	3.389	'''

    ss5 = '''\
    2 - 3	 4.340	 0.691 	 7.989	***
    2 - 1	 4.600	 0.951	 8.249	***
    3 - 2	-4.340	-7.989	-0.691	***
    3 - 1	 0.260	-3.389	 3.909	 -
    1 - 2	-4.600	-8.249	-0.951	***
    1 - 3	-0.260	-3.909	 3.389	'''

    dta5 = np.recfromtxt(StringIO(ss5),
                         names=('pair', 'mean', 'lower', 'upper', 'sig'),
                         delimiter='\t')

    sas_ = dta5[[1, 3, 2]]
    confint1 = res3[1][4]
    confint2 = sas_[['lower', 'upper']].view(float).reshape((3, 2))
    assert_almost_equal(confint1, confint2, decimal=2)
    reject1 = res3[1][1]
    reject2 = sas_['sig'] == '***'
    assert_equal(reject1, reject2)
    meandiff1 = res3[1][2]
    meandiff2 = sas_['mean']
    assert_almost_equal(meandiff1, meandiff2, decimal=14)
コード例 #30
0
def setup_module():
    global _orig_stdout
    _orig_stdout = sys.stdout
    sys.stdout = StringIO()