Example #1
0
class ToString(object):

    def setup(self):
        self.df = DataFrame(np.random.randn(100, 10))

    def time_to_string_floats(self):
        self.df.to_string()
Example #2
0
    def test_eng_float_formatter(self):
        df = DataFrame({'A' : [1.41, 141., 14100, 1410000.]})

        fmt.set_eng_float_format()
        result = df.to_string()
        expected = ('             A\n'
                    '0    1.410E+00\n'
                    '1  141.000E+00\n'
                    '2   14.100E+03\n'
                    '3    1.410E+06')
        self.assertEqual(result, expected)

        fmt.set_eng_float_format(use_eng_prefix=True)
        result = df.to_string()
        expected = ('         A\n'
                    '0    1.410\n'
                    '1  141.000\n'
                    '2  14.100k\n'
                    '3   1.410M')
        self.assertEqual(result, expected)

        fmt.set_eng_float_format(accuracy=0)
        result = df.to_string()
        expected = ('         A\n'
                    '0    1E+00\n'
                    '1  141E+00\n'
                    '2   14E+03\n'
                    '3    1E+06')
        self.assertEqual(result, expected)

        fmt.reset_printoptions()
Example #3
0
    def test_to_string_repr_unicode(self):
        buf = StringIO()

        unicode_values = [u'\u03c3'] * 10
        unicode_values = np.array(unicode_values, dtype=object)
        df = DataFrame({'unicode': unicode_values})
        df.to_string(col_space=10, buf=buf)

        # it works!
        repr(df)

        idx = Index(['abc', u'\u03c3a', 'aegdvg'])
        ser = Series(np.random.randn(len(idx)), idx)
        rs = repr(ser).split('\n')
        line_len = len(rs[0])
        for line in rs[1:]:
            try:
                line = line.decode(get_option("display.encoding"))
            except:
                pass
            if not line.startswith('Dtype:'):
                self.assert_(len(line) == line_len)

        # it works even if sys.stdin in None
        _stdin= sys.stdin
        try:
            sys.stdin = None
            repr(df)
        finally:
            sys.stdin = _stdin
Example #4
0
    def test_to_string_repr_unicode(self):
        buf = StringIO()

        unicode_values = [u'\u03c3'] * 10
        unicode_values = np.array(unicode_values, dtype=object)
        df = DataFrame({'unicode' : unicode_values})
        df.to_string(col_space=10, buf=buf)

        # it works!
        repr(df)

        idx = Index(['abc', u'\u03c3a', 'aegdvg'])
        ser = Series(np.random.randn(len(idx)), idx)
        rs = repr(ser).split('\n')
        line_len = len(rs[0])
        for line in rs[1:]:
            try:
                line = line.decode('utf-8')
            except:
                pass
            self.assert_(len(line) == line_len)

        # it works even if sys.stdin in None
        sys.stdin = None
        repr(df)
        sys.stdin = sys.__stdin__
Example #5
0
    def test_to_string_format_na(self):
        fmt.reset_printoptions()
        df = DataFrame({'A' : [np.nan, -1, -2.1234, 3, 4],
                        'B' : [np.nan, 'foo', 'foooo', 'fooooo', 'bar']})
        result = df.to_string()

        expected = ('        A       B\n'
                    '0     NaN     NaN\n'
                    '1 -1.0000     foo\n'
                    '2 -2.1234   foooo\n'
                    '3  3.0000  fooooo\n'
                    '4  4.0000     bar')
        self.assertEqual(result, expected)

        df = DataFrame({'A' : [np.nan, -1., -2., 3., 4.],
                        'B' : [np.nan, 'foo', 'foooo', 'fooooo', 'bar']})
        result = df.to_string()

        expected = ('    A       B\n'
                    '0 NaN     NaN\n'
                    '1  -1     foo\n'
                    '2  -2   foooo\n'
                    '3   3  fooooo\n'
                    '4   4     bar')
        self.assertEqual(result, expected)
Example #6
0
    def test_repr_embedded_ndarray(self):
        arr = np.empty(10, dtype=[('err', object)])
        for i in range(len(arr)):
            arr['err'][i] = np.random.randn(i)

        df = DataFrame(arr)
        repr(df['err'])
        repr(df)
        df.to_string()
Example #7
0
 def update_with_predictions(self, context, x, actuals, predictions):
     cm = metrics.confusion_matrix(actuals, predictions)
     self.config.target.context = context
     factors = self.config.target.get_prep_data()
     if factors:
         names = [f[0] for f in factors]
         df = DataFrame(cm, columns=names, index=names)
         print df.to_string()
     else:
         print cm
Example #8
0
    def test_to_string_unicode_columns(self):
        df = DataFrame({u'\u03c3' : np.arange(10.)})

        buf = StringIO()
        df.to_string(buf=buf)
        buf.getvalue()

        buf = StringIO()
        df.info(buf=buf)
        buf.getvalue()

        result = self.frame.to_string()
        self.assert_(isinstance(result, unicode))
def test_cho_rst(year = 2014, verbose = False):
    # Tests that _chobrut which computes "chômage brut" from "imposable" yields an amount compatbe
    # with the one obtained from running openfisca satrting with a "chômage brut"

    period = periods.period(year)
    remplacement = {'cho': 'chobrut', 'rst': 'rstbrut'}

    for var, varbrut in remplacement.iteritems():
        maxrev = 24000

        simulation = base.tax_benefit_system.new_scenario().init_single_entity(
            axes = [dict(name = varbrut, max = maxrev, min = 0, count = 11)],
            period = period,
            parent1 = dict(
                birth = datetime.date(year - 40, 1, 1),
                ),
            ).new_simulation(debug = True)

        df_b2i = DataFrame({
            var: simulation.calculate(var),
            varbrut: simulation.calculate(varbrut),
            })

        vari = df_b2i[var].get_values()
        csg_rempl = vari * 0 + 3

        defaultP = simulation.get_reference_compact_legislation(period.start)
        if var == "cho":
            _vari_to_brut = inversion_revenus._chobrut_from_choi
        elif var == "rst":
            _vari_to_brut = inversion_revenus._rstbrut_from_rsti
        else:
            assert False, u'Unsupported value for var: {!r}'.format(var)

        df_i2b = DataFrame({var: vari, varbrut: _vari_to_brut(vari, csg_rempl, defaultP)})

        if verbose:
            print df_i2b.to_string()
            print df_b2i.to_string()

        for variable in [var, varbrut]:
            passed = ((df_b2i[variable] - df_i2b[variable]).abs() < 1).all()

            if (not passed) or verbose:
                print "Brut to imposable"
                print (df_b2i[[varbrut, var]] / 12).to_string()
                print "Imposable to brut"
                print (df_i2b / 12).to_string()

                assert passed, "difference in %s " % (var)
def test_case_study(year = 2013, verbose = False):
    '''
    Tests that _salbrut which computes "salaire brut" from "imposable" yields an amount compatbe
    with the one obtained from running openfisca satrting with a "salaire brut"
    '''

    for type_sal_category in ['prive_non_cadre', 'prive_cadre']:  # , 'public_titulaire_etat']:
        simulation = ScenarioSimulation()
        maxrev = 24000
        simulation.set_config(year = year, reforme = False, nmen = 11, maxrev = maxrev, x_axis = 'salbrut')
        simulation.scenario.indiv[0]['salbrut'] = maxrev
        simulation.scenario.indiv[0]['type_sal'] = CAT[type_sal_category]
        if type_sal_category == 'public_titulaire_etat':
            from openfisca_france.model.cotisations_sociales.travail import TAUX_DE_PRIME
            simulation.scenario.indiv[0]['primes'] = TAUX_DE_PRIME * maxrev

        simulation.set_param()

        # The aefa prestation can be disabled by uncommenting the following line:
        # simulation.disable_prestations( ['aefa'])
        df = simulation.get_results_dataframe(index_by_code = True)

        from openfisca_france.model.inversion_revenus import _salbrut
        df_b2i = df.transpose()
        if verbose:

            print df_b2i.to_string()

        sali = df_b2i['sal'].get_values()
        hsup = simulation.input_table.table['hsup'].get_values()
        type_sal = simulation.input_table.table['type_sal'].get_values()
        primes = simulation.input_table.table['hsup'].get_values()

        defaultP = simulation.P_default
        from pandas import DataFrame
        df_i2b = DataFrame({'sal': sali, 'salbrut' : _salbrut(sali, hsup, type_sal, defaultP) })

        if verbose:
            print df_i2b.to_string()


        for var in ['sal', 'salbrut']:
            passed = ((df_b2i[var] - df_i2b[var]).abs() < .01).all()

            if (not passed) or type_sal_category in ['public_titulaire_etat']:
                print (df_b2i / 12).to_string()
                print (df_i2b / 12).to_string()

            assert passed, "difference in %s for %s" % (var, type_sal_category)
Example #11
0
    def test_to_string_small_float_values(self):
        df = DataFrame({"a": [1.5, 1e-17, -5.5e-7]})

        result = df.to_string()
        # sadness per above
        if "%.4g" % 1.7e8 == "1.7e+008":
            expected = "               a\n" "0  1.500000e+000\n" "1  1.000000e-017\n" "2 -5.500000e-007"
        else:
            expected = "              a\n" "0  1.500000e+00\n" "1  1.000000e-17\n" "2 -5.500000e-07"
        self.assertEqual(result, expected)

        # but not all exactly zero
        df = df * 0
        result = df.to_string()
        expected = "   0\n" "0  0\n" "1  0\n" "2 -0"
Example #12
0
    def test_to_string_repr_unicode(self):
        buf = StringIO()

        unicode_values = [u'\u03c3'] * 10
        unicode_values = np.array(unicode_values, dtype=object)
        df = DataFrame({'unicode' : unicode_values})
        df.to_string(col_space=10, buf=buf)

        # it works!
        repr(df)

        # it works even if sys.stdin in None
        sys.stdin = None
        repr(df)
        sys.stdin = sys.__stdin__
Example #13
0
    def test_to_string_float_formatting(self):
        fmt.reset_printoptions()
        fmt.set_printoptions(precision=6, column_space=12,
                             notebook_repr_html=False)

        df = DataFrame({'x' : [0, 0.25, 3456.000, 12e+45, 1.64e+6,
                               1.7e+8, 1.253456, np.pi, -1e6]})

        df_s = df.to_string()

        # Python 2.5 just wants me to be sad. And debian 32-bit
        #sys.version_info[0] == 2 and sys.version_info[1] < 6:
        if _three_digit_exp():
            expected = ('              x\n0  0.00000e+000\n1  2.50000e-001\n'
                        '2  3.45600e+003\n3  1.20000e+046\n4  1.64000e+006\n'
                        '5  1.70000e+008\n6  1.25346e+000\n7  3.14159e+000\n'
                        '8 -1.00000e+006')
        else:
            expected = ('             x\n0  0.00000e+00\n1  2.50000e-01\n'
                        '2  3.45600e+03\n3  1.20000e+46\n4  1.64000e+06\n'
                        '5  1.70000e+08\n6  1.25346e+00\n7  3.14159e+00\n'
                        '8 -1.00000e+06')
        assert(df_s == expected)

        df = DataFrame({'x' : [3234, 0.253]})
        df_s = df.to_string()

        expected = ('          x\n'
                    '0  3234.000\n'
                    '1     0.253')
        assert(df_s == expected)

        fmt.reset_printoptions()
        self.assertEqual(get_option("display.precision"), 7)

        df = DataFrame({'x': [1e9, 0.2512]})
        df_s = df.to_string()
        # Python 2.5 just wants me to be sad. And debian 32-bit
        #sys.version_info[0] == 2 and sys.version_info[1] < 6:
        if _three_digit_exp():
            expected = ('               x\n'
                        '0  1.000000e+009\n'
                        '1  2.512000e-001')
        else:
            expected = ('              x\n'
                        '0  1.000000e+09\n'
                        '1  2.512000e-01')
        assert(df_s == expected)
def test_case_study(year = 2013, verbose = False):
    for type_sal_category in ['prive_non_cadre', 'prive_cadre', 'public_titulaire_etat']:

        simulation = ScenarioSimulation()
        maxrev = 24000
        simulation.set_config(year = year, reforme = False, nmen = 1, x_axis = 'salbrut')
        # Add husband/wife on the same tax sheet (foyer).
    #    simulation.scenario.addIndiv(1, datetime.date(1975, 1, 1), 'conj', 'part')
        simulation.scenario.indiv[0]['salbrut'] = maxrev
        simulation.scenario.indiv[0]['type_sal'] = CAT[type_sal_category]
        if type_sal_category == 'public_titulaire_etat':
            from openfisca_france.model.cotisations_sociales.travail import TAUX_DE_PRIME
            simulation.scenario.indiv[0]['primes'] = TAUX_DE_PRIME * maxrev

        simulation.set_param()

        # The aefa prestation can be disabled by uncommenting the following line:
        # simulation.disable_prestations( ['aefa'])
        df = simulation.get_results_dataframe(index_by_code = True)


        from openfisca_france.model.cotisations_sociales.travail import _salbrut
        df_b2i = df.transpose()
        if verbose:

            print df_b2i.to_string()

        sali = df_b2i['sal'].get_values()
        hsup = simulation.input_table.table['hsup'].get_values()
        type_sal = simulation.input_table.table['type_sal'].get_values()
        primes = simulation.input_table.table['hsup'].get_values()

        defaultP = simulation.P_default
        from pandas import DataFrame
        df_i2b = DataFrame({'sal': sali, 'salbrut' : _salbrut(sali, hsup, type_sal, defaultP) })

        if verbose:
            print df_i2b.to_string()


        for var in ['sal', 'salbrut']:
            test = ((df_b2i[var] - df_i2b[var]).abs() < .01).all()

            if (not test) or type_sal_category in ['public_titulaire_etat']:
                print (df_b2i / 12).to_string()
                print (df_i2b / 12).to_string()

            assert test, "difference in %s for %s" % (var, type_sal_category)
Example #15
0
 def test_to_string_with_formatters_unicode(self):
     df = DataFrame({u'c/\u03c3':[1,2,3]})
     result = df.to_string(formatters={u'c/\u03c3': lambda x: '%s' % x})
     self.assertEqual(result, (u'  c/\u03c3\n'
                                '0   1\n'
                                '1   2\n'
                                '2   3'))
Example #16
0
    def test_to_string_no_header(self):
        df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})

        df_s = df.to_string(header=False)
        expected = "0  1  4\n1  2  5\n2  3  6"

        assert df_s == expected
Example #17
0
    def test_to_string_int_formatting(self):
        df = DataFrame({"x": [-15, 20, 25, -35]})
        self.assert_(issubclass(df["x"].dtype.type, np.integer))

        output = df.to_string()
        expected = "    x\n" "0 -15\n" "1  20\n" "2  25\n" "3 -35"
        self.assertEqual(output, expected)
Example #18
0
    def test_to_string_with_formatters(self):
        df = DataFrame({'int': [1, 2, 3],
                        'float': [1.0, 2.0, 3.0],
                        'object': [(1,2), True, False]},
                        columns=['int', 'float', 'object'])

        formatters = [('int', lambda x: '0x%x' % x),
                      ('float', lambda x: '[% 4.1f]' % x),
                      ('object', lambda x: '-%s-' % str(x))]
        result = df.to_string(formatters=dict(formatters))
        result2 = df.to_string(formatters=lzip(*formatters)[1])
        self.assertEqual(result, ('  int  float    object\n'
                                  '0 0x1 [ 1.0]  -(1, 2)-\n'
                                  '1 0x2 [ 2.0]    -True-\n'
                                  '2 0x3 [ 3.0]   -False-'))
        self.assertEqual(result, result2)
Example #19
0
    def test_to_string_no_index(self):
        df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})

        df_s = df.to_string(index=False)
        expected = " x  y\n 1  4\n 2  5\n 3  6"

        assert df_s == expected
Example #20
0
    def test_to_string_float_index(self):
        index = Index([1.5, 2, 3, 4, 5])
        df = DataFrame(range(5), index=index)

        result = df.to_string()
        expected = "     0\n" "1.5  0\n" "2    1\n" "3    2\n" "4    3\n" "5    4"
        self.assertEqual(result, expected)
Example #21
0
    def test_to_string_small_float_values(self):
        df = DataFrame({'a': [1.5, 1e-17, -5.5e-7]})

        result = df.to_string()
        expected = ('              a\n'
                    '0  1.500000e+00\n'
                    '1  1.000000e-17\n'
                    '2 -5.500000e-07')
        self.assertEqual(result, expected)

        # but not all exactly zero
        df = df * 0
        result = df.to_string()
        expected = ('   0\n'
                    '0  0\n'
                    '1  0\n'
                    '2 -0')
Example #22
0
 def test_to_string_left_justify_cols(self):
     fmt.reset_printoptions()
     df = DataFrame({'x' : [3234, 0.253]})
     df_s = df.to_string(justify='left')
     expected = ('   x       \n'
                 '0  3234.000\n'
                 '1     0.253')
     assert(df_s == expected)
Example #23
0
    def test_to_string_with_formatters(self):
        df = DataFrame(
            {"int": [1, 2, 3], "float": [1.0, 2.0, 3.0], "object": [(1, 2), True, False]},
            columns=["int", "float", "object"],
        )

        formatters = [
            ("int", lambda x: "0x%x" % x),
            ("float", lambda x: "[% 4.1f]" % x),
            ("object", lambda x: "-%s-" % str(x)),
        ]
        result = df.to_string(formatters=dict(formatters))
        result2 = df.to_string(formatters=lzip(*formatters)[1])
        self.assertEqual(
            result,
            ("  int  float    object\n" "0 0x1 [ 1.0]  -(1, 2)-\n" "1 0x2 [ 2.0]    -True-\n" "2 0x3 [ 3.0]   -False-"),
        )
        self.assertEqual(result, result2)
Example #24
0
    def test_to_string_float_formatting(self):
        fmt.reset_printoptions()
        fmt.set_printoptions(precision=6, column_space=12, notebook_repr_html=False)

        df = DataFrame({"x": [0, 0.25, 3456.000, 12e45, 1.64e6, 1.7e8, 1.253456, np.pi, -1e6]})

        df_s = df.to_string()

        # Python 2.5 just wants me to be sad. And debian 32-bit
        # sys.version_info[0] == 2 and sys.version_info[1] < 6:
        if _three_digit_exp():
            expected = (
                "              x\n0  0.00000e+000\n1  2.50000e-001\n"
                "2  3.45600e+003\n3  1.20000e+046\n4  1.64000e+006\n"
                "5  1.70000e+008\n6  1.25346e+000\n7  3.14159e+000\n"
                "8 -1.00000e+006"
            )
        else:
            expected = (
                "             x\n0  0.00000e+00\n1  2.50000e-01\n"
                "2  3.45600e+03\n3  1.20000e+46\n4  1.64000e+06\n"
                "5  1.70000e+08\n6  1.25346e+00\n7  3.14159e+00\n"
                "8 -1.00000e+06"
            )
        assert df_s == expected

        df = DataFrame({"x": [3234, 0.253]})
        df_s = df.to_string()

        expected = "          x\n" "0  3234.000\n" "1     0.253"
        assert df_s == expected

        fmt.reset_printoptions()
        self.assertEqual(fmt.print_config.precision, 7)

        df = DataFrame({"x": [1e9, 0.2512]})
        df_s = df.to_string()
        # Python 2.5 just wants me to be sad. And debian 32-bit
        # sys.version_info[0] == 2 and sys.version_info[1] < 6:
        if _three_digit_exp():
            expected = "               x\n" "0  1.000000e+009\n" "1  2.512000e-001"
        else:
            expected = "              x\n" "0  1.000000e+09\n" "1  2.512000e-01"
        assert df_s == expected
    def run(self):
        df = DataFrame(np.random.rand(self.nrows, self.ncolumns), columns=ALPHABET[0:self.ncolumns])
        df.index.name = 'index'
        output = df.to_string()

        conn = S3Connection(ACCESS_KEY, ACCESS_SECRET)
        bucket = conn.get_bucket(BUCKET)
        file = Key(bucket)
        file.key = 'random_numbers.csv'
        file.set_contents_from_string(output)
Example #26
0
def getAllChampionContestRates(minGames=1):
    contestRateString = ""
    contestRateString += "Total Games: %d\n\n" % stats.getTotalGames()
    
    contestRates = stats.getAllChampionContestRates(minGames=minGames)
    dataFrame = DataFrame(contestRates, columns=(["champion", "picked", "banned", "contested", "contestRate"]))
    dataFrame = dataFrame.set_index("champion")
    contestRateString += dataFrame.to_string()
    
    return contestRateString
Example #27
0
 def __repr__(self):
     repr_table = DataFrame(self.cluster_info,
                            columns=['Cluster_no', 'Cluster_size', 'Description', 'Non_red_size'])
     repr_table = repr_table[['Cluster_no', 'Cluster_size', 'Non_red_size', 'Description']]
     repr_table = repr_table.sort_values('Non_red_size', ascending=False)[:20]
     repr_string = "Id value for clustering: {}\n\n".format(self.id_val)
     repr_string += repr_table.to_string(index=False)
     if len(self.cluster_info) > 20:
         repr_string += "\n... {} more entries...".format(len(self.cluster_info)-20)
     return repr_string
Example #28
0
    def test_eng_float_formatter(self):
        df = DataFrame({"A": [1.41, 141.0, 14100, 1410000.0]})

        fmt.set_eng_float_format()
        result = df.to_string()
        expected = "             A\n" "0    1.410E+00\n" "1  141.000E+00\n" "2   14.100E+03\n" "3    1.410E+06"
        self.assertEqual(result, expected)

        fmt.set_eng_float_format(use_eng_prefix=True)
        result = df.to_string()
        expected = "         A\n" "0    1.410\n" "1  141.000\n" "2  14.100k\n" "3   1.410M"
        self.assertEqual(result, expected)

        fmt.set_eng_float_format(accuracy=0)
        result = df.to_string()
        expected = "         A\n" "0    1E+00\n" "1  141E+00\n" "2   14E+03\n" "3    1E+06"
        self.assertEqual(result, expected)

        fmt.reset_printoptions()
Example #29
0
    def test_to_string_int_formatting(self):
        df = DataFrame({'x' : [-15, 20, 25, -35]})
        self.assert_(issubclass(df['x'].dtype.type, np.integer))

        output = df.to_string()
        expected = ('    x\n'
                    '0 -15\n'
                    '1  20\n'
                    '2  25\n'
                    '3 -35')
        self.assertEqual(output, expected)
Example #30
0
    def test_to_string_index_formatter(self):
        df = DataFrame([range(5), range(5, 10), range(10, 15)])

        rs = df.to_string(formatters={'__index__': lambda x: 'abc'[x]})

        xp = """\
    0   1   2   3   4
a   0   1   2   3   4
b   5   6   7   8   9
c  10  11  12  13  14\
"""
        self.assertEqual(rs, xp)