def test_eng_float_formatter(self): df = DataFrame({'A' : [1.41, 141., 14100, 1410000.]}) fmt.set_eng_float_format() result = df.to_string() expected = (' A\n' '0 1.410E+00\n' '1 141.000E+00\n' '2 14.100E+03\n' '3 1.410E+06') self.assertEqual(result, expected) fmt.set_eng_float_format(use_eng_prefix=True) result = df.to_string() expected = (' A\n' '0 1.410\n' '1 141.000\n' '2 14.100k\n' '3 1.410M') self.assertEqual(result, expected) fmt.set_eng_float_format(accuracy=0) result = df.to_string() expected = (' A\n' '0 1E+00\n' '1 141E+00\n' '2 14E+03\n' '3 1E+06') self.assertEqual(result, expected) fmt.reset_printoptions()
def test_to_string_repr_unicode(self): buf = StringIO() unicode_values = [u'\u03c3'] * 10 unicode_values = np.array(unicode_values, dtype=object) df = DataFrame({'unicode': unicode_values}) df.to_string(col_space=10, buf=buf) # it works! repr(df) idx = Index(['abc', u'\u03c3a', 'aegdvg']) ser = Series(np.random.randn(len(idx)), idx) rs = repr(ser).split('\n') line_len = len(rs[0]) for line in rs[1:]: try: line = line.decode(get_option("display.encoding")) except: pass if not line.startswith('Dtype:'): self.assert_(len(line) == line_len) # it works even if sys.stdin in None _stdin= sys.stdin try: sys.stdin = None repr(df) finally: sys.stdin = _stdin
def test_to_string_repr_unicode(self): buf = StringIO() unicode_values = [u'\u03c3'] * 10 unicode_values = np.array(unicode_values, dtype=object) df = DataFrame({'unicode' : unicode_values}) df.to_string(col_space=10, buf=buf) # it works! repr(df) idx = Index(['abc', u'\u03c3a', 'aegdvg']) ser = Series(np.random.randn(len(idx)), idx) rs = repr(ser).split('\n') line_len = len(rs[0]) for line in rs[1:]: try: line = line.decode('utf-8') except: pass self.assert_(len(line) == line_len) # it works even if sys.stdin in None sys.stdin = None repr(df) sys.stdin = sys.__stdin__
def test_to_string_format_na(self): fmt.reset_printoptions() df = DataFrame({'A' : [np.nan, -1, -2.1234, 3, 4], 'B' : [np.nan, 'foo', 'foooo', 'fooooo', 'bar']}) result = df.to_string() expected = (' A B\n' '0 NaN NaN\n' '1 -1.0000 foo\n' '2 -2.1234 foooo\n' '3 3.0000 fooooo\n' '4 4.0000 bar') self.assertEqual(result, expected) df = DataFrame({'A' : [np.nan, -1., -2., 3., 4.], 'B' : [np.nan, 'foo', 'foooo', 'fooooo', 'bar']}) result = df.to_string() expected = (' A B\n' '0 NaN NaN\n' '1 -1 foo\n' '2 -2 foooo\n' '3 3 fooooo\n' '4 4 bar') self.assertEqual(result, expected)
class ToString(object): def setup(self): self.df = DataFrame(np.random.randn(100, 10)) def time_to_string_floats(self): self.df.to_string()
def test_repr_embedded_ndarray(self): arr = np.empty(10, dtype=[('err', object)]) for i in range(len(arr)): arr['err'][i] = np.random.randn(i) df = DataFrame(arr) repr(df['err']) repr(df) df.to_string()
def update_with_predictions(self, context, x, actuals, predictions): cm = metrics.confusion_matrix(actuals, predictions) self.config.target.context = context factors = self.config.target.get_prep_data() if factors: names = [f[0] for f in factors] df = DataFrame(cm, columns=names, index=names) print df.to_string() else: print cm
def test_to_string_unicode_columns(self): df = DataFrame({u'\u03c3' : np.arange(10.)}) buf = StringIO() df.to_string(buf=buf) buf.getvalue() buf = StringIO() df.info(buf=buf) buf.getvalue() result = self.frame.to_string() self.assert_(isinstance(result, unicode))
def test_cho_rst(year = 2014, verbose = False): # Tests that _chobrut which computes "chômage brut" from "imposable" yields an amount compatbe # with the one obtained from running openfisca satrting with a "chômage brut" period = periods.period(year) remplacement = {'cho': 'chobrut', 'rst': 'rstbrut'} for var, varbrut in remplacement.iteritems(): maxrev = 24000 simulation = base.tax_benefit_system.new_scenario().init_single_entity( axes = [dict(name = varbrut, max = maxrev, min = 0, count = 11)], period = period, parent1 = dict( birth = datetime.date(year - 40, 1, 1), ), ).new_simulation(debug = True) df_b2i = DataFrame({ var: simulation.calculate(var), varbrut: simulation.calculate(varbrut), }) vari = df_b2i[var].get_values() csg_rempl = vari * 0 + 3 defaultP = simulation.get_reference_compact_legislation(period.start) if var == "cho": _vari_to_brut = inversion_revenus._chobrut_from_choi elif var == "rst": _vari_to_brut = inversion_revenus._rstbrut_from_rsti else: assert False, u'Unsupported value for var: {!r}'.format(var) df_i2b = DataFrame({var: vari, varbrut: _vari_to_brut(vari, csg_rempl, defaultP)}) if verbose: print df_i2b.to_string() print df_b2i.to_string() for variable in [var, varbrut]: passed = ((df_b2i[variable] - df_i2b[variable]).abs() < 1).all() if (not passed) or verbose: print "Brut to imposable" print (df_b2i[[varbrut, var]] / 12).to_string() print "Imposable to brut" print (df_i2b / 12).to_string() assert passed, "difference in %s " % (var)
def test_case_study(year = 2013, verbose = False): ''' Tests that _salbrut which computes "salaire brut" from "imposable" yields an amount compatbe with the one obtained from running openfisca satrting with a "salaire brut" ''' for type_sal_category in ['prive_non_cadre', 'prive_cadre']: # , 'public_titulaire_etat']: simulation = ScenarioSimulation() maxrev = 24000 simulation.set_config(year = year, reforme = False, nmen = 11, maxrev = maxrev, x_axis = 'salbrut') simulation.scenario.indiv[0]['salbrut'] = maxrev simulation.scenario.indiv[0]['type_sal'] = CAT[type_sal_category] if type_sal_category == 'public_titulaire_etat': from openfisca_france.model.cotisations_sociales.travail import TAUX_DE_PRIME simulation.scenario.indiv[0]['primes'] = TAUX_DE_PRIME * maxrev simulation.set_param() # The aefa prestation can be disabled by uncommenting the following line: # simulation.disable_prestations( ['aefa']) df = simulation.get_results_dataframe(index_by_code = True) from openfisca_france.model.inversion_revenus import _salbrut df_b2i = df.transpose() if verbose: print df_b2i.to_string() sali = df_b2i['sal'].get_values() hsup = simulation.input_table.table['hsup'].get_values() type_sal = simulation.input_table.table['type_sal'].get_values() primes = simulation.input_table.table['hsup'].get_values() defaultP = simulation.P_default from pandas import DataFrame df_i2b = DataFrame({'sal': sali, 'salbrut' : _salbrut(sali, hsup, type_sal, defaultP) }) if verbose: print df_i2b.to_string() for var in ['sal', 'salbrut']: passed = ((df_b2i[var] - df_i2b[var]).abs() < .01).all() if (not passed) or type_sal_category in ['public_titulaire_etat']: print (df_b2i / 12).to_string() print (df_i2b / 12).to_string() assert passed, "difference in %s for %s" % (var, type_sal_category)
def test_to_string_repr_unicode(self): buf = StringIO() unicode_values = [u'\u03c3'] * 10 unicode_values = np.array(unicode_values, dtype=object) df = DataFrame({'unicode' : unicode_values}) df.to_string(col_space=10, buf=buf) # it works! repr(df) # it works even if sys.stdin in None sys.stdin = None repr(df) sys.stdin = sys.__stdin__
def test_to_string_small_float_values(self): df = DataFrame({"a": [1.5, 1e-17, -5.5e-7]}) result = df.to_string() # sadness per above if "%.4g" % 1.7e8 == "1.7e+008": expected = " a\n" "0 1.500000e+000\n" "1 1.000000e-017\n" "2 -5.500000e-007" else: expected = " a\n" "0 1.500000e+00\n" "1 1.000000e-17\n" "2 -5.500000e-07" self.assertEqual(result, expected) # but not all exactly zero df = df * 0 result = df.to_string() expected = " 0\n" "0 0\n" "1 0\n" "2 -0"
def test_case_study(year = 2013, verbose = False): for type_sal_category in ['prive_non_cadre', 'prive_cadre', 'public_titulaire_etat']: simulation = ScenarioSimulation() maxrev = 24000 simulation.set_config(year = year, reforme = False, nmen = 1, x_axis = 'salbrut') # Add husband/wife on the same tax sheet (foyer). # simulation.scenario.addIndiv(1, datetime.date(1975, 1, 1), 'conj', 'part') simulation.scenario.indiv[0]['salbrut'] = maxrev simulation.scenario.indiv[0]['type_sal'] = CAT[type_sal_category] if type_sal_category == 'public_titulaire_etat': from openfisca_france.model.cotisations_sociales.travail import TAUX_DE_PRIME simulation.scenario.indiv[0]['primes'] = TAUX_DE_PRIME * maxrev simulation.set_param() # The aefa prestation can be disabled by uncommenting the following line: # simulation.disable_prestations( ['aefa']) df = simulation.get_results_dataframe(index_by_code = True) from openfisca_france.model.cotisations_sociales.travail import _salbrut df_b2i = df.transpose() if verbose: print df_b2i.to_string() sali = df_b2i['sal'].get_values() hsup = simulation.input_table.table['hsup'].get_values() type_sal = simulation.input_table.table['type_sal'].get_values() primes = simulation.input_table.table['hsup'].get_values() defaultP = simulation.P_default from pandas import DataFrame df_i2b = DataFrame({'sal': sali, 'salbrut' : _salbrut(sali, hsup, type_sal, defaultP) }) if verbose: print df_i2b.to_string() for var in ['sal', 'salbrut']: test = ((df_b2i[var] - df_i2b[var]).abs() < .01).all() if (not test) or type_sal_category in ['public_titulaire_etat']: print (df_b2i / 12).to_string() print (df_i2b / 12).to_string() assert test, "difference in %s for %s" % (var, type_sal_category)
def test_to_string_float_formatting(self): fmt.reset_printoptions() fmt.set_printoptions(precision=6, column_space=12, notebook_repr_html=False) df = DataFrame({'x' : [0, 0.25, 3456.000, 12e+45, 1.64e+6, 1.7e+8, 1.253456, np.pi, -1e6]}) df_s = df.to_string() # Python 2.5 just wants me to be sad. And debian 32-bit #sys.version_info[0] == 2 and sys.version_info[1] < 6: if _three_digit_exp(): expected = (' x\n0 0.00000e+000\n1 2.50000e-001\n' '2 3.45600e+003\n3 1.20000e+046\n4 1.64000e+006\n' '5 1.70000e+008\n6 1.25346e+000\n7 3.14159e+000\n' '8 -1.00000e+006') else: expected = (' x\n0 0.00000e+00\n1 2.50000e-01\n' '2 3.45600e+03\n3 1.20000e+46\n4 1.64000e+06\n' '5 1.70000e+08\n6 1.25346e+00\n7 3.14159e+00\n' '8 -1.00000e+06') assert(df_s == expected) df = DataFrame({'x' : [3234, 0.253]}) df_s = df.to_string() expected = (' x\n' '0 3234.000\n' '1 0.253') assert(df_s == expected) fmt.reset_printoptions() self.assertEqual(get_option("display.precision"), 7) df = DataFrame({'x': [1e9, 0.2512]}) df_s = df.to_string() # Python 2.5 just wants me to be sad. And debian 32-bit #sys.version_info[0] == 2 and sys.version_info[1] < 6: if _three_digit_exp(): expected = (' x\n' '0 1.000000e+009\n' '1 2.512000e-001') else: expected = (' x\n' '0 1.000000e+09\n' '1 2.512000e-01') assert(df_s == expected)
def test_to_string_float_index(self): index = Index([1.5, 2, 3, 4, 5]) df = DataFrame(range(5), index=index) result = df.to_string() expected = " 0\n" "1.5 0\n" "2 1\n" "3 2\n" "4 3\n" "5 4" self.assertEqual(result, expected)
def test_to_string_no_index(self): df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) df_s = df.to_string(index=False) expected = " x y\n 1 4\n 2 5\n 3 6" assert df_s == expected
def test_to_string_with_formatters(self): df = DataFrame({'int': [1, 2, 3], 'float': [1.0, 2.0, 3.0], 'object': [(1,2), True, False]}, columns=['int', 'float', 'object']) formatters = [('int', lambda x: '0x%x' % x), ('float', lambda x: '[% 4.1f]' % x), ('object', lambda x: '-%s-' % str(x))] result = df.to_string(formatters=dict(formatters)) result2 = df.to_string(formatters=lzip(*formatters)[1]) self.assertEqual(result, (' int float object\n' '0 0x1 [ 1.0] -(1, 2)-\n' '1 0x2 [ 2.0] -True-\n' '2 0x3 [ 3.0] -False-')) self.assertEqual(result, result2)
def test_to_string_with_formatters_unicode(self): df = DataFrame({u'c/\u03c3':[1,2,3]}) result = df.to_string(formatters={u'c/\u03c3': lambda x: '%s' % x}) self.assertEqual(result, (u' c/\u03c3\n' '0 1\n' '1 2\n' '2 3'))
def test_to_string_no_header(self): df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) df_s = df.to_string(header=False) expected = "0 1 4\n1 2 5\n2 3 6" assert df_s == expected
def test_to_string_int_formatting(self): df = DataFrame({"x": [-15, 20, 25, -35]}) self.assert_(issubclass(df["x"].dtype.type, np.integer)) output = df.to_string() expected = " x\n" "0 -15\n" "1 20\n" "2 25\n" "3 -35" self.assertEqual(output, expected)
def test_to_string_small_float_values(self): df = DataFrame({'a': [1.5, 1e-17, -5.5e-7]}) result = df.to_string() expected = (' a\n' '0 1.500000e+00\n' '1 1.000000e-17\n' '2 -5.500000e-07') self.assertEqual(result, expected) # but not all exactly zero df = df * 0 result = df.to_string() expected = (' 0\n' '0 0\n' '1 0\n' '2 -0')
def test_to_string_left_justify_cols(self): fmt.reset_printoptions() df = DataFrame({'x' : [3234, 0.253]}) df_s = df.to_string(justify='left') expected = (' x \n' '0 3234.000\n' '1 0.253') assert(df_s == expected)
def test_to_string_with_formatters(self): df = DataFrame( {"int": [1, 2, 3], "float": [1.0, 2.0, 3.0], "object": [(1, 2), True, False]}, columns=["int", "float", "object"], ) formatters = [ ("int", lambda x: "0x%x" % x), ("float", lambda x: "[% 4.1f]" % x), ("object", lambda x: "-%s-" % str(x)), ] result = df.to_string(formatters=dict(formatters)) result2 = df.to_string(formatters=lzip(*formatters)[1]) self.assertEqual( result, (" int float object\n" "0 0x1 [ 1.0] -(1, 2)-\n" "1 0x2 [ 2.0] -True-\n" "2 0x3 [ 3.0] -False-"), ) self.assertEqual(result, result2)
def test_to_string_float_formatting(self): fmt.reset_printoptions() fmt.set_printoptions(precision=6, column_space=12, notebook_repr_html=False) df = DataFrame({"x": [0, 0.25, 3456.000, 12e45, 1.64e6, 1.7e8, 1.253456, np.pi, -1e6]}) df_s = df.to_string() # Python 2.5 just wants me to be sad. And debian 32-bit # sys.version_info[0] == 2 and sys.version_info[1] < 6: if _three_digit_exp(): expected = ( " x\n0 0.00000e+000\n1 2.50000e-001\n" "2 3.45600e+003\n3 1.20000e+046\n4 1.64000e+006\n" "5 1.70000e+008\n6 1.25346e+000\n7 3.14159e+000\n" "8 -1.00000e+006" ) else: expected = ( " x\n0 0.00000e+00\n1 2.50000e-01\n" "2 3.45600e+03\n3 1.20000e+46\n4 1.64000e+06\n" "5 1.70000e+08\n6 1.25346e+00\n7 3.14159e+00\n" "8 -1.00000e+06" ) assert df_s == expected df = DataFrame({"x": [3234, 0.253]}) df_s = df.to_string() expected = " x\n" "0 3234.000\n" "1 0.253" assert df_s == expected fmt.reset_printoptions() self.assertEqual(fmt.print_config.precision, 7) df = DataFrame({"x": [1e9, 0.2512]}) df_s = df.to_string() # Python 2.5 just wants me to be sad. And debian 32-bit # sys.version_info[0] == 2 and sys.version_info[1] < 6: if _three_digit_exp(): expected = " x\n" "0 1.000000e+009\n" "1 2.512000e-001" else: expected = " x\n" "0 1.000000e+09\n" "1 2.512000e-01" assert df_s == expected
def getAllChampionContestRates(minGames=1): contestRateString = "" contestRateString += "Total Games: %d\n\n" % stats.getTotalGames() contestRates = stats.getAllChampionContestRates(minGames=minGames) dataFrame = DataFrame(contestRates, columns=(["champion", "picked", "banned", "contested", "contestRate"])) dataFrame = dataFrame.set_index("champion") contestRateString += dataFrame.to_string() return contestRateString
def run(self): df = DataFrame(np.random.rand(self.nrows, self.ncolumns), columns=ALPHABET[0:self.ncolumns]) df.index.name = 'index' output = df.to_string() conn = S3Connection(ACCESS_KEY, ACCESS_SECRET) bucket = conn.get_bucket(BUCKET) file = Key(bucket) file.key = 'random_numbers.csv' file.set_contents_from_string(output)
def test_eng_float_formatter(self): df = DataFrame({"A": [1.41, 141.0, 14100, 1410000.0]}) fmt.set_eng_float_format() result = df.to_string() expected = " A\n" "0 1.410E+00\n" "1 141.000E+00\n" "2 14.100E+03\n" "3 1.410E+06" self.assertEqual(result, expected) fmt.set_eng_float_format(use_eng_prefix=True) result = df.to_string() expected = " A\n" "0 1.410\n" "1 141.000\n" "2 14.100k\n" "3 1.410M" self.assertEqual(result, expected) fmt.set_eng_float_format(accuracy=0) result = df.to_string() expected = " A\n" "0 1E+00\n" "1 141E+00\n" "2 14E+03\n" "3 1E+06" self.assertEqual(result, expected) fmt.reset_printoptions()
def __repr__(self): repr_table = DataFrame(self.cluster_info, columns=['Cluster_no', 'Cluster_size', 'Description', 'Non_red_size']) repr_table = repr_table[['Cluster_no', 'Cluster_size', 'Non_red_size', 'Description']] repr_table = repr_table.sort_values('Non_red_size', ascending=False)[:20] repr_string = "Id value for clustering: {}\n\n".format(self.id_val) repr_string += repr_table.to_string(index=False) if len(self.cluster_info) > 20: repr_string += "\n... {} more entries...".format(len(self.cluster_info)-20) return repr_string
def test_to_string_int_formatting(self): df = DataFrame({'x' : [-15, 20, 25, -35]}) self.assert_(issubclass(df['x'].dtype.type, np.integer)) output = df.to_string() expected = (' x\n' '0 -15\n' '1 20\n' '2 25\n' '3 -35') self.assertEqual(output, expected)
def test_to_string_index_formatter(self): df = DataFrame([range(5), range(5, 10), range(10, 15)]) rs = df.to_string(formatters={'__index__': lambda x: 'abc'[x]}) xp = """\ 0 1 2 3 4 a 0 1 2 3 4 b 5 6 7 8 9 c 10 11 12 13 14\ """ self.assertEqual(rs, xp)
def test_repr_tuples(): buf = StringIO() df = DataFrame({"tups": list(zip(range(10), range(10)))}) repr(df) df.to_string(col_space=10, buf=buf)
def test_to_string_unicode_two(): dm = DataFrame({"c/\u03c3": []}) buf = StringIO() dm.to_string(buf)
def test_to_string_unicode_three(): dm = DataFrame(["\xc2"]) buf = StringIO() dm.to_string(buf)
def test_unicode_problem_decoding_as_ascii(self): dm = DataFrame({u'c/\u03c3': Series({'test': np.NaN})}) unicode(dm.to_string())
def test_truncation_no_index(max_cols, max_rows, expected): df = DataFrame([[0] * 11] * 4) assert df.to_string(index=False, max_cols=max_cols, max_rows=max_rows) == expected
def test_to_string_unicode_two(self): dm = DataFrame({u'c/\u03c3': []}) buf = StringIO() dm.to_string(buf)
class ToString: def setup(self): self.df = DataFrame(np.random.randn(100, 10)) def time_to_string_floats(self): self.df.to_string()
# # formatters['Done'] = "%s" # formatters['settings_name'] = "%s" # formatters['te_AreaUnderROC'] = "%s" # formatters['te_F1'] = "%s" # formatters['test_programs'] = "%s" # formatters['tr_AreaUnderROC'] = "%s" # # formatters['tr_F1'] = "%s" # formatters['training_programs'] = "%s" # # formatters = {key: lambda x: val % x for key, val in formatters.items()} # # print(table.to_string(formatters=formatters)) # Print table print(table.to_string()) print("\n\nLatex:\n") scores = ["AreaUnderROC", "F1", "TPR"] rows = sorted(list(table.iterrows())) for name, row in rows: row_str = name for val in scores: row_str += " & \\perfsplit{{{:.2f}}}{{{:.2f}}}"\ .format(row.get("tr_" + val), row.get("te_" + val)) print(row_str + "\\\\")
def saveHistory_txt(self, path) -> None: """ Save history attribute to .txt file. Extension can be omitted from path string """ if path[-3:] != '.txt': path += '.txt' df = DataFrame(self.history) with open(path, 'w') as txt: txt.write(df.to_string(index=False))
def test_repr_tuples(self): buf = StringIO() df = DataFrame({'tups': zip(range(10), range(10))}) repr(df) df.to_string(col_space=10, buf=buf)
initial_value.append(i) for i in [ "Infectious_initial(I0)", "number of students(N)", "beta", "sigma", "gamma" ]: for j in range(0, 4): parameters_name.append(i) for i in range(0, 20): amount_before.append(456) for i in range(0, 20): difference_rate.append(results_change[i] / 456) pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) data = { "parameters": parameters_name, "initial value": initial_value, "change rate": rate_change, "value after change": value_afterchange, "the number of affected students after change": results, "the number of affected students before change": amount_before, "difference value": results_change, "difference rate": difference_rate } df = DataFrame(data) print(df.to_string(justify='center', index=False)) writer = pd.ExcelWriter("Sensitivity_Analysis(R0=3.2).xlsx") df.to_excel(writer, sheet_name="Sensitivity Analysis") writer.save()
def _save_embedding(file_name: str, emb: pd.DataFrame): header = " ".join(map(str, emb.shape)) + "\n" emb_string = header + re.sub("\s\s+", " ", emb.to_string(header=False, index=True)) with open(file_name, "w+") as file: file.write(emb_string)
def pretty2d(arr): pretty_data = DataFrame(arr) print(pretty_data.to_string(index=False, header=False))
def print_rich_table( df: pd.DataFrame, show_index: bool = False, title: str = "", index_name: str = "", headers: Union[List[str], pd.Index] = None, floatfmt: Union[str, List[str]] = ".2f", ): """Prepare a table from df in rich Parameters ---------- df: pd.DataFrame Dataframe to turn into table show_index: bool Whether to include index title: str Title for table index_name : str Title for index column headers: List[str] Titles for columns floatfmt: str String to """ if gtff.USE_TABULATE_DF: table = Table(title=title, show_lines=True) if show_index: table.add_column(index_name) if headers is not None: if isinstance(headers, pd.Index): headers = list(headers) if len(headers) != len(df.columns): log_and_raise( ValueError("Length of headers does not match length of DataFrame") ) for header in headers: table.add_column(str(header)) else: for column in df.columns: table.add_column(str(column)) if isinstance(floatfmt, list): if len(floatfmt) != len(df.columns): log_and_raise( ValueError( "Length of floatfmt list does not match length of DataFrame columns." ) ) if isinstance(floatfmt, str): floatfmt = [floatfmt for _ in range(len(df.columns))] for idx, values in zip(df.index.tolist(), df.values.tolist()): row = [str(idx)] if show_index else [] row += [ str(x) if not isinstance(x, float) else f"{x:{floatfmt[idx]}}" for idx, x in enumerate(values) ] table.add_row(*row) console.print(table) else: console.print(df.to_string(col_space=0))
def test_to_string(self): from pandas import read_table import re # big mixed biggie = DataFrame({ 'A': randn(200), 'B': tm.makeStringIndex(200) }, index=range(200)) biggie['A'][:20] = nan biggie['B'][:20] = nan s = biggie.to_string() buf = StringIO() retval = biggie.to_string(buf=buf) self.assert_(retval is None) self.assertEqual(buf.getvalue(), s) self.assert_(isinstance(s, basestring)) # print in right order result = biggie.to_string(columns=['B', 'A'], col_space=17, float_format='%.5f'.__mod__) lines = result.split('\n') header = lines[0].strip().split() joined = '\n'.join([re.sub('\s+', ' ', x).strip() for x in lines[1:]]) recons = read_table(StringIO(joined), names=header, sep=' ') tm.assert_series_equal(recons['B'], biggie['B']) self.assertEqual(recons['A'].count(), biggie['A'].count()) self.assert_( (np.abs(recons['A'].dropna() - biggie['A'].dropna()) < 0.1).all()) # expected = ['B', 'A'] # self.assertEqual(header, expected) result = biggie.to_string(columns=['A'], col_space=17) header = result.split('\n')[0].strip().split() expected = ['A'] self.assertEqual(header, expected) biggie.to_string(columns=['B', 'A'], formatters={'A': lambda x: '%.1f' % x}) biggie.to_string(columns=['B', 'A'], float_format=str) biggie.to_string(columns=['B', 'A'], col_space=12, float_format=str) frame = DataFrame(index=np.arange(200)) frame.to_string()
def test_to_string_left_justify_cols(self): fmt.reset_printoptions() df = DataFrame({'x': [3234, 0.253]}) df_s = df.to_string(justify='left') expected = (' x \n' '0 3234.000\n' '1 0.253') assert (df_s == expected)
def test_to_string_unicode_three(self): dm = DataFrame(['\xc2']) buf = StringIO() dm.to_string(buf)
def test_dict_entries(self): df = DataFrame({'A': [{'a':1, 'b':2}]}) val = df.to_string() self.assertTrue("{'a': 1, 'b': 2}" in val)
def print_report_cpfc(aim: Cpfc, real: Cpfc): report = report_cpfc(aim, real) df = DataFrame(columns=report[0], data=report[1:]) print(df.to_string(index=False))
def test_to_string_with_formatters_unicode(self): df = DataFrame({u'c/\u03c3': [1, 2, 3]}) result = df.to_string(formatters={u'c/\u03c3': lambda x: '%s' % x}) self.assertEqual(result, (u' c/\u03c3\n' '0 1\n' '1 2\n' '2 3'))
def test_truncation_col_placement_no_index(max_cols, expected): df = DataFrame([[0] * 11] * 2) assert df.to_string(index=False, max_cols=max_cols).split("\n") == expected
def test_frame_index_to_string(self): index = PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M") frame = DataFrame(np.random.randn(3, 4), index=index) # it works! frame.to_string()
def writeDataFrameTextFile(self, filenameSuffix, df: pd.DataFrame): p = self.path(filenameSuffix, extensionToAdd="df.txt", validOtherExtensions="txt") self._log.info(f"Saving data frame text file {p}") with open(p, "w") as f: f.write(df.to_string()) return p
def test_to_string_with_formatters_unicode(): df = DataFrame({"c/\u03c3": [1, 2, 3]}) result = df.to_string(formatters={"c/\u03c3": str}) assert result == " c/\u03c3\n" + "0 1\n1 2\n2 3"
def main(): from pandas import DataFrame from vbench.api import BenchmarkRunner from vbench.db import BenchmarkDB from suite import REPO_PATH, BUILD, DB_PATH, PREPARE, dependencies, benchmarks if not args.base_commit: args.base_commit = BASELINE_COMMIT # GitRepo wants exactly 7 character hash? args.base_commit = args.base_commit[:7] if args.target_commit: args.target_commit = args.target_commit[:7] if not args.log_file: args.log_file = os.path.abspath(os.path.join(REPO_PATH, 'vb_suite.log')) TMP_DIR = tempfile.mkdtemp() prprint("TMP_DIR = %s" % TMP_DIR) prprint("LOG_FILE = %s\n" % args.log_file) try: logfile = open(args.log_file, 'w') prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint("Initializing Runner...") runner = BenchmarkRunner( benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) repo = runner.repo # (steal the parsed git repo used by runner) # ARGH. reparse the repo, without discarding any commits, # then overwrite the previous parse results # prprint ("Slaughtering kittens..." ) (repo.shas, repo.messages, repo.timestamps, repo.authors) = _parse_commit_log(REPO_PATH) h_head = args.target_commit or repo.shas[-1] h_baseline = args.base_commit prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))) prprint('Baseline [%s] : %s\n' % (h_baseline, repo.messages.get(h_baseline, ""))) prprint("removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint("Running benchmarks for baseline [%s]" % h_baseline) runner._run_and_write_results(h_baseline) prprint("Running benchmarks for target [%s]" % h_head) runner._run_and_write_results(h_head) prprint('Processing results...') head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) ratio = head_res['timing'] / baseline_res['timing'] totals = DataFrame(dict(t_head=head_res['timing'], t_baseline=baseline_res['timing'], ratio=ratio, name=baseline_res.name), columns=["t_head", "t_baseline", "ratio", "name"]) totals = totals.ix[totals.t_head > args.min_duration] # ignore below threshold totals = totals.dropna().sort("ratio").set_index( 'name') # sort in ascending order s = "\n\nResults:\n" s += totals.to_string( float_format=lambda x: "{:4.4f}".format(x).rjust(10)) s += "\n\n" s += "Columns: test_name | target_duration [ms] | baseline_duration [ms] | ratio\n\n" s += "- a Ratio of 1.30 means the target commit is 30% slower then the baseline.\n\n" s += 'Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, "")) s += 'Baseline [%s] : %s\n\n' % (h_baseline, repo.messages.get(h_baseline, "")) logfile.write(s) logfile.close() prprint(s) prprint("Results were also written to the logfile at '%s'\n" % args.log_file) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR) logfile.close()
def test_frame_index_to_string(self): index = PeriodIndex(['2011-1', '2011-2', '2011-3'], freq='M') frame = DataFrame(np.random.randn(3, 4), index=index) # it works! frame.to_string()
# build the dataframe dict_cohort = {} icol = 0 for i in colnames: dict_cohort[i] = list(occ_matrix[1:, icol]) icol = icol + 1 cohort = DataFrame(dict_cohort, columns=colnames, index=rownames) # we round the entries cohort = cohort.round(2) # and include the number of occurrences in each row as an integer cohort[colnames[0]] = pd.to_numeric(cohort[colnames[0]], downcast='integer') # we see the result print(cohort.to_string()) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # export to latex with open('myCohort.tex', 'w') as op: op.write(cohort.to_latex(bold_rows=True, index_names=True)) # check sum of numbers is at most as large as the number of ID's check = cohort.loc[:, 'number'] check = check.sum() print('\nThis should be <=', number_of_IDs, '\n') print(check) # check sum of percentages check = cohort check.drop('number', axis=1, inplace=True)
def test_nonunicode_nonascii_alignment(self): df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]]) rep_str = df.to_string() lines = rep_str.split('\n') self.assert_(len(lines[1]) == len(lines[2]))
class TestMultiLevel(unittest.TestCase): def setUp(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) self.frame = DataFrame(np.random.randn(10, 3), index=index, columns=Index(['A', 'B', 'C'], name='exp')) self.single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], labels=[[0, 1, 2, 3]], names=['first']) # create test series object arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) s[3] = np.NaN self.series = s tm.N = 100 self.tdf = tm.makeTimeDataFrame() self.ymd = self.tdf.groupby( [lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() # use Int64Index, to make sure things work self.ymd.index.levels = [ lev.astype('i8') for lev in self.ymd.index.levels ] self.ymd.index.names = ['year', 'month', 'day'] def test_append(self): a, b = self.frame[:5], self.frame[5:] result = a.append(b) tm.assert_frame_equal(result, self.frame) result = a['A'].append(b['A']) tm.assert_series_equal(result, self.frame['A']) def test_reindex_level(self): # axis=0 month_sums = self.ymd.sum(level='month') result = month_sums.reindex(self.ymd.index, level=1) expected = self.ymd.groupby(level='month').transform(np.sum) assert_frame_equal(result, expected) # Series result = month_sums['A'].reindex(self.ymd.index, level=1) expected = self.ymd['A'].groupby(level='month').transform(np.sum) assert_series_equal(result, expected) # axis=1 month_sums = self.ymd.T.sum(axis=1, level='month') result = month_sums.reindex(columns=self.ymd.index, level=1) expected = self.ymd.groupby(level='month').transform(np.sum).T assert_frame_equal(result, expected) def test_binops_level(self): def _check_op(opname): op = getattr(DataFrame, opname) month_sums = self.ymd.sum(level='month') result = op(self.ymd, month_sums, level='month') broadcasted = self.ymd.groupby(level='month').transform(np.sum) expected = op(self.ymd, broadcasted) assert_frame_equal(result, expected) # Series op = getattr(Series, opname) result = op(self.ymd['A'], month_sums['A'], level='month') broadcasted = self.ymd['A'].groupby(level='month').transform( np.sum) expected = op(self.ymd['A'], broadcasted) assert_series_equal(result, expected) _check_op('sub') _check_op('add') _check_op('mul') _check_op('div') def test_pickle(self): import cPickle def _test_roundtrip(frame): pickled = cPickle.dumps(frame) unpickled = cPickle.loads(pickled) assert_frame_equal(frame, unpickled) _test_roundtrip(self.frame) _test_roundtrip(self.frame.T) _test_roundtrip(self.ymd) _test_roundtrip(self.ymd.T) def test_reindex(self): reindexed = self.frame.ix[[('foo', 'one'), ('bar', 'one')]] expected = self.frame.ix[[0, 3]] assert_frame_equal(reindexed, expected) def test_reindex_preserve_levels(self): new_index = self.ymd.index[::10] chunk = self.ymd.reindex(new_index) self.assert_(chunk.index is new_index) chunk = self.ymd.ix[new_index] self.assert_(chunk.index is new_index) ymdT = self.ymd.T chunk = ymdT.reindex(columns=new_index) self.assert_(chunk.columns is new_index) chunk = ymdT.ix[:, new_index] self.assert_(chunk.columns is new_index) def test_sort_index_preserve_levels(self): result = self.frame.sort_index() self.assertEquals(result.index.names, self.frame.index.names) def test_repr_to_string(self): repr(self.frame) repr(self.ymd) repr(self.frame.T) repr(self.ymd.T) buf = StringIO() self.frame.to_string(buf=buf) self.ymd.to_string(buf=buf) self.frame.T.to_string(buf=buf) self.ymd.T.to_string(buf=buf) def test_getitem_simple(self): df = self.frame.T col = df['foo', 'one'] assert_almost_equal(col.values, df.values[:, 0]) self.assertRaises(KeyError, df.__getitem__, ('foo', 'four')) self.assertRaises(KeyError, df.__getitem__, 'foobar') def test_series_getitem(self): s = self.ymd['A'] result = s[2000, 3] result2 = s.ix[2000, 3] expected = s.reindex(s.index[42:65]) expected.index = expected.index.droplevel(0).droplevel(0) assert_series_equal(result, expected) result = s[2000, 3, 10] expected = s[49] self.assertEquals(result, expected) # fancy result = s.ix[[(2000, 3, 10), (2000, 3, 13)]] expected = s.reindex(s.index[49:51]) assert_series_equal(result, expected) # key error self.assertRaises(KeyError, s.__getitem__, (2000, 3, 4)) def test_series_getitem_corner(self): s = self.ymd['A'] # don't segfault, GH #495 # out of bounds access self.assertRaises(IndexError, s.__getitem__, len(self.ymd)) # generator result = s[(x > 0 for x in s)] expected = s[s > 0] assert_series_equal(result, expected) def test_series_setitem(self): s = self.ymd['A'] s[2000, 3] = np.nan self.assert_(isnull(s.values[42:65]).all()) self.assert_(notnull(s.values[:42]).all()) self.assert_(notnull(s.values[65:]).all()) s[2000, 3, 10] = np.nan self.assert_(isnull(s[49])) def test_series_slice_partial(self): pass def test_frame_getitem_setitem_slice(self): # getitem result = self.frame.ix[:4] expected = self.frame[:4] assert_frame_equal(result, expected) # setitem cp = self.frame.copy() cp.ix[:4] = 0 self.assert_((cp.values[:4] == 0).all()) self.assert_((cp.values[4:] != 0).all()) def test_frame_getitem_setitem_multislice(self): levels = [['t1', 't2'], ['a', 'b', 'c']] labels = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]] midx = MultiIndex(labels=labels, levels=levels, names=[None, 'id']) df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx) result = df.ix[:, 'value'] assert_series_equal(df['value'], result) result = df.ix[1:3, 'value'] assert_series_equal(df['value'][1:3], result) result = df.ix[:, :] assert_frame_equal(df, result) result = df df.ix[:, 'value'] = 10 result['value'] = 10 assert_frame_equal(df, result) df.ix[:, :] = 10 assert_frame_equal(df, result) def test_getitem_tuple_plus_slice(self): # GH #671 df = DataFrame({ 'a': range(10), 'b': range(10), 'c': np.random.randn(10), 'd': np.random.randn(10) }) idf = df.set_index(['a', 'b']) result = idf.ix[(0, 0), :] expected = idf.ix[0, 0] expected2 = idf.xs((0, 0)) assert_series_equal(result, expected) assert_series_equal(result, expected2) def test_xs(self): xs = self.frame.xs(('bar', 'two')) xs2 = self.frame.ix[('bar', 'two')] assert_series_equal(xs, xs2) assert_almost_equal(xs.values, self.frame.values[4]) def test_xs_partial(self): result = self.frame.xs('foo') result2 = self.frame.ix['foo'] expected = self.frame.T['foo'].T assert_frame_equal(result, expected) assert_frame_equal(result, result2) def test_xs_level(self): result = self.frame.xs('two', level='second') expected = self.frame[self.frame.index.get_level_values(1) == 'two'] expected.index = expected.index.droplevel(1) assert_frame_equal(result, expected) index = MultiIndex.from_tuples([('x', 'y', 'z'), ('a', 'b', 'c'), ('p', 'q', 'r')]) df = DataFrame(np.random.randn(3, 5), index=index) result = df.xs('c', level=2) expected = df[1:2] expected.index = expected.index.droplevel(2) assert_frame_equal(result, expected) def test_xs_level_multiple(self): from pandas import read_table from StringIO import StringIO text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" df = read_table(StringIO(text), sep='\s+') result = df.xs(('a', 4), level=['one', 'four']) expected = df.xs('a').xs(4, level='four') assert_frame_equal(result, expected) def test_xs_level0(self): from pandas import read_table from StringIO import StringIO text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" df = read_table(StringIO(text), sep='\s+') result = df.xs('a', level=0) expected = df.xs('a') self.assertEqual(len(result), 2) assert_frame_equal(result, expected) def test_xs_level_series(self): s = self.frame['A'] result = s[:, 'two'] expected = self.frame.xs('two', level=1)['A'] assert_series_equal(result, expected) s = self.ymd['A'] result = s[2000, 5] expected = self.ymd.ix[2000, 5]['A'] assert_series_equal(result, expected) # not implementing this for now self.assertRaises(TypeError, s.__getitem__, (2000, slice(3, 4))) # result = s[2000, 3:4] # lv =s.index.get_level_values(1) # expected = s[(lv == 3) | (lv == 4)] # expected.index = expected.index.droplevel(0) # assert_series_equal(result, expected) # can do this though def test_get_loc_single_level(self): s = Series(np.random.randn(len(self.single_level)), index=self.single_level) for k in self.single_level.values: s[k] def test_getitem_toplevel(self): df = self.frame.T result = df['foo'] expected = df.reindex(columns=df.columns[:3]) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) result = df['bar'] result2 = df.ix[:, 'bar'] expected = df.reindex(columns=df.columns[3:5]) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result, result2) def test_getitem_setitem_slice_integers(self): index = MultiIndex(levels=[[0, 1, 2], [0, 2]], labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) frame = DataFrame(np.random.randn(len(index), 4), index=index, columns=['a', 'b', 'c', 'd']) res = frame.ix[1:2] exp = frame.reindex(frame.index[2:]) assert_frame_equal(res, exp) frame.ix[1:2] = 7 self.assert_((frame.ix[1:2] == 7).values.all()) series = Series(np.random.randn(len(index)), index=index) res = series.ix[1:2] exp = series.reindex(series.index[2:]) assert_series_equal(res, exp) series.ix[1:2] = 7 self.assert_((series.ix[1:2] == 7).values.all()) def test_getitem_int(self): levels = [[0, 1], [0, 1, 2]] labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] index = MultiIndex(levels=levels, labels=labels) frame = DataFrame(np.random.randn(6, 2), index=index) result = frame.ix[1] expected = frame[-3:] expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) # raises exception self.assertRaises(KeyError, frame.ix.__getitem__, 3) # however this will work result = self.frame.ix[2] expected = self.frame.xs(self.frame.index[2]) assert_series_equal(result, expected) def test_getitem_partial(self): ymd = self.ymd.T result = ymd[2000, 2] expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1]) expected.columns = expected.columns.droplevel(0).droplevel(0) assert_frame_equal(result, expected) def test_getitem_slice_not_sorted(self): df = self.frame.sortlevel(1).T # buglet with int typechecking result = df.ix[:, :np.int32(3)] expected = df.reindex(columns=df.columns[:3]) assert_frame_equal(result, expected) def test_setitem_change_dtype(self): dft = self.frame.T s = dft['foo', 'two'] dft['foo', 'two'] = s > s.median() assert_series_equal(dft['foo', 'two'], s > s.median()) self.assert_(isinstance(dft._data.blocks[1].items, MultiIndex)) reindexed = dft.reindex(columns=[('foo', 'two')]) assert_series_equal(reindexed['foo', 'two'], s > s.median()) def test_frame_setitem_ix(self): self.frame.ix[('bar', 'two'), 'B'] = 5 self.assertEquals(self.frame.ix[('bar', 'two'), 'B'], 5) # with integer labels df = self.frame.copy() df.columns = range(3) df.ix[('bar', 'two'), 1] = 7 self.assertEquals(df.ix[('bar', 'two'), 1], 7) def test_fancy_slice_partial(self): result = self.frame.ix['bar':'baz'] expected = self.frame[3:7] assert_frame_equal(result, expected) result = self.ymd.ix[(2000, 2):(2000, 4)] lev = self.ymd.index.labels[1] expected = self.ymd[(lev >= 1) & (lev <= 3)] assert_frame_equal(result, expected) def test_sortlevel(self): df = self.frame.copy() df.index = np.arange(len(df)) self.assertRaises(Exception, df.sortlevel, 0) # axis=1 # series a_sorted = self.frame['A'].sortlevel(0) self.assertRaises(Exception, self.frame.reset_index()['A'].sortlevel) # preserve names self.assertEquals(a_sorted.index.names, self.frame.index.names) def test_delevel_infer_dtype(self): tuples = [ tuple for tuple in cart_product(['foo', 'bar'], [10, 20], [1.0, 1.1]) ] index = MultiIndex.from_tuples(tuples, names=['prm0', 'prm1', 'prm2']) df = DataFrame(np.random.randn(8, 3), columns=['A', 'B', 'C'], index=index) deleveled = df.reset_index() self.assert_(com.is_integer_dtype(deleveled['prm1'])) self.assert_(com.is_float_dtype(deleveled['prm2'])) def test_reset_index_with_drop(self): deleveled = self.ymd.reset_index(drop=True) self.assertEquals(len(deleveled.columns), len(self.ymd.columns)) deleveled = self.series.reset_index() self.assert_(isinstance(deleveled, DataFrame)) self.assert_( len(deleveled.columns) == len(self.series.index.levels) + 1) deleveled = self.series.reset_index(drop=True) self.assert_(isinstance(deleveled, Series)) def test_sortlevel_by_name(self): self.frame.index.names = ['first', 'second'] result = self.frame.sortlevel(level='second') expected = self.frame.sortlevel(level=1) assert_frame_equal(result, expected) def test_sortlevel_mixed(self): sorted_before = self.frame.sortlevel(1) df = self.frame.copy() df['foo'] = 'bar' sorted_after = df.sortlevel(1) assert_frame_equal(sorted_before, sorted_after.drop(['foo'], axis=1)) dft = self.frame.T sorted_before = dft.sortlevel(1, axis=1) dft['foo', 'three'] = 'bar' sorted_after = dft.sortlevel(1, axis=1) assert_frame_equal(sorted_before.drop([('foo', 'three')], axis=1), sorted_after.drop([('foo', 'three')], axis=1)) def test_count_level(self): def _check_counts(frame, axis=0): index = frame._get_axis(axis) for i in range(index.nlevels): result = frame.count(axis=axis, level=i) expected = frame.groupby(axis=axis, level=i).count(axis=axis) expected = expected.reindex_like(result).astype('i8') assert_frame_equal(result, expected) self.frame.ix[1, [1, 2]] = np.nan self.frame.ix[7, [0, 1]] = np.nan self.ymd.ix[1, [1, 2]] = np.nan self.ymd.ix[7, [0, 1]] = np.nan _check_counts(self.frame) _check_counts(self.ymd) _check_counts(self.frame.T, axis=1) _check_counts(self.ymd.T, axis=1) # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() self.assertRaises(Exception, df.count, level=0) self.frame['D'] = 'foo' result = self.frame.count(level=0, numeric_only=True) assert_almost_equal(result.columns, ['A', 'B', 'C']) def test_count_level_series(self): index = MultiIndex(levels=[['foo', 'bar', 'baz'], ['one', 'two', 'three', 'four']], labels=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]]) s = Series(np.random.randn(len(index)), index=index) result = s.count(level=0) expected = s.groupby(level=0).count() assert_series_equal(result.astype('f8'), expected.reindex(result.index).fillna(0)) result = s.count(level=1) expected = s.groupby(level=1).count() assert_series_equal(result.astype('f8'), expected.reindex(result.index).fillna(0)) def test_count_level_corner(self): s = self.frame['A'][:0] result = s.count(level=0) expected = Series(0, index=s.index.levels[0]) assert_series_equal(result, expected) df = self.frame[:0] result = df.count(level=0) expected = DataFrame({}, index=s.index.levels[0], columns=df.columns).fillna(0).astype(int) assert_frame_equal(result, expected) def test_unstack(self): # just check that it works for now unstacked = self.ymd.unstack() unstacked2 = unstacked.unstack() # test that ints work unstacked = self.ymd.astype(int).unstack() def test_stack(self): # regular roundtrip unstacked = self.ymd.unstack() restacked = unstacked.stack() assert_frame_equal(restacked, self.ymd) unlexsorted = self.ymd.sortlevel(2) unstacked = unlexsorted.unstack(2) restacked = unstacked.stack() assert_frame_equal(restacked.sortlevel(0), self.ymd) unlexsorted = unlexsorted[::-1] unstacked = unlexsorted.unstack(1) restacked = unstacked.stack().swaplevel(1, 2) assert_frame_equal(restacked.sortlevel(0), self.ymd) unlexsorted = unlexsorted.swaplevel(0, 1) unstacked = unlexsorted.unstack(0).swaplevel(0, 1, axis=1) restacked = unstacked.stack(0).swaplevel(1, 2) assert_frame_equal(restacked.sortlevel(0), self.ymd) # columns unsorted unstacked = self.ymd.unstack() unstacked = unstacked.sort(axis=1, ascending=False) restacked = unstacked.stack() assert_frame_equal(restacked, self.ymd) # more than 2 levels in the columns unstacked = self.ymd.unstack(1).unstack(1) result = unstacked.stack(1) expected = self.ymd.unstack() assert_frame_equal(result, expected) result = unstacked.stack(2) expected = self.ymd.unstack(1) assert_frame_equal(result, expected) result = unstacked.stack(0) expected = self.ymd.stack().unstack(1).unstack(1) assert_frame_equal(result, expected) # not all levels present in each echelon unstacked = self.ymd.unstack(2).ix[:, ::3] stacked = unstacked.stack().stack() ymd_stacked = self.ymd.stack() assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) # stack with negative number result = self.ymd.unstack(0).stack(-2) expected = self.ymd.unstack(0).stack(0) def test_stack_mixed_dtype(self): df = self.frame.T df['foo', 'four'] = 'foo' df = df.sortlevel(1, axis=1) stacked = df.stack() assert_series_equal(stacked['foo'], df['foo'].stack()) self.assert_(stacked['bar'].dtype == np.float_) def test_unstack_bug(self): df = DataFrame({ 'state': ['naive', 'naive', 'naive', 'activ', 'activ', 'activ'], 'exp': ['a', 'b', 'b', 'b', 'a', 'a'], 'barcode': [1, 2, 3, 4, 1, 3], 'v': ['hi', 'hi', 'bye', 'bye', 'bye', 'peace'], 'extra': np.arange(6.) }) result = df.groupby(['state', 'exp', 'barcode', 'v']).apply(len) unstacked = result.unstack() restacked = unstacked.stack() assert_series_equal(restacked, result.reindex(restacked.index).astype(float)) def test_stack_unstack_preserve_names(self): unstacked = self.frame.unstack() self.assertEquals(unstacked.index.name, 'first') self.assertEquals(unstacked.columns.names, ['exp', 'second']) restacked = unstacked.stack() self.assertEquals(restacked.index.names, self.frame.index.names) def test_unstack_level_name(self): result = self.frame.unstack('second') expected = self.frame.unstack(level=1) assert_frame_equal(result, expected) def test_stack_level_name(self): unstacked = self.frame.unstack('second') result = unstacked.stack('exp') expected = self.frame.unstack().stack(0) assert_frame_equal(result, expected) result = self.frame.stack('exp') expected = self.frame.stack() assert_series_equal(result, expected) def test_stack_unstack_multiple(self): unstacked = self.ymd.unstack(['year', 'month']) expected = self.ymd.unstack('year').unstack('month') assert_frame_equal(unstacked, expected) self.assertEquals(unstacked.columns.names, expected.columns.names) # series s = self.ymd['A'] s_unstacked = s.unstack(['year', 'month']) assert_frame_equal(s_unstacked, expected['A']) restacked = unstacked.stack(['year', 'month']) restacked = restacked.swaplevel(0, 1).swaplevel(1, 2) restacked = restacked.sortlevel(0) assert_frame_equal(restacked, self.ymd) self.assertEquals(restacked.index.names, self.ymd.index.names) # GH #451 unstacked = self.ymd.unstack([1, 2]) expected = self.ymd.unstack(1).unstack(1) assert_frame_equal(unstacked, expected) unstacked = self.ymd.unstack([2, 1]) expected = self.ymd.unstack(2).unstack(1) assert_frame_equal(unstacked, expected) def test_groupby_transform(self): s = self.frame['A'] grouper = s.index.get_level_values(0) grouped = s.groupby(grouper) applied = grouped.apply(lambda x: x * 2) expected = grouped.transform(lambda x: x * 2) assert_series_equal(applied.reindex(expected.index), expected) def test_groupby_corner(self): midx = MultiIndex(levels=[['foo'], ['bar'], ['baz']], labels=[[0], [0], [0]], names=['one', 'two', 'three']) df = DataFrame([np.random.rand(4)], columns=['a', 'b', 'c', 'd'], index=midx) # should work df.groupby(level='three') def test_join(self): a = self.frame.ix[:5, ['A']] b = self.frame.ix[2:, ['B', 'C']] joined = a.join(b, how='outer').reindex(self.frame.index) expected = self.frame.copy() expected.values[np.isnan(joined.values)] = np.nan self.assert_(not np.isnan(joined.values).all()) assert_frame_equal(joined, expected) def test_swaplevel(self): swapped = self.frame['A'].swaplevel(0, 1) swapped2 = self.frame['A'].swaplevel('first', 'second') self.assert_(not swapped.index.equals(self.frame.index)) assert_series_equal(swapped, swapped2) back = swapped.swaplevel(0, 1) back2 = swapped.swaplevel('second', 'first') self.assert_(back.index.equals(self.frame.index)) assert_series_equal(back, back2) ft = self.frame.T swapped = ft.swaplevel('first', 'second', axis=1) exp = self.frame.swaplevel('first', 'second').T assert_frame_equal(swapped, exp) def test_swaplevel_panel(self): panel = Panel({'ItemA': self.frame, 'ItemB': self.frame * 2}) result = panel.swaplevel(0, 1, axis='major') expected = panel.copy() expected.major_axis = expected.major_axis.swaplevel(0, 1) tm.assert_panel_equal(result, expected) def test_reorder_levels(self): result = self.ymd.reorder_levels(['month', 'day', 'year']) expected = self.ymd.swaplevel(0, 1).swaplevel(1, 2) assert_frame_equal(result, expected) result = self.ymd['A'].reorder_levels(['month', 'day', 'year']) expected = self.ymd['A'].swaplevel(0, 1).swaplevel(1, 2) assert_series_equal(result, expected) result = self.ymd.T.reorder_levels(['month', 'day', 'year'], axis=1) expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) assert_frame_equal(result, expected) self.assertRaises(Exception, self.ymd.index.reorder_levels, [1, 2, 3]) def test_insert_index(self): df = self.ymd[:5].T df[2000, 1, 10] = df[2000, 1, 7] self.assert_(isinstance(df.columns, MultiIndex)) self.assert_((df[2000, 1, 10] == df[2000, 1, 7]).all()) def test_alignment(self): x = Series(data=[1, 2, 3], index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3)])) y = Series(data=[4, 5, 6], index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B", 3)])) res = x - y exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) assert_series_equal(res, exp) # hit non-monotonic code path res = x[::-1] - y[::-1] exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) assert_series_equal(res, exp) def test_is_lexsorted(self): levels = [[0, 1], [0, 1, 2]] index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) self.assert_(index.is_lexsorted()) index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) self.assert_(not index.is_lexsorted()) index = MultiIndex(levels=levels, labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) self.assert_(not index.is_lexsorted()) self.assert_(index.lexsort_depth == 0) def test_frame_getitem_view(self): df = self.frame.T df['foo'].values[:] = 0 self.assert_((df['foo'].values == 0).all()) # but not if it's mixed-type df['foo', 'four'] = 'foo' df = df.sortlevel(0, axis=1) df['foo']['one'] = 2 self.assert_((df['foo', 'one'] == 0).all()) def test_frame_getitem_not_sorted(self): df = self.frame.T df['foo', 'four'] = 'foo' arrays = [np.array(x) for x in zip(*df.columns.get_tuple_index())] result = df['foo'] result2 = df.ix[:, 'foo'] expected = df.reindex(columns=df.columns[arrays[0] == 'foo']) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) df = df.T result = df.xs('foo') result2 = df.ix['foo'] expected = df.reindex(df.index[arrays[0] == 'foo']) expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) def test_series_getitem_not_sorted(self): arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) arrays = [np.array(x) for x in zip(*index.get_tuple_index())] result = s['qux'] result2 = s.ix['qux'] expected = s[arrays[0] == 'qux'] expected.index = expected.index.droplevel(0) assert_series_equal(result, expected) assert_series_equal(result2, expected) AGG_FUNCTIONS = [ 'sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', 'mad', 'std', 'var' ] def test_series_group_min_max(self): for op, level, skipna in cart_product(self.AGG_FUNCTIONS, range(2), [False, True]): grouped = self.series.groupby(level=level) aggf = lambda x: getattr(x, op)(skipna=skipna) # skipna=True leftside = grouped.agg(aggf) rightside = getattr(self.series, op)(level=level, skipna=skipna) assert_series_equal(leftside, rightside) def test_frame_group_ops(self): self.frame.ix[1, [1, 2]] = np.nan self.frame.ix[7, [0, 1]] = np.nan for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, range(2), range(2), [False, True]): if axis == 0: frame = self.frame else: frame = self.frame.T grouped = frame.groupby(level=level, axis=axis) aggf = lambda x: getattr(x, op)(skipna=skipna, axis=axis) leftside = grouped.agg(aggf) rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna) # for good measure, groupby detail level_index = frame._get_axis(axis).levels[level] self.assert_(leftside._get_axis(axis).equals(level_index)) self.assert_(rightside._get_axis(axis).equals(level_index)) assert_frame_equal(leftside, rightside) def test_frame_series_agg_multiple_levels(self): result = self.ymd.sum(level=['year', 'month']) expected = self.ymd.groupby(level=['year', 'month']).sum() assert_frame_equal(result, expected) result = self.ymd['A'].sum(level=['year', 'month']) expected = self.ymd['A'].groupby(level=['year', 'month']).sum() assert_series_equal(result, expected) def test_groupby_multilevel(self): result = self.ymd.groupby(level=[0, 1]).mean() k1 = self.ymd.index.get_level_values(0) k2 = self.ymd.index.get_level_values(1) expected = self.ymd.groupby([k1, k2]).mean() assert_frame_equal(result, expected) self.assertEquals(result.index.names, self.ymd.index.names[:2]) result2 = self.ymd.groupby(level=self.ymd.index.names[:2]).mean() assert_frame_equal(result, result2) def test_groupby_multilevel_with_transform(self): pass def test_multilevel_consolidate(self): index = MultiIndex.from_tuples([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), ('bar', 'two')]) df = DataFrame(np.random.randn(4, 4), index=index, columns=index) df['Totals', ''] = df.sum(1) df = df.consolidate() def test_ix_preserve_names(self): result = self.ymd.ix[2000] result2 = self.ymd['A'].ix[2000] self.assertEquals(result.index.names, self.ymd.index.names[1:]) self.assertEquals(result2.index.names, self.ymd.index.names[1:]) result = self.ymd.ix[2000, 2] result2 = self.ymd['A'].ix[2000, 2] self.assertEquals(result.index.name, self.ymd.index.names[2]) self.assertEquals(result2.index.name, self.ymd.index.names[2]) def test_partial_set(self): # GH #397 df = self.ymd.copy() exp = self.ymd.copy() df.ix[2000, 4] = 0 exp.ix[2000, 4].values[:] = 0 assert_frame_equal(df, exp) df['A'].ix[2000, 4] = 1 exp['A'].ix[2000, 4].values[:] = 1 assert_frame_equal(df, exp) df.ix[2000] = 5 exp.ix[2000].values[:] = 5 assert_frame_equal(df, exp) # this works...for now df['A'].ix[14] = 5 self.assertEquals(df['A'][14], 5) def test_unstack_preserve_types(self): # GH #403 self.ymd['E'] = 'foo' self.ymd['F'] = 2 unstacked = self.ymd.unstack('month') self.assert_(unstacked['A', 1].dtype == np.float64) self.assert_(unstacked['E', 1].dtype == np.object_) self.assert_(unstacked['F', 1].dtype == np.float64) def test_getitem_lowerdim_corner(self): self.assertRaises(KeyError, self.frame.ix.__getitem__, (('bar', 'three'), 'B')) self.assertRaises(KeyError, self.frame.ix.__setitem__, (('bar', 'three'), 'B'), 0) #---------------------------------------------------------------------- # AMBIGUOUS CASES! def test_partial_ix_missing(self): raise nose.SkipTest result = self.ymd.ix[2000, 0] expected = self.ymd.ix[2000]['A'] assert_series_equal(result, expected) # need to put in some work here # self.ymd.ix[2000, 0] = 0 # self.assert_((self.ymd.ix[2000]['A'] == 0).all()) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6)) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6), 0) def test_fancy_2d(self): raise nose.SkipTest result = self.frame.ix['foo', 'B'] expected = self.frame.xs('foo')['B'] assert_series_equal(result, expected) ft = self.frame.T result = ft.ix['B', 'foo'] expected = ft.xs('B')['foo'] assert_series_equal(result, expected) #---------------------------------------------------------------------- def test_to_html(self): self.ymd.columns.name = 'foo' self.ymd.to_html() self.ymd.T.to_html() def test_level_with_tuples(self): index = MultiIndex(levels=[[('foo', 'bar', 0), ('foo', 'baz', 0), ('foo', 'qux', 0)], [0, 1]], labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) series = Series(np.random.randn(6), index=index) frame = DataFrame(np.random.randn(6, 4), index=index) result = series[('foo', 'bar', 0)] result2 = series.ix[('foo', 'bar', 0)] expected = series[:2] expected.index = expected.index.droplevel(0) assert_series_equal(result, expected) assert_series_equal(result2, expected) self.assertRaises(KeyError, series.__getitem__, (('foo', 'bar', 0), 2)) result = frame.ix[('foo', 'bar', 0)] result2 = frame.xs(('foo', 'bar', 0)) expected = frame[:2] expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) index = MultiIndex(levels=[[('foo', 'bar'), ('foo', 'baz'), ('foo', 'qux')], [0, 1]], labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) series = Series(np.random.randn(6), index=index) frame = DataFrame(np.random.randn(6, 4), index=index) result = series[('foo', 'bar')] result2 = series.ix[('foo', 'bar')] expected = series[:2] expected.index = expected.index.droplevel(0) assert_series_equal(result, expected) assert_series_equal(result2, expected) result = frame.ix[('foo', 'bar')] result2 = frame.xs(('foo', 'bar')) expected = frame[:2] expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) def test_int_series_slicing(self): s = self.ymd['A'] result = s[5:] expected = s.reindex(s.index[5:]) assert_series_equal(result, expected) exp = self.ymd['A'].copy() s[5:] = 0 exp.values[5:] = 0 self.assert_(np.array_equal(s.values, exp.values)) result = self.ymd[5:] expected = self.ymd.reindex(s.index[5:]) assert_frame_equal(result, expected) def test_mixed_depth_get(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) result = df['a'] expected = df['a', '', ''] assert_series_equal(result, expected) self.assertEquals(result.name, 'a') result = df['routine1', 'result1'] expected = df['routine1', 'result1', ''] assert_series_equal(result, expected) self.assertEquals(result.name, ('routine1', 'result1')) def test_mixed_depth_insert(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) result = df.copy() expected = df.copy() result['b'] = [1, 2, 3, 4] expected['b', '', ''] = [1, 2, 3, 4] assert_frame_equal(result, expected) def test_mixed_depth_drop(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) result = df.drop('a', axis=1) expected = df.drop([('a', '', '')], axis=1) assert_frame_equal(expected, result) result = df.drop(['top'], axis=1) expected = df.drop([('top', 'OD', 'wx')], axis=1) expected = expected.drop([('top', 'OD', 'wy')], axis=1) assert_frame_equal(expected, result) def test_mixed_depth_pop(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) df1 = df.copy() df2 = df.copy() result = df1.pop('a') expected = df2.pop(('a', '', '')) assert_series_equal(expected, result) assert_frame_equal(df1, df2) self.assertEquals(result.name, 'a') expected = df1['top'] df1 = df1.drop(['top'], axis=1) result = df2.pop('top') assert_frame_equal(expected, result) assert_frame_equal(df1, df2)
Query_Classificiation = pd.concat( [Query_Classificiation, Query_temp_frame]) else: type = 'PuUbiq' hit_data_1 = list(chr_dict.values())[0][0] Query_class_list = [[ query, type, unique_hit_locations, 'Not assessed' ]] Query_temp_frame = DataFrame(Query_class_list, columns=classification_cols) Query_Classificiation = pd.concat( [Query_Classificiation, Query_temp_frame]) current_pct = ((tracker_count / total) * 100) update = '%dpct done, on record %d of %d.' % (current_pct, tracker_count, total) print(update) print('Writing output files') Location_by_hit.to_string() Query_Classificiation.to_string() Hit_location_out = 'Hit_Locations_' + args.input Location_by_hit.to_csv(Hit_location_out, sep='\t', index=False) Query_Classificiation_out = 'Hit_Classifications_' + args.input Query_Classificiation.to_csv(Query_Classificiation_out, sep='\t', index=False) print('Done!')