def test_summary_fns(dataframe): PrettyPandas(dataframe).total() PrettyPandas(dataframe).average() PrettyPandas(dataframe).median() PrettyPandas(dataframe).max() PrettyPandas(dataframe).min() out = PrettyPandas(dataframe).total() assert len(out.summary_rows) == 1 assert len(out.summary_cols) == 0 out = PrettyPandas(dataframe).total(axis=1) assert len(out.summary_rows) == 0 assert len(out.summary_cols) == 1 out = PrettyPandas(dataframe).total(axis=None) assert len(out.summary_rows) == 1 assert len(out.summary_cols) == 1 out = PrettyPandas(dataframe).min().max() assert len(out.summary_rows) == 2 assert len(out.summary_cols) == 0 out = PrettyPandas(dataframe).min().max(axis=1) assert len(out.summary_rows) == 1 assert len(out.summary_cols) == 1
def test_summary(dataframe): p1 = PrettyPandas(dataframe).total() actual = list(p1.data.sum()) r = p1._apply_summaries() row = r.iloc[-1] assert (row == actual).all()
def test_data_safety(dataframe): df1 = copy.deepcopy(dataframe) df = PrettyPandas(dataframe) df.total()._apply_summaries() assert all(dataframe == df1) assert all(df.data == df1)
def test_data_safety(dataframe): df1 = copy.deepcopy(dataframe) df = PrettyPandas(dataframe) df.total()._translate() assert all(dataframe == df1) assert all(df.data == df1)
def test_summary(dataframe): p1 = PrettyPandas(dataframe).total() actual = list(p1.data.sum()) r = p1._translate() row = [cell for cell in r['body'][10] if cell['type'] == 'td'] values = [cell['value'] for cell in sorted(row, key=itemgetter('id'))] assert values == actual
def test_creation(dataframe): PrettyPandas(dataframe) try: PrettyPandas(None) except TypeError: assert True p1 = PrettyPandas(dataframe) assert p1.summary_rows == [] assert p1.summary_cols == [] assert p1.formatters == [] p2 = PrettyPandas(dataframe, summary_rows=['test']) assert p2.summary_rows == ['test'] assert p1.summary_cols == [] assert p1.formatters == []
def present(df): '''Present dataframe df nicely by printing to screen''' from prettypandas import PrettyPandas # prettypandas won't work with repeated indices such as during CST/CDT transition try: print(PrettyPandas(df.tz_localize(None)).render()) except: print(df.tz_localize(None).to_html())
def test_mulitindex(): df = pd.DataFrame({'A': [1, 2], 'B': [3, 4], 'D': [4, 3], 'C': [6, 7]}) output = PrettyPandas(df.set_index(['A', 'B'])).total(axis=1)._translate() for row in output['body']: assert row[-1]['value'] == 10 for style in output['table_styles']: if style['selector'] == 'td:nth-child(5)': assert True break else: assert False
def test_mulitindex(): df = pd.DataFrame({'A': [1, 2], 'B': [3, 4], 'D': [4, 3], 'C': [6, 7]}) with pytest.raises(ValueError): output = PrettyPandas(df.set_index( ['A', 'B'])).total(axis=1)._apply_summaries()
def prettyframe(dataframe): return PrettyPandas(dataframe)
female.head(2) # #### Sex Distribution # In[9]: print("{} males ({:.1%}), {} females ({:.1%})".format(len(male), len(male) / len(d), len(female), len(female) / len(d))) # In[10]: # Ignore columns with "essay" in the name (they are long) PrettyPandas(d # Prettyprints pandas dataframes .head(10) # Sample the first 10 rows [[c for c in d.columns if "essay" not in c] ]) # Ignore columns with "essay" in the name (they are long) # ### Age Distribution # In[11]: print("Age statistics:\n{}".format(d["age"].describe())) print() print("There are {} users older than 80".format((d["age"] > 80).sum())) # ### Find the age outliers # Apparently we have one 110-year-old user, and only another one over-80. They might be outliers, let's have a look at their data. # In[12]:
] aggResult = collection.aggregate(pipeline) female = pd.DataFrame(list(aggResult)) female.head(2) print("{} males ({:.1%}), {} females ({:.1%})".format(len(male), len(male) / len(d), len(female), len(female) / len(d))) # Ignore columns with "essay" in the name (they are long) # Prettyprints pandas dataframes # Sample the first 10 rows PrettyPandas(d.head(10)[[ c for c in d.columns if "essay" not in c ]]) # Ignore columns with "essay" in the name (they are long) d["age"].describe() print("Age statistics:\n{}".format(d["age"].describe())) print() print("There are {} users older than 80".format((d["age"] > 80).sum())) collection.find({"age": {"$gt": 80}}).count() ##Let's assume the 110-year-old lady and the athletic 109-year-old gentleman (who's working on a masters program) are outliers: we get rid of them so the following plots look better. They didn't say much else about themselves, anyway. ##We then remove them collection.delete_many({"age": {"$gt": 80}}) collection.find({"age": {"$gt": 80}}).count() print("The dataset now contains {} records".format(
# $dbh=mysql_connect('tabs1.gerg.tamu.edu','tabsweb','tabs') # engine = create_engine('postgresql://*****:*****@localhost:5432/mydatabase') # query = "SELECT * FROM tabs_D_ven WHERE (date BETWEEN '2016-1-1' - interval 0 day AND '2016-1-1' + interval 0 day) order by obs_time" # query = args.query engine = create_engine( 'mysql+mysqldb://tabsweb:[email protected]/tabsdb') #engine = create_engine('mysql+mysqldb://tabsweb:[email protected]/tabsdb') df = pd.read_sql_query(query, engine, index_col=['obs_time']) # remove extra date/time columns df = df.drop(['date', 'time'], axis=1) df.columns = [ 'East [cm/s]', 'North [cm/s]', 'Dir to [°T]', 'WaterT [°C]', 'Tx', 'Ty' ] df.index.name = 'Dates [UTC]' # df['Speed [cm/s]'] # ALSO ROTATED # # df = pd.read_table('tmp/Dven0kjrpw', delim_whitespace=True, index_col=[0], # df = pd.read_table(args.result, delim_whitespace=True, index_col=[0], # header=0, names=['Date', 'Time', 'East [cm/s]', 'North [cm/s]', 'Speed [cm/s]', # 'Dir to [°T]', 'WaterT [°C]'], # parse_dates={'Dates [UTC]': ['Date', 'Time']}) # html = df.to_html() # import pdb; pdb.set_trace() # print(html) print(PrettyPandas(df).render()) # return table # print(df) print('e')