#!/usr/bin/env python import pandas as pd from printheader import print_header cols = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] # <1> indices = ['a', 'b', 'c', 'd', 'e', 'f'] # <2> values = [ # <3> [100, 110, 120, 130, 140], [200, 210, 220, 230, 240], [300, 310, 320, 330, 340], [400, 410, 420, 430, 440], [500, 510, 520, 530, 540], [600, 610, 620, 630, 640], ] print_header('cols') print(cols, '\n') print_header('indices') print(indices, '\n') print_header('values') print(values, '\n') df = pd.DataFrame(values, index=indices, columns=cols) # <4> print_header('DataFrame df') print(df, '\n') print_header("df['gamma']") print(df['alpha':'gamma']) # <5> print(df['b':'e']) # <5>
#!/usr/bin/env python import pandas as pd from printheader import print_header index1 = pd.Index(['a', 'b', 'c'], name='letters') # <1> index2 = pd.Index(['b', 'a', 'c']) index3 = pd.Index(['b', 'c', 'd']) index4 = pd.Index(['red', 'blue', 'green'], name='colors') print_header("index1, index2, index3", 70) # <2> print(index1) print(index2) print(index3) print() print_header("index2 & index3", 70) # these are the same print(index2 & index3) # <3> print(index2.intersection(index3)) # <3> print() print_header("index2 | index3", 70) # these are the same print(index2 | index3) # <4> print(index2.union(index3)) print() print_header("index1.difference(index3)", 70) print(index1.difference(index3)) # <5> print()
Created on Sun May 19 20:42:32 2013 ''' from pandas.core.frame import DataFrame from printheader import print_header cols = ['alpha','beta','gamma','delta','epsilon'] index = ['a','b','c','d','e','f'] values = [ [100, 110, 120, 130, 140], [200, 210, 220, 230, 240], [300, 310, 320, 330, 340], [400, 410, 420, 430, 440], [500, 510, 520, 530, 540], [600, 610, 620, 630, 640], ] print_header('values:') print(values, '\n\n') df = DataFrame(values, index=index, columns=cols) print_header('DataFrame df') print(df, '\n') df2 = df.drop(['beta','delta'], axis=1) print_header("After dropping beta and delta:") print(df2, '\n') print_header("After dropping rows b, c, and e") df3 = df.drop(['b','c','e'], axis=0) print(df3)
from pandas.core.frame import DataFrame from printheader import print_header # <1> dataset1 = np.arange(9.).reshape((3, 3)) # <2> df1 = DataFrame( # <3> dataset1, columns=['apple', 'banana', 'mango'], index=['orange', 'purple', 'blue']) dataset2 = np.arange(12.).reshape((4, 3)) # <2> df2 = DataFrame( # <3> dataset2, columns=['apple', 'banana', 'kiwi'], index=['orange', 'purple', 'blue', 'brown']) print_header('df1') print(df1) # <4> print() print_header('df2') print(df2) # <4> print() print_header('df1 + df2') print(df1 + df2) # <5> print_header('df1.add(df2, fill_value=0)') print(df1.add(df2, fill_value=0)) # <6>
from pandas.core.frame import DataFrame from printheader import print_header cols = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] index = ['a', 'b', 'c', 'd', 'e', 'f'] values = [ [100, 110, 120, 130, 140], [200, 210, 220, 230, 240], [300, 310, 320, 330, 340], [400, 410, 420, 430, 440], [500, 510, 520, 530, 540], [600, 610, 620, 630, 640], ] df = DataFrame(values, index=index, columns=cols) print_header('DataFrame df') print(df, '\n') # select a column print_header("df['alpha']:") print(df['alpha'], '\n') # same as above print_header("df.alpha:") print(df.alpha, '\n') # slice rows print_header("df['b':'e']") print(df['b':'e'], '\n') # single row
""" @author: jstrick Created on Sat May 18 11:36:57 2013 """ import pandas as pd from printheader import print_header # data from # http://www.rita.dot.gov/bts/sites/rita.dot.gov.bts/files/publications/ # national_transportation_statistics/html/table_01_44.html airports_df = pd.read_csv('../DATA/airport_boardings.csv', thousands=',') print_header("ENTIRE DATAFRAME") print(airports_df, "\n") print_header("ONLY COLUMN 'CODE'") print(airports_df['Code'], "\n") print_header("SELECTED COLUMNS WITH FILTERED ROWS") columns_wanted = ['Code', '2001 Total', 'Airport'] sort_col = '2001 Total' max_val = 20000000 selector = airports_df['2001 Total'] > max_val print(airports_df[selector][columns_wanted]) print_header("COLUMN TOTALS") print(airports_df[['2001 Total', '2010 Total']].sum(), "\n")
#!/usr/bin/env python import pandas as pd from printheader import print_header # data from # http://www.rita.dot.gov/bts/sites/rita.dot.gov.bts/files/publications/ # national_transportation_statistics/html/table_01_44.html airports_df = pd.read_csv('../DATA/airport_boardings.csv', thousands=',', index_col=1) # <1> print_header("HEAD OF DATAFRAME") print(airports_df.head(), "\n") print_header("SELECTED COLUMNS WITH FILTERED ROWS") columns_wanted = ['2001 Total', 'Airport'] sort_col = '2001 Total' max_val = 20000000 selector = airports_df['2001 Total'] > max_val selected = airports_df[selector][columns_wanted] print(selected) print_header("COLUMN TOTALS") print(airports_df[['2001 Total', '2010 Total']].sum(), "\n") # print_header("'CODE' COLUMN SET AS INDEX") # airports_df.set_index('Code') # print(airports_df) print_header("FIRST FIVE ROWS") print(airports_df.iloc[:5])
print(index, "\n") values = [ [100, 110, 120, 930, 140], [250, 210, 120, 130, 840], [300, 310, 520, 430, 340], [275, 410, 420, 330, 777], [300, 510, 120, 730, 540], [150, 610, 320, 690, 640], ] ser1 = Series([.1, .2, .3, .4, .5]) df = DataFrame(values, index, cols) print_header("Basic DataFrame:") print(df) print() print_header("Triple each column") print(df * 3) print() print_header("Multiply column gamma by 1.5") df['gamma'] *= 1.5 print(df) print() # print_header("Multiply by strings") # df['gamma'] *= 'spam' # print(df)
#!/usr/bin/env python import pandas as pd from printheader import print_header col_names = ["Desc", "1960", "1965", "1970", "1975", "1980", "1985", "1990", "1991", "1992", "1993", "1994", "1995", "1996", "1997", "1998", "1999", "2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "(R) 2010", "2011"] # <1> df = pd.read_csv( # <2> '../DATA/energy_use_quad.csv', names=col_names, header=None, index_col="Desc", ) print_header("database header") print(df.head(), "\n") print("-" * 60) print_header("Only column 2003") print(df[:]['2003'], "\n") # <3> print("-" * 60) print_header('Only row "Transportation as percent..."') print(df.loc['Transportation as percent of total energy consumption'])