CPI.Average: This is the average Consumer Price Index associated with that year. It was used to calculate 2018-equivalent values. High.2018: This is the 2018-equivalent dollars for High.Value. Low.2018: This is the 2018-equivalent dollars for Low.Value. ''' # Displaying all columns without '...' pd.options.display.width = 0 df = pd.read_csv('datasets/Minimum Wage Data.csv', encoding="latin") df.to_csv('datasets/minwage.csv', encoding="utf-8") df = pd.read_csv('datasets/minwage.csv') hdr("df.head()") print(df.head()) # Now we use group-by: gb = df.groupby('State') hdr("gb.get_group('Alabama').set_index('Year').head()") print(gb.get_group('Alabama').set_index('Year').head()) # The above is the same as the following syntax, but using pandas instead of regular Python # print(df[df['State'] == 'Alabama'].set_index('Year').head()) # Iterating over groups act_min_wage = pd.DataFrame() for name, group in df.groupby('State'):
import pandas as pd import matplotlib.pyplot as plt from helpers import hdr # df is a dataframe df = pd.read_csv("datasets/avocado.csv") # First x rows hdr("df.head(4)") print(df.head(4)) # Last x rows hdr("df.tail(5)") print(df.tail(5)) # Get the column AveragePrice hdr("df['AveragePrice'].head()") print(df['AveragePrice'].head()) # The dot notation is uncommon! (df.AveragePrice.head()) # Get all rows where region == Albany albany_df = df[df['region'] == "Albany"] # The result is a new dataframe hdr("albany_df.head()") print(albany_df.head()) # Show the indexes of the dataframe hdr("albany_df.index") print(albany_df.index)
from helpers import hdr pd.options.display.width = 0 df = pd.read_csv('datasets/minwage.csv') act_min_wage = pd.DataFrame() for name, group in df.groupby('State'): if act_min_wage.empty: act_min_wage = group.set_index('Year')[['Low.2018']].rename(columns={'Low.2018':name}) else: act_min_wage = act_min_wage.join(group.set_index('Year')[['Low.2018']].rename(columns={'Low.2018':name})) hdr('act_min_wage.head()') print(act_min_wage.head()) min_wage_corr = act_min_wage.replace(0, np.NaN).dropna(axis=1).corr() hdr('min_wage_corr.head()') print(min_wage_corr.head()) plt.matshow(min_wage_corr) plt.show() labels = [c[:2].upper() for c in min_wage_corr.columns] def show_heatmap(labels): fig = plt.figure(figsize=(12, 12)) # <- all the subplots in the figure are in a 1 by 1 grid, and this is number one (only 1 graph)
import pandas as pd import matplotlib.pyplot as plt from helpers import hdr df = pd.read_csv("datasets/avocado.csv") # Convert the Date column to datetime objects df['Date'] = pd.to_datetime(df['Date']) hdr("df.head()") print(df.head()) albany_df = df.copy()[df['region'] == "Albany"] albany_df.set_index("Date", inplace=True) albany_df["AveragePrice"].plot() plt.show() # Moving average of size 25 albany_df["AveragePrice"].rolling(25).mean().plot() plt.show() # It doesn't look right. # We need to sort the dataset by the Date, which is the index: albany_df.sort_index(inplace=True) albany_df["AveragePrice"].rolling(25).mean().plot( title="AveragePrice with rolling average=25") plt.show() # Put this data as a new column in our dataframe:
import pandas as pd from helpers import hdr pd.options.display.width = 0 df = pd.read_csv("datasets/diamonds.csv", index_col=0) hdr("df.head()") print(df.head()) # Starting with linear regression # It would be ideal that our string classifications are linear, meaning they have a meaningful order. # Let's see what all of our cuts are, for example hdr("df['cut'].unique()") print(df['cut'].unique()) # Now we hard-code the order: cut_class_dict = { 'Fair': 1, 'Good': 2, 'Very Good': 3, 'Premium': 4, 'Ideal': 5 } # Let's see clarity hdr("df['clarity'].unique()") print(df['clarity'].unique()) clarity_dict = { "I3": 1,
import pandas as pd import numpy as np from helpers import hdr pd.options.display.width = 0 # In this lesson we'll try to find correlation between unemployment and the minimum wage unemp_county = pd.read_csv('datasets/unemployment-by-county-us/output.csv') hdr('unemp_county.head()') print(unemp_county.head()) df = pd.read_csv('datasets/minwage.csv') act_min_wage = pd.DataFrame() for name, group in df.groupby('State'): if act_min_wage.empty: act_min_wage = group.set_index('Year')[[ 'Low.2018' ]].rename(columns={'Low.2018': name}) else: act_min_wage = act_min_wage.join( group.set_index('Year')[['Low.2018' ]].rename(columns={'Low.2018': name})) hdr('act_min_wage.head()') print(act_min_wage.head())