import matplotlib.pyplot as plt import seaborn as sns # import csv (from exported project data frame) df = pd.DataFrame.from_csv('FDA_FoodRecall_DB.csv', index_col=8) # set order of columns for chart to match pie chart (for colors) CHARTCOLS = ['Undeclared Soy', 'Undeclared Eggs', 'Undeclared Nuts', 'Allergen', 'Undeclared Milk and/or Pasteurization Issues', 'Other Bacterial/Viral Contaminants', 'Salmonella'] # get locations for desired column order above COLUMN_RANGE = [df.columns.get_loc(i) for i in CHARTCOLS] # aggregate recall columns based on monthyear index dfchart = frf.make_cat_df(df, COLUMN_RANGE) dfchart = dfchart.groupby([dfchart.index]) dfchart = dfchart.aggregate(np.sum) # use dfchart index as x values - convert to list yrmo = dfchart.index.tolist() # convert yearmonth values to numeric, # then normalize the decimal places to represent month as a fraction # for example, feb is 2nd of 12 months, # so 2009.02 converted to 2009 + .02/12*100 = 2009.167 x = [float(i) for i in yrmo] x = [(j - int(j))*100/12+int(j) for j in x] # convert x and y values to arrays for stackplot function x = np.array(x)
DF["YearMonth"][i] = datetime.datetime.strptime(DF["DATE"][i][0:16], "%a, %d %b %Y").strftime("%Y.%m") #add categories CATADD = {"Salmonella":"salmonella", "Other Bacterial/Viral Contaminants": \ "listeria|clostridium|botulism|coli|bacteri|staph|coliform|norovirus|bacillus", "Undeclared Milk and/or Pasteurization Issues": "milk|dairy|pasteuri", "Undeclared Nuts": \ "peanut|almond|walnut|cashew|hazel|tree nut|pecan|pistachio|pine nut|macadamia|treenut", "Undeclared Eggs": "egg", "Undeclared Soy": "soy", "Allergen": "allergen|allergic"} DF = frf.add_categories(DF, CATADD) #Make a dataframe of categories indicating occurrences (1 indicates occurence) #adds Other category COLUMN_RANGE = list(range(9, 16)) DF2 = frf.make_cat_df(DF, COLUMN_RANGE) DF2 #Sum the categories CATSUM = {"Salmonella": "salmonella", "Other Bacterial/Viral Contaminants": "bacteria", "Undeclared Milk and/or Pasteurization Issues": "milk", "Undeclared Nuts": "nut", "Undeclared Eggs": "egg", "Undeclared Soy": "soy", "Allergen": "allergen", "Other": "other"} SUMCAT = frf.sum_categories(DF2, CATSUM) #add label names LABEL = ["Salmonella", "Other Bacterial/Viral Contaminants", "Other", "Undeclared Milk and/or Pasteurization Issues", "Allergen", "Undeclared Nuts", "Undeclared Eggs", "Undeclared Soy"]