Ejemplo n.º 1
0
import matplotlib.pyplot as plt
import seaborn as sns

# import csv (from exported project data frame)
df = pd.DataFrame.from_csv('FDA_FoodRecall_DB.csv', index_col=8)

# set order of columns for chart to match pie chart (for colors)
CHARTCOLS = ['Undeclared Soy', 'Undeclared Eggs', 'Undeclared Nuts', 'Allergen',
             'Undeclared Milk and/or Pasteurization Issues',
             'Other Bacterial/Viral Contaminants', 'Salmonella']

# get locations for desired column order above
COLUMN_RANGE = [df.columns.get_loc(i) for i in CHARTCOLS]

# aggregate recall columns based on monthyear index
dfchart = frf.make_cat_df(df, COLUMN_RANGE)
dfchart = dfchart.groupby([dfchart.index])
dfchart = dfchart.aggregate(np.sum)

# use dfchart index as x values - convert to list
yrmo = dfchart.index.tolist()

# convert yearmonth values to numeric,
# then normalize the decimal places to represent month as a fraction
# for example, feb is 2nd of 12 months,
# so 2009.02 converted to 2009 + .02/12*100 = 2009.167
x = [float(i) for i in yrmo]
x = [(j - int(j))*100/12+int(j) for j in x]

# convert x and y values to arrays for stackplot function
x = np.array(x)
Ejemplo n.º 2
0
    DF["YearMonth"][i] = datetime.datetime.strptime(DF["DATE"][i][0:16], "%a, %d %b %Y").strftime("%Y.%m")

#add categories
CATADD = {"Salmonella":"salmonella", "Other Bacterial/Viral Contaminants": \
          "listeria|clostridium|botulism|coli|bacteri|staph|coliform|norovirus|bacillus",
          "Undeclared Milk and/or Pasteurization Issues": "milk|dairy|pasteuri",
          "Undeclared Nuts": \
          "peanut|almond|walnut|cashew|hazel|tree nut|pecan|pistachio|pine nut|macadamia|treenut",
          "Undeclared Eggs": "egg", "Undeclared Soy": "soy", "Allergen": "allergen|allergic"}

DF = frf.add_categories(DF, CATADD)

#Make a dataframe of categories indicating occurrences (1 indicates occurence)
#adds Other category
COLUMN_RANGE = list(range(9, 16))
DF2 = frf.make_cat_df(DF, COLUMN_RANGE)
DF2

#Sum the categories
CATSUM = {"Salmonella": "salmonella", "Other Bacterial/Viral Contaminants": "bacteria",
          "Undeclared Milk and/or Pasteurization Issues": "milk", "Undeclared Nuts": "nut",
          "Undeclared Eggs": "egg", "Undeclared Soy": "soy", "Allergen": "allergen",
          "Other": "other"}

SUMCAT = frf.sum_categories(DF2, CATSUM)


#add label names
LABEL = ["Salmonella", "Other Bacterial/Viral Contaminants", "Other",
         "Undeclared Milk and/or Pasteurization Issues", "Allergen",
         "Undeclared Nuts", "Undeclared Eggs", "Undeclared Soy"]