def create_filter(): """ Build a list urls to the inspectors' detailed reports that will get scraped. Read in records from database of earlier reports, create df to filter against new reports in df above. """ #db_directory = os.path.dirname(os.path.abspath(__file__)) #db_file = os.path.join(db_directory, "rinspect.sqlite") db_file = "rinspect.sqlite" conn = sqlite3.connect(db_file) df_insp = joined_df() # Access result from function above df = pd.read_sql_query("select * from fdinsp;", conn) # Get old info unique_vals = df_insp[~df_insp.visitid.isin(df.visitid)] # Filter conn.close() # Build list of urls for detailed reports # Takes LicenseID and VisitID, passes it into the urls for detailed reports later result = [] for index, rows in unique_vals.iterrows(): visitid = rows['visitid'] licid = rows['licid'] urls = f"https://www.myfloridalicense.com/inspectionDetail.asp?InspVisitID={visitid}&id={licid}" urls = urls.replace(' ', '') result.append(urls) urlList = result urlList.pop(0) # get rid of first "Null" from append above """ Put new summary report info into the database. Would like to make this a separate function, but unique_vals gets returned by function as tuple rather than a dataframe, for some reason. I saw something on Stack Overflow that makes me think it may be a bug related to timestamps in dataframes. """ unique_vals = create_filter() # Access result from function above var = list(unique_vals.itertuples(index='visitid', name=None)) db_file = "rinspect.sqlite" conn = sqlite3.connect(db_file, detect_types=sqlite3.PARSE_DECLTYPES) c = conn.cursor() c.executemany( '''INSERT OR IGNORE INTO fdinsp (librow, county, licnum, sitename, streetaddy, cityaddy, zip, inspnum, insptype, inspdispos, inspdate, totalvio, highvio, licid, visitid) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)''', var) conn.commit() conn.close() c.close() return unique_vals, urlList
def graphWeights(): df = pd.read_sql_query("SELECT * FROM weights ORDER BY weight_date", app.config['SQLALCHEMY_DATABASE_URI']) _y = df["weight"] _x = df["weight_date"] box = BoxSelectTool() wheel = WheelZoomTool() panTool = PanTool() boxZoomTool = BoxZoomTool() resetTool = ResetTool() hover = HoverTool(show_arrow=True, point_policy='follow_mouse', tooltips=[ ("Date", " @x{%F}"), ("Weight", " @y Kg")], formatters={ # use 'datetime' formatter for '@x' field '@x' : 'datetime'}) TOOLS = [hover, box, wheel, panTool, boxZoomTool, resetTool] _plot = figure(title=("Historic data showing variations of my weight " "since I moved to London"), plot_width=650, plot_height=450, sizing_mode='scale_both', x_axis_label='Dates', y_axis_label='Kg', x_axis_type='datetime', tools=TOOLS) _plot.line(_x, _y, legend_label="My weight of life", line_width=5) cdn_javascript = CDN.js_files[0] bokehScriptComponent, bokehDivComponent = components(_plot) graph = [cdn_javascript, bokehScriptComponent, bokehDivComponent] return graph
insp.streetaddy = insp.streetaddy.str.replace(r'2Nd ', '2nd ') insp.streetaddy = insp.streetaddy.str.replace(r'3Rd ', '3rd ') insp.streetaddy = insp.streetaddy.str.replace(r' Us ', ' US ') insp.cityaddy = insp.cityaddy.str.title() insp = insp.applymap(lambda x: str(x).strip() if len(str(x).strip()) else None) insp['visitid'] = insp['visitid'].apply(int) # so it can be filtered against df insp.inspdate = pd.to_datetime(insp.inspdate) insp.inspdate = insp.inspdate.dt.strftime('%Y, %m, %d') # READ IN records from database of earlier reports, create df to filter against # new reports in df above. #db_directory = os.path.dirname(os.path.abspath(__file__)) #db_file = os.path.join(db_directory, "rinspect.sqlite") db_file = "rinspect.sqlite" conn = sqlite3.connect(db_file) df = pd.read_sql_query("select * from fdinsp;", conn) unique_vals = insp[~insp.visitid.isin(df.visitid)] #filter new_vals = len(unique_vals) # BUILD A LIST of URLs to the inspectors detailed reports that will get scraped result = [] result_for_urls = result # for url list result_for_list = result.append("NULL") # field for potential later user input # takes LicenseID and VisitID, passes it into the urls for detailed reports later for index, rows in unique_vals.iterrows(): visitid = rows['visitid'] licid = rows['licid'] urls = "https://www.myfloridalicense.com/inspectionDetail.asp?InspVisitID={:d}&id={:s}".format(visitid, licid) urls = urls.replace(' ', '') result.append(urls)
def tripsPlot(): weights_y = None df_solo_trips = [] df_companion_trips = [] try: df_solo_trips = pd.read_sql_query("select * from trips where passenger_companion = '' ", app.config['SQLALCHEMY_DATABASE_URI']) except Exception as e: pass try: df_companion_trips = pd.read_sql_query("select * from trips where passenger_companion != '' ", app.config['SQLALCHEMY_DATABASE_URI']) except Exception as e: pass if (len(df_solo_trips) == 0 and len(df_companion_trips) == 0): return ["","",""] # Make sure we have a default bar height # in case there is no Weight data to show weights_y = weights_y if weights_y is not None else [0,9] highestWeight_y = int(sorted(weights_y)[-1]) lowestWeight_y = int(sorted(weights_y)[0]) # tripsBarHeight = height of the vertical bar # which depends on max height of weights_y # + 10% # tripsBarStart = left start of the bar # tripsBarEnd = right end of the bar # tripsBarColors = global if one "color", # individual if an array # ["color_1", "color_2" [,...]] # Solo trips horizontal bars data tripsBarHeight = (highestWeight_y - lowestWeight_y) + (highestWeight_y - lowestWeight_y)*0.1 tripsBarStart = df_solo_trips['departure_date'] tripsBarEnd = df_solo_trips['return_date'] tripsBarColors = "red" # Or individual colors for each value ["Cyan", "red",...] # Group trips horizontal bars data tripsBarGroupStart = df_companion_trips['departure_date'] tripsBarGroupEnd = df_companion_trips['return_date'] tripsBarGroupColors = "blue" # designing the plot style and information hover = HoverTool(show_arrow=True, point_policy='follow_mouse', tooltips=[ ("Trip from", " @left{%F}"), ("to", " @right{%F}")], formatters={ # use 'datetime' formatter for 'date' fields # default 'numeral' formatter for other fields '@left' : 'datetime', '@right' : 'datetime'}) box = BoxSelectTool() wheel = WheelZoomTool() panTool = PanTool() boxZoomTool = BoxZoomTool() resetTool = ResetTool() TOOLS = [hover, box, wheel, panTool, boxZoomTool, resetTool] fig = figure(title = 'Trips dates', x_axis_label='Dates', x_axis_type="datetime", plot_width=650, plot_height=450, sizing_mode='scale_both', tools=TOOLS) fig.hbar(name = "red", y = highestWeight_y*0.96, height = tripsBarHeight, left = tripsBarStart, right = tripsBarEnd, color = tripsBarColors) fig.hbar(name = "blue", y = highestWeight_y*0.96, height = tripsBarHeight, left = tripsBarGroupStart, right = tripsBarGroupEnd, color = tripsBarGroupColors) # Technicalities to show the graph curdoc().add_root(fig) cdn_javascript = CDN.js_files[0] bokehScriptComponent, bokehDivComponent = components(fig) graph = [cdn_javascript, bokehScriptComponent, bokehDivComponent] return graph
def plotWeightsAndTrips(): weights_y = None # Weights and dates df_weights = pd.read_sql_query( "SELECT * FROM weights ORDER BY weight_date", app.config['SQLALCHEMY_DATABASE_URI']) weightDates_x = df_weights['weight_date'] weights_y = df_weights['weight'] line_color = "black" line_width = 4 df_solo_trips = pd.read_sql_query( "select * from trips where passenger_companion = '' ", app.config['SQLALCHEMY_DATABASE_URI']) df_companion_trips = pd.read_sql_query( "select * from trips where passenger_companion != '' ", app.config['SQLALCHEMY_DATABASE_URI']) if (df_solo_trips.empty == True and df_companion_trips.empty == True and df_weights.empty == True): return ["", "", ""] # Make sure we have a default bar height # in case there is no Weight data to show weights_y = weights_y if weights_y is not None else [0, 9] highestWeight_y = int(sorted(weights_y)[-1]) lowestWeight_y = int(sorted(weights_y)[0]) # tripsBarHeight = height of the vertical bar # which depends on max height of weights_y # + 10% # tripsBarStart = left start of the bar # tripsBarEnd = right end of the bar # tripsBarColors = global if one "color", # individual if an array # ["color_1", "color_2" [,...]] # Solo trips horizontal bars data tripsBarHeight = (highestWeight_y - lowestWeight_y ) + (highestWeight_y - lowestWeight_y) * 0.1 tripsBarStart = df_solo_trips['departure_date'] tripsBarEnd = df_solo_trips['return_date'] tripsBarColors = "red" # Or individual colors for each value ["Cyan", "red",...] # Group trips horizontal bars data tripsBarGroupStart = df_companion_trips['departure_date'] tripsBarGroupEnd = df_companion_trips['return_date'] tripsBarGroupColors = "blue" # designing the plot style and information hover = HoverTool( show_arrow=True, point_policy='follow_mouse', tooltips=[("Trip from", " @left{%F}"), ("to", " @right{%F}"), ("Date", " @x{%F}"), ("Weight", " @y Kg")], formatters={ # use 'datetime' formatter for 'date' fields # default 'numeral' formatter for other fields '@x': 'datetime', '@left': 'datetime', '@right': 'datetime' }) box = BoxSelectTool() wheel = WheelZoomTool() panTool = PanTool() boxZoomTool = BoxZoomTool() resetTool = ResetTool() TOOLS = [hover, box, wheel, panTool, boxZoomTool, resetTool] _plot = figure(title="Personal data registered since June 2014.", x_axis_label='Dates', x_axis_type='datetime', y_axis_label='Kilograms', y_axis_type='auto', plot_width=650, plot_height=450, sizing_mode='scale_both', tools=TOOLS) _plot.hbar(name="red", y=highestWeight_y * 0.96, height=tripsBarHeight, left=tripsBarStart, right=tripsBarEnd, color=tripsBarColors, legend_label="Solo trips") _plot.hbar(name="blue", y=highestWeight_y * 0.96, height=tripsBarHeight, left=tripsBarGroupStart, right=tripsBarGroupEnd, color=tripsBarGroupColors, legend_label="Group trips") # add weights' line to the figure _plot.line(weightDates_x, weights_y, color=line_color, width=line_width, legend_label="Weights", line_width=5) # Technicalities to show the graph curdoc().add_root(_plot) cdn_javascript = CDN.js_files[0] bokehScriptComponent, bokehDivComponent = components(_plot) graph = [cdn_javascript, bokehScriptComponent, bokehDivComponent] return graph