Ejemplo n.º 1
0
def create_filter():
    """
    Build a list urls to the inspectors' detailed reports that will get scraped.
    Read in records from database of earlier reports, create df to filter against
    new reports in df above.
    """

    #db_directory = os.path.dirname(os.path.abspath(__file__))
    #db_file = os.path.join(db_directory, "rinspect.sqlite")
    db_file = "rinspect.sqlite"
    conn = sqlite3.connect(db_file)
    df_insp = joined_df()  # Access result from function above
    df = pd.read_sql_query("select * from fdinsp;", conn)  # Get old info
    unique_vals = df_insp[~df_insp.visitid.isin(df.visitid)]  # Filter
    conn.close()

    # Build list of urls for detailed reports
    # Takes LicenseID and VisitID, passes it into the urls for detailed reports later
    result = []
    for index, rows in unique_vals.iterrows():
        visitid = rows['visitid']
        licid = rows['licid']
        urls = f"https://www.myfloridalicense.com/inspectionDetail.asp?InspVisitID={visitid}&id={licid}"
        urls = urls.replace(' ', '')
        result.append(urls)
    urlList = result
    urlList.pop(0)  # get rid of first "Null" from append above
    """
    Put new summary report info into the database.

    Would like to make this a separate     function, but unique_vals
    gets returned by function as tuple rather than a dataframe, for some reason.
    I saw something on Stack Overflow that makes me think
    it may be a bug related to timestamps in dataframes.
    """

    unique_vals = create_filter()  # Access result from function above
    var = list(unique_vals.itertuples(index='visitid', name=None))

    db_file = "rinspect.sqlite"
    conn = sqlite3.connect(db_file, detect_types=sqlite3.PARSE_DECLTYPES)
    c = conn.cursor()
    c.executemany(
        '''INSERT OR IGNORE INTO fdinsp (librow, county, licnum, sitename,
                  streetaddy, cityaddy, zip, inspnum, insptype, inspdispos,
                  inspdate, totalvio, highvio, licid, visitid)
                  VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)''', var)
    conn.commit()
    conn.close()
    c.close()

    return unique_vals, urlList
Ejemplo n.º 2
0
def graphWeights():

    df = pd.read_sql_query("SELECT * FROM weights ORDER BY weight_date",
                           app.config['SQLALCHEMY_DATABASE_URI'])
    _y = df["weight"]
    _x = df["weight_date"]

    box = BoxSelectTool()
    wheel = WheelZoomTool()
    panTool = PanTool()
    boxZoomTool = BoxZoomTool()
    resetTool = ResetTool()
    hover = HoverTool(show_arrow=True,
                      point_policy='follow_mouse',
                      tooltips=[
                      ("Date", " @x{%F}"),
                      ("Weight", " @y Kg")],
                      formatters={
                        # use 'datetime' formatter for '@x' field
                        '@x' : 'datetime'})

    TOOLS = [hover, box, wheel, panTool, boxZoomTool, resetTool]

    _plot = figure(title=("Historic data showing variations of my weight "
                          "since I moved to London"),
                    plot_width=650,
                    plot_height=450,
                    sizing_mode='scale_both',
                    x_axis_label='Dates',
                    y_axis_label='Kg',
                    x_axis_type='datetime',
                    tools=TOOLS)

    _plot.line(_x,
               _y,
               legend_label="My weight of life",
               line_width=5)

    cdn_javascript = CDN.js_files[0]
    bokehScriptComponent, bokehDivComponent = components(_plot)

    graph = [cdn_javascript, bokehScriptComponent, bokehDivComponent]

    return graph
Ejemplo n.º 3
0
insp.streetaddy = insp.streetaddy.str.replace(r'2Nd ', '2nd ')
insp.streetaddy = insp.streetaddy.str.replace(r'3Rd ', '3rd ')
insp.streetaddy = insp.streetaddy.str.replace(r' Us ', ' US ')
insp.cityaddy = insp.cityaddy.str.title()
insp = insp.applymap(lambda x: str(x).strip() if len(str(x).strip()) else None)
insp['visitid'] = insp['visitid'].apply(int) # so it can be filtered against df
insp.inspdate = pd.to_datetime(insp.inspdate)
insp.inspdate = insp.inspdate.dt.strftime('%Y, %m, %d')

# READ IN records from database of earlier reports, create df to filter against
# new reports in df above.
#db_directory = os.path.dirname(os.path.abspath(__file__))
#db_file = os.path.join(db_directory, "rinspect.sqlite")
db_file = "rinspect.sqlite"
conn = sqlite3.connect(db_file)
df = pd.read_sql_query("select * from fdinsp;", conn)
unique_vals = insp[~insp.visitid.isin(df.visitid)] #filter
new_vals = len(unique_vals)

# BUILD A LIST of URLs to the inspectors detailed reports that will get scraped
result = []
result_for_urls = result # for url list
result_for_list = result.append("NULL") # field for potential later user input

# takes LicenseID and VisitID, passes it into the urls for detailed reports later
for index, rows in unique_vals.iterrows():
    visitid = rows['visitid']
    licid = rows['licid']
    urls = "https://www.myfloridalicense.com/inspectionDetail.asp?InspVisitID={:d}&id={:s}".format(visitid, licid)
    urls = urls.replace(' ', '')
    result.append(urls)
def tripsPlot():

    weights_y = None

    df_solo_trips = []
    df_companion_trips = []

    try:
        df_solo_trips = pd.read_sql_query("select * from trips where passenger_companion = '' ",
                                 app.config['SQLALCHEMY_DATABASE_URI'])
    except Exception as e:
        pass

    try:
        df_companion_trips = pd.read_sql_query("select * from trips where passenger_companion != '' ",
                                 app.config['SQLALCHEMY_DATABASE_URI'])
    except Exception as e:
        pass


    if (len(df_solo_trips) == 0 
        and len(df_companion_trips) == 0):
        return ["","",""]

    # Make sure we have a default bar height
    # in case there is no Weight data to show
    weights_y = weights_y if weights_y is not None else [0,9]
    highestWeight_y = int(sorted(weights_y)[-1])
    lowestWeight_y = int(sorted(weights_y)[0])

    # tripsBarHeight = height of the vertical bar 
    #                   which depends on max height of weights_y
    #                   + 10%
    # tripsBarStart = left start of the bar
    # tripsBarEnd = right end of the bar
    # tripsBarColors = global if one "color",
    #                   individual if an array
    #                   ["color_1", "color_2" [,...]]

    # Solo trips horizontal bars data
    tripsBarHeight = (highestWeight_y - lowestWeight_y) + (highestWeight_y - lowestWeight_y)*0.1
    tripsBarStart = df_solo_trips['departure_date']
    tripsBarEnd = df_solo_trips['return_date']
    tripsBarColors = "red" # Or individual colors for each value ["Cyan", "red",...]

    # Group trips horizontal bars data
    tripsBarGroupStart = df_companion_trips['departure_date']
    tripsBarGroupEnd = df_companion_trips['return_date']
    tripsBarGroupColors = "blue"

    # designing the plot style and information
    hover = HoverTool(show_arrow=True,
                      point_policy='follow_mouse',
                      tooltips=[
                      ("Trip from", " @left{%F}"),
                      ("to", " @right{%F}")],
                      formatters={
                        # use 'datetime' formatter for 'date' fields
                        # default 'numeral' formatter for other fields            
                        '@left' : 'datetime',
                        '@right' : 'datetime'})
    box = BoxSelectTool()
    wheel = WheelZoomTool()
    panTool = PanTool()
    boxZoomTool = BoxZoomTool()
    resetTool = ResetTool()

    TOOLS = [hover, box, wheel, panTool, boxZoomTool, resetTool]

    fig = figure(title = 'Trips dates',
                 x_axis_label='Dates',
                 x_axis_type="datetime",
                 plot_width=650,
                 plot_height=450,
                 sizing_mode='scale_both',
                 tools=TOOLS)

    fig.hbar(name = "red",
             y = highestWeight_y*0.96,
             height = tripsBarHeight,
             left = tripsBarStart,
             right = tripsBarEnd,
             color = tripsBarColors)

    fig.hbar(name = "blue",
             y = highestWeight_y*0.96,
             height = tripsBarHeight,
             left = tripsBarGroupStart,
             right = tripsBarGroupEnd,
             color = tripsBarGroupColors)

    # Technicalities to show the graph
    curdoc().add_root(fig)

    cdn_javascript = CDN.js_files[0]
    bokehScriptComponent, bokehDivComponent = components(fig)
    graph = [cdn_javascript, bokehScriptComponent, bokehDivComponent]

    return graph
def plotWeightsAndTrips():

    weights_y = None

    # Weights and dates
    df_weights = pd.read_sql_query(
        "SELECT * FROM weights ORDER BY weight_date",
        app.config['SQLALCHEMY_DATABASE_URI'])
    weightDates_x = df_weights['weight_date']
    weights_y = df_weights['weight']
    line_color = "black"
    line_width = 4

    df_solo_trips = pd.read_sql_query(
        "select * from trips where passenger_companion = '' ",
        app.config['SQLALCHEMY_DATABASE_URI'])

    df_companion_trips = pd.read_sql_query(
        "select * from trips where passenger_companion != '' ",
        app.config['SQLALCHEMY_DATABASE_URI'])

    if (df_solo_trips.empty == True and df_companion_trips.empty == True
            and df_weights.empty == True):
        return ["", "", ""]

    # Make sure we have a default bar height
    # in case there is no Weight data to show
    weights_y = weights_y if weights_y is not None else [0, 9]
    highestWeight_y = int(sorted(weights_y)[-1])
    lowestWeight_y = int(sorted(weights_y)[0])

    # tripsBarHeight = height of the vertical bar
    #                   which depends on max height of weights_y
    #                   + 10%
    # tripsBarStart = left start of the bar
    # tripsBarEnd = right end of the bar
    # tripsBarColors = global if one "color",
    #                   individual if an array
    #                   ["color_1", "color_2" [,...]]

    # Solo trips horizontal bars data
    tripsBarHeight = (highestWeight_y - lowestWeight_y
                      ) + (highestWeight_y - lowestWeight_y) * 0.1
    tripsBarStart = df_solo_trips['departure_date']
    tripsBarEnd = df_solo_trips['return_date']
    tripsBarColors = "red"  # Or individual colors for each value ["Cyan", "red",...]

    # Group trips horizontal bars data
    tripsBarGroupStart = df_companion_trips['departure_date']
    tripsBarGroupEnd = df_companion_trips['return_date']
    tripsBarGroupColors = "blue"

    # designing the plot style and information
    hover = HoverTool(
        show_arrow=True,
        point_policy='follow_mouse',
        tooltips=[("Trip from", " @left{%F}"), ("to", " @right{%F}"),
                  ("Date", " @x{%F}"), ("Weight", " @y Kg")],
        formatters={
            # use 'datetime' formatter for 'date' fields
            # default 'numeral' formatter for other fields
            '@x': 'datetime',
            '@left': 'datetime',
            '@right': 'datetime'
        })
    box = BoxSelectTool()
    wheel = WheelZoomTool()
    panTool = PanTool()
    boxZoomTool = BoxZoomTool()
    resetTool = ResetTool()

    TOOLS = [hover, box, wheel, panTool, boxZoomTool, resetTool]

    _plot = figure(title="Personal data registered since June 2014.",
                   x_axis_label='Dates',
                   x_axis_type='datetime',
                   y_axis_label='Kilograms',
                   y_axis_type='auto',
                   plot_width=650,
                   plot_height=450,
                   sizing_mode='scale_both',
                   tools=TOOLS)

    _plot.hbar(name="red",
               y=highestWeight_y * 0.96,
               height=tripsBarHeight,
               left=tripsBarStart,
               right=tripsBarEnd,
               color=tripsBarColors,
               legend_label="Solo trips")

    _plot.hbar(name="blue",
               y=highestWeight_y * 0.96,
               height=tripsBarHeight,
               left=tripsBarGroupStart,
               right=tripsBarGroupEnd,
               color=tripsBarGroupColors,
               legend_label="Group trips")

    # add weights' line to the figure
    _plot.line(weightDates_x,
               weights_y,
               color=line_color,
               width=line_width,
               legend_label="Weights",
               line_width=5)

    # Technicalities to show the graph
    curdoc().add_root(_plot)

    cdn_javascript = CDN.js_files[0]
    bokehScriptComponent, bokehDivComponent = components(_plot)
    graph = [cdn_javascript, bokehScriptComponent, bokehDivComponent]

    return graph