def generate_career_spvfp_scatter(): """ Generates average (across career) start position vs finish position scatter plot. :return: SP v FP scatter plot layout """ logging.info("Generating start position vs finish position scatter") dids = drivers.index.values source = pd.DataFrame(columns=["driver_name", "avg_sp", "avg_fp", "num_races", "color", "size"]) color_gen = itertools.cycle(Turbo256[25:220]) for did in dids: driver_name = get_driver_name(did) driver_results = results[(results["driverId"] == did) & (results["grid"] > 0)] avg_sp = driver_results["grid"].mean() avg_fp = driver_results["position"].mean() num_races = driver_results.shape[0] size = math.pow(num_races, 0.3) + 2 source = source.append({ "driver_name": driver_name, "avg_sp": avg_sp, "avg_fp": avg_fp, "num_races": num_races, "color": color_gen.__next__(), "size": size }, ignore_index=True) spvfp_scatter = figure(title=u"Average Starting Position vs Finish Position \u2014 Saturday vs Sunday performance", x_axis_label="Career Avg. Grid Position", y_axis_label="Career Avg. Finishing Position (Official Classification)", x_range=Range1d(0, 35, bounds=(0, 60)), y_range=Range1d(0, 35, bounds=(0, 60))) spvfp_scatter.xaxis.ticker = FixedTicker(ticks=np.arange(5, 61, 5).tolist() + [1]) spvfp_scatter.yaxis.ticker = FixedTicker(ticks=np.arange(5, 61, 5).tolist() + [1]) spvfp_scatter.xaxis.major_label_overrides = {i: int_to_ordinal(i) for i in range(1, 60)} spvfp_scatter.yaxis.major_label_overrides = {i: int_to_ordinal(i) for i in range(1, 60)} subtitle = "Average is taken across the driver's whole career. Dot size is calculated based on the number of " \ "races the driver entered. DNFs not considered in either calculation." spvfp_scatter.add_layout(Title(text=subtitle, text_font_style="italic"), "above") spvfp_scatter.scatter(x="avg_sp", y="avg_fp", source=source, color="color", size="size", alpha=0.7) spvfp_scatter.line(x=[-60, 60], y=[-60, 60], color="white", line_alpha=0.2) spvfp_scatter.line(x=[0, 60], y=[2.58240774, 29.96273108], color="white", line_alpha=0.6) # Regression line label_kwargs = dict(render_mode="canvas", text_color="white", text_font_size="10pt", border_line_color="white", border_line_alpha=0.7) label1 = Label(x=32, y=16, text=" Regression Line ", **label_kwargs) label2 = Label(x=26, y=1, text=" Driver tends to make up many places ", **label_kwargs) label3 = Label(x=1, y=25, text=" Driver tends to lose many places ", **label_kwargs) spvfp_scatter.add_layout(label1) spvfp_scatter.add_layout(label2) spvfp_scatter.add_layout(label3) spvfp_scatter.add_tools(HoverTool(show_arrow=False, tooltips=[ ("Driver", "@driver_name"), ("Avg. Starting Pos.", "@avg_sp"), ("Avg. Finish Pos.", "@avg_fp (DNFs not considered)"), ("Races Entered", "@num_races") ])) spvfp_scatter.add_tools(CrosshairTool(dimensions="both", line_color="white", line_alpha=0.6)) return spvfp_scatter
def generate_sp_position_scatter(): """ Generates a scatter plot of average (across career) start position vs WDC position :return: """ logging.info("Generating start position vs WDC finish position scatter") dids = drivers.index.values source = pd.DataFrame(columns=["driver_name", "avg_sp", "avg_wdc_position", "num_races", "color", "size"]) color_gen = itertools.cycle(Turbo256[25:220]) for did in dids: driver_name = get_driver_name(did) driver_results = results[(results["driverId"] == did) & (results["grid"] > 0)] avg_sp = driver_results["grid"].mean() avg_position = wdc_final_positions[wdc_final_positions["driverId"] == did]["position"].mean() num_races = driver_results.shape[0] size = math.pow(num_races, 0.3) + 2 source = source.append({ "driver_name": driver_name, "avg_sp": avg_sp, "avg_wdc_position": avg_position, "num_races": num_races, "color": color_gen.__next__(), "size": size }, ignore_index=True) sp_wdc_pos_scatter = figure(title=u"Average Starting Position vs WDC Finish Position \u2014 Saturday vs Sunday " u"performance, a less outlier-prone perspective", x_axis_label="Career Avg. Grid Position", y_axis_label="Career Avg. WDC Finish Position", x_range=Range1d(0, 35, bounds=(0, 60)), y_range=Range1d(0, 35, bounds=(0, 200))) sp_wdc_pos_scatter.xaxis.ticker = FixedTicker(ticks=np.arange(5, 61, 5).tolist() + [1]) sp_wdc_pos_scatter.yaxis.ticker = FixedTicker(ticks=np.arange(5, 201, 5).tolist() + [1]) sp_wdc_pos_scatter.xaxis.major_label_overrides = {i: int_to_ordinal(i) for i in range(1, 70)} sp_wdc_pos_scatter.yaxis.major_label_overrides = {i: int_to_ordinal(i) for i in range(1, 200)} subtitle = "Average is taken across the driver's whole career. Dot size is calculated based on the number of " \ "races the driver entered." sp_wdc_pos_scatter.add_layout(Title(text=subtitle, text_font_style="italic"), "above") sp_wdc_pos_scatter.scatter(x="avg_sp", y="avg_wdc_position", source=source, color="color", size="size", alpha=0.7) sp_wdc_pos_scatter.line(x=[-60, 60], y=[-60, 60], color="white", line_alpha=0.2) # This is the correct regression line it just isn't very helpful on this plot # sp_wdc_pos_scatter.line(x=[0, 200], y=[7.65286499, 345.88496942], color="white", line_alpha=0.6) label_kwargs = dict(render_mode="canvas", text_color="white", text_font_size="10pt", border_line_color="white", border_line_alpha=0.7) # TODO are these really the right labels? label1 = Label(x=10, y=1, text=" Driver tends to make up many places ", **label_kwargs) label2 = Label(x=1, y=25, text=" Driver tends to lose many places ", **label_kwargs) sp_wdc_pos_scatter.add_layout(label1) sp_wdc_pos_scatter.add_layout(label2) sp_wdc_pos_scatter.add_tools(HoverTool(show_arrow=False, tooltips=[ ("Driver", "@driver_name"), ("Avg. Starting Pos.", "@avg_sp"), ("Avg. WDC Pos.", "@avg_wdc_position"), ("Races Entered", "@num_races") ])) sp_wdc_pos_scatter.add_tools(CrosshairTool(dimensions="both", line_color="white", line_alpha=0.6)) return sp_wdc_pos_scatter
def generate_stats_layout(cd_years, cd_races, cd_results, cd_fastest_lap_data, positions_source, circuit_id, driver_id, constructor_id=None): """ Stats div including: - Years - Num. races - Num. wins - Num. podiums - Best results - Average start position - Average finish position - Average lap time - Fastest lap time - Num mechanical DNFs and mechanical DNF rate - Num crash DNFs and crash DNF rate :param cd_years: CD years :param cd_races: CD races :param cd_results: CD results :param cd_fastest_lap_data: CD fastest lap data :param positions_source: Positions source :param driver_id: Driver ID :param circuit_id: Circuit ID :param constructor_id: If set to anything but None, will do constructor mode :return: Stats div layout """ logging.info("Generating stats div") num_races = cd_results.shape[0] if num_races == 0: return Div() win_results = cd_results[cd_results["positionOrder"] == 1] num_wins = win_results.shape[0] if num_wins > 0: rids = win_results["raceId"] years = sorted(cd_races.loc[rids.values, "year"].astype(str).values.tolist(), reverse=True) num_wins = str(num_wins) + " (" + ", ".join(years) + ")" else: num_wins = str(num_wins) podium_results = cd_results[cd_results["positionOrder"] <= 3] num_podiums = podium_results.shape[0] if num_podiums > 0: rids = podium_results["raceId"] years = list(set(cd_races.loc[rids.values, "year"].values.tolist())) years = rounds_to_str(years) num_podiums_str = str(num_podiums) + " (" + years + ")" if len(num_podiums_str) > 120: split = num_podiums_str.split(" ") split.insert(int(len(split) / 2), "<br> " + "".ljust(20)) num_podiums_str = " ".join(split) else: num_podiums_str = str(num_podiums) best_result = None if num_wins == 0: idxmin = cd_results["positionOrder"].idxmin() if not np.isnan(idxmin): rid = cd_results.loc[idxmin, "raceId"] year = cd_races.loc[rid, "year"] best_result = int_to_ordinal( int(cd_results.loc[idxmin, "positionOrder"])) + f" ({year})" mean_sp = round(cd_results["grid"].mean(), 1) mean_fp = round(cd_results["positionOrder"].mean(), 1) avg_lap_time = cd_fastest_lap_data["avg_lap_time_millis"].mean() fastest_lap_time = cd_fastest_lap_data["fastest_lap_time_millis"].min() classifications = cd_results["statusId"].apply(get_status_classification) num_mechanical_dnfs = classifications[classifications == "mechanical"].shape[0] num_crash_dnfs = classifications[classifications == "crash"].shape[0] num_finishes = classifications[classifications == "finished"].shape[0] mechanical_dnfs_str = str(num_mechanical_dnfs) crash_dnfs_str = str(num_crash_dnfs) finishes_str = str(num_finishes) if num_races > 0: mechanical_dnfs_str += " (" + str( round(100 * num_mechanical_dnfs / num_races, 1)) + "%)" crash_dnfs_str += " (" + str(round(100 * num_crash_dnfs / num_races, 1)) + "%)" finishes_str += " (" + str(round(100 * num_finishes / num_races, 1)) + "%)" if positions_source.shape[0] > 0: avg_finish_pos_overall = positions_source["avg_finish_pos"].mean() avg_finish_pos_here = positions_source["finish_position_int"].mean() diff = avg_finish_pos_here - avg_finish_pos_overall avg_finish_pos_overall = round(avg_finish_pos_overall, 1) avg_finish_pos_here = round(avg_finish_pos_here, 1) w = "higher" if diff < 0 else "lower" finish_pos_diff_str = f"Finished on average {round(abs(diff), 1)} place(s) {w} than average " \ f"(pos. {avg_finish_pos_here} here vs pos. {avg_finish_pos_overall} average overall)" else: finish_pos_diff_str = "" header_template = """ <h2 style="text-align: center;"><b>{}</b></h2> """ template = """ <pre><b>{}</b> {}<br></pre> """ if constructor_id: name = get_constructor_name(constructor_id, include_flag=False) else: name = get_driver_name(driver_id, include_flag=False, just_last=True) cd_stats = header_template.format( f"{name} at {get_circuit_name(circuit_id, include_flag=False)} Stats") cd_stats += template.format("Years: ".ljust(22), rounds_to_str(cd_years)) cd_stats += template.format("Num Races: ".ljust(22), str(num_races)) cd_stats += template.format("Num Wins: ".ljust(22), str(num_wins)) cd_stats += template.format("Num Podiums: ".ljust(22), str(num_podiums_str)) if best_result: cd_stats += template.format("Best Result: ".ljust(22), str(best_result)) cd_stats += template.format("Avg. Start Pos.: ".ljust(22), mean_sp) cd_stats += template.format("Avg. Finish Pos.: ".ljust(22), mean_fp) if not np.isnan(avg_lap_time): cd_stats += template.format("Avg. Lap Time: ".ljust(22), millis_to_str(avg_lap_time)) cd_stats += template.format("Fastest Lap Time: ".ljust(22), millis_to_str(fastest_lap_time)) cd_stats += template.format("Num. Mechanical DNFs: ".ljust(22), mechanical_dnfs_str) cd_stats += template.format("Num. Crash DNFs: ".ljust(22), crash_dnfs_str) cd_stats += template.format("Num Finishes".ljust(22), finishes_str) if positions_source.shape[0] > 0: cd_stats += template.format("Compared to Average: ".ljust(22), finish_pos_diff_str) return Div(text=cd_stats)
def generate_all_drivers_win_plot(): """ Generates a win plot including every driver. :return: All drivers win plot layout """ logging.info("Generating all drivers win plot") winner_dids = results[results["position"] == 1]["driverId"].unique() win_plot = figure( title="Win Percentage of All Winning Drivers vs Years Experience", x_axis_label="Years of Experience (excluding breaks)", y_axis_label="Win Percentage", y_range=Range1d(0, 1, bounds=(0, 1)) ) win_plot.yaxis.formatter = NumeralTickFormatter(format="0.0%") win_plot.xaxis.ticker = FixedTicker(ticks=np.arange(0, 30, 1)) palette = Turbo256 n_drivers = len(winner_dids) colors = [] di = 180 / n_drivers i = 40 for _ in range(n_drivers): colors.append(palette[int(i)]) i += di max_years_in = 0 color_gen = ColorDashGenerator(colors=colors, driver_only_mode=True) for did in winner_dids: source = pd.DataFrame(columns=["years_in", "wins", "win_pct", "wins_str", "n_races"]) years_in = 1 driver_results = results[results["driverId"] == did] driver_races = races.loc[driver_results["raceId"]] driver_years = driver_races["year"].unique() driver_years.sort() wins = 0 num_races = 0 for year in driver_years: year_races = driver_races[driver_races["year"] == year] year_results = driver_results[driver_results["raceId"].isin(year_races.index.values)] num_races += year_results.shape[0] wins += year_results[year_results["position"] == 1].shape[0] win_pct = wins / num_races source = source.append({ "driver_name": get_driver_name(did), "years_in": years_in, "wins": wins, "win_pct": win_pct, "n_races": num_races, "wins_str": str(wins) + " (" + str(100 * round(win_pct, 1)) + "%)" }, ignore_index=True) max_years_in = max(max_years_in, years_in) years_in += 1 color, _ = color_gen.get_color_dash(did, did) win_plot.line(x="years_in", y="win_pct", source=source, color=color, alpha=0.6, line_width=2, muted_alpha=0.0) win_plot.x_range = Range1d(1, max_years_in, bounds=(1, max_years_in)) win_plot.add_tools(HoverTool(show_arrow=False, tooltips=[ ("Driver", "@driver_name"), ("Years into Career", "@years_in"), ("Wins", "@wins_str"), ("Number of Races", "@n_races") ])) win_plot.add_tools(CrosshairTool(dimensions="both", line_color="white", line_alpha=0.6)) return win_plot
def generate_lap_time_plot(cd_lap_times, cd_rids, circuit_id, driver_id, constructor_id=None): """ Plot lap time distribution of the driver at this circuit along with the lap time distribution of all drivers at this circuit during the time period to show how fast and consistent he is. :param cd_lap_times: Circuit driver lap times :param cd_rids: Circuit driver race IDs :param circuit_id: Circuit ID :param driver_id: Driver ID, can be set to None if using constructor mode :param constructor_id: Constructor ID, set to None if not using constructor mode :return: Lap time plot layout """ logging.info("Generating lap time distribution plot") # Collect data on everyone during these years all_times = lap_times[lap_times["raceId"].isin(cd_rids)] millis_range_min = all_times["milliseconds"].mean( ) - 1 * all_times["milliseconds"].std() millis_range_max = all_times["milliseconds"].mean( ) + 2 * all_times["milliseconds"].std() cd_lap_times = cd_lap_times[ (cd_lap_times["milliseconds"] > millis_range_min) & (cd_lap_times["milliseconds"] < millis_range_max)] if cd_lap_times.shape[0] == 0: return Div( text= "Unfortunately, we do not yet have lap time data on this driver at this circuit." ) all_times = all_times[(all_times["milliseconds"] > millis_range_min) & (all_times["milliseconds"] < millis_range_max)] cd_hist, cd_edges = np.histogram(cd_lap_times["milliseconds"], bins=50) all_hist, all_edges = np.histogram(all_times["milliseconds"], bins=50) cd_hist = cd_hist / cd_lap_times.shape[0] all_hist = all_hist / all_times.shape[0] all_pdf_source = pd.DataFrame(columns=["x", "pdf"]) cd_pdf_source = pd.DataFrame(columns=["x", "pdf"]) for i in range(0, all_edges.shape[0] - 1): x = 0.5 * (all_edges[i] + all_edges[i + 1]) pdf = all_hist[i] all_pdf_source = all_pdf_source.append({ "x": x, "pdf": pdf }, ignore_index=True) for i in range(0, cd_edges.shape[0] - 1): x = 0.5 * (cd_edges[i] + cd_edges[i + 1]) pdf = cd_hist[i] cd_pdf_source = cd_pdf_source.append({ "x": x, "pdf": pdf }, ignore_index=True) all_pdf_source["lap_time_str"] = all_pdf_source["x"].apply(millis_to_str) cd_pdf_source["lap_time_str"] = all_pdf_source["x"].apply(millis_to_str) all_pdf_source["pct_str"] = all_pdf_source["pdf"].apply( lambda pdf: str(round(100 * pdf, 1)) + "%") cd_pdf_source["pct_str"] = cd_pdf_source["pdf"].apply( lambda pdf: str(round(100 * pdf, 1)) + "%") if constructor_id: name = get_constructor_name(constructor_id, include_flag=False) else: name = get_driver_name(driver_id, include_flag=False, just_last=True) circuit_name = get_circuit_name(circuit_id, include_flag=False) title = u"Lap Time Distribution \u2014 " + name + "'s lap times at " + circuit_name + \ " vs the rest of the field during their years" max_y = 0.02 + max(cd_pdf_source["pdf"].max(), all_pdf_source["pdf"].max()) min_x = all_edges[0] - 500 max_x = all_edges[-1] + 500 time_dist = figure(title=title, y_axis_label="% Occurrence", x_axis_label="Lap Time", y_range=Range1d(0, max_y, bounds=(0, max_y)), x_range=Range1d(min_x, max_x, bounds=(min_x, max_x + 3)), tools="pan,box_zoom,wheel_zoom,reset,save") subtitle = "Only lap times within 2 standard deviations of the mean are shown, means marked with horizontal line" time_dist.add_layout(Title(text=subtitle, text_font_style="italic"), "above") time_dist.xaxis.formatter = DatetimeTickFormatter(**DATETIME_TICK_KWARGS) time_dist.yaxis.formatter = NumeralTickFormatter(format="0.0%") cd_quad = time_dist.quad(top=cd_hist, bottom=0, left=cd_edges[:-1], right=cd_edges[1:], fill_color="orange", line_alpha=0, alpha=0.1, muted_alpha=0) line_kwargs = dict(x="x", y="pdf", line_alpha=0.9, line_width=2, muted_line_alpha=0.05) cd_pdf_line = time_dist.line(source=cd_pdf_source, color="orange", **line_kwargs) all_pdf_line = time_dist.line(source=all_pdf_source, color="white", **line_kwargs) # Mark means line_kwargs = dict(y=[-100, 100], line_alpha=0.9, line_width=2, muted_alpha=0.05) all_mean = all_times["milliseconds"].mean() cd_mean = cd_lap_times["milliseconds"].mean() cd_mean_line = time_dist.line(x=[cd_mean] * 2, line_color="orange", **line_kwargs) all_mean_line = time_dist.line(x=[all_mean] * 2, line_color="white", **line_kwargs) # Legend legend = [ LegendItem(label=f"{name}'s Dist.", renderers=[cd_pdf_line, cd_quad, cd_mean_line]), LegendItem(label="All Drivers Dist.", renderers=[all_pdf_line, all_mean_line]) ] legend = Legend(items=legend, location="top_right", glyph_height=15, spacing=2, inactive_fill_color="gray") time_dist.add_layout(legend, "right") time_dist.legend.click_policy = "mute" time_dist.legend.label_text_font_size = "12pt" # Hover tooltip time_dist.add_tools( HoverTool(show_arrow=False, renderers=[all_pdf_line, cd_pdf_line], tooltips=[("Lap Time", "@lap_time_str"), ("Percent of Laps", "@pct_str")])) # Crosshair tooltip time_dist.add_tools(CrosshairTool(line_color="white", line_alpha=0.6)) return time_dist
def generate_stats_layout(positions_source, yc_results, comparison_source, year_id, constructor_id): """ Year summary div, including WCC place, highest race finish, number of races, points, points per race, number of wins, number of podiums, and everything else in constructor.generate_stats_layout and yeardriver.generate_stats_layout - WCC place - Highest race finish - Number of races - Points - Points per race - Number of wins and where were they - Number of podiums and where were they - Teammates - Constructors - Mean gap to teammate in positions - Mean grid position - Mean finish position - DNF info :param positions_source: Positions source :param yc_results: YC results :param comparison_source: Comparison source :param year_id: Year ID :param constructor_id: Constructor ID :return: Stats layout """ logging.info("Generating year constructor stats layout") if positions_source.shape[0] == 0: return Div(text="") wcc_final_standing = positions_source["wcc_final_standing"].mode() if wcc_final_standing.shape[0] > 0: wcc_final_standing_str = int_to_ordinal(wcc_final_standing.values[0]) else: wcc_final_standing_str = "" highest_race_finish_idx = yc_results["positionOrder"].idxmin() if np.isnan(highest_race_finish_idx): highest_race_finish_str = "" else: highest_race_finish = yc_results.loc[highest_race_finish_idx, "positionOrder"] round_name = get_race_name(yc_results.loc[highest_race_finish_idx, "raceId"]) highest_race_finish_str = int_to_ordinal(highest_race_finish) + " at " + round_name num_races = positions_source["race_id"].unique().shape[0] num_races_str = str(num_races) points = positions_source["points"].max() if np.isnan(points): points_str = "" elif points <= 0: points_str = str(points) + " (0 pts/race)" else: points_str = str(points) + " (" + str(round(points / num_races, 1)) + " pts/race)" wins_slice = yc_results[yc_results["positionOrder"] == 1] num_wins = wins_slice.shape[0] if num_wins == 0: wins_str = str(num_wins) else: wins_str = str(num_wins) + " (" + ", ".join(wins_slice["raceId"].apply(get_race_name)) + ")" if len(wins_str) > 120: split = wins_str.split(" ") split.insert(int(len(split) / 2), "<br> " + "".ljust(20)) wins_str = " ".join(split) podiums_slice = yc_results[yc_results["positionOrder"] <= 3] num_podiums = podiums_slice.shape[0] if num_podiums == 0: podiums_str = str(num_podiums) else: race_names = ", ".join([get_race_name(rid) for rid in podiums_slice["raceId"].unique()]) podiums_str = str(num_podiums) + " (" + race_names + ")" if len(podiums_str) > 120: split = podiums_str.split(" ") split.insert(int(len(split) / 2), "<br> " + "".ljust(20)) podiums_str = " ".join(split) driver_dids = yc_results["driverId"].unique() driver_names = [] for did in driver_dids: driver_names.append(get_driver_name(did)) driver_names = ", ".join(driver_names) mean_grid_pos = yc_results["grid"].replace("", np.nan).mean() if np.isnan(mean_grid_pos): mean_grid_pos_str = "" else: mean_grid_pos_str = str(round(mean_grid_pos, 1)) mean_finish_pos = yc_results["positionOrder"].mean() if np.isnan(mean_finish_pos): mean_finish_pos_str = "" else: mean_finish_pos_str = str(round(mean_finish_pos, 1)) classifications = yc_results["statusId"].apply(get_status_classification) num_mechanical_dnfs = classifications[classifications == "mechanical"].shape[0] num_crash_dnfs = classifications[classifications == "crash"].shape[0] if num_races > 0: num_mechanical_dnfs_str = str(num_mechanical_dnfs) + " (" + \ str(round(100 * num_mechanical_dnfs / num_races, 1)) + "%)" num_crash_dnfs_str = str(num_crash_dnfs) + " (" + str(round(100 * num_crash_dnfs / num_races, 1)) + "%)" else: num_mechanical_dnfs_str = "" num_crash_dnfs_str = "" mean_teammate_gap_pos = (comparison_source["driver1_fp"] - comparison_source["driver2_fp"]).mean() if np.isnan(mean_teammate_gap_pos): mean_teammate_gap_pos_str = "" else: mean_teammate_gap_pos_str = "Driver {} finished {} places better than driver {} on average" mean_teammate_gap_pos_str = mean_teammate_gap_pos_str.format("1" if mean_teammate_gap_pos < 0 else "2", str(abs(round(mean_teammate_gap_pos, 1))), "2" if mean_teammate_gap_pos < 0 else "1") # Construct the HTML header_template = """ <h2 style="text-align: left;"><b>{}</b></h2> """ template = """ <pre><b>{}</b> {}<br></pre> """ constructor_name = get_constructor_name(constructor_id, include_flag=False) constructor_stats = header_template.format(f"{constructor_name}'s Stats for the {year_id} Season") constructor_stats += template.format("WCC Final Pos.: ".ljust(20), wcc_final_standing_str) constructor_stats += template.format("Num. Races: ".ljust(20), num_races_str) if num_wins == 0: constructor_stats += template.format("Best Finish Pos.: ".ljust(20), highest_race_finish_str) constructor_stats += template.format("Wins: ".ljust(20), wins_str) constructor_stats += template.format("Podiums: ".ljust(20), podiums_str) constructor_stats += template.format("Points: ".ljust(20), points_str) constructor_stats += template.format("Drivers(s): ".ljust(20), driver_names) constructor_stats += template.format("Avg. Grid Pos.: ".ljust(20), mean_grid_pos_str) constructor_stats += template.format("Avg. Finish Pos.: ".ljust(20), mean_finish_pos_str) constructor_stats += template.format("Mechanical DNFs: ".ljust(20), num_mechanical_dnfs_str) constructor_stats += template.format("Crash DNFs: ".ljust(20), num_crash_dnfs_str) constructor_stats += template.format("Avg. Driver Gap: ".ljust(20), mean_teammate_gap_pos_str) return Div(text=constructor_stats)
def generate_results_table(yc_results, yc_fastest_lap_data, year_results, year_fastest_lap_data, year_only=False, height=None, include_driver_name=True, include_constructor_name=False): """ Generates a table of results at each race, including quali position, finish position (or reason for DNF), time, gap to leader, fastest lap time and gap to fastest lap (of all drivers), average lap time and gap to fastest average lap time (of all drivers). :param yc_results: YC results :param yc_fastest_lap_data: YC fastest lap data :param year_results: Year results :param year_fastest_lap_data: Year fastest lap data :param year_only: Whether to set the race name row to just the year :param height: Plot height :param include_driver_name: If True, will include a driver name column :param include_constructor_name: If True, will include a constructor name column :return: Table layout, source """ # TODO this might be able to be refactored with yeardriver or year, but it is kind of unique logging.info("Generating results table") source = pd.DataFrame(columns=["race_name", "driver_name", "driver_id ", "race_id", "year", "constructor_name", "quali_pos_str", "finish_pos_str", "time_str", "fastest_lap_time_str", "avg_lap_time_str"]) for idx, results_row in yc_results.sort_values(by=["raceId", "driverId"]).iterrows(): rid = results_row["raceId"] driver_id = results_row["driverId"] constructor_id = results_row["constructorId"] driver_name = get_driver_name(driver_id) constructor_name = get_constructor_name(constructor_id) race_results = year_results[year_results["raceId"] == rid] race_fastest_lap_data = year_fastest_lap_data[year_fastest_lap_data["raceId"] == rid] race_driver_fastest_lap_data = yc_fastest_lap_data[(yc_fastest_lap_data["raceId"] == rid) & (yc_fastest_lap_data["driver_id"] == driver_id)] race_name = get_race_name(rid) grid = results_row["grid"] if grid == -1: quali_pos_str = "DNQ" else: quali_pos_str = int_to_ordinal(grid) status_id = results_row["statusId"] finish_pos_str, finish_pos = result_to_str(results_row["positionOrder"], status_id) time = results_row["milliseconds"] winner = race_results[race_results["positionOrder"] == 1] if winner.shape[0] > 0 and winner["driverId"].values[0] != driver_id \ and not np.isnan(time) and not np.isnan(results_row["position"]): time_gap = millis_to_str(time - winner["milliseconds"].values[0]) time_str = millis_to_str(time) + " (+" + time_gap + ")" if status_id != 1 and get_status_classification(status_id) == "finished": time_str = millis_to_str(time) + " (+" + time_gap + ", " + status.loc[status_id, "status"] + ")" elif finish_pos == 1: time_str = millis_to_str(time) else: time_str = "Not Set" if race_driver_fastest_lap_data.shape[0] > 0: fastest_lap_time = race_driver_fastest_lap_data["fastest_lap_time_millis"].values[0] fastest_lap_time_str = millis_to_str(fastest_lap_time) if race_driver_fastest_lap_data["rank"].values[0] == " 1": fastest_lap_time_str = fastest_lap_time_str + " (Fastest)" else: fastest_time = race_fastest_lap_data[race_fastest_lap_data["rank"] == " 1"]["fastest_lap_time_millis"] if fastest_time.shape[0] > 0 and not np.isnan(fastest_lap_time): fastest_time = fastest_time.values[0] fastest_gap = millis_to_str(fastest_lap_time - fastest_time) fastest_lap_time_str = millis_to_str(fastest_lap_time) + " (+" + fastest_gap + ")" if fastest_lap_time_str == "": fastest_lap_time_str = "Not Set" fastest_avg_idx = race_fastest_lap_data["avg_lap_time_millis"].idxmin() avg_lap_time = race_driver_fastest_lap_data["avg_lap_time_millis"].values[0] if np.isnan(avg_lap_time): avg_lap_time_str = "Not Set" elif race_fastest_lap_data.loc[fastest_avg_idx, "driver_id"] == driver_id or np.isnan(avg_lap_time): avg_lap_time_str = millis_to_str(avg_lap_time) + " (Fastest Avg.)" else: fastest_avg_time = race_fastest_lap_data.loc[fastest_avg_idx, "avg_lap_time_millis"] avg_gap = millis_to_str(avg_lap_time - fastest_avg_time) avg_lap_time_str = millis_to_str(avg_lap_time) + " (+" + avg_gap + ")" else: fastest_lap_time_str = "Not Set" avg_lap_time_str = "Not Set" source = source.append({ "race_name": race_name, "race_id": rid, "driver_name": driver_name, "driver_id": driver_id, "constructor_name": constructor_name, "year": races.loc[rid, "year"], "quali_pos_str": quali_pos_str, "finish_pos_str": finish_pos_str, "time_str": time_str, "fastest_lap_time_str": fastest_lap_time_str, "avg_lap_time_str": avg_lap_time_str }, ignore_index=True) source = source.sort_values(by="year", ascending=False) results_columns = [ TableColumn(field="quali_pos_str", title="Grid Pos.", width=75), TableColumn(field="finish_pos_str", title="Finish Pos.", width=75), TableColumn(field="time_str", title="Time", width=100), TableColumn(field="fastest_lap_time_str", title="Fastest Lap Time", width=75), TableColumn(field="avg_lap_time_str", title="Avg. Lap Time", width=75), ] if include_driver_name: results_columns.insert(0, TableColumn(field="driver_name", title="Driver Name", width=100)) if include_constructor_name: results_columns.insert(0, TableColumn(field="constructor_name", title="Constructor Name", width=100)) if year_only: results_columns.insert(0, TableColumn(field="year", title="Year", width=50)) else: results_columns.insert(0, TableColumn(field="race_name", title="Race Name", width=100)) results_table = DataTable(source=ColumnDataSource(data=source), columns=results_columns, index_position=None, height=28 * yc_results.shape[0] if height is None else height) title = Div(text=f"<h2><b>Results for each race</b></h2><br><i>The fastest lap time and average lap time gaps " f"shown are calculated based on the gap to the fastest of all drivers and fastest average of " f"all drivers in that race respectively.</i>") return column([title, row([results_table], sizing_mode="stretch_width")], sizing_mode="stretch_width"), source
Very fragile scraping method """ html = requests.get(wiki_url) b = BeautifulSoup(html.text, "lxml") imgs = b.find_all(name="img") for img_tag in imgs: if "src" in img_tag.attrs: if surname not in img_tag.attrs["src"]: continue return img_tag.attrs["src"] return "" drivers = load_drivers() driver_img_src = pd.DataFrame(columns=["driverId", "imgUrl"]) i = 0 for did, row in drivers.iterrows(): url = get_driver_image(row["url"], row["surname"]).strip("//") driver_img_src = driver_img_src.append({ "imgUrl": url, "driverId": did }, ignore_index=True) i += 1 name = get_driver_name(did, include_flag=False) print(f"{name}: {url}") print(f"{i} / {drivers.shape[0]}") print("=" * 20) driver_img_src = driver_img_src.set_index("driverId") driver_img_src.to_csv("data/static_data/driver_image_urls.csv")
def generate_circuits_table(): """ Generates a table with information on every circuit. :return: Circuits table layout """ source = pd.DataFrame(columns=[ "circuit_name", "location", "num_races", "years", "top_driver", "top_constructor" ]) for cid, circuit_row in circuits.iterrows(): circuit_name = get_circuit_name(cid) location = circuit_row["location"] + ", " + circuit_row["country"] circuit_races = races[races["circuitId"] == cid] num_races = circuit_races.shape[0] years = circuit_races["year"].unique() years.sort() years = rounds_to_str(years) circuit_winners = results[(results["raceId"].isin(circuit_races.index)) & (results["position"] == 1)] driver_winners = circuit_winners["driverId"].value_counts() top_num_wins = driver_winners.iloc[0] top_driver_winners = [] for did, num_wins in driver_winners.iteritems(): if num_wins == top_num_wins: top_driver_winners.append( get_driver_name(did) + " (" + str(num_wins) + " wins)") else: break top_driver_winners = ", ".join(top_driver_winners) constructor_winners = circuit_winners["constructorId"].value_counts() top_num_wins = constructor_winners.iloc[0] top_constructor_winners = [] for constructor_id, num_wins in constructor_winners.iteritems(): if num_wins == top_num_wins: top_constructor_winners.append( get_constructor_name(constructor_id) + " (" + str(num_wins) + " wins)") else: break top_constructor_winners = ", ".join(top_constructor_winners) source = source.append( { "circuit_name": circuit_name, "location": location, "num_races": num_races, "years": years, "top_driver": top_driver_winners, "top_constructor": top_constructor_winners }, ignore_index=True) source = source.sort_values(by="num_races", ascending=False) circuits_columns = [ TableColumn(field="circuit_name", title="Circuit Name", width=150), TableColumn(field="location", title="Location", width=100), TableColumn(field="num_races", title="Num. Races", width=50), TableColumn(field="years", title="Years", width=130), TableColumn(field="top_driver", title="Top Winner(s) (Driver)", width=275), TableColumn(field="top_constructor", title="Top Winner(s) (Constructor)", width=200), ] circuits_table = DataTable(source=ColumnDataSource(data=source), columns=circuits_columns, index_position=None) title = Div(text="<h2><b>All Circuits</b></h2>") return column([title, circuits_table], sizing_mode="stretch_width")