def avgofffastlap(): raceratings = da.getraceratings() laptimes = da.laptimes() racelapratings = pd.merge(raceratings, laptimes, how="right", on="raceId") racelapratings = racelapratings.dropna(subset=[ "rating", ]) avglaptimerace = racelapratings.groupby( ["raceId", "driverId"]).mean().groupby("raceId").mean() avgbestlaptime = racelapratings.groupby(["raceId", "driverId" ]).min().groupby("raceId").mean() avgdeltarace = (avglaptimerace["milliseconds"] - avgbestlaptime["milliseconds"]) cmp = makecmp( raceratings, racelapratings.drop_duplicates(subset=["raceId"]).name.to_numpy()) plt.scatter(avgdeltarace, avglaptimerace["rating"], alpha=0.8, color=cmp.colors) plt.ylabel("Score") plt.xlabel("Average ms off fastest lap") plt.title("Rating (1-10) compared to average delta per driver per race") plt.tight_layout() #plt.show() plt.savefig("D:\Semester3\ADS-A\Challenge\Charts\AvgOffFastLapBig.png")
def getdata(): raceratings = da.getraceratings() raceratings = raceratings.drop( columns=['year', 'round', 'circuitId', 'date', 'time', 'race']) results = da.results() raceresults = pd.merge(raceratings, results, how='right', on="raceId") crashers = raceresults[(raceresults["statusId"] > 1) & ( (raceresults["statusId"] < 10) | (raceresults["statusId"] > 14))] ccount = crashers.groupby("raceId").count() cols = np.delete(ccount.columns.to_numpy(), 18) ccount = ccount.drop(columns=cols) ccount.columns = ["crashcount"] ccount = ccount.fillna(0) raceratings = pd.merge(raceratings, ccount, how="right", on="raceId") raceratings["crashcount"] = raceratings["crashcount"].fillna(0) stops = da.pitstops() racestops = pd.merge(raceratings, stops, how='left', on="raceId") racestopsraceavg = racestops.groupby(["raceId", "driverId" ]).count().groupby("raceId").mean() racestopsraceavg = racestopsraceavg.drop(columns=[ 'name', 'rating', 'crashcount', 'lap', 'time', 'duration', 'milliseconds' ]) racestopsraceavg.columns = ["averagestops"] raceratings = pd.merge(raceratings, racestopsraceavg, how="right", on="raceId") laptimes = da.laptimes() racelapratings = pd.merge(raceratings, laptimes, how="right", on="raceId") racelapratings = racelapratings.dropna(subset=[ "rating", ]) avglaptimerace = racelapratings.groupby( ["raceId", "driverId"]).mean().groupby("raceId").mean() avgbestlaptime = racelapratings.groupby(["raceId", "driverId" ]).min().groupby("raceId").mean() avgdeltarace = (avglaptimerace["milliseconds"] - avgbestlaptime["milliseconds"]) raceratings = pd.merge(raceratings, avgdeltarace, how="left", on="raceId") raceratings.columns = [ 'raceId', 'name', 'rating', 'crashcount', 'averagestops', 'delta' ] raceratings = raceratings.drop(columns=['raceId']) return raceratings.dropna()