def getPitStops(filepath): """ Parameters ---------- filepath: string The directory to the file relative to the parentPath # [raceId, driverId, stop, lap, time, duration, milliseconds] Returns ------- tuple (headings, data, dictionary) """ RecentDriversList = getRecentDrivers(2000) circuitRaceList = getCircuitRaceList("races.csv") circuitRaceKeys = circuitRaceList.keys() PitStopsData = getDataset(filepath) dataObtained = PitStopsData.next() Data = [] for data in PitStopsData: if (ifExist(data[1], RecentDriversList)): for key in circuitRaceKeys: if (data[0] in circuitRaceList[key]): Data.append(data) # Data = list(PitStopsData) print "Data from file: " + str(dataObtained) # print "Data Extracted: " + 'Year ' + " raceID" _ret_dict = defaultdict(lambda: defaultdict(lambda: 0.0)) for d in Data: # {raceId, driverId :{ stop: pit time }} _ret_dict[str(d[0]) + "," + str(d[1])][d[2]] = d[6] return (dataObtained, Data, _ret_dict)
def getPreprocessedData(filepath): """ Parameters ---------- filepath: string The directory to the file relative to the parentPath # [raceId, driverId, constructorId, rank, points, grid, position, positionOrder, positionText , firstLapChange, pitStops] Returns ------- tuple (headings, data, dictionary) """ RecentDriversList = getRecentDrivers(2000) circuitRaceList = getCircuitRaceList("races.csv") circuitRaceKeys = circuitRaceList.keys() PreprocessedData = getDataset(filepath) dataObtained = PreprocessedData.next() # Data = list(PreprocessedData) Data = [] for data in PreprocessedData: if (ifExist(data[1], RecentDriversList)): for key in circuitRaceKeys: if (data[0] in circuitRaceList[key]): Data.append(data) print "Data from file: " + str(dataObtained) return (dataObtained, Data)
def getDataFromResults(filepath): """ Parameters ---------- filepath: string The directory to the file relative to the parentPath # [raceID, driverID, constructorID, rank, points, grid, postion, positionOrder] Returns ------- tuple (headings, data) """ RecentDriversList = getRecentDrivers(2000) circuitRaceList = getCircuitRaceList("races.csv") circuitRaceKeys = circuitRaceList.keys() ResultData = getDataset(filepath) dataObtained = ResultData.next() Data = [] # Data = list(ResultData) for data in ResultData: if (ifExist(data[1], RecentDriversList)): for key in circuitRaceKeys: if (data[0] in circuitRaceList[key]): Data.append(data) print "Data from file: " + str(dataObtained) # print "Data Extracted: " + 'Year ' + " raceID" # yearDict_raceList = defaultdict(lambda:[]) # initialise the dictionary value to an empty list # dataObtained.append("firstLapChange") # for d in Data: # d.append(int(d[5]) - int(d[7])) return (dataObtained, Data)
def getLapTimes(filepath): """ Parameters ---------- filepath: string The directory to the file relative to the parentPath # [raceId, driverId, lap, position, time, milliseconds] Returns ------- tuple (headings, data, dictionary, dictionary of dictionary) """ RecentDriversList = getRecentDrivers(2000) circuitRaceList = getCircuitRaceList("races.csv") circuitRaceKeys = circuitRaceList.keys() LapTimesData = getDataset(filepath) dataObtained = LapTimesData.next() # Data = list(LapTimesData) Data = [] for data in LapTimesData: if (ifExist(data[1], RecentDriversList)): for key in circuitRaceKeys: if (data[0] in circuitRaceList[key]): Data.append(data) print "Data from file: " + str(dataObtained) # {raceID: [[driverID, lap#, laptime]]} _ret_dict = defaultdict(lambda: []) # {raceID: {lap#: [driverID, laptime]}} _ret_dict_dict = defaultdict(lambda: defaultdict(lambda: [])) for d in Data: # {raceId, driverId :{ lap: laptime]] # _ret_dict[str(d[0]) +"," + str(d[1])][d[2]] = d[5] _ret_dict[int(d[0])].append(np.array([int(d[1]), int(d[2]), int(d[5])])) # {raceID: {lap#: [driverID, laptime]}} _ret_dict_dict[int(d[0])][int(d[2])].append([int(d[1]), int(d[5])]) # keys = sorted(_ret_dict_dict.keys(), lambda _key: int(_key)) keys = _ret_dict_dict.keys() for key in keys: _keys = _ret_dict_dict[key].keys() for _key in _keys: _ret_dict_dict[key][_key].sort(key=itemgetter(1)) # print _ret_dict_dict[key][_key] return (dataObtained, Data, _ret_dict, _ret_dict_dict)
def getCircuitSpecificStatisticsOfFirstLapChange(headings): """ for circuit specific statistics """ headings.append("DriverId") headings.append("DriverName") headings.append("CircuitId") headings.append("CircuitName") headings.append("FirstLapChangeList") headings.append("FirstLapChangeMin") headings.append("FirstLapChangeMax") headings.append("FirstLapChangeFirstQuartile") headings.append("FirstLapChangeMedian") headings.append("FirstLapChangeThirdQuartile") headings.append("FirstLapChangeMean") headings.append("FirstLapChangeSd") _ret_list = [] circuitRaceList = getCircuitRaceList("races.csv") PreprocessedDataHeadings, PreprocessedDataset = getPreprocessedData( "PreprocessedDataset1.csv") circuitNameDict = getRecentTracksName() driverIDNameDict = getDriverIDName("drivers.csv") # temperary data structure # {driverId: {circuitId: [first lap change]}} _dict_dict_list = defaultdict(lambda: defaultdict(lambda: [])) for data in PreprocessedDataset: circuitKeys = circuitRaceList.keys() for circuitKey in circuitKeys: if (data[0] in circuitRaceList[circuitKey]): _dict_dict_list[data[1]][circuitKey].append(data[9]) # print _dict_dict_list["4"]["32"] driversKeys = sorted(_dict_dict_list.keys(), key=lambda _key: int(_key)) for driversKey in driversKeys: _driverID = driversKey _driverName = driverIDNameDict[_driverID] _circuitID = "" _circuitName = "" _FirstLapChangeList = "" _FirstLapChangeMin = 0.0 _FirstLapChangeMax = 0.0 _FirstLapChangeFirstQuartile = 0.0 _FirstLapChangeMedian = 0.0 _FirstLapChangeThirdQuartile = 0.0 _FirstLapChangeMean = 0.0 _FirstLapChangeSd = 0.0 driverCircuitsKeys = sorted(_dict_dict_list[driversKey].keys(), key=lambda _key: int(_key)) for driverCircuitsKey in driverCircuitsKeys: # _tempSumOfFirstLapChange = 0.0; _tempContainer = [] _circuitID = driverCircuitsKey _circuitName = circuitNameDict[_circuitID] for firstLapChangeVal in _dict_dict_list[driversKey][ driverCircuitsKey]: if (firstLapChangeVal != "-999"): _FirstLapChangeList += (firstLapChangeVal + ",") _change = int(firstLapChangeVal) _tempContainer.append(_change) _tempContainer.sort(key=lambda _key: int(_key)) if (len(_tempContainer) != 0): _FirstLapChangeMin = _tempContainer[0] _FirstLapChangeMax = _tempContainer[-1] _FirstLapChangeFirstQuartile = _tempContainer[ len(_tempContainer) / 4] _FirstLapChangeMedian = _tempContainer[len(_tempContainer) / 2] _FirstLapChangeThirdQuartile = _tempContainer[ len(_tempContainer) / 4 * 3] _FirstLapChangeMean = np.sum( np.array(_tempContainer)) / len(_tempContainer) _FirstLapChangeSd = np.sqrt( np.sum(( (np.array(_tempContainer) - _FirstLapChangeMean)**2)) / len(_tempContainer)) _ret_list.append([ _driverID, _driverName, _circuitID, _circuitName, _FirstLapChangeList, _FirstLapChangeMin, _FirstLapChangeMax, _FirstLapChangeFirstQuartile, _FirstLapChangeMedian, _FirstLapChangeThirdQuartile, _FirstLapChangeMean, _FirstLapChangeSd ]) # print _FirstLapChangeList # if(len(_tempContainer) != 0): # _FirstLapChangeMin=_tempContainer[0] # _FirstLapChangeMax=_tempContainer[-1] # _FirstLapChangeFirstQuartile = _tempContainer[len(_tempContainer)/4] # _FirstLapChangeMedian=_tempContainer[len(_tempContainer)/2] # _FirstLapChangeThirdQuartile = _tempContainer[len(_tempContainer)/4 *3] # _FirstLapChangeMean=np.sum(np.array(_tempContainer))/len(_tempContainer) # _FirstLapChangeSd=np.sqrt(np.sum(((np.array(_tempContainer)-_FirstLapChangeMean)**2))/len(_tempContainer)) # _ret_list.append([_driverID, _FirstLapChangeList, _FirstLapChangeMin, _FirstLapChangeMax, _FirstLapChangeFirstQuartile, # _FirstLapChangeMedian, _FirstLapChangeThirdQuartile, _FirstLapChangeMean, _FirstLapChangeSd]) _ret_list.insert(0, headings) saveLListAsCSV("PreprocessedDataset3", _ret_list)
def getDriverIDConstructorAveragePointsRanking(dataset): """ Parameters: ----------- dataset: #[raceID, driverID, constructorID, rank, points] """ # print "dataset" # print dataset def returnCategoryNumber(rank): if(int(rank) == 1): # print "1" return 4 if((int(rank) > 1) & (int(rank) < 4)): # print "2-3" return 3 if((int(rank) > 3) & (int(rank) < 11)): # print "4-10" return 2 return 1 _listOfList = [["ConstructorName", "DriverID", "DriverName","CircuitName", "AveragePoints", "Ranking Groups"]] # {driverID: {constructorID: points}} # _temp = defaultdict(lambda:defaultdict(lambda:float(0))) # {driverID: totalpoints} _totalpoints = defaultdict(lambda:float(0)) circuitRaceList = getCircuitRaceList("races.csv") _circuitRaceList_keys = sorted(circuitRaceList.keys(), key = lambda _key: int(_key)) circuitIDNameDict = getRecentTracksName() # {constructorID: [driverID]} # _consDriver = defaultdict(lambda:[]) # get number of collaborations in terms of years # {constructor: {driverID: [raceID]}} # Step 1: get {constructor: {driverID: [year]}} # _consDriverYear = defaultdict(lambda:defaultdict()) # _consDriverRaceID = defaultdict(lambda:defaultdict(lambda:[])) # Step 1: get {driverID: [raceID]} # Step 2: convert to {driverID:[year]} # Step 1 # _driverRace = defaultdict(lambda:[]) for data in dataset: # _temp[data[1]][data[2]] += float(data[4]) _totalpoints[data[1]] += float(data[4]) # _consDriver[data[2]].append(data[1]) # _consDriverRaceID[data[2]][data[1]].append(data[0]) _driverRace[data[1]].append(data[0]) yearDict_raceList = getYearRaceID("races.csv") ConstructorIDName = getConstructorIDName("constructors.csv") DriverName = getDriverIDName("drivers.csv") # Step 2 # _driverYear = convertRaceToYear(_driverRace, yearDict_raceList) # keys_CDR = sorted(_consDriverRaceID.keys(), key = lambda _key: int(_key)) # # Step 2 # # for key_CDR in keys_CDR: # _temp_DriverYear = convertRaceToYear(_consDriverRaceID[key_CDR], yearDict_raceList) # keys_temp_DY = sorted(_temp_DriverYear.keys(), key= lambda _key: int(_key)) # for key_temp_DY in keys_temp_DY: # _consDriverYear[key_CDR][key_temp_DY] = _temp_DriverYear[key_temp_DY] # dict_of_list {driverID: [raceID, starting position, position after first lap, ending position]} _dict_grid_firstLapPosition_finalPosition = getRaceID_Grid_FirstLapPosition_FinalPosition() for data in dataset: rank = 0 _circuitID = ""; for raceIDStats in _dict_grid_firstLapPosition_finalPosition[data[1]]: if(raceIDStats[0] == data[0]): rank = int(raceIDStats[3]) - int(raceIDStats[1]) break for _circuitRaceList_key in _circuitRaceList_keys: if(data[0] in circuitRaceList[_circuitRaceList_key]): _circuitID = _circuitRaceList_key break _listOfList.append([ConstructorIDName[data[2]],\ data[1], \ DriverName[data[1]],\ circuitIDNameDict[_circuitID],\ _totalpoints[data[1]]/len(_driverYear[data[1]]),\ returnCategoryNumber(rank) ]) return {"listofList": _listOfList}
def getDriverTopPerformingCircuits(): def returnCategory(rank): if(int(rank) == 1): # print "1" return "1" if((int(rank) > 1) & (int(rank) < 4)): # print "2-3" return "2_3" if((int(rank) > 3) & (int(rank) < 11)): # print "4-10" return "4_10" return "others" # step one temp data structure {driverID: {rank: {circuitId: count}}} _tempCountDict = defaultdict(lambda:defaultdict(lambda:defaultdict(lambda:0))) # step two temp data structure {driverID: {rank: [circuitId, count]}} _tempCountList = defaultdict(lambda:defaultdict(lambda:[])) # return list [driverID, _ranking, circuit_id] _ret_list = [] # return list [driverID, _ranking, circuit_id, count] _ret_list_count = [] circuitRaceList = getCircuitRaceList("races.csv") # {circuitID: raceList} RGFF_dict = getRaceID_Grid_FirstLapPosition_FinalPosition() # {driverID: [ raceID, starting position, position after first lap, final position]} # print circuitRaceList # print RGFF_dict _RGFF_dict_keys = sorted(RGFF_dict.keys(), key = lambda _key: int(_key)) _circuitRace_keys = sorted(circuitRaceList.keys(), key = lambda _key: int(_key)) # print _RGFF_dict_keys # print _circuitRace_keys for _rkey in _RGFF_dict_keys: for _ckey in _circuitRace_keys: # print (RGFF_dict[_rkey]) # print circuitRaceList[_ckey] for _driverInfo in RGFF_dict[_rkey]: if(_driverInfo[0] in circuitRaceList[_ckey]): # print RGFF_dict[_rkey][3] _tempCountDict[_rkey][returnCategory(_driverInfo[3])][_ckey] += 1 # print _tempCountDict driverIDKeys = sorted(_tempCountDict.keys(), key = lambda _key: int(_key)) for _dikey in driverIDKeys: rankingKeys = _tempCountDict[_dikey].keys() for _rkey in rankingKeys: circuitKeys = _tempCountDict[_dikey][_rkey].keys() for _ckey in circuitKeys: _temp_2Dlist = [_ckey,_tempCountDict[_dikey][_rkey][_ckey]] # _tempCountList[_dikey][_rkey][0] = _ckey # _tempCountList[_dikey][_rkey][1] = _tempCountDict[_dikey][_rkey][_ckey] _tempCountList[_dikey][_rkey].append(_temp_2Dlist) _tempCountList[_dikey][_rkey].sort(key=itemgetter(1), reverse = True) driverIDKeys = sorted(_tempCountList.keys(), key = lambda _key: int(_key)) for _dikey in driverIDKeys: rankingKeys = _tempCountList[_dikey].keys() for _rkey in rankingKeys: breakCount = 0 # print _tempCountList[_dikey][_rkey] _temp_top3Circuits = "" for listData in (_tempCountList[_dikey][_rkey]): _temp_top3Circuits += ( "_" + str(listData[0])) breakCount += 1 if(breakCount > 2): break _ret_list.append([_dikey, _rkey, _temp_top3Circuits]) return _ret_list