Example #1
0
def getPitStops(filepath):
    """
	Parameters
	----------
	filepath: string
		The directory to the file relative to the parentPath
		# [raceId, driverId, stop, lap, time, duration, milliseconds]
	Returns
	-------
	tuple
		(headings, data, dictionary)

	"""
    RecentDriversList = getRecentDrivers(2000)
    circuitRaceList = getCircuitRaceList("races.csv")
    circuitRaceKeys = circuitRaceList.keys()
    PitStopsData = getDataset(filepath)
    dataObtained = PitStopsData.next()
    Data = []
    for data in PitStopsData:
        if (ifExist(data[1], RecentDriversList)):
            for key in circuitRaceKeys:
                if (data[0] in circuitRaceList[key]):
                    Data.append(data)
    # Data = list(PitStopsData)
    print "Data from file: " + str(dataObtained)
    # print "Data Extracted: " + 'Year ' + " raceID"
    _ret_dict = defaultdict(lambda: defaultdict(lambda: 0.0))
    for d in Data:
        # {raceId, driverId :{ stop: pit time }}
        _ret_dict[str(d[0]) + "," + str(d[1])][d[2]] = d[6]
    return (dataObtained, Data, _ret_dict)
Example #2
0
def getPreprocessedData(filepath):
    """
	Parameters
	----------
	filepath: string
		The directory to the file relative to the parentPath
		# [raceId, driverId, constructorId, rank, points, grid, position, positionOrder, positionText , firstLapChange, pitStops]
	Returns
	-------
	tuple
		(headings, data, dictionary)
	"""
    RecentDriversList = getRecentDrivers(2000)
    circuitRaceList = getCircuitRaceList("races.csv")
    circuitRaceKeys = circuitRaceList.keys()
    PreprocessedData = getDataset(filepath)
    dataObtained = PreprocessedData.next()
    # Data = list(PreprocessedData)
    Data = []
    for data in PreprocessedData:
        if (ifExist(data[1], RecentDriversList)):
            for key in circuitRaceKeys:
                if (data[0] in circuitRaceList[key]):
                    Data.append(data)
    print "Data from file: " + str(dataObtained)

    return (dataObtained, Data)
Example #3
0
def getDataFromResults(filepath):
    """
	Parameters
	----------
	filepath: string
		The directory to the file relative to the parentPath
		# [raceID, driverID, constructorID, rank, points, grid, postion, positionOrder]
	Returns
	-------
	tuple
		(headings, data)

	"""
    RecentDriversList = getRecentDrivers(2000)
    circuitRaceList = getCircuitRaceList("races.csv")
    circuitRaceKeys = circuitRaceList.keys()
    ResultData = getDataset(filepath)
    dataObtained = ResultData.next()
    Data = []
    # Data = list(ResultData)
    for data in ResultData:
        if (ifExist(data[1], RecentDriversList)):
            for key in circuitRaceKeys:
                if (data[0] in circuitRaceList[key]):
                    Data.append(data)
    print "Data from file: " + str(dataObtained)
    # print "Data Extracted: " + 'Year ' + " raceID"
    # yearDict_raceList = defaultdict(lambda:[])	# initialise the dictionary value to an empty list
    # dataObtained.append("firstLapChange")
    # for d in Data:
    # 	d.append(int(d[5]) - int(d[7]))
    return (dataObtained, Data)
Example #4
0
def getLapTimes(filepath):
    """
	Parameters
	----------
	filepath: string
		The directory to the file relative to the parentPath
		# [raceId, driverId, lap, position, time, milliseconds]
	Returns
	-------
	tuple
		(headings, data, dictionary, dictionary of dictionary)
	"""
    RecentDriversList = getRecentDrivers(2000)
    circuitRaceList = getCircuitRaceList("races.csv")
    circuitRaceKeys = circuitRaceList.keys()
    LapTimesData = getDataset(filepath)
    dataObtained = LapTimesData.next()
    # Data = list(LapTimesData)
    Data = []
    for data in LapTimesData:
        if (ifExist(data[1], RecentDriversList)):
            for key in circuitRaceKeys:
                if (data[0] in circuitRaceList[key]):
                    Data.append(data)
    print "Data from file: " + str(dataObtained)
    # {raceID: [[driverID, lap#, laptime]]}
    _ret_dict = defaultdict(lambda: [])
    # {raceID: {lap#: [driverID, laptime]}}
    _ret_dict_dict = defaultdict(lambda: defaultdict(lambda: []))
    for d in Data:
        # {raceId, driverId :{ lap: laptime]]
        # _ret_dict[str(d[0]) +"," + str(d[1])][d[2]] = d[5]
        _ret_dict[int(d[0])].append(np.array([int(d[1]),
                                              int(d[2]),
                                              int(d[5])]))
        # {raceID: {lap#: [driverID, laptime]}}
        _ret_dict_dict[int(d[0])][int(d[2])].append([int(d[1]), int(d[5])])
    # keys = sorted(_ret_dict_dict.keys(), lambda _key: int(_key))
    keys = _ret_dict_dict.keys()
    for key in keys:
        _keys = _ret_dict_dict[key].keys()
        for _key in _keys:
            _ret_dict_dict[key][_key].sort(key=itemgetter(1))
            # print _ret_dict_dict[key][_key]
    return (dataObtained, Data, _ret_dict, _ret_dict_dict)
Example #5
0
def getCircuitSpecificStatisticsOfFirstLapChange(headings):
    """
		for circuit specific statistics
	"""
    headings.append("DriverId")
    headings.append("DriverName")
    headings.append("CircuitId")
    headings.append("CircuitName")
    headings.append("FirstLapChangeList")
    headings.append("FirstLapChangeMin")
    headings.append("FirstLapChangeMax")
    headings.append("FirstLapChangeFirstQuartile")
    headings.append("FirstLapChangeMedian")
    headings.append("FirstLapChangeThirdQuartile")
    headings.append("FirstLapChangeMean")
    headings.append("FirstLapChangeSd")
    _ret_list = []

    circuitRaceList = getCircuitRaceList("races.csv")
    PreprocessedDataHeadings, PreprocessedDataset = getPreprocessedData(
        "PreprocessedDataset1.csv")
    circuitNameDict = getRecentTracksName()
    driverIDNameDict = getDriverIDName("drivers.csv")
    # temperary data structure
    # {driverId: {circuitId: [first lap change]}}
    _dict_dict_list = defaultdict(lambda: defaultdict(lambda: []))

    for data in PreprocessedDataset:
        circuitKeys = circuitRaceList.keys()
        for circuitKey in circuitKeys:
            if (data[0] in circuitRaceList[circuitKey]):
                _dict_dict_list[data[1]][circuitKey].append(data[9])

    # print _dict_dict_list["4"]["32"]
    driversKeys = sorted(_dict_dict_list.keys(), key=lambda _key: int(_key))
    for driversKey in driversKeys:
        _driverID = driversKey
        _driverName = driverIDNameDict[_driverID]
        _circuitID = ""
        _circuitName = ""
        _FirstLapChangeList = ""
        _FirstLapChangeMin = 0.0
        _FirstLapChangeMax = 0.0
        _FirstLapChangeFirstQuartile = 0.0
        _FirstLapChangeMedian = 0.0
        _FirstLapChangeThirdQuartile = 0.0
        _FirstLapChangeMean = 0.0
        _FirstLapChangeSd = 0.0
        driverCircuitsKeys = sorted(_dict_dict_list[driversKey].keys(),
                                    key=lambda _key: int(_key))
        for driverCircuitsKey in driverCircuitsKeys:
            # _tempSumOfFirstLapChange = 0.0;
            _tempContainer = []
            _circuitID = driverCircuitsKey
            _circuitName = circuitNameDict[_circuitID]
            for firstLapChangeVal in _dict_dict_list[driversKey][
                    driverCircuitsKey]:
                if (firstLapChangeVal != "-999"):
                    _FirstLapChangeList += (firstLapChangeVal + ",")
                    _change = int(firstLapChangeVal)
                    _tempContainer.append(_change)
            _tempContainer.sort(key=lambda _key: int(_key))
            if (len(_tempContainer) != 0):
                _FirstLapChangeMin = _tempContainer[0]
                _FirstLapChangeMax = _tempContainer[-1]
                _FirstLapChangeFirstQuartile = _tempContainer[
                    len(_tempContainer) / 4]
                _FirstLapChangeMedian = _tempContainer[len(_tempContainer) / 2]
                _FirstLapChangeThirdQuartile = _tempContainer[
                    len(_tempContainer) / 4 * 3]
                _FirstLapChangeMean = np.sum(
                    np.array(_tempContainer)) / len(_tempContainer)
                _FirstLapChangeSd = np.sqrt(
                    np.sum((
                        (np.array(_tempContainer) - _FirstLapChangeMean)**2)) /
                    len(_tempContainer))
            _ret_list.append([
                _driverID, _driverName, _circuitID, _circuitName,
                _FirstLapChangeList, _FirstLapChangeMin, _FirstLapChangeMax,
                _FirstLapChangeFirstQuartile, _FirstLapChangeMedian,
                _FirstLapChangeThirdQuartile, _FirstLapChangeMean,
                _FirstLapChangeSd
            ])

        # print _FirstLapChangeList
        # if(len(_tempContainer) != 0):
        # 	_FirstLapChangeMin=_tempContainer[0]
        # 	_FirstLapChangeMax=_tempContainer[-1]
        # 	_FirstLapChangeFirstQuartile = _tempContainer[len(_tempContainer)/4]
        # 	_FirstLapChangeMedian=_tempContainer[len(_tempContainer)/2]
        # 	_FirstLapChangeThirdQuartile = _tempContainer[len(_tempContainer)/4 *3]
        # 	_FirstLapChangeMean=np.sum(np.array(_tempContainer))/len(_tempContainer)
        # 	_FirstLapChangeSd=np.sqrt(np.sum(((np.array(_tempContainer)-_FirstLapChangeMean)**2))/len(_tempContainer))
        # _ret_list.append([_driverID, _FirstLapChangeList, _FirstLapChangeMin, _FirstLapChangeMax, _FirstLapChangeFirstQuartile,
        # 	_FirstLapChangeMedian, _FirstLapChangeThirdQuartile, _FirstLapChangeMean, _FirstLapChangeSd])

    _ret_list.insert(0, headings)
    saveLListAsCSV("PreprocessedDataset3", _ret_list)
Example #6
0
def getDriverIDConstructorAveragePointsRanking(dataset):
	"""
	Parameters:
	-----------
	dataset:
		#[raceID, driverID, constructorID, rank, points]
	"""
	# print "dataset"
	# print dataset

	def returnCategoryNumber(rank):
		if(int(rank) == 1):
			# print "1"
			return 4
		if((int(rank) > 1) & (int(rank) < 4)):
			# print "2-3"
			return 3
		if((int(rank) > 3) & (int(rank) < 11)):
			# print "4-10"
			return 2
		return 1

	_listOfList = [["ConstructorName", "DriverID", "DriverName","CircuitName", "AveragePoints", "Ranking Groups"]]
	# {driverID: {constructorID: points}}
	# _temp = defaultdict(lambda:defaultdict(lambda:float(0)))
	# {driverID: totalpoints}
	_totalpoints = defaultdict(lambda:float(0))
	circuitRaceList = getCircuitRaceList("races.csv")
	_circuitRaceList_keys = sorted(circuitRaceList.keys(), key = lambda _key: int(_key))
	circuitIDNameDict = getRecentTracksName()
	# {constructorID: [driverID]}
	# _consDriver = defaultdict(lambda:[])
	# get number of collaborations in terms of years
	# {constructor: {driverID: [raceID]}}
	# Step 1: get {constructor: {driverID: [year]}}
	# _consDriverYear = defaultdict(lambda:defaultdict())
	# _consDriverRaceID = defaultdict(lambda:defaultdict(lambda:[]))
	# Step 1: get {driverID: [raceID]}
	# Step 2: convert to {driverID:[year]}

	# Step 1 #
	_driverRace = defaultdict(lambda:[])
	for data in dataset:
		# _temp[data[1]][data[2]] += float(data[4])
		_totalpoints[data[1]] += float(data[4])
		# _consDriver[data[2]].append(data[1])
		# _consDriverRaceID[data[2]][data[1]].append(data[0])
		_driverRace[data[1]].append(data[0])

	yearDict_raceList = getYearRaceID("races.csv")
	ConstructorIDName = getConstructorIDName("constructors.csv")
	DriverName = getDriverIDName("drivers.csv")
	# Step 2 #
	_driverYear = convertRaceToYear(_driverRace, yearDict_raceList)
	# keys_CDR = sorted(_consDriverRaceID.keys(), key = lambda _key: int(_key))
	
	# # Step 2 #
	# for key_CDR in keys_CDR:
	# 	_temp_DriverYear = convertRaceToYear(_consDriverRaceID[key_CDR], yearDict_raceList)
	# 	keys_temp_DY = sorted(_temp_DriverYear.keys(), key= lambda _key: int(_key))
	# 	for key_temp_DY in keys_temp_DY:
	# 		_consDriverYear[key_CDR][key_temp_DY] = _temp_DriverYear[key_temp_DY]

	# dict_of_list {driverID: [raceID, starting position, position after first lap, ending position]}
	_dict_grid_firstLapPosition_finalPosition = getRaceID_Grid_FirstLapPosition_FinalPosition()

	for data in dataset:
		rank = 0
		_circuitID = "";
		for raceIDStats in _dict_grid_firstLapPosition_finalPosition[data[1]]:
			if(raceIDStats[0] == data[0]):
				rank = int(raceIDStats[3]) - int(raceIDStats[1])
				break

		for _circuitRaceList_key in _circuitRaceList_keys:
			if(data[0] in circuitRaceList[_circuitRaceList_key]):
				_circuitID = _circuitRaceList_key
				break

		_listOfList.append([ConstructorIDName[data[2]],\
			data[1], \
			DriverName[data[1]],\
			circuitIDNameDict[_circuitID],\
			_totalpoints[data[1]]/len(_driverYear[data[1]]),\
			returnCategoryNumber(rank)
			])


	return {"listofList": _listOfList}
Example #7
0
def getDriverTopPerformingCircuits():
	def returnCategory(rank):
		if(int(rank) == 1):
			# print "1"
			return "1"
		if((int(rank) > 1) & (int(rank) < 4)):
			# print "2-3"
			return "2_3"
		if((int(rank) > 3) & (int(rank) < 11)):
			# print "4-10"
			return "4_10"
		return "others"

	# step one temp data structure {driverID: {rank: {circuitId: count}}}
	_tempCountDict = defaultdict(lambda:defaultdict(lambda:defaultdict(lambda:0)))
	# step two temp data structure {driverID: {rank: [circuitId, count]}}
	_tempCountList = defaultdict(lambda:defaultdict(lambda:[]))
	# return list [driverID, _ranking, circuit_id]
	_ret_list = []
	# return list [driverID, _ranking, circuit_id, count]
	_ret_list_count = []

	circuitRaceList = getCircuitRaceList("races.csv") # {circuitID: raceList}
	RGFF_dict = getRaceID_Grid_FirstLapPosition_FinalPosition() # {driverID: [ raceID, starting position, position after first lap, final position]}
	# print circuitRaceList
	# print RGFF_dict

	_RGFF_dict_keys = sorted(RGFF_dict.keys(), key = lambda _key: int(_key))
	_circuitRace_keys = sorted(circuitRaceList.keys(), key = lambda _key: int(_key))
	# print _RGFF_dict_keys
	# print _circuitRace_keys
	for _rkey in _RGFF_dict_keys:
		for _ckey in _circuitRace_keys:
			# print (RGFF_dict[_rkey])
			# print circuitRaceList[_ckey]
			for _driverInfo in RGFF_dict[_rkey]:
				if(_driverInfo[0] in circuitRaceList[_ckey]):
					# print RGFF_dict[_rkey][3]
					_tempCountDict[_rkey][returnCategory(_driverInfo[3])][_ckey] += 1

	# print _tempCountDict
	driverIDKeys = sorted(_tempCountDict.keys(), key = lambda _key: int(_key))
	for _dikey in driverIDKeys:
		rankingKeys = _tempCountDict[_dikey].keys()
		for _rkey in rankingKeys:
			circuitKeys = _tempCountDict[_dikey][_rkey].keys()
			for _ckey in circuitKeys:
				_temp_2Dlist = [_ckey,_tempCountDict[_dikey][_rkey][_ckey]]
				# _tempCountList[_dikey][_rkey][0] = _ckey
				# _tempCountList[_dikey][_rkey][1] = _tempCountDict[_dikey][_rkey][_ckey]
				_tempCountList[_dikey][_rkey].append(_temp_2Dlist)
			_tempCountList[_dikey][_rkey].sort(key=itemgetter(1), reverse = True)

	driverIDKeys = sorted(_tempCountList.keys(), key = lambda _key: int(_key))
	for _dikey in driverIDKeys:
		rankingKeys = _tempCountList[_dikey].keys()
		for _rkey in rankingKeys:
			breakCount = 0
			# print _tempCountList[_dikey][_rkey]
			_temp_top3Circuits = ""
			for listData in (_tempCountList[_dikey][_rkey]):
				_temp_top3Circuits += ( "_" + str(listData[0]))
				breakCount += 1
				if(breakCount > 2):
					break
			_ret_list.append([_dikey, _rkey, _temp_top3Circuits])
	return _ret_list