def predictLot(filename, time, AvailNum_weightsVector, Price_weightsVector, lotid, lotLocation, final_dest): ''' Predicts the availNum and price at |time| by averaging the prediction result from (time-timeBuffer) to (time) Returns a tuple of the prediction result ''' fp = open("../data"+filename, 'r') HourMin = time[0]*60+time[1] timeBuffer = 10 # the time (in minutes), ahead of arrival_time, that we will consider when we do prediction count = 0 availNumEstimate = 0.0 priceEstimate = 0.0 for line in fp: parsedLine = util.exactSingleLineFromFine(line) _, currHour, currMin = util.convertTimeStampToDate(parsedLine[0]) currHourMin = currHour*60+currMin if HourMin - currHourMin <= timeBuffer: count += 1 phi, availNum, price = model.extractRecordFeatures(line,util.locDict, util.eventDict) if len(phi)<=0 or availNum < 0 or price < 0: continue availNumEstimate += round(util.sparseVectorDotProduct(AvailNum_weightsVector, phi)) priceEstimate += util.sparseVectorDotProduct(Price_weightsVector, phi) # averaging availNumEstimate /= count priceEstimate /= count dist = util.calculateDistance(final_dest, lotLocation) # distance between lot and final location return (lotid, lotLocation, round(availNumEstimate), priceEstimate, dist)
def filterLotsByMaxDist(final_dest, max_dist): lots = os.listdir('../data') # get a list of all lots in output directory locDict = util.locDict lots_within_max_dist = list() for lot in lots: lotLocation = tuple(float(v) for v in locDict[lot]) dist = util.calculateDistance(final_dest, lotLocation) if dist <= max_dist: # print lot, lotLocation, dist lots_within_max_dist.append(lot) # print len(lots_within_max_dist) return lots_within_max_dist
def predictLot(filename, time, AvailNum_weightsVector, Price_weightsVector, lotid, lotLocation, final_dest): ''' Predicts the availNum and price at |time| by averaging the prediction result from (time-timeBuffer) to (time) Returns a tuple of the prediction result ''' fp = open("../data" + filename, 'r') HourMin = time[0] * 60 + time[1] timeBuffer = 10 # the time (in minutes), ahead of arrival_time, that we will consider when we do prediction count = 0 availNumEstimate = 0.0 priceEstimate = 0.0 for line in fp: parsedLine = util.exactSingleLineFromFine(line) _, currHour, currMin = util.convertTimeStampToDate(parsedLine[0]) currHourMin = currHour * 60 + currMin if HourMin - currHourMin <= timeBuffer: count += 1 phi, availNum, price = model.extractRecordFeatures( line, util.locDict, util.eventDict) if len(phi) <= 0 or availNum < 0 or price < 0: continue availNumEstimate += round( util.sparseVectorDotProduct(AvailNum_weightsVector, phi)) priceEstimate += util.sparseVectorDotProduct( Price_weightsVector, phi) # averaging availNumEstimate /= count priceEstimate /= count dist = util.calculateDistance( final_dest, lotLocation) # distance between lot and final location return (lotid, lotLocation, round(availNumEstimate), priceEstimate, dist)
def extractRecordFeatures(x, locationDict, eventDict): """ Extract the features from a string line @param string 'x' : represents each timestamp there is a record @return dictionary Feature items: day - M, T, W, Th, F, S, Su [7 entries] time - 6-10, 10-12, 12-14, 14-16, 16-18, 18-20, 20-22 [7 entries] loc - |loc - centroid| [1 entry] price - 0 <= price , 1<= price, ..., 8 <= price [9 entries] events - """ featureDict = Counter() _tempFeatureList = [] for item in x.split(','): #print "show items", item item = item.strip(' \"|\r|\n ') _tempFeatureList.append(item) # build feature vector phi timeRecord = util.convertTimeStampToDate(_tempFeatureList[0]) # day, hour, minute = timeRecord # if earlier than 6am or later than 10pm, return empty feature if hour < initialHour or hour >= finalHour: return (featureDict, 0, 0) dayDict = { 0: 'Mon', 1: 'Tues', 2: 'Wed', 3: 'Thu', 4: 'Fri', 5: 'Sat', 6: 'Sun' } assert day in dayDict # update the day feature featureDict[dayDict[day]] = 1 # update time feature def updateTimeFeature(timeFeature): # print '-----' # print timeFeature # print len(timeFeature) # for i in range(len(timeFeature)): # print i # startTuple, endTuple = timeFeature[i] for startTuple, endTuple in timeFeature: # print startTuple, endTuple startHour, startMin = startTuple endHour, endMin = endTuple startHourMin = startHour * 60 + startMin finalHourMin = endHour * 60 + endMin actualHourMin = hour * 60 + minute # print actualHourMin # print startHourMin, finalHourMin if actualHourMin >= startHourMin and actualHourMin < finalHourMin: timeFeatureKey = str(startHour) + ':' + str( startMin) + '-' + str(endHour) + ':' + str(endMin) featureDict[timeFeatureKey] = 1 # print timeFeatureKey break # updateTimeFeature(timeFeature10min) updateTimeFeature(timeFeature20min) # updateTimeFeature(timeFeature30min) # updateTimeFeature(timeFeature1hr) updateTimeFeature(timeFeature2hr) dist = 0 if locationDict[_tempFeatureList[1]]: lat, lng = locationDict[_tempFeatureList[1]] lotLocation = (float(lat), float(lng)) #print "centroid", CENTROIDS[0] dist = min([ util.calculateDistance( lotLocation, (util.CENTROIDS[0][i], util.CENTROIDS[1][i])) for i in range(len(util.CENTROIDS[0])) ]) else: dist = 0.5 def updateDistFeature(dist, interval=.2, max_miles=5): ''' Returns indicator functions for distance, in intervals of |interval| miles, up to |max_miles| miles feature is 1 if dist <= _d ''' _d = 0 while _d < max_miles: _d += interval if dist <= _d: featureDict['Dist<=' + str(_d)] = 1 featureDict['Dist'] = dist # updateDistFeature(dist, .2, 5) def checkEvent(timeTS): #print type(float(timeTS)) currentTS = float(timeTS) for i in range(len(eventDict['ST'])): #print "Start TimeStamp", eventDict['ST'][i], type(eventDict['ST'][i]) #print currentTS > eventDict['ST'][i] #print currentTS < eventDict['ET'][i] if currentTS > eventDict['ST'][i] and currentTS < eventDict['ET'][ i]: featureDict[eventDict['NAME'][i]] = 1 def checkPrice(price, featureDict): #print "current price", currPrice if currPrice < 0: featureDict = Counter() return featureDict if currPrice >= 0 and currPrice < 1: featureDict['price_0-1'] = 1 elif currPrice >= 1 and currPrice < 2: featureDict['price_1-2'] = 1 elif currPrice >= 2 and currPrice < 3: featureDict['price_2-3'] = 1 elif currPrice >= 3 and currPrice < 4: featureDict['price_3-4'] = 1 elif currPrice >= 4 and currPrice < 5: featureDict['price_4-5'] = 1 elif currPrice >= 5 and currPrice < 6: featureDict['price_5-6'] = 1 elif currPrice >= 6 and currPrice < 7: featureDict['price_6-7'] = 1 else: featureDict['price_gte7'] = 1 # checkPrice(currPrice, featureDict) currPrice = float(_tempFeatureList[-1]) # current price checkEvent(_tempFeatureList[0] ) # here call the internal function to check the events feature # extract label y if int(_tempFeatureList[3]) < 0 or int(_tempFeatureList[2]) < 0: avlNum = -99 else: avlNum = int(_tempFeatureList[3]) - int(_tempFeatureList[2]) return (featureDict, avlNum, currPrice)
def extractRecordFeatures(x, locationDict, eventDict): """ Extract the features from a string line @param string 'x' : represents each timestamp there is a record @return dictionary Feature items: day - M, T, W, Th, F, S, Su [7 entries] time - 6-10, 10-12, 12-14, 14-16, 16-18, 18-20, 20-22 [7 entries] loc - |loc - centroid| [1 entry] price - 0 <= price , 1<= price, ..., 8 <= price [9 entries] events - """ featureDict = Counter() _tempFeatureList = [] for item in x.split(","): # print "show items", item item = item.strip(' "|\r|\n ') _tempFeatureList.append(item) # build feature vector phi timeRecord = util.convertTimeStampToDate(_tempFeatureList[0]) # day, hour, minute = timeRecord # if earlier than 6am or later than 10pm, return empty feature if hour < initialHour or hour >= finalHour: return (featureDict, 0, 0) dayDict = {0: "Mon", 1: "Tues", 2: "Wed", 3: "Thu", 4: "Fri", 5: "Sat", 6: "Sun"} assert day in dayDict # update the day feature featureDict[dayDict[day]] = 1 # update time feature def updateTimeFeature(timeFeature): # print '-----' # print timeFeature # print len(timeFeature) # for i in range(len(timeFeature)): # print i # startTuple, endTuple = timeFeature[i] for startTuple, endTuple in timeFeature: # print startTuple, endTuple startHour, startMin = startTuple endHour, endMin = endTuple startHourMin = startHour * 60 + startMin finalHourMin = endHour * 60 + endMin actualHourMin = hour * 60 + minute # print actualHourMin # print startHourMin, finalHourMin if actualHourMin >= startHourMin and actualHourMin < finalHourMin: timeFeatureKey = str(startHour) + ":" + str(startMin) + "-" + str(endHour) + ":" + str(endMin) featureDict[timeFeatureKey] = 1 # print timeFeatureKey break # updateTimeFeature(timeFeature10min) updateTimeFeature(timeFeature20min) # updateTimeFeature(timeFeature30min) # updateTimeFeature(timeFeature1hr) updateTimeFeature(timeFeature2hr) dist = 0 if locationDict[_tempFeatureList[1]]: lat, lng = locationDict[_tempFeatureList[1]] lotLocation = (float(lat), float(lng)) # print "centroid", CENTROIDS[0] dist = min( [ util.calculateDistance(lotLocation, (util.CENTROIDS[0][i], util.CENTROIDS[1][i])) for i in range(len(util.CENTROIDS[0])) ] ) else: dist = 0.5 def updateDistFeature(dist, interval=0.2, max_miles=5): """ Returns indicator functions for distance, in intervals of |interval| miles, up to |max_miles| miles feature is 1 if dist <= _d """ _d = 0 while _d < max_miles: _d += interval if dist <= _d: featureDict["Dist<=" + str(_d)] = 1 featureDict["Dist"] = dist # updateDistFeature(dist, .2, 5) def checkEvent(timeTS): # print type(float(timeTS)) currentTS = float(timeTS) for i in range(len(eventDict["ST"])): # print "Start TimeStamp", eventDict['ST'][i], type(eventDict['ST'][i]) # print currentTS > eventDict['ST'][i] # print currentTS < eventDict['ET'][i] if currentTS > eventDict["ST"][i] and currentTS < eventDict["ET"][i]: featureDict[eventDict["NAME"][i]] = 1 def checkPrice(price, featureDict): # print "current price", currPrice if currPrice < 0: featureDict = Counter() return featureDict if currPrice >= 0 and currPrice < 1: featureDict["price_0-1"] = 1 elif currPrice >= 1 and currPrice < 2: featureDict["price_1-2"] = 1 elif currPrice >= 2 and currPrice < 3: featureDict["price_2-3"] = 1 elif currPrice >= 3 and currPrice < 4: featureDict["price_3-4"] = 1 elif currPrice >= 4 and currPrice < 5: featureDict["price_4-5"] = 1 elif currPrice >= 5 and currPrice < 6: featureDict["price_5-6"] = 1 elif currPrice >= 6 and currPrice < 7: featureDict["price_6-7"] = 1 else: featureDict["price_gte7"] = 1 # checkPrice(currPrice, featureDict) currPrice = float(_tempFeatureList[-1]) # current price checkEvent(_tempFeatureList[0]) # here call the internal function to check the events feature # extract label y if int(_tempFeatureList[3]) < 0 or int(_tempFeatureList[2]) < 0: avlNum = -99 else: avlNum = int(_tempFeatureList[3]) - int(_tempFeatureList[2]) return (featureDict, avlNum, currPrice)