def predictLot(filename, time, AvailNum_weightsVector, Price_weightsVector, lotid, lotLocation, final_dest): ''' Predicts the availNum and price at |time| by averaging the prediction result from (time-timeBuffer) to (time) Returns a tuple of the prediction result ''' fp = open("../data"+filename, 'r') HourMin = time[0]*60+time[1] timeBuffer = 10 # the time (in minutes), ahead of arrival_time, that we will consider when we do prediction count = 0 availNumEstimate = 0.0 priceEstimate = 0.0 for line in fp: parsedLine = util.exactSingleLineFromFine(line) _, currHour, currMin = util.convertTimeStampToDate(parsedLine[0]) currHourMin = currHour*60+currMin if HourMin - currHourMin <= timeBuffer: count += 1 phi, availNum, price = model.extractRecordFeatures(line,util.locDict, util.eventDict) if len(phi)<=0 or availNum < 0 or price < 0: continue availNumEstimate += round(util.sparseVectorDotProduct(AvailNum_weightsVector, phi)) priceEstimate += util.sparseVectorDotProduct(Price_weightsVector, phi) # averaging availNumEstimate /= count priceEstimate /= count dist = util.calculateDistance(final_dest, lotLocation) # distance between lot and final location return (lotid, lotLocation, round(availNumEstimate), priceEstimate, dist)
def readFileUpdateWeight(AvailNum_weightsVector, Price_weightsVector, filepath = 'NA', locDict='NA', eventDict='NA', alpha=.5, eta_0=1.): ''' Reads each line in the filepath and update weightsVector (a Counter obj representing sparse vector) by stochastic gradient descent, for predicting both AvailNum and Price w[i] = w[i] + eta*(y[i]-dotProd(w,phi))*phi[i] update learning rate by eta = eta_0/t^alpha ''' # _AvailNum_weightsVector = AvailNum_weightsVector.copy() # _Price_weightsVector = Price_weightsVector.copy() if not os.path.exists(filepath): raise "File doesn't exist!!" return _weightsVector else: fp = open(filepath, 'r') for t, line in enumerate(fp): eta = eta_0/(t+1)**alpha phi, availNum, currPrice = model.extractRecordFeatures(line, locDict, eventDict) if len(phi) <= 0 or availNum < 0 or currPrice < 0: # if nothing changed for this line continue AvailNum_dotProd = util.sparseVectorDotProduct(phi, AvailNum_weightsVector) Price_dotProd = util.sparseVectorDotProduct(phi, Price_weightsVector) for key in phi: # print "-------",y-dotProd AvailNum_weightsVector[key] += eta * (availNum - AvailNum_dotProd) * phi[key] Price_weightsVector[key] += eta * (currPrice - Price_dotProd) * phi[key] fp.close() return (AvailNum_weightsVector, Price_weightsVector)
def predictLot(filename, time, AvailNum_weightsVector, Price_weightsVector, lotid, lotLocation, final_dest): ''' Predicts the availNum and price at |time| by averaging the prediction result from (time-timeBuffer) to (time) Returns a tuple of the prediction result ''' fp = open("../data" + filename, 'r') HourMin = time[0] * 60 + time[1] timeBuffer = 10 # the time (in minutes), ahead of arrival_time, that we will consider when we do prediction count = 0 availNumEstimate = 0.0 priceEstimate = 0.0 for line in fp: parsedLine = util.exactSingleLineFromFine(line) _, currHour, currMin = util.convertTimeStampToDate(parsedLine[0]) currHourMin = currHour * 60 + currMin if HourMin - currHourMin <= timeBuffer: count += 1 phi, availNum, price = model.extractRecordFeatures( line, util.locDict, util.eventDict) if len(phi) <= 0 or availNum < 0 or price < 0: continue availNumEstimate += round( util.sparseVectorDotProduct(AvailNum_weightsVector, phi)) priceEstimate += util.sparseVectorDotProduct( Price_weightsVector, phi) # averaging availNumEstimate /= count priceEstimate /= count dist = util.calculateDistance( final_dest, lotLocation) # distance between lot and final location return (lotid, lotLocation, round(availNumEstimate), priceEstimate, dist)
def computeError(weights, y, count, sumErr, Yvec, estimateVec, rounding=1): estimate = util.sparseVectorDotProduct(weights, phi) if rounding: estimate = round(estimate) if abs(y - estimate) < 1e-3: sumErr += 0 count += 1 elif y > 0: count += 1 sumErr += abs(y - estimate) / float(y) Yvec.append(y) estimateVec.append(estimate) return (sumErr, count)
def computeError(weights, y, count, sumErr, Yvec, estimateVec,rounding=1): estimate = util.sparseVectorDotProduct(weights, phi) if rounding: estimate = round(estimate) if abs(y-estimate) < 1e-3: sumErr += 0 count += 1 elif y > 0: count +=1 sumErr += abs(y-estimate) / float(y) Yvec.append(y) estimateVec.append(estimate) return (sumErr, count)
def test2(filename, locDict, eventDict, weightsVector, plotting=0): ''' Old testing function, for just testing AvailNum ''' fp = open("../data" + filename, 'r') count = 0 sumErr = 0 Yvec = list() estimateVec = list() for line in fp: #print line phi, y, _ = model.extractRecordFeatures(line, locDict, eventDict) if len(phi) <= 0 or y < 0: continue estimate = util.sparseVectorDotProduct(weightsVector, phi) estimate = round(estimate) # print "==========show feature vector==========",phi # print "real", y, "est:", estimate, "diff error", y-estimate if abs(y - estimate) < 1e-3: # print "error rate", 0 sumErr += 0 count += 1 elif y > 0: # print "error rate", (y-estimate)/y count += 1 sumErr += abs(y - estimate) / y Yvec.append(y) estimateVec.append(estimate) fp.close() avgErr = sumErr / count print "--average error rate--", avgErr
def test2(filename, locDict, eventDict, weightsVector, plotting=0): ''' Old testing function, for just testing AvailNum ''' fp = open("../data"+filename, 'r') count = 0 sumErr = 0 Yvec = list() estimateVec = list() for line in fp: #print line phi, y, _ = model.extractRecordFeatures(line,locDict, eventDict) if len(phi)<=0 or y < 0: continue estimate = util.sparseVectorDotProduct(weightsVector, phi) estimate = round(estimate) # print "==========show feature vector==========",phi # print "real", y, "est:", estimate, "diff error", y-estimate if abs(y-estimate) < 1e-3: # print "error rate", 0 sumErr += 0 count += 1 elif y > 0: # print "error rate", (y-estimate)/y count +=1 sumErr += abs(y-estimate) / y Yvec.append(y) estimateVec.append(estimate) fp.close() avgErr = sumErr/count print "--average error rate--", avgErr