예제 #1
0
def predictLot(filename, time, AvailNum_weightsVector, Price_weightsVector, lotid, lotLocation, final_dest):
	'''
	Predicts the availNum and price at |time| by averaging the prediction result from (time-timeBuffer) to (time)

	Returns a tuple of the prediction result
	'''

	fp = open("../data"+filename, 'r')
	HourMin = time[0]*60+time[1]
	timeBuffer = 10		# the time (in minutes), ahead of arrival_time, that we will consider when we do prediction
	count = 0

	availNumEstimate = 0.0
	priceEstimate = 0.0

	for line in fp:
		parsedLine = util.exactSingleLineFromFine(line)
		_, currHour, currMin = util.convertTimeStampToDate(parsedLine[0])
		currHourMin = currHour*60+currMin

		if HourMin - currHourMin <= timeBuffer:
			count += 1
			phi, availNum, price = model.extractRecordFeatures(line,util.locDict, util.eventDict)
			if len(phi)<=0 or availNum < 0 or price < 0:
			    continue
			availNumEstimate += round(util.sparseVectorDotProduct(AvailNum_weightsVector, phi))
			priceEstimate += util.sparseVectorDotProduct(Price_weightsVector, phi)

	# averaging
	availNumEstimate /= count
	priceEstimate /= count

	dist = util.calculateDistance(final_dest, lotLocation)	# distance between lot and final location

	return (lotid, lotLocation, round(availNumEstimate), priceEstimate, dist)
예제 #2
0
def readFileUpdateWeight(AvailNum_weightsVector, Price_weightsVector,  filepath = 'NA', locDict='NA', eventDict='NA', alpha=.5, eta_0=1.):
    '''
    Reads each line in the filepath and update weightsVector (a Counter obj representing
    sparse vector) by stochastic gradient descent, for predicting both AvailNum and Price
        w[i] = w[i] + eta*(y[i]-dotProd(w,phi))*phi[i]

    update learning rate by eta = eta_0/t^alpha
    '''
    # _AvailNum_weightsVector = AvailNum_weightsVector.copy()
    # _Price_weightsVector = Price_weightsVector.copy()
    if not os.path.exists(filepath):    
        raise "File doesn't exist!!"
        return _weightsVector
    else:
        fp = open(filepath, 'r')
        for t, line in enumerate(fp):
            eta = eta_0/(t+1)**alpha
            
            phi, availNum, currPrice = model.extractRecordFeatures(line, locDict, eventDict) 
            if len(phi) <= 0 or availNum < 0 or currPrice < 0:    # if nothing changed for this line
                continue    
            
            AvailNum_dotProd = util.sparseVectorDotProduct(phi, AvailNum_weightsVector)
            Price_dotProd = util.sparseVectorDotProduct(phi, Price_weightsVector)
            for key in phi:
                # print "-------",y-dotProd
                AvailNum_weightsVector[key]  += eta * (availNum - AvailNum_dotProd) * phi[key]
                Price_weightsVector[key]  += eta * (currPrice - Price_dotProd) * phi[key]
        fp.close()  
    
    return (AvailNum_weightsVector, Price_weightsVector)
예제 #3
0
def predictLot(filename, time, AvailNum_weightsVector, Price_weightsVector,
               lotid, lotLocation, final_dest):
    '''
	Predicts the availNum and price at |time| by averaging the prediction result from (time-timeBuffer) to (time)

	Returns a tuple of the prediction result
	'''

    fp = open("../data" + filename, 'r')
    HourMin = time[0] * 60 + time[1]
    timeBuffer = 10  # the time (in minutes), ahead of arrival_time, that we will consider when we do prediction
    count = 0

    availNumEstimate = 0.0
    priceEstimate = 0.0

    for line in fp:
        parsedLine = util.exactSingleLineFromFine(line)
        _, currHour, currMin = util.convertTimeStampToDate(parsedLine[0])
        currHourMin = currHour * 60 + currMin

        if HourMin - currHourMin <= timeBuffer:
            count += 1
            phi, availNum, price = model.extractRecordFeatures(
                line, util.locDict, util.eventDict)
            if len(phi) <= 0 or availNum < 0 or price < 0:
                continue
            availNumEstimate += round(
                util.sparseVectorDotProduct(AvailNum_weightsVector, phi))
            priceEstimate += util.sparseVectorDotProduct(
                Price_weightsVector, phi)

    # averaging
    availNumEstimate /= count
    priceEstimate /= count

    dist = util.calculateDistance(
        final_dest, lotLocation)  # distance between lot and final location

    return (lotid, lotLocation, round(availNumEstimate), priceEstimate, dist)
예제 #4
0
def test2(filename, locDict, eventDict, weightsVector, plotting=0):
    '''
    Old testing function, for just testing AvailNum
    '''
    fp = open("../data" + filename, 'r')
    count = 0
    sumErr = 0

    Yvec = list()
    estimateVec = list()

    for line in fp:
        #print line
        phi, y, _ = model.extractRecordFeatures(line, locDict, eventDict)

        if len(phi) <= 0 or y < 0:
            continue

        estimate = util.sparseVectorDotProduct(weightsVector, phi)
        estimate = round(estimate)
        # print "==========show feature vector==========",phi
        # print "real", y, "est:", estimate, "diff error", y-estimate

        if abs(y - estimate) < 1e-3:
            # print "error rate", 0
            sumErr += 0
            count += 1
        elif y > 0:
            # print "error rate", (y-estimate)/y
            count += 1
            sumErr += abs(y - estimate) / y

        Yvec.append(y)
        estimateVec.append(estimate)

    fp.close()
    avgErr = sumErr / count
    print "--average error rate--", avgErr
예제 #5
0
def test2(filename, locDict, eventDict, weightsVector, plotting=0):
    '''
    Old testing function, for just testing AvailNum
    '''
    fp = open("../data"+filename, 'r')
    count = 0
    sumErr = 0

    Yvec = list()
    estimateVec = list()

    for line in fp:
        #print line
        phi, y, _ = model.extractRecordFeatures(line,locDict, eventDict)

        if len(phi)<=0 or y < 0:
            continue

        estimate = util.sparseVectorDotProduct(weightsVector, phi)
        estimate = round(estimate)
        # print "==========show feature vector==========",phi 
        # print "real", y, "est:", estimate, "diff error", y-estimate

        if abs(y-estimate) < 1e-3:
            # print "error rate", 0
            sumErr += 0
            count += 1
        elif y > 0:
            # print "error rate", (y-estimate)/y
            count +=1
            sumErr += abs(y-estimate) / y

        Yvec.append(y)
        estimateVec.append(estimate)

    fp.close()
    avgErr = sumErr/count
    print "--average error rate--", avgErr
예제 #6
0
def test(filename,
         locDict,
         eventDict,
         AvailNum_weightsVector,
         Price_weightsVector,
         plotting=0):
    fp = open("../data" + filename, 'r')

    count = [0, 0]  # [availNum, price]
    sumErr = [0., 0.]  # [availNum, price]

    availNumVec = list()
    availNumEstimateVec = list()
    priceNumVec = list()
    priceEstimateVec = list()

    def computeError(weights, y, count, sumErr, Yvec, estimateVec, rounding=1):
        estimate = util.sparseVectorDotProduct(weights, phi)
        if rounding:
            estimate = round(estimate)

        if abs(y - estimate) < 1e-3:
            sumErr += 0
            count += 1
        elif y > 0:
            count += 1
            sumErr += abs(y - estimate) / float(y)

        Yvec.append(y)
        estimateVec.append(estimate)

        return (sumErr, count)

    for line in fp:
        # _temp = util.exactSingleLineFromFine(line)
        # print "*****************", _temp
        phi, availNum, price = model.extractRecordFeatures(
            line, locDict, eventDict)

        if len(phi) <= 0 or availNum < 0 or price < 0:
            continue

        sumErr[0], count[0] = computeError(AvailNum_weightsVector, availNum,
                                           count[0], sumErr[0], availNumVec,
                                           availNumEstimateVec, 1)
        sumErr[1], count[1] = computeError(Price_weightsVector, price,
                                           count[1], sumErr[1], priceNumVec,
                                           priceEstimateVec, 0)

    fp.close()

    if count[0] == 0 or count[1] == 0:
        return (-1, -1)
    avgErr = (sumErr[0] / count[0], sumErr[1] / count[1]
              )  # mean absolute error
    # print "Average Error: (availNum, price) = ", avgErr

    if plotting:
        timeVec = linspace(6, 22, len(availNumVec))
        plt.plot(timeVec, availNumVec, 'b-')
        plt.plot(timeVec, availNumEstimateVec, 'r-')
        plt.legend(['real', 'prediction'], fontsize=14)
        plt.ylabel('number of available spot', fontsize=14)
        plt.xlabel('Time', fontsize=14)
        plt.title(filename, fontsize=14)
        plt.show()

        timeVec = linspace(6, 22, len(priceNumVec))
        plt.plot(timeVec, priceNumVec, 'b-')
        plt.plot(timeVec, priceEstimateVec, 'r-')
        plt.legend(['real', 'prediction'], fontsize=14)
        plt.ylabel('Price ($)', fontsize=14)
        plt.xlabel('Time', fontsize=14)
        plt.title(filename, fontsize=14)
        plt.show()

    return avgErr
예제 #7
0
def test(filename, locDict, eventDict, AvailNum_weightsVector, Price_weightsVector, plotting=0):
    fp = open("../data"+filename, 'r')

    count = [0, 0]  # [availNum, price]
    sumErr = [0., 0.]  # [availNum, price]  

    availNumVec = list()
    availNumEstimateVec = list()
    priceNumVec = list()
    priceEstimateVec = list()

    def computeError(weights, y, count, sumErr, Yvec, estimateVec,rounding=1):
        estimate = util.sparseVectorDotProduct(weights, phi)
        if rounding:
            estimate = round(estimate)

        if abs(y-estimate) < 1e-3:
            sumErr += 0
            count += 1
        elif y > 0:
            count +=1
            sumErr += abs(y-estimate) / float(y)

        Yvec.append(y)
        estimateVec.append(estimate)

        return (sumErr, count)

    for line in fp:
        # _temp = util.exactSingleLineFromFine(line)
        # print "*****************", _temp
        phi, availNum, price = model.extractRecordFeatures(line,locDict, eventDict)

        if len(phi)<=0 or availNum < 0 or price < 0:
            continue

        sumErr[0], count[0] = computeError(AvailNum_weightsVector, availNum, count[0], sumErr[0], availNumVec, availNumEstimateVec,1)
        sumErr[1], count[1] = computeError(Price_weightsVector, price, count[1], sumErr[1], priceNumVec, priceEstimateVec,0)
        
        
    fp.close()

    if count[0] == 0 or count[1] == 0:
        return (-1,-1
            )
    avgErr = (sumErr[0]/count[0], sumErr[1]/count[1])   # mean absolute error
    # print "Average Error: (availNum, price) = ", avgErr

    if plotting:
        timeVec = linspace(6,22,len(availNumVec))
        plt.plot(timeVec, availNumVec,'b-')
        plt.plot(timeVec, availNumEstimateVec,'r-')
        plt.legend(['real','prediction'],fontsize=14)
        plt.ylabel('number of available spot', fontsize=14)
        plt.xlabel('Time',fontsize=14)
        plt.title(filename,fontsize=14)
        plt.show()

        timeVec = linspace(6,22,len(priceNumVec))
        plt.plot(timeVec, priceNumVec,'b-')
        plt.plot(timeVec, priceEstimateVec,'r-')
        plt.legend(['real','prediction'],fontsize=14)
        plt.ylabel('Price ($)', fontsize=14)
        plt.xlabel('Time',fontsize=14)
        plt.title(filename,fontsize=14)
        plt.show()

    return avgErr