def naiveBayesMC(): binSize = 10; maxPrice = 100; dataSplit = 0.70; print "Loading items... " allItems = getItems(db,filteritems=True,complete=True,sold=True,genre=ROCK); [trainItems,testItems] = splitItemSet(allItems,dataSplit); bins = generateBinArray(binSize,maxPrice); [actualFinalPrice,actualFinalPriceBinned] = getFinalPrices(testItems,bins); ##Only have to make these onece orderedWordList = generateOrderedWordList(allItems,lengthcutoff=5,frequencycutoff=1); [testMatrix,testCategory] = generateMatrixData(orderedWordList,generateItemTitleList(testItems,orderedWordList)); predictedFinalPrices = [-1]*len(testItems); for priceCutOff in bins: print "Price cut off: ", priceCutOff [phi_k_unsold,phi_k_sold,p_y0,p_y1] = trainOnData(trainItems,orderedWordList,priceCutOff); [testingSetPredictions,prob_sell,prob_wontSell] = makePredictions(testMatrix,phi_k_sold,phi_k_unsold,p_y0,p_y1); #predicted category for testItems [0,1] predictedFinalPrices = updatePredictedFinalPrice(testItems,predictedFinalPrices,testingSetPredictions,priceCutOff,binSize,bins); for i in range(len(predictedFinalPrices)): if predictedFinalPrices[i] == -1: predictedFinalPrices[i] = bins[-1]; for i in range(len(predictedFinalPrices)): print i,testItems[i][0],testItems[i][START_PRICE],testItems[i][END_PRICE],getBinOf(bins,float(testItems[i][START_PRICE])), actualFinalPriceBinned[i],predictedFinalPrices[i],"\t\t",testItems[i][TITLE] print "Classification error on testing set is: ", classificationError(predictedFinalPrices,actualFinalPriceBinned);
def updatePredictedFinalPrice(testItems,predictedFinalPrice,testingSetPredictions,priceCutOff,binSize,bins): for i in range(len(testItems)): if testingSetPredictions[i]==0 and predictedFinalPrice[i]==-1: if priceCutOff-binSize>float(testItems[i][START_PRICE]): predictedFinalPrice[i] = priceCutOff-binSize; elif priceCutOff-binSize<=float(testItems[i][START_PRICE]): predictedFinalPrice[i] = getBinOf(bins,float(testItems[i][START_PRICE])); if predictedFinalPrice[i]<0: predictedFinalPrice[i]=0; return predictedFinalPrice