예제 #1
0
def generateTrainCategory(items,priceCutOff):
    arr = []
    for row in items:
        if float(row[END_PRICE]) >= priceCutOff and didItemSell(row):  arr.append(1);
        else:                                                          arr.append(0);

    return arr
def getFinalPrices(items,bins,default=0):
    actualFinalPrice = [];
    for row in items:
        if didItemSell(row): actualFinalPrice.append(float(row[END_PRICE]));
        else:                actualFinalPrice.append(default);
    actualFinalPriceBinned = binnedFinalPrice(bins,actualFinalPrice);

    return actualFinalPrice,actualFinalPriceBinned
예제 #3
0
def generateItemTitleList(items,orderedWordList):
    item_title_words = [];
    for row in items:
        words = [didItemSell(row)];

        for word in row[TITLE].split(" "):
            if len(word)>2: 
                if orderedWordList.count(word.lower())!=0:
                    words.append(orderedWordList.index(word.lower()));
        
        item_title_words.append(words);

    return item_title_words;
예제 #4
0
def crossValidate(binSize, num):
    priceCutOff = 15;
    maxPrice = 100;
    dataSplit = 0.70

    #print "Loading items... "
    allItems                = getItems(complete=True,sold=True);   #remove genre parameter to search all genres
    allItems                = filterItems(allItems);
    #allItems                = selectNumItems(allItems, num);
    [trainItems,testItems]  = splitItemSet(allItems,dataSplit);
    trainItems   = selectNumItems(trainItems, num);
    actualFinalPrice = [];
    for row in testItems:
        if didItemSell(row): actualFinalPrice.append(float(row[END_PRICE]));
        else:                actualFinalPrice.append(0);
    
    bins = generateBinArray(binSize,maxPrice);
    actualFinalPriceBinned = binnedFinalPrice(bins,actualFinalPrice);

    ##Only have to make these once
    orderedWordList = generateOrderedWordList(allItems);
    [testMatrix,testCategory]   = generateMatrixData(orderedWordList,generateItemTitleList(testItems,orderedWordList));

    predictedFinalPrice = [-1]*len(testItems);
    for priceCutOff in bins:
        #print "Price cut off: ", priceCutOff
        #Have to calculate these at every priceCutOff increment
        [phi_k_unsold,phi_k_sold]   = trainOnData(trainItems,orderedWordList,priceCutOff);
        testCategory                = generateTrainCategory(testItems,priceCutOff)            #actual category for testItems
        testingSetPredictions       = makePredictions(testMatrix,phi_k_sold,phi_k_unsold);    #predicted category for testItems

        for i in range(len(testItems)):
            if testingSetPredictions[i]==0 and predictedFinalPrice[i]==-1:
                if priceCutOff-binSize>float(testItems[i][END_PRICE]):
                    predictedFinalPrice[i] = priceCutOff-binSize;
                elif priceCutOff-binSize<=float(testItems[i][END_PRICE]):
                    predictedFinalPrice[i] = getBinOf(bins,float(testItems[i][END_PRICE]));
                    if predictedFinalPrice[i]<0: predictedFinalPrice[i]=0;


                #if testItems[i][END_PRICE]>=priceCutOff: predictedFinalPrice[i] = priceCutOff;
                #else: predictedFinalPrice[i] = testItems[i][END_PRICE]; 
    file = open('predictedactualprices.csv', 'w');
    for i in range(len(predictedFinalPrice)):
        if predictedFinalPrice[i] == -1: predictedFinalPrice[i] = bins[-1];
    for item in range(len(predictedFinalPrice)):
        file.write(str(predictedFinalPrice[item])+ ", "+ str(actualFinalPrice[item])+ "\n");
    file.close();
    return classificationError(predictedFinalPrice, actualFinalPriceBinned);