def create_CategoryAUC(categoryList):  
    # {'acq':['1', '2'], 'cad':['3', '4'] ...}
    categoryAssigFileTFIDF = {} 
   
    for cat in categoryList: 
        categoryAssigFileTFIDF[cat] = DT.getTopCategory(cat, len(fileTestAlphaNumericStrStemmedDict.keys()))
                
    return categoryAssigFileTFIDF      
Example #2
0
def create_CategoryAUC(categoryList):
    # {'acq':['1', '2'], 'cad':['3', '4'] ...}
    categoryAssigFileTFIDF = {}

    for cat in categoryList:
        categoryAssigFileTFIDF[cat] = DT.getTopCategory(
            cat, len(fileTestAlphaNumericStrStemmedDict.keys()))

    return categoryAssigFileTFIDF
Example #3
0
    np.set_printoptions(threshold='nan')

    print "\nNow Printing Confusion Matrix...\n"
    print repr(confusionMatrix)


# Execute ConfusionMatrix
# createConfusionMatrix()

# Execute NaiveBayes algorithm
# naiveBayes(termFrequencyPerCategoryList)

file_category_matrix = DT.get_file_category_matrix(
    clf, frequencyInFilePerCategoryInTestSetList)
top_k_file = DT.getTopCategory(file_category_matrix, 'alum', 3)

print str(top_k_file)

# AUC PR curve
print "Drawing PR curve"


# test phase for AUC of tfidf
def create_CategoryAUC(categoryList):
    # {'acq':['1', '2'], 'cad':['3', '4'] ...}
    categoryAssigFileTFIDF = {}

    for cat in categoryList:
        categoryAssigFileTFIDF[cat] = DT.getTopCategory(
            cat, len(fileTestAlphaNumericStrStemmedDict.keys()))
    print "Time for Testing:\t" + str(round(time.time()-startCMTime,3))

    np.set_printoptions(threshold='nan')

    print "\nNow Printing Confusion Matrix...\n"
    print repr(confusionMatrix)

# Execute ConfusionMatrix
# createConfusionMatrix()

# Execute NaiveBayes algorithm
# naiveBayes(termFrequencyPerCategoryList)

file_category_matrix = DT.get_file_category_matrix(clf, frequencyInFilePerCategoryInTestSetList)
top_k_file = DT.getTopCategory(file_category_matrix, 'alum', 3)

print str(top_k_file)


# AUC PR curve
print "Drawing PR curve" 
# test phase for AUC of tfidf
def create_CategoryAUC(categoryList):  
    # {'acq':['1', '2'], 'cad':['3', '4'] ...}
    categoryAssigFileTFIDF = {} 
   
    for cat in categoryList: 
        categoryAssigFileTFIDF[cat] = DT.getTopCategory(cat, len(fileTestAlphaNumericStrStemmedDict.keys()))
                
    return categoryAssigFileTFIDF