Beispiel #1
0
def fullConcWeight(weight, numSent):
    goodWords, badWords = utils.getUniqueGoodandBadWords();    
    
#    posReviews = utils.loadAllTextFiles('dataset/txt_sentoken/pos/')
#    negReviews = utils.loadAllTextFiles('dataset/txt_sentoken/neg/')
    
    posReviews = utils.loadAllTextFiles('dataset/ebert_reviews/4-0/')
    posReviews += utils.loadAllTextFiles('dataset/ebert_reviews/3-5/')
    negReviews = utils.loadAllTextFiles('dataset/ebert_reviews/0-0/')
    negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/0-5/')
    negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/1-0/')
    negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/1-5/')
    numberOfReviews = 950
    posReviews = posReviews[:numberOfReviews]
    negReviews = negReviews[:numberOfReviews]

    correct = 0
    
    for review in posReviews:
        if conclusionWeight(review, weight, goodWords, badWords, numSent):
            correct += 1
            
    for review in negReviews:
        if not conclusionWeight(review, weight, goodWords, badWords, numSent):
            correct += 1
            
    accuracy = correct / (len(posReviews) + len(negReviews))

    return accuracy
Beispiel #2
0
def fullPosNegTest():
    goodWords, badWords = utils.getUniqueGoodandBadWords();
    
#    posReviews = utils.loadAllTextFiles('dataset/txt_sentoken/pos/')
#    negReviews = utils.loadAllTextFiles('dataset/txt_sentoken/neg/')
    posReviews = utils.loadAllTextFiles('dataset/ebert_reviews/4-0/')
    posReviews += utils.loadAllTextFiles('dataset/ebert_reviews/3-5/')
    negReviews = utils.loadAllTextFiles('dataset/ebert_reviews/0-0/')
    negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/0-5/')
    negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/1-0/')
    negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/1-5/')
    
    numberOfReviews = 950
    posReviews = posReviews[:numberOfReviews]
    negReviews = negReviews[:numberOfReviews]

    correct = 0
    count = 0    
    s = ""
    for review in posReviews:
        count += 1
        if posminusneg(review, goodWords, badWords):
            correct += 1
            s = "correct!"
        else:
            s = "wrong :("
        #if count % 10 == 0:
        print(s + "  {:.2f}%  ".format(correct / count * 100) + str(count))
            
    print("halfway there!")
    for review in negReviews:
        count += 1
        if not posminusneg(review, goodWords, badWords):
            correct += 1
            s = "correct!"
        else:
            s = "wrong :("
        #if count % 10 == 0:
        print(s + "  {:.2f}%  ".format(correct / count * 100) + str(count))
            
    return correct / count
def getSuperGoodBadAvg(iterations, topNum):

    posList = ['JJ','NN','RB']    
    inclusion = True
    
#    posReviews = utils.loadAllTextFiles('dataset/txt_sentoken/pos/')
#    negReviews = utils.loadAllTextFiles('dataset/txt_sentoken/neg/')
#    posposList = utils.loadPosList('dataset/txt_sentoken/negposlist.txt', posList, inclusion)
#    negposList = utils.loadPosList('dataset/txt_sentoken/posposlist.txt', posList, inclusion)

    posReviews = utils.loadAllTextFiles('dataset/ebert_reviews/4-0/')
    posReviews += utils.loadAllTextFiles('dataset/ebert_reviews/3-5/')
    negReviews = utils.loadAllTextFiles('dataset/ebert_reviews/0-0/')
    negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/0-5/')
    negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/1-0/')
    negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/1-5/')

    posposList = utils.loadPosList('dataset/ebert_reviews/pos4-0.txt', posList, inclusion)
    posposList += utils.loadPosList('dataset/ebert_reviews/pos3-5.txt', posList, inclusion)
    negposList = utils.loadPosList('dataset/ebert_reviews/pos0-0.txt', posList, inclusion)
    negposList += utils.loadPosList('dataset/ebert_reviews/pos0-5.txt', posList, inclusion)
    negposList += utils.loadPosList('dataset/ebert_reviews/pos1-0.txt', posList, inclusion)
    negposList += utils.loadPosList('dataset/ebert_reviews/pos1-5.txt', posList, inclusion)
    
    numberOfReviews = 950
    posReviews = posReviews[:numberOfReviews]
    negReviews = negReviews[:numberOfReviews]
    posposList = posposList[:numberOfReviews]
    negposList = negposList[:numberOfReviews]
    
    posTuples = list(zip(posReviews, posposList))
    negTuples = list(zip(negReviews, negposList))    
    
    dataSetGoodWords, dataSetBadWords = utils.getUniqueGoodandBadWords()
    
    totalacc = 0.0
    for i in range(iterations):
        totalacc += getSuperGoodBad(topNum, posTuples, negTuples, dataSetGoodWords, dataSetBadWords)
        print("accuracy : " + "{:.4f}".format(totalacc / (i+1)))