def fullConcWeight(weight, numSent): goodWords, badWords = utils.getUniqueGoodandBadWords(); # posReviews = utils.loadAllTextFiles('dataset/txt_sentoken/pos/') # negReviews = utils.loadAllTextFiles('dataset/txt_sentoken/neg/') posReviews = utils.loadAllTextFiles('dataset/ebert_reviews/4-0/') posReviews += utils.loadAllTextFiles('dataset/ebert_reviews/3-5/') negReviews = utils.loadAllTextFiles('dataset/ebert_reviews/0-0/') negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/0-5/') negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/1-0/') negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/1-5/') numberOfReviews = 950 posReviews = posReviews[:numberOfReviews] negReviews = negReviews[:numberOfReviews] correct = 0 for review in posReviews: if conclusionWeight(review, weight, goodWords, badWords, numSent): correct += 1 for review in negReviews: if not conclusionWeight(review, weight, goodWords, badWords, numSent): correct += 1 accuracy = correct / (len(posReviews) + len(negReviews)) return accuracy
def fullPosNegTest(): goodWords, badWords = utils.getUniqueGoodandBadWords(); # posReviews = utils.loadAllTextFiles('dataset/txt_sentoken/pos/') # negReviews = utils.loadAllTextFiles('dataset/txt_sentoken/neg/') posReviews = utils.loadAllTextFiles('dataset/ebert_reviews/4-0/') posReviews += utils.loadAllTextFiles('dataset/ebert_reviews/3-5/') negReviews = utils.loadAllTextFiles('dataset/ebert_reviews/0-0/') negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/0-5/') negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/1-0/') negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/1-5/') numberOfReviews = 950 posReviews = posReviews[:numberOfReviews] negReviews = negReviews[:numberOfReviews] correct = 0 count = 0 s = "" for review in posReviews: count += 1 if posminusneg(review, goodWords, badWords): correct += 1 s = "correct!" else: s = "wrong :(" #if count % 10 == 0: print(s + " {:.2f}% ".format(correct / count * 100) + str(count)) print("halfway there!") for review in negReviews: count += 1 if not posminusneg(review, goodWords, badWords): correct += 1 s = "correct!" else: s = "wrong :(" #if count % 10 == 0: print(s + " {:.2f}% ".format(correct / count * 100) + str(count)) return correct / count
def getSuperGoodBadAvg(iterations, topNum): posList = ['JJ','NN','RB'] inclusion = True # posReviews = utils.loadAllTextFiles('dataset/txt_sentoken/pos/') # negReviews = utils.loadAllTextFiles('dataset/txt_sentoken/neg/') # posposList = utils.loadPosList('dataset/txt_sentoken/negposlist.txt', posList, inclusion) # negposList = utils.loadPosList('dataset/txt_sentoken/posposlist.txt', posList, inclusion) posReviews = utils.loadAllTextFiles('dataset/ebert_reviews/4-0/') posReviews += utils.loadAllTextFiles('dataset/ebert_reviews/3-5/') negReviews = utils.loadAllTextFiles('dataset/ebert_reviews/0-0/') negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/0-5/') negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/1-0/') negReviews += utils.loadAllTextFiles('dataset/ebert_reviews/1-5/') posposList = utils.loadPosList('dataset/ebert_reviews/pos4-0.txt', posList, inclusion) posposList += utils.loadPosList('dataset/ebert_reviews/pos3-5.txt', posList, inclusion) negposList = utils.loadPosList('dataset/ebert_reviews/pos0-0.txt', posList, inclusion) negposList += utils.loadPosList('dataset/ebert_reviews/pos0-5.txt', posList, inclusion) negposList += utils.loadPosList('dataset/ebert_reviews/pos1-0.txt', posList, inclusion) negposList += utils.loadPosList('dataset/ebert_reviews/pos1-5.txt', posList, inclusion) numberOfReviews = 950 posReviews = posReviews[:numberOfReviews] negReviews = negReviews[:numberOfReviews] posposList = posposList[:numberOfReviews] negposList = negposList[:numberOfReviews] posTuples = list(zip(posReviews, posposList)) negTuples = list(zip(negReviews, negposList)) dataSetGoodWords, dataSetBadWords = utils.getUniqueGoodandBadWords() totalacc = 0.0 for i in range(iterations): totalacc += getSuperGoodBad(topNum, posTuples, negTuples, dataSetGoodWords, dataSetBadWords) print("accuracy : " + "{:.4f}".format(totalacc / (i+1)))