def showFeatures(IDsFilename=REVIEW_IDS_FILENAME): corpus = Corpus(IDsFilename) features, featureVectors = extractFeatures(corpus.reviewIDs, corpus.reviews, features=None) showFeatureOccurrence(features, featureVectors)
def applyRules(IDsFilename): """Uses rule based approach to classify the reviews from the given set.""" print("Using the set at '{path}{file}'".format(path=CORPUS_PATH, file=IDsFilename)) print("Creating reviews...(this may take a while)") dataSet = Corpus(IDsFilename, corpusPath=CORPUS_PATH) # print("Loading reviews...") # dataSet = Corpus.loadCorpus(filename="training_set.pk") print("Extracting features...") features, featureVectors = extractFeatures(dataSet.reviewIDs, dataSet.reviews) gold = dataSet.goldStandard classification = classify(features, featureVectors) showFeatureOccurrence(features, featureVectors, gold, classification) targets = [] cls = [] for ID, g in gold.items(): targets.append(g) cls.append(classification[ID]) showPerformance(targets, cls)
def testRules(): """Uses rule based approach to classify reviews.""" ironicIDs, regularIDs, reviews = createTestReviews() features, featureVectors = extractFeatures(ironicIDs + regularIDs, reviews) gold = {ID: reviews[ID].ironic for ID in ironicIDs + regularIDs} classification = ruleClassify(features, featureVectors) showFeatureOccurrence(features, featureVectors, gold, classification) showPerformance(gold, classification)
def applySingleRules(IDsFilename): """ Should originally just apply one rule. Is now used to apply one feature to the given corpus. So it basically shows how often each feature occurs in ironic and regular reviews. """ print("Using the set at '{path}{file}'".format(path=CORPUS_PATH, file=IDsFilename)) print("Creating reviews...(this may take a while)") dataSet = Corpus(IDsFilename, corpusPath=CORPUS_PATH) print("Loading reviews...") # dataSet = Corpus.loadCorpus(filename="training_set.pk") # dataSet = Corpus.loadCorpus(filename="training_and_validation_set.pk") print("Extracting features...") features, featureVectors = extractFeatures(dataSet.reviewIDs, dataSet.reviews) showFeatureOccurrence(features, featureVectors) gold = dataSet.goldStandard # decisiveFeatureNames = ["Scare quotes", # "Positive star polarity discrepancy", # "Negative star polarity discrepancy", # "Positive Ppunctuation", # "Negative Ppunctuation", # "Streak of Positive Words", # "Ellipsis and Punctuation", # "Emoticon Happy", "Emoticon Laughing", # "Emoticon Winking", "Emotion Tongue", # "LoLAcroym", "GrinAcronym", "Onomatopoeia", # "Interrobang"] decisiveFeatureNames = [f.name for f in features] for d in decisiveFeatureNames: classification = classify(features, featureVectors, [d]) targets = [] cls = [] for ID, g in gold.items(): targets.append(g) cls.append(classification[ID]) print("\nClassifying by rule: ", d) showPerformance(targets, cls)
def testFeatures(): """Tests if the features work on the corpus.""" ironicIDs, regularIDs, reviews = createTestReviews() features, featureVectors = extractFeatures(ironicIDs + regularIDs, reviews) showFeatureOccurrence(features, featureVectors)