コード例 #1
0
ファイル: main.py プロジェクト: kbuschme/irony-detection
def showFeatures(IDsFilename=REVIEW_IDS_FILENAME):
    corpus = Corpus(IDsFilename)
    features, featureVectors = extractFeatures(corpus.reviewIDs,
                                               corpus.reviews,
                                               features=None)

    showFeatureOccurrence(features, featureVectors)
コード例 #2
0
ファイル: rules.py プロジェクト: kbuschme/irony-detection
def applyRules(IDsFilename):
    """Uses rule based approach to classify the reviews from the given set."""
    print("Using the set at '{path}{file}'".format(path=CORPUS_PATH,
                                                   file=IDsFilename))

    print("Creating reviews...(this may take a while)")
    dataSet = Corpus(IDsFilename, corpusPath=CORPUS_PATH)

    # print("Loading reviews...")
    # dataSet = Corpus.loadCorpus(filename="training_set.pk")

    print("Extracting features...")
    features, featureVectors = extractFeatures(dataSet.reviewIDs,
                                               dataSet.reviews)

    gold = dataSet.goldStandard
    classification = classify(features, featureVectors)

    showFeatureOccurrence(features, featureVectors, gold, classification)

    targets = []
    cls = []

    for ID, g in gold.items():
        targets.append(g)
        cls.append(classification[ID])

    showPerformance(targets, cls)
コード例 #3
0
ファイル: rules.py プロジェクト: romanklinger/irony-detection
def applyRules(IDsFilename):
    """Uses rule based approach to classify the reviews from the given set."""
    print("Using the set at '{path}{file}'".format(path=CORPUS_PATH, 
                                                    file=IDsFilename))
    
    print("Creating reviews...(this may take a while)")
    dataSet = Corpus(IDsFilename, corpusPath=CORPUS_PATH)
    
    # print("Loading reviews...")
    # dataSet = Corpus.loadCorpus(filename="training_set.pk")

    print("Extracting features...")
    features, featureVectors = extractFeatures(dataSet.reviewIDs, 
                                                dataSet.reviews)

    gold = dataSet.goldStandard
    classification = classify(features, featureVectors)

    showFeatureOccurrence(features, featureVectors, gold, classification)
    
    targets = []
    cls = []

    for ID, g in gold.items():
        targets.append(g)
        cls.append(classification[ID])

    showPerformance(targets, cls)
コード例 #4
0
ファイル: main.py プロジェクト: romanklinger/irony-detection
def testRules():
    """Uses rule based approach to classify reviews."""
    ironicIDs, regularIDs, reviews = createTestReviews()
    features, featureVectors = extractFeatures(ironicIDs + regularIDs, reviews)

    gold = {ID: reviews[ID].ironic for ID in ironicIDs + regularIDs}
    classification = ruleClassify(features, featureVectors)

    showFeatureOccurrence(features, featureVectors, gold, classification)
    showPerformance(gold, classification)
コード例 #5
0
ファイル: main.py プロジェクト: kbuschme/irony-detection
def testRules():
    """Uses rule based approach to classify reviews."""
    ironicIDs, regularIDs, reviews = createTestReviews()
    features, featureVectors = extractFeatures(ironicIDs + regularIDs, reviews)

    gold = {ID: reviews[ID].ironic for ID in ironicIDs + regularIDs}
    classification = ruleClassify(features, featureVectors)

    showFeatureOccurrence(features, featureVectors, gold, classification)
    showPerformance(gold, classification)
コード例 #6
0
ファイル: rules.py プロジェクト: romanklinger/irony-detection
def applySingleRules(IDsFilename):
    """
    Should originally just apply one rule.
    Is now used to apply one feature to the given corpus.
    So it basically shows how often each feature occurs in ironic and regular 
    reviews.
    """
    print("Using the set at '{path}{file}'".format(path=CORPUS_PATH, 
                                                    file=IDsFilename))
    
    print("Creating reviews...(this may take a while)")
    dataSet = Corpus(IDsFilename, corpusPath=CORPUS_PATH)
    print("Loading reviews...")
#   dataSet = Corpus.loadCorpus(filename="training_set.pk")
    # dataSet = Corpus.loadCorpus(filename="training_and_validation_set.pk")


    print("Extracting features...")
    features, featureVectors = extractFeatures(dataSet.reviewIDs, 
                                                dataSet.reviews)

    showFeatureOccurrence(features, featureVectors)

    gold = dataSet.goldStandard
    
    # decisiveFeatureNames = ["Scare quotes", 
    #                         "Positive star polarity discrepancy",
    #                         "Negative star polarity discrepancy",
    #                         "Positive Ppunctuation",
    #                         "Negative Ppunctuation",
    #                         "Streak of Positive Words",
    #                         "Ellipsis and Punctuation",
    #                         "Emoticon Happy", "Emoticon Laughing", 
    #                         "Emoticon Winking", "Emotion Tongue", 
    #                         "LoLAcroym", "GrinAcronym", "Onomatopoeia",
    #                         "Interrobang"]

    decisiveFeatureNames = [f.name for f in features]

    for d in decisiveFeatureNames:
        classification = classify(features, featureVectors, [d])

        targets = []
        cls = []

        for ID, g in gold.items():
            targets.append(g)
            cls.append(classification[ID])

        print("\nClassifying by rule: ", d)

        showPerformance(targets, cls)
コード例 #7
0
ファイル: rules.py プロジェクト: kbuschme/irony-detection
def applySingleRules(IDsFilename):
    """
    Should originally just apply one rule.
    Is now used to apply one feature to the given corpus.
    So it basically shows how often each feature occurs in ironic and regular
    reviews.
    """
    print("Using the set at '{path}{file}'".format(path=CORPUS_PATH,
                                                   file=IDsFilename))

    print("Creating reviews...(this may take a while)")
    dataSet = Corpus(IDsFilename, corpusPath=CORPUS_PATH)
    print("Loading reviews...")
    #   dataSet = Corpus.loadCorpus(filename="training_set.pk")
    # dataSet = Corpus.loadCorpus(filename="training_and_validation_set.pk")

    print("Extracting features...")
    features, featureVectors = extractFeatures(dataSet.reviewIDs,
                                               dataSet.reviews)

    showFeatureOccurrence(features, featureVectors)

    gold = dataSet.goldStandard

    # decisiveFeatureNames = ["Scare quotes",
    #                         "Positive star polarity discrepancy",
    #                         "Negative star polarity discrepancy",
    #                         "Positive Ppunctuation",
    #                         "Negative Ppunctuation",
    #                         "Streak of Positive Words",
    #                         "Ellipsis and Punctuation",
    #                         "Emoticon Happy", "Emoticon Laughing",
    #                         "Emoticon Winking", "Emotion Tongue",
    #                         "LoLAcroym", "GrinAcronym", "Onomatopoeia",
    #                         "Interrobang"]

    decisiveFeatureNames = [f.name for f in features]

    for d in decisiveFeatureNames:
        classification = classify(features, featureVectors, [d])

        targets = []
        cls = []

        for ID, g in gold.items():
            targets.append(g)
            cls.append(classification[ID])

        print("\nClassifying by rule: ", d)

        showPerformance(targets, cls)
コード例 #8
0
ファイル: main.py プロジェクト: romanklinger/irony-detection
def showFeatures(IDsFilename=REVIEW_IDS_FILENAME):
    corpus = Corpus(IDsFilename)
    features, featureVectors = extractFeatures(corpus.reviewIDs, corpus.reviews, features=None)

    showFeatureOccurrence(features, featureVectors)
コード例 #9
0
ファイル: main.py プロジェクト: romanklinger/irony-detection
def testFeatures():
    """Tests if the features work on the corpus."""
    ironicIDs, regularIDs, reviews = createTestReviews()
    features, featureVectors = extractFeatures(ironicIDs + regularIDs, reviews)
    showFeatureOccurrence(features, featureVectors)
コード例 #10
0
ファイル: main.py プロジェクト: kbuschme/irony-detection
def testFeatures():
    """Tests if the features work on the corpus."""
    ironicIDs, regularIDs, reviews = createTestReviews()
    features, featureVectors = extractFeatures(ironicIDs + regularIDs, reviews)
    showFeatureOccurrence(features, featureVectors)