Ejemplo n.º 1
0
            classifier_file.close()

        # setup word stemmer
        self.stemmer = SnowballStemmer("english")

    def classify(self, tree):
        words = [self.stemmer.stem(word).lower() for word in tree.leaves()]
        categories = {}
        for category, classifier in self.classifiers.items():
            features = { feature: (feature in words) for feature in self.word_features[category] }
            prob = classifier.prob_classify(features).prob(category)
            categories[category] = prob
        return categories

classifier = Classifier()
predictor = SentimentPredictor()

ANALYZER_VERSION = 3

# iterate over confessions and predict categories and sentiment
confessions = db.parses.find({
    "analyzed": { "$ne": ANALYZER_VERSION }
}, limit=500)
threshold = 0.2
for confession in confessions:
    for tree_id, raw_tree in enumerate(confession["trees"]):
        if raw_tree == "None": continue

        # get sentence categories
        tree = Tree.fromstring(raw_tree)
        categories = [(category, prob) for (category, prob) in classifier.classify(tree).items() if prob > threshold]
Ejemplo n.º 2
0
def score_accuracy(data):
    accurate = 0
    inaccurate = 0
    for datum in data:
        if abs(datum["gold_sentiment"]) <= 0.5: continue
        s = datum["sentiment"]
        if datum["gold_sentiment"] < 0:
            if s < 0: accurate += 1
            else: inaccurate += 1
        else:
            if s > 0: accurate += 1
            else: inaccurate += 1
    return accurate*1.0/(accurate+inaccurate)

# predict sentence sentiments
predictor = SentimentPredictor()
for (parse_id, tree_id), sentiments in sentences.items():
    confession = db.parses.find_one({ "_id": ObjectId(parse_id) })
    tree = confession["trees"][tree_id]
    sentiment = sum(sentiments)*1.0/len(sentiments)
    predictor.add_tree({
        "raw_tree": tree,
        "gold_sentiment": sentiment
    })
predictor.run()
print score_accuracy(predictor.trees)

# predict confession sentiments
predictor = SentimentPredictor()
for parse_id, sentiments in confessions.items():
    confession = db.parses.find_one({ "_id": ObjectId(parse_id) })