Ejemplo n.º 1
0
def main():
    q21 = q2_1()
    x = []
    y = []
    pos = ['N', 'VG', 'ADJ', 'ADV']
    print pos
    extractor = make_pos_extractor(pos)
    classifier = q21.evaluate_features(extractor, 10)
    x.append(1)
    acc = accuracy(q21.maintest, q21.testClassify)
    y.append(acc)
    
    pos = ['N', 'V', 'VG', 'VN', 'VN', 'ADJ', 'ADV']
    print pos
    extractor = make_pos_extractor(pos)
    classifier = q21.evaluate_features(extractor, 10)
    x.append(2)
    acc = accuracy(q21.maintest, q21.testClassify)
    y.append(acc)
    
    pos = ['V', 'ADJ', 'ADV']
    print pos
    extractor = make_pos_extractor(pos)
    classifier = q21.evaluate_features(extractor, 10)
    x.append(3)
    acc = accuracy(q21.maintest, q21.testClassify)
    y.append(acc)
    
    pos = ['ADJ', 'ADV']
    print pos
    extractor = make_pos_extractor(pos)
    classifier = q21.evaluate_features(extractor, 10)
    x.append(4)
    acc = accuracy(q21.maintest, q21.testClassify)
    y.append(acc)
    
    pos = ['N', 'ADJ', 'ADV']
    print pos
    extractor = make_pos_extractor(pos)
    classifier = q21.evaluate_features(extractor, 10)
    x.append(5)
    acc = accuracy(q21.maintest, q21.testClassify)
    y.append(acc)
    
    pylab.bar(x, y, width=0.02, facecolor='blue', align='center')
    pylab.xlabel('POS')
    pylab.ylabel("Accuracy")
    pylab.title("Accuracy for each pos set")
    pylab.grid(False)
    pylab.show()
    return
Ejemplo n.º 2
0
def main():
    q21 = q2_1()
    x = []
    y = []
    pos = ['N', 'VG', 'ADJ', 'ADV']
    print pos
    extractor = make_pos_extractor(pos)
    classifier = q21.evaluate_features(extractor, 10)
    x.append(1)
    acc = accuracy(q21.maintest, q21.testClassify)
    y.append(acc)

    pos = ['N', 'V', 'VG', 'VN', 'VN', 'ADJ', 'ADV']
    print pos
    extractor = make_pos_extractor(pos)
    classifier = q21.evaluate_features(extractor, 10)
    x.append(2)
    acc = accuracy(q21.maintest, q21.testClassify)
    y.append(acc)

    pos = ['V', 'ADJ', 'ADV']
    print pos
    extractor = make_pos_extractor(pos)
    classifier = q21.evaluate_features(extractor, 10)
    x.append(3)
    acc = accuracy(q21.maintest, q21.testClassify)
    y.append(acc)

    pos = ['ADJ', 'ADV']
    print pos
    extractor = make_pos_extractor(pos)
    classifier = q21.evaluate_features(extractor, 10)
    x.append(4)
    acc = accuracy(q21.maintest, q21.testClassify)
    y.append(acc)

    pos = ['N', 'ADJ', 'ADV']
    print pos
    extractor = make_pos_extractor(pos)
    classifier = q21.evaluate_features(extractor, 10)
    x.append(5)
    acc = accuracy(q21.maintest, q21.testClassify)
    y.append(acc)

    pylab.bar(x, y, width=0.02, facecolor='blue', align='center')
    pylab.xlabel('POS')
    pylab.ylabel("Accuracy")
    pylab.title("Accuracy for each pos set")
    pylab.grid(False)
    pylab.show()
    return
Ejemplo n.º 3
0
 def evaluate_features(self,feature_extractor, N):
     self.negative = movie_reviews.fileids('neg') #list of all names of the documents under neg folder
     self.positive = movie_reviews.fileids('pos') #list of all names of the documents under pos folder
     self.maintrain, self.maintest = self.stratifiedSplit(self.negative, self.positive, N)
     lst = []
     trainvocabulary = []
     for doc,lbl in self.maintrain:
         x = (feature_extractor(movie_reviews.words(fileids=[doc])),lbl)
         lst.append(x)
         trainvocabulary = trainvocabulary + x[0].keys()
     trainvocabulary = set(trainvocabulary)
     if q2_1.W == 0:
         q2_1.W = len(trainvocabulary)
     print "no. of features in train:", self.W
     nb = classifier.train(lst)
     self.testClassify = self.classifyTest(self.maintest, nb, feature_extractor)
     print "accuracy = ", accuracy(self.maintest, self.testClassify)
     print "Negative:"
     print "    precision = ", self.calcPrec('neg', self.maintest, self.testClassify)
     print "    recall = ", self.calcRecall('neg', self.maintest, self.testClassify)
     print "    f measure = ", self.calcFMeasur('neg', self.maintest, self.testClassify)
     print "Positive:"
     print "    precision = ", self.calcPrec('pos', self.maintest, self.testClassify)
     print "    recall = ", self.calcRecall('pos', self.maintest, self.testClassify)
     print "    f measure = ", self.calcFMeasur('pos', self.maintest, self.testClassify)
     nb.show_most_informative_features()
     return nb
Ejemplo n.º 4
0
def accuracy(rtetagger, gold):
    """
    Score the accuracy of the RTETagger against the Gold standard.

    @type rtetagger: ???
    @param tagger: The rtetagger being evaluated.
    @type gold: C{list} of L{RTEPair}
    @param gold: The list of tagged text-hypothesis pairs to score the tagger on.
    @rtype: C{float}
    """
    gold_values = [(rtepair.gid, rtepair.value) for rtepair in gold]
    predictions = []
    for rtepair in gold:
        predictions.append((rtepair.gid, rtetagger.tag(rtepair)))
    return evaluate.accuracy(gold_values, predictions)
Ejemplo n.º 5
0
def accuracy(rtetagger, gold):
    """
    Score the accuracy of the RTETagger against the Gold standard.

    @type rtetagger: ???
    @param tagger: The rtetagger being evaluated.
    @type gold: C{list} of L{RTEPair}
    @param gold: The list of tagged text-hypothesis pairs to score the tagger on.
    @rtype: C{float}
    """
    gold_values = [(rtepair.gid, rtepair.value) for rtepair in gold]
    predictions = []
    for rtepair in gold:
        predictions.append((rtepair.gid, rtetagger.tag(rtepair)))
    return evaluate.accuracy(gold_values, predictions)
Ejemplo n.º 6
0
def plotGraph(q21, K):
    x = []
    y = []
    for i in range(1, 6):
        newK = kVal(q21, i, K)
        extractor = make_topK_non_stopword_extractor(newK, stopset)
        print "top K without stops words, K = ", newK, ":"
        classifier = q21.evaluate_features(extractor, 10)
        x.append(float(newK) / float(q21.W))
        acc = accuracy(q21.maintest, q21.testClassify)
        y.append(acc)
    pylab.bar(x, y, width=0.02, facecolor='blue', align='center')
    pylab.xlabel('K/W')
    pylab.ylabel("Accuracy")
    pylab.title("Accuracy for each K/W value")
    pylab.grid(False)
    pylab.show()
    return
Ejemplo n.º 7
0
def plotGraph(q21, K):
    x = []
    y = []
    for i in range(1,6):
        newK = kVal(q21, i, K)
        extractor = make_topK_non_stopword_extractor(newK, stopset)
        print "top K without stops words, K = ", newK, ":"
        classifier = q21.evaluate_features(extractor, 10)
        x.append(float(newK)/float(q21.W))
        acc = accuracy(q21.maintest, q21.testClassify)
        y.append(acc)
    pylab.bar(x, y, width=0.02, facecolor='blue', align='center')
    pylab.xlabel('K/W')
    pylab.ylabel("Accuracy")
    pylab.title("Accuracy for each K/W value")
    pylab.grid(False)
    pylab.show()
    return
Ejemplo n.º 8
0
def accuracy(tagger, gold):
    """
    Score the accuracy of the tagger against the gold standard.
    Strip the tags from the gold standard text, retag it using
    the tagger, then compute the accuracy score.

    @type tagger: C{TaggerI}
    @param tagger: The tagger being evaluated.
    @type gold: C{list} of C{Token}
    @param gold: The list of tagged tokens to score the tagger on.
    @rtype: C{float}
    """

    gold_tokens = []
    test_tokens = []
    for sent in gold:
        sent = list(sent)
        gold_tokens += sent
        test_tokens += list(tagger.tag(untag(sent)))

    return evaluate.accuracy(gold_tokens, test_tokens)
Ejemplo n.º 9
0
def accuracy(chunker, gold):
    """
    Score the accuracy of the chunker against the gold standard.
    Strip the chunk information from the gold standard and rechunk it using
    the chunker, then compute the accuracy score.

    @type chunker: C{ChunkParserI}
    @param tagger: The chunker being evaluated.
    @type gold: C{tree}
    @param gold: The chunk structures to score the chunker on.
    @rtype: C{float}
    """

    gold_tags = []
    test_tags = []
    for gold_tree in gold:
        test_tree = chunker.parse(gold_tree.flatten())
        gold_tags += tree2conlltags(gold_tree)
        test_tags += tree2conlltags(test_tree)

#    print 'GOLD:', gold_tags[:50]
#    print 'TEST:', test_tags[:50]
    return evaluate.accuracy(gold_tags, test_tags)
Ejemplo n.º 10
0
def accuracy(chunker, gold):
    """
    Score the accuracy of the chunker against the gold standard.
    Strip the chunk information from the gold standard and rechunk it using
    the chunker, then compute the accuracy score.

    @type chunker: C{ChunkParserI}
    @param tagger: The chunker being evaluated.
    @type gold: C{tree}
    @param gold: The chunk structures to score the chunker on.
    @rtype: C{float}
    """

    gold_tags = []
    test_tags = []
    for gold_tree in gold:
        test_tree = chunker.parse(gold_tree.flatten())
        gold_tags += tree2conlltags(gold_tree)
        test_tags += tree2conlltags(test_tree)

#    print 'GOLD:', gold_tags[:50]
#    print 'TEST:', test_tags[:50]
    return evaluate.accuracy(gold_tags, test_tags)
Ejemplo n.º 11
0
 def evaluate_features(self, feature_extractor, N):
     self.negative = movie_reviews.fileids(
         'neg')  #list of all names of the documents under neg folder
     self.positive = movie_reviews.fileids(
         'pos')  #list of all names of the documents under pos folder
     self.maintrain, self.maintest = self.stratifiedSplit(
         self.negative, self.positive, N)
     lst = []
     trainvocabulary = []
     for doc, lbl in self.maintrain:
         x = (feature_extractor(movie_reviews.words(fileids=[doc])), lbl)
         lst.append(x)
         trainvocabulary = trainvocabulary + x[0].keys()
     trainvocabulary = set(trainvocabulary)
     if q2_1.W == 0:
         q2_1.W = len(trainvocabulary)
     print "no. of features in train:", self.W
     nb = classifier.train(lst)
     self.testClassify = self.classifyTest(self.maintest, nb,
                                           feature_extractor)
     print "accuracy = ", accuracy(self.maintest, self.testClassify)
     print "Negative:"
     print "    precision = ", self.calcPrec('neg', self.maintest,
                                             self.testClassify)
     print "    recall = ", self.calcRecall('neg', self.maintest,
                                            self.testClassify)
     print "    f measure = ", self.calcFMeasur('neg', self.maintest,
                                                self.testClassify)
     print "Positive:"
     print "    precision = ", self.calcPrec('pos', self.maintest,
                                             self.testClassify)
     print "    recall = ", self.calcRecall('pos', self.maintest,
                                            self.testClassify)
     print "    f measure = ", self.calcFMeasur('pos', self.maintest,
                                                self.testClassify)
     nb.show_most_informative_features()
     return nb