class TestClassifierSentenceMulti(unittest.TestCase): def setUp(self): self.classifier = Classifier(granularity='sentence', binary=False) def test_predict_for_groups(self): data = [ ('Its', 'it', 'PRP$', 'B-NP'), ('short', 'short', 'JJ', 'I-NP'), ('life', 'life', 'NN', 'I-NP'), ('span', 'span', 'NN', 'I-NP'), ('(', '(', '(', 'O'), ('thirty-odd', 'thirty-odd', 'JJ', 'B-NP'), ('episodes', 'episod', 'NNS', 'I-NP'), (')', ')', ')', 'O'), ('was', 'wa', 'VBD', 'B-VP'), ('in', 'in', 'IN', 'B-PP'), ('part', 'part', 'NN', 'B-NP'), ('because', 'becaus', 'IN', 'B-PP'), ('it', 'it', 'PRP', 'B-NP'), ('was', 'wa', 'VBD', 'B-VP'), ('considered', 'consid', 'VBN', 'I-VP'), ('too', 'too', 'RB', 'O'), ('violent', 'violent', 'JJ', 'B-NP'), ('at', 'at', 'IN', 'B-PP'), ('the', 'the', 'DT', 'B-NP'), ('time', 'time', 'NN', 'I-NP'), (',', ',', ',', 'O'), ('although', 'although', 'IN', 'O'), ('the', 'the', 'DT', 'B-NP'), ('violence', 'violenc', 'NN', 'I-NP'), ('it', 'it', 'PRP', 'B-NP'), ('depicted', 'depict', 'VBD', 'B-VP'), ('is', 'is', 'VBZ', 'I-VP'), ('actually', 'actual', 'RB', 'O'), ('mild', 'mild', 'VBN', 'O'), ('by', 'by', 'IN', 'O'), ('today', 'today', 'NN', 'B-NP'), ("'s", "'s", 'POS', 'B-NP'), ('television', 'televis', 'NN', 'I-NP'), ('standards', 'standard', 'NNS', 'I-NP'), ('.', '.', '.', 'O') ] expected = 'D' actual = self.classifier.predict(data) self.assertEqual(expected, actual) def test_predict_for_sentence(self): data = 'Its short life span (thirty-odd episodes) was in part ' \ 'because it was considered too violent at the time, ' \ 'although the violence it depicted is actually mild by ' \ 'today\'s television standards.' expected = 'D' actual = self.classifier.predict(data) self.assertEqual(expected, actual) data = 'If I\'m human, then this test will pass without failure.' expected = 'N' actual = self.classifier.predict(data) self.assertEqual(expected, actual) data = 'I am certain that this sentence will be certain.' expected = 'C' actual = self.classifier.predict(data) self.assertEqual(expected, actual)
def train(args): classifier = Classifier(binary=not args.multiclass) classifier.train(args.filepath)
def predict(args): classifier = Classifier(granularity=args.granularity, binary=not args.multiclass) print('{}: {}'.format(args.sentence, classifier.predict(args.sentence)))
def uncertainty_features(text): cls = Classifier() results = cls.predict(text) unc_ratio = sum([1 for r in results if r == 'U']) / len(results) return unc_ratio
def setUp(self): self.classifier = Classifier(granularity='word', binary=True)
class TestClassifierWordBinary(unittest.TestCase): def setUp(self): self.classifier = Classifier(granularity='word', binary=True) def test_predict_for_groups(self): data = [ ('Its', 'it', 'PRP$', 'B-NP'), ('short', 'short', 'JJ', 'I-NP'), ('life', 'life', 'NN', 'I-NP'), ('span', 'span', 'NN', 'I-NP'), ('(', '(', '(', 'O'), ('thirty-odd', 'thirty-odd', 'JJ', 'B-NP'), ('episodes', 'episod', 'NNS', 'I-NP'), (')', ')', ')', 'O'), ('was', 'wa', 'VBD', 'B-VP'), ('in', 'in', 'IN', 'B-PP'), ('part', 'part', 'NN', 'B-NP'), ('because', 'becaus', 'IN', 'B-PP'), ('it', 'it', 'PRP', 'B-NP'), ('was', 'wa', 'VBD', 'B-VP'), ('considered', 'consid', 'VBN', 'I-VP'), ('too', 'too', 'RB', 'O'), ('violent', 'violent', 'JJ', 'B-NP'), ('at', 'at', 'IN', 'B-PP'), ('the', 'the', 'DT', 'B-NP'), ('time', 'time', 'NN', 'I-NP'), (',', ',', ',', 'O'), ('although', 'although', 'IN', 'O'), ('the', 'the', 'DT', 'B-NP'), ('violence', 'violenc', 'NN', 'I-NP'), ('it', 'it', 'PRP', 'B-NP'), ('depicted', 'depict', 'VBD', 'B-VP'), ('is', 'is', 'VBZ', 'I-VP'), ('actually', 'actual', 'RB', 'O'), ('mild', 'mild', 'VBN', 'O'), ('by', 'by', 'IN', 'O'), ('today', 'today', 'NN', 'B-NP'), ("'s", "'s", 'POS', 'B-NP'), ('television', 'televis', 'NN', 'I-NP'), ('standards', 'standard', 'NNS', 'I-NP'), ('.', '.', '.', 'O') ] expected = [ 'C', # Its 'C', # short 'C', # life 'C', # span 'C', # ( 'C', # thirty-odd 'C', # episodes 'C', # ) 'C', # was 'C', # in 'C', # part 'C', # because 'C', # it 'C', # was 'U', # considered 'C', # too 'C', # violent 'C', # at 'C', # the 'C', # time 'C', # , 'C', # although 'C', # the 'C', # violence 'C', # it 'C', # depicted 'C', # is 'C', # actually 'C', # mild 'C', # by 'C', # today 'C', # 's 'C', # television 'C', # standards 'C' # . ] actual = self.classifier.predict(data) self.assertEqual(expected, actual) def test_predict_for_sentence(self): data = 'Its short life span (thirty-odd episodes) was in part ' \ 'because it was considered too violent at the time, ' \ 'although the violence it depicted is actually mild by ' \ 'today\'s television standards.' expected = [ 'C', # Its 'C', # short 'C', # life 'C', # span 'C', # ( 'C', # thirty-odd 'C', # episodes 'C', # ) 'C', # was 'C', # in 'C', # part 'C', # because 'C', # it 'C', # was 'U', # considered 'C', # too 'C', # violent 'C', # at 'C', # the 'C', # time 'C', # , 'C', # although 'C', # the 'C', # violence 'C', # it 'C', # depicted 'C', # is 'C', # actually 'C', # mild 'C', # by 'C', # today 'C', # 's 'C', # television 'C', # standards 'C' # . ] actual = self.classifier.predict(data) self.assertEqual(expected, actual)
def setUp(self): self.classifier = Classifier(granularity='sentence', binary=False)
import re import sys import subprocess import traceback import warnings warnings.filterwarnings("ignore", category=FutureWarning) from json import JSONDecodeError from app.lib.helpers import JSON_NULL from app.lib.nlp import analyzers from uncertainty.classifier import Classifier classifier = Classifier(granularity='word', binary=False) class UncertaintyAnalyzer(analyzers.Analyzer): def __init__(self, tokens, root_type='stem'): super(UncertaintyAnalyzer, self).__init__("") self.tokens = tokens self.root = root_type def analyze(self): uncertainty = None try: tok_list = [] if self.root == "stem": for tok in self.tokens: