Example #1
0
class TestClassifierSentenceMulti(unittest.TestCase):
    def setUp(self):
        self.classifier = Classifier(granularity='sentence', binary=False)

    def test_predict_for_groups(self):
        data = [
            ('Its', 'it', 'PRP$', 'B-NP'), ('short', 'short', 'JJ', 'I-NP'),
            ('life', 'life', 'NN', 'I-NP'), ('span', 'span', 'NN', 'I-NP'),
            ('(', '(', '(', 'O'), ('thirty-odd', 'thirty-odd', 'JJ', 'B-NP'),
            ('episodes', 'episod', 'NNS', 'I-NP'), (')', ')', ')', 'O'),
            ('was', 'wa', 'VBD', 'B-VP'), ('in', 'in', 'IN', 'B-PP'),
            ('part', 'part', 'NN', 'B-NP'),
            ('because', 'becaus', 'IN', 'B-PP'), ('it', 'it', 'PRP', 'B-NP'),
            ('was', 'wa', 'VBD', 'B-VP'),
            ('considered', 'consid', 'VBN', 'I-VP'), ('too', 'too', 'RB', 'O'),
            ('violent', 'violent', 'JJ', 'B-NP'), ('at', 'at', 'IN', 'B-PP'),
            ('the', 'the', 'DT', 'B-NP'), ('time', 'time', 'NN', 'I-NP'),
            (',', ',', ',', 'O'), ('although', 'although', 'IN', 'O'),
            ('the', 'the', 'DT', 'B-NP'), ('violence', 'violenc',
                                           'NN', 'I-NP'),
            ('it', 'it', 'PRP', 'B-NP'), ('depicted', 'depict', 'VBD', 'B-VP'),
            ('is', 'is', 'VBZ', 'I-VP'), ('actually', 'actual', 'RB', 'O'),
            ('mild', 'mild', 'VBN', 'O'), ('by', 'by', 'IN', 'O'),
            ('today', 'today', 'NN', 'B-NP'), ("'s", "'s", 'POS', 'B-NP'),
            ('television', 'televis', 'NN', 'I-NP'),
            ('standards', 'standard', 'NNS', 'I-NP'), ('.', '.', '.', 'O')
        ]

        expected = 'D'
        actual = self.classifier.predict(data)

        self.assertEqual(expected, actual)

    def test_predict_for_sentence(self):
        data = 'Its short life span (thirty-odd episodes) was in part ' \
               'because  it was considered too violent at the time, ' \
               'although the violence it depicted is actually mild by ' \
               'today\'s television standards.'

        expected = 'D'
        actual = self.classifier.predict(data)
        self.assertEqual(expected, actual)

        data = 'If I\'m human, then this test will pass without failure.'

        expected = 'N'
        actual = self.classifier.predict(data)
        self.assertEqual(expected, actual)

        data = 'I am certain that this sentence will be certain.'

        expected = 'C'
        actual = self.classifier.predict(data)
        self.assertEqual(expected, actual)
Example #2
0
def train(args):
    classifier = Classifier(binary=not args.multiclass)
    classifier.train(args.filepath)
Example #3
0
def predict(args):
    classifier = Classifier(granularity=args.granularity,
                            binary=not args.multiclass)
    print('{}: {}'.format(args.sentence, classifier.predict(args.sentence)))
Example #4
0
def uncertainty_features(text):
    cls = Classifier()
    results = cls.predict(text)
    unc_ratio = sum([1 for r in results if r == 'U']) / len(results)
    return unc_ratio
Example #5
0
 def setUp(self):
     self.classifier = Classifier(granularity='word', binary=True)
Example #6
0
class TestClassifierWordBinary(unittest.TestCase):
    def setUp(self):
        self.classifier = Classifier(granularity='word', binary=True)

    def test_predict_for_groups(self):
        data = [
            ('Its', 'it', 'PRP$', 'B-NP'), ('short', 'short', 'JJ', 'I-NP'),
            ('life', 'life', 'NN', 'I-NP'), ('span', 'span', 'NN', 'I-NP'),
            ('(', '(', '(', 'O'), ('thirty-odd', 'thirty-odd', 'JJ', 'B-NP'),
            ('episodes', 'episod', 'NNS', 'I-NP'), (')', ')', ')', 'O'),
            ('was', 'wa', 'VBD', 'B-VP'), ('in', 'in', 'IN', 'B-PP'),
            ('part', 'part', 'NN', 'B-NP'),
            ('because', 'becaus', 'IN', 'B-PP'), ('it', 'it', 'PRP', 'B-NP'),
            ('was', 'wa', 'VBD', 'B-VP'),
            ('considered', 'consid', 'VBN', 'I-VP'), ('too', 'too', 'RB', 'O'),
            ('violent', 'violent', 'JJ', 'B-NP'), ('at', 'at', 'IN', 'B-PP'),
            ('the', 'the', 'DT', 'B-NP'), ('time', 'time', 'NN', 'I-NP'),
            (',', ',', ',', 'O'), ('although', 'although', 'IN', 'O'),
            ('the', 'the', 'DT', 'B-NP'), ('violence', 'violenc',
                                           'NN', 'I-NP'),
            ('it', 'it', 'PRP', 'B-NP'), ('depicted', 'depict', 'VBD', 'B-VP'),
            ('is', 'is', 'VBZ', 'I-VP'), ('actually', 'actual', 'RB', 'O'),
            ('mild', 'mild', 'VBN', 'O'), ('by', 'by', 'IN', 'O'),
            ('today', 'today', 'NN', 'B-NP'), ("'s", "'s", 'POS', 'B-NP'),
            ('television', 'televis', 'NN', 'I-NP'),
            ('standards', 'standard', 'NNS', 'I-NP'), ('.', '.', '.', 'O')
        ]
        expected = [
            'C',  # Its
            'C',  # short
            'C',  # life
            'C',  # span
            'C',  # (
            'C',  # thirty-odd
            'C',  # episodes
            'C',  # )
            'C',  # was
            'C',  # in
            'C',  # part
            'C',  # because
            'C',  # it
            'C',  # was
            'U',  # considered
            'C',  # too
            'C',  # violent
            'C',  # at
            'C',  # the
            'C',  # time
            'C',  # ,
            'C',  # although
            'C',  # the
            'C',  # violence
            'C',  # it
            'C',  # depicted
            'C',  # is
            'C',  # actually
            'C',  # mild
            'C',  # by
            'C',  # today
            'C',  # 's
            'C',  # television
            'C',  # standards
            'C'  # .
        ]
        actual = self.classifier.predict(data)
        self.assertEqual(expected, actual)

    def test_predict_for_sentence(self):
        data = 'Its short life span (thirty-odd episodes) was in part ' \
               'because  it was considered too violent at the time, ' \
               'although the violence it depicted is actually mild by ' \
               'today\'s television standards.'
        expected = [
            'C',  # Its
            'C',  # short
            'C',  # life
            'C',  # span
            'C',  # (
            'C',  # thirty-odd
            'C',  # episodes
            'C',  # )
            'C',  # was
            'C',  # in
            'C',  # part
            'C',  # because
            'C',  # it
            'C',  # was
            'U',  # considered
            'C',  # too
            'C',  # violent
            'C',  # at
            'C',  # the
            'C',  # time
            'C',  # ,
            'C',  # although
            'C',  # the
            'C',  # violence
            'C',  # it
            'C',  # depicted
            'C',  # is
            'C',  # actually
            'C',  # mild
            'C',  # by
            'C',  # today
            'C',  # 's
            'C',  # television
            'C',  # standards
            'C'  # .
        ]
        actual = self.classifier.predict(data)
        self.assertEqual(expected, actual)
Example #7
0
 def setUp(self):
     self.classifier = Classifier(granularity='sentence', binary=False)
Example #8
0
import re
import sys
import subprocess
import traceback

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

from json import JSONDecodeError

from app.lib.helpers import JSON_NULL
from app.lib.nlp import analyzers

from uncertainty.classifier import Classifier

classifier = Classifier(granularity='word', binary=False)


class UncertaintyAnalyzer(analyzers.Analyzer):
    def __init__(self, tokens, root_type='stem'):
        super(UncertaintyAnalyzer, self).__init__("")
        self.tokens = tokens
        self.root = root_type

    def analyze(self):
        uncertainty = None

        try:
            tok_list = []
            if self.root == "stem":
                for tok in self.tokens: