예제 #1
0
    def runTest(self):
        # load lexicon
        L = sentlex.MobyLexicon()
        self.assertTrue(L.is_loaded, 'Test lexicon did not load correctly')

        # create a class that scores only adjectives
        ds = sentdoc.BasicDocSentiScore()
        ds.verbose = False
        ds.set_active_pos(True, True, False, False)
        ds.set_lexicon(L)

        # score untagged doc - this should cause an exception
        self.assertRaises(AssertionError,
                          ds.classify_document,
                          TESTDOC_UNTAGGED,
                          verbose=False)

        # this should work
        (dpos, dneg) = ds.classify_document(TESTDOC_UNTAGGED,
                                            verbose=False,
                                            tagged=False)
        self.assertTrue(dpos > 0, 'Did not score "good" in untagged doc')

        # score again, now changing all tags to false
        (dpos, dneg) = ds.classify_document(TESTDOC_UNTAGGED,
                                            verbose=False,
                                            tagged=False,
                                            a=False,
                                            v=False,
                                            n=False,
                                            r=False)
        self.assertTrue(dpos == 0 and dneg == 0,
                        'Scprng with no active tags should not happen')
예제 #2
0
    def runTest(self):
        # load lexicon
        L = sentlex.MobyLexicon()
        self.assertTrue(L.is_loaded, 'Test lexicon did not load correctly')

        ds = sentdoc.BasicDocSentiScore()
        ds.verbose = False
        ds.set_active_pos(True, True, False, False)
        ds.set_lexicon(L)
        ds.set_parameters(score_function='linear')
        (dpos, dneg) = ds.classify_document(TESTDOC_ADJ, verbose=False)
예제 #3
0
    def runTest(self):
        # empty list
        ds = sentdoc.BasicDocSentiScore()
        ds.verbose=True

        ds.set_neg_detection(True, 15)
        ds.set_active_pos(True, False, False, False)

        self.assertEqual((ds.a, ds.v, ds.n, ds.r), (True, False, False, False), 'Failed set POS parameters')
        self.assertEqual((ds.negation, ds.negation_window), (True, 15), 'Failed set negation')

        ds.set_parameters(score_mode=ds.SCOREONCE, score_freq=True, negation=False)
        print ds.score_mode, ds.score_freq, ds.negation
        self.assertEqual(ds.score_mode, ds.SCOREONCE, 'Unable to set parameters via kwards')
        self.assertEqual(ds.score_freq, True, 'Unable to set parameters via kwards')
        self.assertEqual(ds.negation, False, 'Unable to set parameters via kwards')
예제 #4
0
def test_pos_adjustment():
    # load lexicon
    L = sentlex.UICLexicon()
    ds = sentdoc.BasicDocSentiScore()
    ds.verbose = False
    ds.set_lexicon(L)
    ds.set_parameters(a_adjust=1.0, v_adjust=1.0)

    (dpos, dneg) = ds.classify_document(TESTDOC_ADJ)
    # no adjustment
    assert dpos == dneg

    ds.set_parameters(a_adjust=0.0, v_adjust=1.0)
    (dpos, dneg) = ds.classify_document(TESTDOC_ADJ, verbose=False)
    assert dpos < dneg

    ds.set_parameters(a_adjust=1.0, v_adjust=0.0)
    (dpos, dneg) = ds.classify_document(TESTDOC_ADJ, verbose=False)
    assert dpos > dneg
예제 #5
0
    def runTest(self):
        # load lexicon
        L = sentlex.MobyLexicon()
        self.assertTrue(L.is_loaded, 'Test lexicon did not load correctly')

        ds = sentdoc.BasicDocSentiScore()
        ds.verbose=True
        ds.set_active_pos(True, True, False, False)
        ds.set_lexicon(L)
        print '=== cosine ==='
        ds.set_parameters(score_function='cosine')
        (dpos, dneg) = ds.classify_document(TESTDOC_ADJ, verbose=True)
        print '=== linear ==='
        ds.set_parameters(score_function='linear')
        (dpos, dneg) = ds.classify_document(TESTDOC_ADJ, verbose=True)
        for i in range(1,11):
            print ds._score_cosine(1.0, i, 10),
        print '\nLinear'
        for i in range(1,11):
            print ds._score_linear(1.0, i, 10),
예제 #6
0
    def runTest(self):
        # load lexicon
        L = sentlex.UICLexicon()
        self.assertTrue(L.is_loaded, 'Test lexicon did not load correctly')

        ds = sentdoc.BasicDocSentiScore()
        ds.verbose=False
        ds.set_active_pos(True, True, False, False)
        ds.set_lexicon(L)
        ds.set_parameters(a_adjust=1.0, v_adjust=1.0)
        (dpos, dneg) = ds.classify_document(TESTDOC_ADJ, verbose=False)
        self.assertTrue(dpos == dneg, 'No POS adjustment should have occurred')

        ds.set_parameters(a_adjust=0.0, v_adjust=1.0)
        (dpos, dneg) = ds.classify_document(TESTDOC_ADJ, verbose=False)
        self.assertTrue(dpos < dneg, 'Neg score expected after adjustment')

        ds.set_parameters(a_adjust=1.0, v_adjust=0.0)
        (dpos, dneg) = ds.classify_document(TESTDOC_ADJ, verbose=False)
        self.assertTrue(dpos > dneg, 'Pos score expected after adjustment')
예제 #7
0
    def runTest(self):
        ds = sentdoc.BasicDocSentiScore()
        ds.verbose = False

        scores = [1.0, 0.5, 0.0, -1.0]
        ds.set_parameters(score_mode=ds.SCOREBACKOFF, backoff_alpha=0.0)
        self.assertEqual(ds.backoff_alpha, 0.0)

        for (i, s) in enumerate(scores):
            # test a harmless backoff
            self.assertTrue(
                s == ds._repeated_backoff(s, i + 1, ds.backoff_alpha),
                'This backoff should always return original score')

        ds.set_parameters(score_mode=ds.SCOREBACKOFF, backoff_alpha=1.0)
        test = [(0.5, 1.0, 0.5), (0.5, 2.0, 0.25), (0.5, 3.0, 0.125)]
        for (score, repeat, result) in test:
            self.assertTrue(result == ds._repeated_backoff(
                score, repeat, ds.backoff_alpha))

        # finally check for bad input
        self.assertTrue(ds._repeated_backoff(1.0, 0.0, 1.0) == 0.0)
예제 #8
0
    def runTest(self):
        # load lexicon
        L = sentlex.MobyLexicon()
        self.assertTrue(L.is_loaded, 'Test lexicon did not load correctly')

        # create a class that scores only adjectives
        ds = sentdoc.BasicDocSentiScore()
        ds.verbose = False
        ds.set_active_pos(True, False, False, False)
        ds.set_parameters(score_mode=ds.SCOREALL,
                          score_freq=False,
                          negation=False)
        ds.set_lexicon(L)

        # separator ok?
        self.assertEqual(ds._detect_tag(TESTDOC_ADJ), '/',
                         'Unable to detect correct separator')

        # now score!
        (dpos, dneg) = ds.classify_document(TESTDOC_ADJ)
        self.assertTrue(ds.resultdata and ds.resultdata.has_key('doc') and ds.resultdata.has_key('annotated_doc')\
            and ds.resultdata.has_key('resultpos') and ds.resultdata.has_key('resultneg'), 'Did not populate resultdata after scoring doc')

        self.assertTrue(dpos > dneg,
                        'Did not find positive words on positive doc')

        # again, for negative text
        (dpos, dneg) = ds.classify_document(TESTDOC_BADADJ)
        self.assertTrue(dneg > dpos,
                        'Did not find negative words on negative doc')

        # negated text
        ds.set_parameters(negation=True)
        ds.set_parameters(negation_window=15)
        (dpos, dneg) = ds.classify_document(TESTDOC_NEGATED)
        self.assertTrue(dpos > dneg,
                        'Did not find positive words on TESTDOC_NEGATED')
def ds():
    ds = sentdoc.BasicDocSentiScore()
    ds.verbose = False
    return ds
예제 #10
0
def ds_loaded(moby):
    ds = sentdoc.BasicDocSentiScore()
    ds.set_lexicon(moby)
    ds.verbose = False
    return ds