def test_main(self):
        lexicon_df = pd.DataFrame({'activation': {'a': 1.3846,
                                                  'abandon': 2.375,
                                                  'abandoned': 2.1,
                                                  'abandonment': 2.0,
                                                  'abated': 1.3333},
                                   'imagery': {'a': 1.0,
                                               'abandon': 2.4,
                                               'abandoned': 3.0,
                                               'abandonment': 1.4,
                                               'abated': 1.2},
                                   'pleasantness': {'a': 2.0,
                                                    'abandon': 1.0,
                                                    'abandoned': 1.1429,
                                                    'abandonment': 1.0,
                                                    'abated': 1.6667}})

        with self.assertRaises(AssertionError):
            FeatsFromScoredLexicon(3)
        feats_from_scored_lexicon = FeatsFromScoredLexicon(lexicon_df)
        self.assertEqual(set(feats_from_scored_lexicon.get_top_model_term_lists().keys()),
                         set(['activation', 'imagery', 'pleasantness']))
        features = feats_from_scored_lexicon.get_doc_metadata(whitespace_nlp_with_sentences('I abandoned a wallet.'))
        np.testing.assert_almost_equal(features[['activation', 'imagery', 'pleasantness']],
                                       np.array([1.74230, 2.00000, 1.57145]))
 def test_get_feats(self):
     doc = whitespace_nlp_with_sentences("A a bb cc.")
     term_freq = UseFullDocAsFeature().get_feats(doc)
     self.assertEqual(Counter({"A a bb cc.": 1}), term_freq)

data = [
	{'text': "I don't think you'll want to.", 'category': 'a'},
	{'text': "You'll have a didn't a-b #dfs .", 'category': 'a'},
	{'text': "You'll shoudn't #have a, didn't a-b #dfs .", 'category': 'a'},
	{'text': "Can't not get along to didn't.", 'category': 'b'},
	{'text': "Can't try aba-ba alo33ng to didn't.", 'category': 'b'},
	{'text': "Can't no't g'e't al33ong 3to5.", 'category': 'b'},
	{'text': "You haven't changed a b'it.", 'category': 'c'},
	{'text': "You haven't changed a b'it.", 'category': 'c'},
	{'text': "You haven't ch5ng3d a bit.", 'category': 'c'}
]

df = pd.DataFrame(data)
df['parse'] = df.text.apply(lambda x: st.whitespace_nlp_with_sentences(x, tok_splitter_re=re.compile('( )')))
corpus = st.CorpusFromParsedDocuments(df, parsed_col='parse', category_col='category').build().get_unigram_corpus()

semiotic_square = st.SemioticSquare(
	corpus,
	category_a='a',
	category_b='b',
	neutral_categories=['c'],
	scorer=st.RankDifference(),
	labels={'not_a_and_not_b': 'Plot Descriptions',
	        'a_and_b': 'Reviews',
	        'a_and_not_b': 'Positive',
	        'b_and_not_a': 'Negative',
	        'a':'',
	        'b':'',
	        'not_a':'',