def test_main(self): lexicon_df = pd.DataFrame({'activation': {'a': 1.3846, 'abandon': 2.375, 'abandoned': 2.1, 'abandonment': 2.0, 'abated': 1.3333}, 'imagery': {'a': 1.0, 'abandon': 2.4, 'abandoned': 3.0, 'abandonment': 1.4, 'abated': 1.2}, 'pleasantness': {'a': 2.0, 'abandon': 1.0, 'abandoned': 1.1429, 'abandonment': 1.0, 'abated': 1.6667}}) with self.assertRaises(AssertionError): FeatsFromScoredLexicon(3) feats_from_scored_lexicon = FeatsFromScoredLexicon(lexicon_df) self.assertEqual(set(feats_from_scored_lexicon.get_top_model_term_lists().keys()), set(['activation', 'imagery', 'pleasantness'])) features = feats_from_scored_lexicon.get_doc_metadata(whitespace_nlp_with_sentences('I abandoned a wallet.')) np.testing.assert_almost_equal(features[['activation', 'imagery', 'pleasantness']], np.array([1.74230, 2.00000, 1.57145]))
def test_get_feats(self): doc = whitespace_nlp_with_sentences("A a bb cc.") term_freq = UseFullDocAsFeature().get_feats(doc) self.assertEqual(Counter({"A a bb cc.": 1}), term_freq)
data = [ {'text': "I don't think you'll want to.", 'category': 'a'}, {'text': "You'll have a didn't a-b #dfs .", 'category': 'a'}, {'text': "You'll shoudn't #have a, didn't a-b #dfs .", 'category': 'a'}, {'text': "Can't not get along to didn't.", 'category': 'b'}, {'text': "Can't try aba-ba alo33ng to didn't.", 'category': 'b'}, {'text': "Can't no't g'e't al33ong 3to5.", 'category': 'b'}, {'text': "You haven't changed a b'it.", 'category': 'c'}, {'text': "You haven't changed a b'it.", 'category': 'c'}, {'text': "You haven't ch5ng3d a bit.", 'category': 'c'} ] df = pd.DataFrame(data) df['parse'] = df.text.apply(lambda x: st.whitespace_nlp_with_sentences(x, tok_splitter_re=re.compile('( )'))) corpus = st.CorpusFromParsedDocuments(df, parsed_col='parse', category_col='category').build().get_unigram_corpus() semiotic_square = st.SemioticSquare( corpus, category_a='a', category_b='b', neutral_categories=['c'], scorer=st.RankDifference(), labels={'not_a_and_not_b': 'Plot Descriptions', 'a_and_b': 'Reviews', 'a_and_not_b': 'Positive', 'b_and_not_a': 'Negative', 'a':'', 'b':'', 'not_a':'',