def get_tweet_tags(tweet): """ Break up a tweet into individual word parts """ tknzr = TweetTokenizer() tokens = tknzr.tokenize(tweet) # replace handles with real names for n, tok in enumerate(tokens): if tok.startswith('@'): handle = tok.strip("@") if handle in user.students: # If we have a database entry for the mentioned user, we can # easily substitute a full name. usr = user.NPUser(handle) tokens[n] = usr.fullname else: # If there is no database entry, we use the user's alias. While # this is the full name in many cases, it is often not reliable usr = api.get_user(handle) tokens[n] = usr.name tagged = nltk.pos_tag(tokens) # In nltk, if a teacher's name is written with a period after an # abbreviated prefix, it is awkwardly broken up into 3 tags for n, tag in enumerate(tagged): # If there is the weird period after the prefix, if tag[1] == '.': # and it is in fact splitting up a person's name, if tagged[n - 1][1] == 'NNP' and tagged[n + 1][1] == 'NNP': if tagged[n - 1][0] in ['Mr', 'Ms', 'Mrs', 'Mx']: # combine it into the actual name, tagged[n - 1] = ('{}. {}'.format(tagged[n - 1][0], tagged[n + 1][0]), 'NNP') # and then remove the extra tags. del tagged[n + 1] del tagged[n] return tagged
def test_basics(self): luke = user.NPUser("1Defenestrator") self.assertIsInstance(luke.followers, list) self.assertIsInstance(luke.following, list) self.assertIn(luke.grade, ["Freshman", "Sophomore", "Junior", "Senior"]) self.assertIsInstance(luke.has_graduated, bool)
def test_has_graduated(self): """ Test has_graduated by manipulating values """ moshe = user.NPUser("G4_Y5_3X") moshe.user_info["grade"] = "3000" # In the past self.assertFalse(moshe.has_graduated) moshe.user_info["grade"] = "2000" # In the future self.assertTrue(moshe.has_graduated) moshe.user_info["grade"] = "2017" # Reset
def test_tweet_tagging(self): # Test that tagging works in its most basic form moshe = user.NPUser("G4_Y5_3X") tags = tweet.get_tweet_tags(moshe.tweepy.status.text) self.assertIsInstance(tags, list) # Test that names are replaced and recognized as proper nouns tags = tweet.get_tweet_tags(self.sentence) self.assertEqual(tags[0][0], "Luke Taylor") self.assertEqual(tags[0][1], "NNP") # Test that teacher names are appropriately tagged tags = tweet.get_tweet_tags(self.sentence3) self.assertEqual(tags[0][0], "Mr. Haas") self.assertNotEqual(tags[1][0], ".")
def test_misc(self): with self.assertRaises(ValueError): user.NPUser("this_is_too_long_to_be_a_twitter_handle")
def test_magicmethod(self): chris = user.NPUser("bravoc9") self.assertEqual(chris.fullname, "Chris Bravo") self.assertEqual(chris.sex, "M") with self.assertRaises(AttributeError): chris.this_is_not_a_valid_attribute