def printFeaturesStats(tweets): arr_Handles = numpy.array([0] * len(tweets)) arr_Hashtags = numpy.array([0] * len(tweets)) arr_Urls = numpy.array([0] * len(tweets)) arr_Emoticons = numpy.array([0] * len(tweets)) arr_Words = numpy.array([0] * len(tweets)) arr_Chars = numpy.array([0] * len(tweets)) i = 0 for (text, sent, subj, quer) in tweets: arr_Handles[i] = preprocessing.countHandles(text) arr_Hashtags[i] = preprocessing.countHashtags(text) arr_Urls[i] = preprocessing.countUrls(text) arr_Emoticons[i] = preprocessing.countEmoticons(text) arr_Words[i] = len(text.split()) arr_Chars[i] = len(text) i += 1 print '%-10s %-010s %-4s ' % ('Features', 'Average', 'Maximum') print '%10s %10.6f %10d' % ('Handles', arr_Handles.mean(), arr_Handles.max()) print '%10s %10.6f %10d' % ('Hashtags', arr_Hashtags.mean(), arr_Hashtags.max()) print '%10s %10.6f %10d' % ('Urls', arr_Urls.mean(), arr_Urls.max()) print '%10s %10.6f %10d' % ('Emoticons', arr_Emoticons.mean(), arr_Emoticons.max()) print '%10s %10.6f %10d' % ('Words', arr_Words.mean(), arr_Words.max()) print '%10s %10.6f %10d' % ('Chars', arr_Chars.mean(), arr_Chars.max())
def printFeaturesStats( tweets ): arr_Handles = numpy.array( [0]*len(tweets) ) arr_Hashtags = numpy.array( [0]*len(tweets) ) arr_Urls = numpy.array( [0]*len(tweets) ) arr_Emoticons = numpy.array( [0]*len(tweets) ) arr_Words = numpy.array( [0]*len(tweets) ) arr_Chars = numpy.array( [0]*len(tweets) ) i=0 for (text, sent, subj, quer) in tweets: arr_Handles[i] = preprocessing.countHandles(text) arr_Hashtags[i] = preprocessing.countHashtags(text) arr_Urls[i] = preprocessing.countUrls(text) arr_Emoticons[i] = preprocessing.countEmoticons(text) arr_Words[i] = len(text.split()) arr_Chars[i] = len(text) i+=1 print '%-10s %-010s %-4s '%('Features', 'Average', 'Maximum') print '%10s %10.6f %10d'%('Handles', arr_Handles.mean(), arr_Handles.max() ) print '%10s %10.6f %10d'%('Hashtags', arr_Hashtags.mean(), arr_Hashtags.max() ) print '%10s %10.6f %10d'%('Urls', arr_Urls.mean(), arr_Urls.max() ) print '%10s %10.6f %10d'%('Emoticons', arr_Emoticons.mean(), arr_Emoticons.max() ) print '%10s %10.6f %10d'%('Words', arr_Words.mean(), arr_Words.max() ) print '%10s %10.6f %10d'%('Chars', arr_Chars.mean(), arr_Chars.max() )