Beispiel #1
0
def printFeaturesStats(tweets):
    arr_Handles = numpy.array([0] * len(tweets))
    arr_Hashtags = numpy.array([0] * len(tweets))
    arr_Urls = numpy.array([0] * len(tweets))
    arr_Emoticons = numpy.array([0] * len(tweets))
    arr_Words = numpy.array([0] * len(tweets))
    arr_Chars = numpy.array([0] * len(tweets))

    i = 0
    for (text, sent, subj, quer) in tweets:
        arr_Handles[i] = preprocessing.countHandles(text)
        arr_Hashtags[i] = preprocessing.countHashtags(text)
        arr_Urls[i] = preprocessing.countUrls(text)
        arr_Emoticons[i] = preprocessing.countEmoticons(text)
        arr_Words[i] = len(text.split())
        arr_Chars[i] = len(text)
        i += 1

    print '%-10s %-010s %-4s ' % ('Features', 'Average', 'Maximum')
    print '%10s %10.6f %10d' % ('Handles', arr_Handles.mean(),
                                arr_Handles.max())
    print '%10s %10.6f %10d' % ('Hashtags', arr_Hashtags.mean(),
                                arr_Hashtags.max())
    print '%10s %10.6f %10d' % ('Urls', arr_Urls.mean(), arr_Urls.max())
    print '%10s %10.6f %10d' % ('Emoticons', arr_Emoticons.mean(),
                                arr_Emoticons.max())
    print '%10s %10.6f %10d' % ('Words', arr_Words.mean(), arr_Words.max())
    print '%10s %10.6f %10d' % ('Chars', arr_Chars.mean(), arr_Chars.max())
Beispiel #2
0
def printFeaturesStats( tweets ):
    arr_Handles   = numpy.array( [0]*len(tweets) )
    arr_Hashtags  = numpy.array( [0]*len(tweets) )
    arr_Urls      = numpy.array( [0]*len(tweets) )
    arr_Emoticons = numpy.array( [0]*len(tweets) )
    arr_Words     = numpy.array( [0]*len(tweets) )
    arr_Chars     = numpy.array( [0]*len(tweets) )
    

    i=0
    for (text, sent, subj, quer) in tweets:
        arr_Handles[i]   = preprocessing.countHandles(text)
        arr_Hashtags[i]  = preprocessing.countHashtags(text)
        arr_Urls[i]      = preprocessing.countUrls(text)
        arr_Emoticons[i] = preprocessing.countEmoticons(text)
        arr_Words[i]     = len(text.split())
        arr_Chars[i]     = len(text)
        i+=1

    print '%-10s %-010s %-4s '%('Features',  'Average',            'Maximum')
    print '%10s %10.6f %10d'%('Handles',   arr_Handles.mean(),   arr_Handles.max()   )
    print '%10s %10.6f %10d'%('Hashtags',  arr_Hashtags.mean(),  arr_Hashtags.max()  )
    print '%10s %10.6f %10d'%('Urls',      arr_Urls.mean(),      arr_Urls.max()      )
    print '%10s %10.6f %10d'%('Emoticons', arr_Emoticons.mean(), arr_Emoticons.max() )
    print '%10s %10.6f %10d'%('Words',     arr_Words.mean(),     arr_Words.max()     )
    print '%10s %10.6f %10d'%('Chars',     arr_Chars.mean(),     arr_Chars.max()     )