def getWordCloudData(self):
     tweets = TweetFinder(self.keyword,10000)
     tweetlist = tweets.getTweets()
     sanelist = sanitize.sanitizeList(tweetlist)
     wordfreqer = wordfreq()
     frequencies = wordfreqer.freq(sanelist)
     affect = Affect()
     emotional  = affect.addaffect(frequencies)
     return emotional
Example #2
0
    view = rc[:]
    view.apply_sync(os.chdir, os.getcwd())

    if not os.path.exists('davinci.txt'):
        # download from project gutenberg
        print("Downloading Da Vinci's notebooks from Project Gutenberg")
        r = requests.get(davinci_url)
        with io.open('davinci.txt', 'w', encoding='utf8') as f:
            f.write(r.text)

    # Run the serial version
    print("Serial word frequency count:")
    text = io.open('davinci.txt', encoding='latin1').read()
    tic = time.time()
    freqs = wordfreq(text)
    toc = time.time()
    print_wordfreq(freqs, 10)
    print("Took %.3f s to calculate" % (toc - tic))

    # The parallel version
    print("\nParallel word frequency count:")
    # split the davinci.txt into one file per engine:
    lines = text.splitlines()
    nlines = len(lines)
    n = len(rc)
    block = nlines // n
    for i in range(n):
        chunk = lines[i * block:i * (block + 1)]
        with io.open('davinci%i.txt' % i, 'w', encoding='utf8') as f:
            f.write('\n'.join(chunk))
Example #3
0
if __name__ == '__main__':
    # Create a Client and View
    rc = Client()
    
    view = rc[:]

    if not os.path.exists('davinci.txt'):
        # download from project gutenberg
        print("Downloading Da Vinci's notebooks from Project Gutenberg")
        urllib.request.urlretrieve(davinci_url, 'davinci.txt')
        
    # Run the serial version
    print("Serial word frequency count:")
    text = open('davinci.txt').read()
    freqs = wordfreq(text)
    print_wordfreq(freqs, 10)
    
    
    # The parallel version
    print("\nParallel word frequency count:")
    # split the davinci.txt into one file per engine:
    lines = text.splitlines()
    nlines = len(lines)
    n = len(rc)
    block = nlines/n
    for i in range(n):
        chunk = lines[i*block:i*(block+1)]
        with open('davinci%i.txt'%i, 'w') as f:
            f.write('\n'.join(chunk))