Exemple #1
0
def scan(geosearchclass, q):
    global keep_scanning
    search_results = geosearchclass.search()
    old_ids = [sr.id for sr in search_results]
    for s in search_results:
        q.put(s)
    while keep_scanning:
        for i in range(5):
            if keep_scanning:
                time.sleep(1)
            else:
                return
        geosearchclass.result_type = "recent"
        search_results = geosearchclass.search()
        new_search_results = new_tweets(search_results, old_ids)
        if new_search_results:
            for nsr in new_search_results:
                q.put(nsr)
    return
def scan(geosearchclass, q):
    global keep_scanning
    search_results = geosearchclass.search()
    old_ids = [sr.id for sr in search_results]
    for s in search_results:
        q.put(s)
    while keep_scanning:
        for i in range(5):
            if keep_scanning:
                time.sleep(1)
            else:
                return
        geosearchclass.result_type = "recent"
        search_results = geosearchclass.search()
        new_search_results = new_tweets(search_results, old_ids)
        if new_search_results:
            for nsr in new_search_results:
                q.put(nsr)
    return
Exemple #3
0
def updating_plot(geosearchclass, number_of_words, grow=True):
    search_results = geosearchclass.search()
    filtered_words = utils.tokenize_and_filter(search_results)
    fdist = utils.get_freq_dist(filtered_words)
    # set up plot
    samples = [item for item, _ in fdist.most_common(number_of_words)]
    freqs = [fdist[sample] for sample in samples]
    plt.grid(True, color="silver")
    plt.plot(freqs, range(len(freqs)))
    plt.yticks(range(len(samples)), [s for s in samples])
    plt.ylabel("Samples")
    plt.xlabel("Counts")
    plt.title("Top Words Frequency Distribution")
    plt.ion()
    plt.show()

    # set up loop
    old_ids = set([s.id for s in search_results])
    for i in xrange(100):
        plt.pause(5)
        # use mixed above, change to recent here
        geosearchclass.result_type = "recent"
        # perturbation study
        # if i%2:  # for testing purposes
        #     # #change location every odd time to nyc
        #     # geosearchclass.latitude =40.734073
        #     # geosearchclass.longitude =-73.990663
        #     # perturb latitude
        #     geosearchclass.latitude =geosearchclass.latitude + .001

        # else:
        #     #now back to sf
        #     # geosearchclass.latitude = 37.7821
        #     # geosearchclass.longitude =  -122.4093
        #     geosearchclass.longitude =geosearchclass.longitude + .001

        search_results = geosearchclass.search()
        new_search_results = utils.new_tweets(search_results, old_ids)
        if new_search_results:
            filtered_words = utils.tokenize_and_filter(new_search_results)
            fdist = update_fdist(fdist, filtered_words)
            if grow:
                newsamples = [
                    item for item, _ in fdist.most_common(number_of_words)
                ]
                s1 = set(newsamples)
                s2 = set(samples)
                s1.difference_update(s2)
                if s1:
                    print "New words: " + str(list(s1))
                    newsamples = list(s1)
                    samples.extend(newsamples)
                    plt.yticks(range(len(samples)), [s for s in samples])
            freqs = [fdist[sample] for sample in samples]
            plt.plot(freqs, range(len(freqs)))
            if grow:
                plt.draw()
            print '%d new tweet(s)' % len(new_search_results)
            old_ids.update(set([s.id for s in new_search_results]))
        else:
            print "no updates"
Exemple #4
0
def updating_plot(geosearchclass, number_of_words, grow=True):
    search_results = geosearchclass.search()
    filtered_words = utils.tokenize_and_filter(search_results)
    fdist = utils.get_freq_dist(filtered_words)
    # set up plot
    samples = [item for item, _ in fdist.most_common(number_of_words)]
    freqs = [fdist[sample] for sample in samples]
    plt.grid(True, color="silver")
    plt.plot(freqs, range(len(freqs)))
    plt.yticks(range(len(samples)), [s for s in samples])
    plt.ylabel("Samples")
    plt.xlabel("Counts")
    plt.title("Top Words Frequency Distribution")
    plt.ion()
    plt.show()

    # set up loop
    old_ids = set([s.id for s in search_results])
    for i in xrange(100):
        plt.pause(5)
        # use mixed above, change to recent here
        geosearchclass.result_type = "recent"
        # perturbation study
        # if i%2:  # for testing purposes
        #     # #change location every odd time to nyc
        #     # geosearchclass.latitude =40.734073
        #     # geosearchclass.longitude =-73.990663
        #     # perturb latitude
        #     geosearchclass.latitude =geosearchclass.latitude + .001

        # else:
        #     #now back to sf
        #     # geosearchclass.latitude = 37.7821
        #     # geosearchclass.longitude =  -122.4093
        #     geosearchclass.longitude =geosearchclass.longitude + .001

        search_results = geosearchclass.search()
        new_search_results = utils.new_tweets(search_results, old_ids)
        if new_search_results:
            filtered_words = utils.tokenize_and_filter(new_search_results)
            fdist = update_fdist(fdist, filtered_words)
            if grow:
                newsamples = [item
                              for item, _ in fdist.most_common(number_of_words)
                              ]
                s1 = set(newsamples)
                s2 = set(samples)
                s1.difference_update(s2)
                if s1:
                    print "New words: " + str(list(s1))
                    newsamples = list(s1)
                    samples.extend(newsamples)
                    plt.yticks(range(len(samples)), [s for s in samples])
            freqs = [fdist[sample] for sample in samples]
            plt.plot(freqs, range(len(freqs)))
            if grow:
                plt.draw()
            print '%d new tweet(s)' % len(new_search_results)
            old_ids.update(set([s.id for s in new_search_results]))
        else:
            print "no updates"