コード例 #1
0
def main():
    for t in tweets.find()[0:10000]:
        if get_day(t['created_at']) == 1:
            if t['twitter_nlp']['chunks'] and t['twitter_nlp']['ner']:
                try:
                    print t['text'].encode('utf-8')
                    print t['twitter_nlp']['chunks']
                    print t['twitter_nlp']['ner']
                except:
                    continue
コード例 #2
0
ファイル: for_demo.py プロジェクト: KeithYue/Twevent
def main():
    for t in tweets.find()[0:10000]:
        if get_day(t["created_at"]) == 1:
            if t["twitter_nlp"]["chunks"] and t["twitter_nlp"]["ner"]:
                try:
                    print t["text"].encode("utf-8")
                    print t["twitter_nlp"]["chunks"]
                    print t["twitter_nlp"]["ner"]
                except:
                    continue
コード例 #3
0
def main():
    #spawn a pool of threads, and pass them queue instance
    for i in range(1):
        t = TweetsSegThread(queue)
        t.setDaemon(True)
        t.start()

    #populate queue with data
    for index, tweet in enumerate(tweets.find()):
        # filter out those who has segments
        if not tweet.has_key('segments'):
            # print 'putting number %d into queue.' % index
            queue.put((tweet['_id'], tweet['text'], index))

    queue.join()
コード例 #4
0
ファイル: twitter_chucnker.py プロジェクト: KeithYue/Twevent
def main():
    # spawn a pool of threads, and pass them queue instance
    for i in range(100):
        t = TweetsChunkerThread(queue)
        t.setDaemon(True)
        t.start()

    # populate the data into the queue
    # get all the tweets in Nov.1
    for index, tweet in enumerate(tweets.find()):
        # filter out the tweets that not in the same day
        if get_day(tweet['created_at']) == day:
            queue.put((tweet, index))

    # block current thread until there is no subthread
    queue.join()
コード例 #5
0
ファイル: twitter_chucnker.py プロジェクト: satadisha/Twevent
def main():
    # spawn a pool of threads, and pass them queue instance
    for i in range(100):
        t = TweetsChunkerThread(queue)
        t.setDaemon(True)
        t.start()

    # populate the data into the queue
    # get all the tweets in Nov.1
    for index, tweet in enumerate(tweets.find()):
        # filter out the tweets that not in the same day
        if get_day(tweet['created_at']) == day:
            queue.put((tweet, index))

    # block current thread until there is no subthread
    queue.join()