コード例 #1
0
ファイル: twitter_chucnker.py プロジェクト: satadisha/Twevent
    def run(self):
        while True:
            try:
                tweet, index = self.queue.get()
                tweet_text = tweet['text']

                lock.acquire()
                ner_output, chunk_output, event_output = get_ner(
                    tweet_text, index)
                print 'The day', get_day(tweet['created_at'])
                lock.release()
                # print ner_output, chunk_output, event_output
                print bio_parse(ner_output)
                # get_chunks(chunk_output)
                # print bio_parse(event_output)

                # save the result to the database
                tweets.update({'_id': tweet['_id']}, {
                    '$set': {
                        'twitter_nlp': {
                            'ner': bio_parse(ner_output),
                            'chunks': get_chunks(chunk_output),
                            'events': bio_parse(event_output)
                        }
                    }
                })
                print 'tweets ', tweet['_id'], 'updated!!'
                # signal the queue this task is completed
                self.queue.task_done()

            except Exception as e:
                print "An error has occured:", e
                self.queue.task_done()
コード例 #2
0
    def run(self):
        while True:
            try:
                # get a tweet to be segged
                tweet_id, tweet, index = self.queue.get()

                # print self.getName()
                # new segmentation
                print tweet
                seg = TweetSeg(tweet)
                segments = seg.tweet_segmentation()
                print segments

                # update the database with the segmentation information
                print 'updating the tweets: ', tweet_id, 'NO %d' % index
                lock.acquire()
                tweets.update({'_id': tweet_id},
                              {'$set': {
                                  'segments': segments
                              }})
                lock.release()
                print 'update complete!'

                # signal the queue the task is done
                self.queue.task_done()
            except Exception as e:
                print e
                self.queue.task_done()
コード例 #3
0
ファイル: twitter_chucnker.py プロジェクト: KeithYue/Twevent
    def run(self):
        while True:
            try:
                tweet, index = self.queue.get()
                tweet_text = tweet['text']

                lock.acquire()
                ner_output, chunk_output, event_output = get_ner(tweet_text, index)
                print 'The day', get_day(tweet['created_at'])
                lock.release()
                # print ner_output, chunk_output, event_output
                print bio_parse(ner_output)
                # get_chunks(chunk_output)
                # print bio_parse(event_output)

                # save the result to the database
                tweets.update(
                        {
                            '_id': tweet['_id']
                            },
                        {
                            '$set': {
                                'twitter_nlp':{
                                    'ner':bio_parse(ner_output),
                                    'chunks':get_chunks(chunk_output),
                                    'events':bio_parse(event_output)
                                    }
                                }
                            }
                        )
                print 'tweets ', tweet['_id'], 'updated!!'
                # signal the queue this task is completed
                self.queue.task_done()

            except Exception as e:
                print "An error has occured:", e
                self.queue.task_done()