コード例 #1
0
    def goThroughCandidateDB(self):
        """Go through candidate event db and classify whatever is left"""
        ei = EventInterface(self.candidate_db, self.candidate_collection)
        ei_classified = EventInterface(self.classified_event_db, self.classified_event_collection)
        cnt = 0
        # consider past 2 hours for merge
        low_bound = str(int(getCurrentStampUTC()) - 60 * 60 * 2)
        condition = {'created_time':{ '$gte':  low_bound}}
        for e in ei.getAllDocuments(condition=condition):
            logging.warning("Classifying %d-th candidate event..." % cnt)
            e = Event(e)
            cnt += 1
            region = Region(e.getRegion())
            corpus = self.all_corpus[region.getKey()]
            ef = BaseFeatureProduction(e, corpus)
            prob = self.clf.classify(ef.extractFeatures())

            if ei_classified.getEventByID(e.getID()) is not None:
                if prob > 0.5:
                    print 'already in front end collection, merge it'
                    ei_classified.addEvent(e)
                else:
                    print 'after merge it becomes none event, delete it'
                    ei_classified.deleteEventByID(e.getID())
            else:
                if prob > 0.5:
                    print 'new events find in collection but not in front end , add it'
                    ei_classified.addEvent(e)