def generateData2(): # if sparse: #rep = Representor() all_corpus = buildAllCorpus(time_interval_length=14, debug=True) true_event_list, false_event_list = loadUnbalancedData() BaseFeatureProduction.GenerateArffFileHeader() for event in true_event_list + false_event_list: r = Region(event['region']) corpus = all_corpus[r.getKey()] BaseFeatureProduction(event, corpus, None).printFeatures()
def testWithTweet(): cnt = 0 corpus_all = buildAllCorpus(element_type='tweets', debug=False) ei = EventInterface() ei.setDB('citybeat_experiment') ei.setCollection('twitter_candidate_events') cur = ei.getAllDocuments() print TwitterFeature.GenerateArffFileHeader() for event in cur: region = Region(event['region']) event = TwitterFeature(event, corpus=corpus_all[region.getKey()]) if event.getActualValue() < 8: print '< 8' continue cnt += 1 print event.extractFeatures() print cnt, cur.count()
def _getEventWordCorpus(self, event): region = Region(Event(event).toDict()["region"]) return self._corpus_dicts_word[region.getKey()]
def _getEventCharCorpus(self, event): region = Region(Event(event).toDict()["region"]) return self._corpus_dicts_char[region.getKey()]