Ejemplo n.º 1
0
def generateData2():
#   if sparse:
    #rep = Representor()

    all_corpus = buildAllCorpus(time_interval_length=14, debug=True)
    true_event_list, false_event_list = loadUnbalancedData()
    BaseFeatureProduction.GenerateArffFileHeader()

    for event in true_event_list + false_event_list:
        r = Region(event['region'])
        corpus = all_corpus[r.getKey()]
        BaseFeatureProduction(event, corpus, None).printFeatures()
Ejemplo n.º 2
0
def testWithTweet():
    cnt = 0
    corpus_all = buildAllCorpus(element_type='tweets', debug=False)
    ei = EventInterface()
    ei.setDB('citybeat_experiment')
    ei.setCollection('twitter_candidate_events')
    cur = ei.getAllDocuments()
    print TwitterFeature.GenerateArffFileHeader()
    for event in cur:
        region = Region(event['region'])
        event = TwitterFeature(event, corpus=corpus_all[region.getKey()])
        if event.getActualValue() < 8:
            print '< 8'
            continue
        cnt += 1
        print event.extractFeatures()
    print  cnt, cur.count()
Ejemplo n.º 3
0
 def _getEventWordCorpus(self, event):
     region = Region(Event(event).toDict()["region"])
     return self._corpus_dicts_word[region.getKey()]
Ejemplo n.º 4
0
 def _getEventCharCorpus(self, event):
     region = Region(Event(event).toDict()["region"])
     return self._corpus_dicts_char[region.getKey()]