Beispiel #1
0
def loadNextWeekData():

    # load modified

    ei = EventInterface()
    ei.setDB('citybeat')
    ei.setCollection('next_week_candidate_event_25by25_merged')

    true_events = []
    false_events = []

    fid2 = open('labeled_data_cf/label_next_week.txt', 'r')

    for line in fid2:
        t = line.split(',')
        id = str(t[0])
        label = int(t[1])

        event = ei.getDocument({'_id': ObjectId(id)})
        event['label'] = label
        e = Event(event)
        if e.getActualValue() < 8 or event['label'] == 0:
            #			print 'bad event ' + id
            continue
        if event['label'] == 1:
            true_events.append(event)
        else:
            false_events.append(event)

    fid2.close()
    return true_events, false_events
def loadNextWeekData():
	
	# load modified 
	
	ei = EventInterface()
	ei.setDB('citybeat')
	ei.setCollection('next_week_candidate_event_25by25_merged')
	
	true_events = []
	false_events = []
	
	fid2 = open('labeled_data_cf/label_next_week.txt', 'r')
	
	for line in fid2:
		t = line.split(',')
		id = str(t[0])
		label = int(t[1])
		
		event = ei.getDocument({'_id':ObjectId(id)})
		event['label'] = label
		e = Event(event)
		if e.getActualValue() < 8 or event['label'] == 0:
#			print 'bad event ' + id
			continue
		if event['label'] == 1:
			true_events.append(event)
		else:
			false_events.append(event)
			
	fid2.close()
	return true_events, false_events
Beispiel #3
0
def loadUnbalancedData(_182):
	
	# load modified 
	
	ei = EventInterface()
	ei.setDB('citybeat')
	ei.setCollection('candidate_event_25by25_merged')
	
	true_events = []
	false_events = []
	if _182:
		fid2 = open('labeled_data_cf/182_positive.txt', 'r')
	else:
		fid2 = open('labeled_data_cf/181_positive.txt', 'r')
		
	modified_events = {}
	
	for line in fid2:
		t = line.split(',')
		modified_events[str(t[0])] = int(t[1])
	fid2.close()
		
	# put the data into a text file first
	fid = open('labeled_data_cf/data2.txt','r')
	for line in fid:
		if len(line.strip()) == 0:
			continue
		t = line.strip().split()
		if not len(t) == 3:
			continue
		label = t[0].lower()
		confidence = float(t[1])
		event_id = str(t[2].split('/')[-1])
		if label == 'not_sure':
			continue
		if label == 'yes':
			label = 1
		else:
			label = -1
		event = ei.getDocument({'_id':ObjectId(event_id)})
		event['label'] = label
		if modified_events.has_key(event_id):
			event['label'] = modified_events[event_id]
		
		e = Event(event)
		if e.getActualValue() < 8 or event['label'] == 0:
#			print 'bad event ' + id
			continue
		if event['label'] == 1:
			true_events.append(event)
		else:
			if event['label'] == -1 and confidence == 1:
				false_events.append(event)
			
	fid.close()
	return true_events, false_events
Beispiel #4
0
def loadUnbalancedData(_182):

    # load modified

    ei = EventInterface()
    ei.setDB('citybeat')
    ei.setCollection('candidate_event_25by25_merged')

    true_events = []
    false_events = []
    if _182:
        fid2 = open('labeled_data_cf/182_positive.txt', 'r')
    else:
        fid2 = open('labeled_data_cf/181_positive.txt', 'r')

    modified_events = {}

    for line in fid2:
        t = line.split(',')
        modified_events[str(t[0])] = int(t[1])
    fid2.close()

    # put the data into a text file first
    fid = open('labeled_data_cf/data2.txt', 'r')
    for line in fid:
        if len(line.strip()) == 0:
            continue
        t = line.strip().split()
        if not len(t) == 3:
            continue
        label = t[0].lower()
        confidence = float(t[1])
        event_id = str(t[2].split('/')[-1])
        if label == 'not_sure':
            continue
        if label == 'yes':
            label = 1
        else:
            label = -1
        event = ei.getDocument({'_id': ObjectId(event_id)})
        event['label'] = label
        if modified_events.has_key(event_id):
            event['label'] = modified_events[event_id]

        e = Event(event)
        if e.getActualValue() < 8 or event['label'] == 0:
            #			print 'bad event ' + id
            continue
        if event['label'] == 1:
            true_events.append(event)
        else:
            if event['label'] == -1 and confidence == 1:
                false_events.append(event)

    fid.close()
    return true_events, false_events
def testWithPhoto():
    corpus_all = buildAllCorpus(element_type='photos', debug=True)
    for key, corpus in corpus_all.items():
        break

    ei = EventInterface()
    ei.setDB('citybeat')
    ei.setCollection('candidate_event_25by25_merged')
    event = ei.getDocument()
    event = BaseFeatureProduction(event, corpus=corpus)
    print event.extractFeatures()
Beispiel #6
0
def insertEvents():
	ei = EventInterface()
	ei.setDB('citybeat')
	ei.setCollection('candidate_event_25by25_merged')
	
	ei2 = EventInterface()
	ei2.setDB('citybeat')
	ei2.setCollection('online_candidate')
	
	ids = ['51148288c2a3754cfe668edd', '51147952c2a3754cfe6684ee',
	       '51148a7ec2a3754cfe669977', '51147967c2a3754cfe668503']
	
	for id in ids:
		event = ei.getDocument({'_id':ObjectId(id)})
		ei2.addEvent(event)	
Beispiel #7
0
def getAllActualEvents():
    ei = EventInterface()
    ei.setDB("citybeat")
    ei.setCollection("candidate_event_25by25_merged")

    true_events = []
    false_events = []
    fid2 = open("labeled_data_cf/181_positive.txt", "r")

    modified_events = {}

    for line in fid2:
        t = line.split(",")
        modified_events[str(t[0])] = int(t[1])
    fid2.close()

    # put the data into a text file first
    fid = open("labeled_data_cf/data2.txt", "r")
    for line in fid:
        if len(line.strip()) == 0:
            continue
        t = line.strip().split()
        if not len(t) == 3:
            continue
        label = t[0].lower()
        confidence = float(t[1])
        event_id = str(t[2].split("/")[-1])
        if label == "not_sure":
            continue
        if label == "yes":
            label = 1
        else:
            label = -1
        event = ei.getDocument({"_id": ObjectId(event_id)})
        event["label"] = label
        if modified_events.has_key(event_id):
            event["label"] = modified_events[event_id]

        e = Event(event)
        if e.getActualValue() < 8 or event["label"] == 0:
            #           print 'bad event ' + id
            continue
        if event["label"] == 1:
            true_events.append(event)

    fid.close()
    return true_events
Beispiel #8
0
def insertEvents():
    ei = EventInterface()
    ei.setDB('citybeat')
    ei.setCollection('candidate_event_25by25_merged')

    ei2 = EventInterface()
    ei2.setDB('citybeat')
    ei2.setCollection('online_candidate')

    ids = [
        '51148288c2a3754cfe668edd', '51147952c2a3754cfe6684ee',
        '51148a7ec2a3754cfe669977', '51147967c2a3754cfe668503'
    ]

    for id in ids:
        event = ei.getDocument({'_id': ObjectId(id)})
        ei2.addEvent(event)
Beispiel #9
0
def testWithTweet():
    from corpus import buildAllCorpus

    corpus_all = buildAllCorpus(element_type="tweets", debug=True)
    for key, corpus in corpus_all.items():
        break

    ei = EventInterface()
    ei.setDB("citybeat")
    ei.setCollection("candidate_event_25by25_merged")
    event = ei.getDocument()
    print event
    ti = TweetInterface()
    cur = ti.getAllDocuments(limit=30)
    tweets = []
    for tweet in cur:
        tweets.append(tweet)
    del event["photos"]
    event["tweets"] = tweets
    event = BaseFeature(event, corpus=corpus)
    print event.printFeatures()
Beispiel #10
0
def generateTrueLabelFile():
	ei = EventInterface()
	ei.setDB('citybeat')
	ei.setCollection('candidate_event_25by25_merged')
	
	events = {}
	fid1 = open('labeled_data_cf/181_positive.txt', 'r')
	true_events = []
	false_events = []
	unknown_events = []
	
	for line in fid1:
		t = line.split(',')
		id = str(t[0])
		label = int(t[1])
		events[id] = label
		
	fid1.close()
	
	for id, label in events.items():
		event = ei.getDocument({'_id':ObjectId(id)})
		event['label'] = label
		e = Event(event)
		if e.getActualValue() < 8:
#			print 'bad event ' + id
			continue
		if event['label'] == -1:
			false_events.append(event)
		else:
			if event['label'] == 1:
				true_events.append(event)
			else:
				unknown_events.append(event)
	
	
	for event in true_events + false_events + unknown_events:
		print str(event['_id'])+','+str(event['label'])
Beispiel #11
0
def generateTrueLabelFile():
    ei = EventInterface()
    ei.setDB('citybeat')
    ei.setCollection('candidate_event_25by25_merged')

    events = {}
    fid1 = open('labeled_data_cf/181_positive.txt', 'r')
    true_events = []
    false_events = []
    unknown_events = []

    for line in fid1:
        t = line.split(',')
        id = str(t[0])
        label = int(t[1])
        events[id] = label

    fid1.close()

    for id, label in events.items():
        event = ei.getDocument({'_id': ObjectId(id)})
        event['label'] = label
        e = Event(event)
        if e.getActualValue() < 8:
            #			print 'bad event ' + id
            continue
        if event['label'] == -1:
            false_events.append(event)
        else:
            if event['label'] == 1:
                true_events.append(event)
            else:
                unknown_events.append(event)

    for event in true_events + false_events + unknown_events:
        print str(event['_id']) + ',' + str(event['label'])