def loadNextWeekData():
	
	# load modified 
	
	ei = EventInterface()
	ei.setDB('citybeat')
	ei.setCollection('next_week_candidate_event_25by25_merged')
	
	true_events = []
	false_events = []
	
	fid2 = open('labeled_data_cf/label_next_week.txt', 'r')
	
	for line in fid2:
		t = line.split(',')
		id = str(t[0])
		label = int(t[1])
		
		event = ei.getDocument({'_id':ObjectId(id)})
		event['label'] = label
		e = Event(event)
		if e.getActualValue() < 8 or event['label'] == 0:
#			print 'bad event ' + id
			continue
		if event['label'] == 1:
			true_events.append(event)
		else:
			false_events.append(event)
			
	fid2.close()
	return true_events, false_events
Esempio n. 2
0
def loadNextWeekData():

    # load modified

    ei = EventInterface()
    ei.setDB('citybeat')
    ei.setCollection('next_week_candidate_event_25by25_merged')

    true_events = []
    false_events = []

    fid2 = open('labeled_data_cf/label_next_week.txt', 'r')

    for line in fid2:
        t = line.split(',')
        id = str(t[0])
        label = int(t[1])

        event = ei.getDocument({'_id': ObjectId(id)})
        event['label'] = label
        e = Event(event)
        if e.getActualValue() < 8 or event['label'] == 0:
            #			print 'bad event ' + id
            continue
        if event['label'] == 1:
            true_events.append(event)
        else:
            false_events.append(event)

    fid2.close()
    return true_events, false_events
Esempio n. 3
0
def loadUnbalancedData(_182):
	
	# load modified 
	
	ei = EventInterface()
	ei.setDB('citybeat')
	ei.setCollection('candidate_event_25by25_merged')
	
	true_events = []
	false_events = []
	if _182:
		fid2 = open('labeled_data_cf/182_positive.txt', 'r')
	else:
		fid2 = open('labeled_data_cf/181_positive.txt', 'r')
		
	modified_events = {}
	
	for line in fid2:
		t = line.split(',')
		modified_events[str(t[0])] = int(t[1])
	fid2.close()
		
	# put the data into a text file first
	fid = open('labeled_data_cf/data2.txt','r')
	for line in fid:
		if len(line.strip()) == 0:
			continue
		t = line.strip().split()
		if not len(t) == 3:
			continue
		label = t[0].lower()
		confidence = float(t[1])
		event_id = str(t[2].split('/')[-1])
		if label == 'not_sure':
			continue
		if label == 'yes':
			label = 1
		else:
			label = -1
		event = ei.getDocument({'_id':ObjectId(event_id)})
		event['label'] = label
		if modified_events.has_key(event_id):
			event['label'] = modified_events[event_id]
		
		e = Event(event)
		if e.getActualValue() < 8 or event['label'] == 0:
#			print 'bad event ' + id
			continue
		if event['label'] == 1:
			true_events.append(event)
		else:
			if event['label'] == -1 and confidence == 1:
				false_events.append(event)
			
	fid.close()
	return true_events, false_events
Esempio n. 4
0
def loadUnbalancedData(_182):

    # load modified

    ei = EventInterface()
    ei.setDB('citybeat')
    ei.setCollection('candidate_event_25by25_merged')

    true_events = []
    false_events = []
    if _182:
        fid2 = open('labeled_data_cf/182_positive.txt', 'r')
    else:
        fid2 = open('labeled_data_cf/181_positive.txt', 'r')

    modified_events = {}

    for line in fid2:
        t = line.split(',')
        modified_events[str(t[0])] = int(t[1])
    fid2.close()

    # put the data into a text file first
    fid = open('labeled_data_cf/data2.txt', 'r')
    for line in fid:
        if len(line.strip()) == 0:
            continue
        t = line.strip().split()
        if not len(t) == 3:
            continue
        label = t[0].lower()
        confidence = float(t[1])
        event_id = str(t[2].split('/')[-1])
        if label == 'not_sure':
            continue
        if label == 'yes':
            label = 1
        else:
            label = -1
        event = ei.getDocument({'_id': ObjectId(event_id)})
        event['label'] = label
        if modified_events.has_key(event_id):
            event['label'] = modified_events[event_id]

        e = Event(event)
        if e.getActualValue() < 8 or event['label'] == 0:
            #			print 'bad event ' + id
            continue
        if event['label'] == 1:
            true_events.append(event)
        else:
            if event['label'] == -1 and confidence == 1:
                false_events.append(event)

    fid.close()
    return true_events, false_events
Esempio n. 5
0
def getAllActualEvents():
    ei = EventInterface()
    ei.setDB("citybeat")
    ei.setCollection("candidate_event_25by25_merged")

    true_events = []
    false_events = []
    fid2 = open("labeled_data_cf/181_positive.txt", "r")

    modified_events = {}

    for line in fid2:
        t = line.split(",")
        modified_events[str(t[0])] = int(t[1])
    fid2.close()

    # put the data into a text file first
    fid = open("labeled_data_cf/data2.txt", "r")
    for line in fid:
        if len(line.strip()) == 0:
            continue
        t = line.strip().split()
        if not len(t) == 3:
            continue
        label = t[0].lower()
        confidence = float(t[1])
        event_id = str(t[2].split("/")[-1])
        if label == "not_sure":
            continue
        if label == "yes":
            label = 1
        else:
            label = -1
        event = ei.getDocument({"_id": ObjectId(event_id)})
        event["label"] = label
        if modified_events.has_key(event_id):
            event["label"] = modified_events[event_id]

        e = Event(event)
        if e.getActualValue() < 8 or event["label"] == 0:
            #           print 'bad event ' + id
            continue
        if event["label"] == 1:
            true_events.append(event)

    fid.close()
    return true_events
Esempio n. 6
0
def getBaselineEvents():
	ei = EventInterface()
	ei.setDB('citybeat')
	ei.setCollection('baseline_candidate_events')
	
	events = ei.getAllDocuments()
	
	event_list = []
	
	for event in events:
		e = Event(event)
		if e.getActualValue() < 8 or e.getZscore() < 3:
			continue
		event_list.append(event)
	
#	print len(event_list)
	
#	return 
	
	random.shuffle(event_list)
	
	for i in xrange(50):
		print event_list[i]['_id']	
Esempio n. 7
0
def generateTrueLabelFile():
	ei = EventInterface()
	ei.setDB('citybeat')
	ei.setCollection('candidate_event_25by25_merged')
	
	events = {}
	fid1 = open('labeled_data_cf/181_positive.txt', 'r')
	true_events = []
	false_events = []
	unknown_events = []
	
	for line in fid1:
		t = line.split(',')
		id = str(t[0])
		label = int(t[1])
		events[id] = label
		
	fid1.close()
	
	for id, label in events.items():
		event = ei.getDocument({'_id':ObjectId(id)})
		event['label'] = label
		e = Event(event)
		if e.getActualValue() < 8:
#			print 'bad event ' + id
			continue
		if event['label'] == -1:
			false_events.append(event)
		else:
			if event['label'] == 1:
				true_events.append(event)
			else:
				unknown_events.append(event)
	
	
	for event in true_events + false_events + unknown_events:
		print str(event['_id'])+','+str(event['label'])
Esempio n. 8
0
def generateTrueLabelFile():
    ei = EventInterface()
    ei.setDB('citybeat')
    ei.setCollection('candidate_event_25by25_merged')

    events = {}
    fid1 = open('labeled_data_cf/181_positive.txt', 'r')
    true_events = []
    false_events = []
    unknown_events = []

    for line in fid1:
        t = line.split(',')
        id = str(t[0])
        label = int(t[1])
        events[id] = label

    fid1.close()

    for id, label in events.items():
        event = ei.getDocument({'_id': ObjectId(id)})
        event['label'] = label
        e = Event(event)
        if e.getActualValue() < 8:
            #			print 'bad event ' + id
            continue
        if event['label'] == -1:
            false_events.append(event)
        else:
            if event['label'] == 1:
                true_events.append(event)
            else:
                unknown_events.append(event)

    for event in true_events + false_events + unknown_events:
        print str(event['_id']) + ',' + str(event['label'])
from event_interface import EventInterface
from event_feature import EventFeature
from photo_interface import PhotoInterface
from photo import Photo
from region import Region
from event import Event
from caption_parser import CaptionParser
from stopwords import Stopwords

import operator
import string
import types
import random
import math

ei = EventInterface()
ei.setDB('AmazonMT')
ei.setCollection('candidate_event_25by25_merged')

events = ei.getAllDocuments()

duplicates = 0
for event in events:
	e = Event(event)
	flag = e.removeDuplicatePhotos()
	if flag > 0:
		print e.getPhotoNumber(), e.getActualValue()
		ei.updateDocument(e)
Esempio n. 10
0
from event_interface import EventInterface
from event_feature import EventFeature
from photo_interface import PhotoInterface
from photo import Photo
from region import Region
from event import Event
from caption_parser import CaptionParser
from stopwords import Stopwords

import operator
import string
import types
import random
import math

ei = EventInterface()
ei.setDB('AmazonMT')
ei.setCollection('candidate_event_25by25_merged')

events = ei.getAllDocuments()

duplicates = 0
for event in events:
    e = Event(event)
    flag = e.removeDuplicatePhotos()
    if flag > 0:
        print e.getPhotoNumber(), e.getActualValue()
        ei.updateDocument(e)