Esempio n. 1
0
	def addEvent(self, raw_event):
		# do not call the method saveDocument, instead, call this method
		# add an event to the db. raw_event can either be a json or an instance of Event 
		if not type(raw_event) is types.DictType:
			new_event = raw_event.toJSON()
		else:
			new_event = raw_event
		new_event = Event(new_event)
		new_event.sortPhotos()
		new_event = new_event.toJSON()
		# before adding, find if any event can be merged
		condition = {'region':new_event['region']}
#		condition = {'lat':new_event['lat'], 'lng':new_event['lng']}
		old_events = self.getAllDocuments(condition).sort('created_time', -1)
		for old_event in old_events:
			end_time1 = int(new_event['photos'][0]['created_time'])
			begin_time1 = int(new_event['photos'][-1]['created_time'])
			end_time2 = int(old_event['photos'][0]['created_time'])
			begin_time2 = int(old_event['photos'][-1]['created_time'])
			time_interval = InstagramConfig.merge_time_interval
			if end_time1 + time_interval >= begin_time2 and end_time2 + time_interval >= begin_time1:
				# if can merge
				merged_event = Event(old_event)
				merged = merged_event.mergeWith(new_event)
				if merged >= 0:
					print '%d out of %d photos are merged into an old event' % (merged, len(new_event['photos']))
#					print old_event['_id'], new_event['_id']
				if merged > 0:
					self.updateDocument(merged_event)
				return
		# cannot merge
		print 'create a new event'
		super(EventInterface, self).saveDocument(new_event)
Esempio n. 2
0
 def __init__(self, vectorizer = None, db='AmazonMT', collection='candidate_event_25by25_merged'):
     """Given an event, return a list incices of the photos in 'photos' filed 
     which are representative to stands for this cluster
     
     Could overwrite TfidfVectorizer as a parameter so that you could customize
     your own tfidf parameters. 
     see http://scikit-learn.org/dev/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html
     """
     
     self.ei = EventInterface()
     self.ei.setDB(db)
     self.ei.setCollection(collection)
     self.events = []
     for e in self.ei.getAllDocuments():
         event = Event(e)
         event.selectOnePhotoForOneUser()
         e = event.toJSON()
         self.events.append(e)
     #self.events = [e for e in self.ei.getAllDocuments()]
     self._captions = self._getAllCaptions()
     
     if vectorizer is None:
         self.vectorizer = TfidfVectorizer( max_df=0.05, min_df = 1, strip_accents='ascii', smooth_idf=True, preprocessor = self._preProcessor, sublinear_tf=True, norm = 'l2', analyzer='char_wb', ngram_range=(4,4), stop_words = 'english')
     else:
         self.vectorizer = vectorizer
     self.vectorizer.fit_transform(self._captions)
Esempio n. 3
0
    def addEvent(self, raw_event):
        # do not call the method saveDocument, instead, call this method
        # add an event to the db. raw_event can either be a json or an instance of Event
        if not type(raw_event) is types.DictType:
            new_event = raw_event.toJSON()
        else:
            new_event = raw_event
        new_event = Event(new_event)
        new_event.sortPhotos()
        new_event = new_event.toJSON()
        # before adding, find if any event can be merged
        region = new_event['region']
        condition = ({
            'region.min_lat': region['min_lat'],
            'region.min_lng': region['min_lng'],
            'region.max_lat': region['max_lat'],
            'region.max_lng': region['max_lng']
        })
        #		condition = {'region.' + k:v for k,v in region.items()}
        old_events = self.getAllDocuments(condition).sort('created_time', -1)
        #		print 'condition1:', condition
        #		print 'results1:',
        #		for oe in old_events:
        #			print '**************'

        #		condition = {'region':new_event['region']}
        #		old_events = self.getAllDocuments(condition).sort('created_time', -1)
        #		print 'condition2:', condition
        #		print 'results2:',
        #		for oe in old_events:
        #			print '**************'

        for old_event in old_events:
            end_time1 = int(new_event['photos'][0]['created_time'])
            begin_time1 = int(new_event['photos'][-1]['created_time'])
            end_time2 = int(old_event['photos'][0]['created_time'])
            begin_time2 = int(old_event['photos'][-1]['created_time'])
            time_interval = InstagramConfig.merge_time_interval
            #			print 'new: ',end_time1,begin_time1
            #			print 'old: ',end_time2,begin_time2
            if end_time1 + time_interval >= begin_time2 and end_time2 + time_interval >= begin_time1:
                # if can merge
                merged_event = Event(old_event)
                merged = merged_event.mergeWith(new_event)
                if merged >= 0:
                    print '%d out of %d photos are merged into an old event' % (
                        merged, len(new_event['photos']))


#					print old_event['_id'], new_event['_id']
                if merged > 0:
                    self.updateDocument(merged_event)
                return
        # cannot merge
        print 'create a new event'
        super(EventInterface, self).saveDocument(new_event)
Esempio n. 4
0
	def addEvent(self, raw_event):
		# do not call the method saveDocument, instead, call this method
		# add an event to the db. raw_event can either be a json or an instance of Event 
		if not type(raw_event) is types.DictType:
			new_event = raw_event.toJSON()
		else:
			new_event = raw_event
		new_event = Event(new_event)
		new_event.sortPhotos()
		new_event = new_event.toJSON()
		# before adding, find if any event can be merged
		region = new_event['region']
		condition = ({'region.min_lat':region['min_lat'],
			            'region.min_lng':region['min_lng'],
			            'region.max_lat':region['max_lat'],
			            'region.max_lng':region['max_lng']})
#		condition = {'region.' + k:v for k,v in region.items()}
		old_events = self.getAllDocuments(condition).sort('created_time', -1)
#		print 'condition1:', condition
#		print 'results1:', 
#		for oe in old_events:
#			print '**************'
		
#		condition = {'region':new_event['region']}
#		old_events = self.getAllDocuments(condition).sort('created_time', -1)
#		print 'condition2:', condition
#		print 'results2:', 
#		for oe in old_events:
#			print '**************'
		
		for old_event in old_events:
			end_time1 = int(new_event['photos'][0]['created_time'])
			begin_time1 = int(new_event['photos'][-1]['created_time'])
			end_time2 = int(old_event['photos'][0]['created_time'])
			begin_time2 = int(old_event['photos'][-1]['created_time'])
			time_interval = InstagramConfig.merge_time_interval
#			print 'new: ',end_time1,begin_time1
#			print 'old: ',end_time2,begin_time2
			if end_time1 + time_interval >= begin_time2 and end_time2 + time_interval >= begin_time1:
				# if can merge
				merged_event = Event(old_event)
				merged = merged_event.mergeWith(new_event)
				if merged >= 0:
					print '%d out of %d photos are merged into an old event' % (merged, len(new_event['photos']))
#					print old_event['_id'], new_event['_id']
				if merged > 0:
					self.updateDocument(merged_event)
				return
		# cannot merge
		print 'create a new event'
		super(EventInterface, self).saveDocument(new_event)
Esempio n. 5
0
    def __init__(self, vectorizer=None, db="AmazonMT", collection="candidate_event_25by25_merged"):
        """Given an event, return a list incices of the photos in 'photos' filed 
        which are representative to stands for this cluster
        
        Could overwrite TfidfVectorizer as a parameter so that you could customize
        your own tfidf parameters. 
        see http://scikit-learn.org/dev/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html
        """

        self.ei = EventInterface()
        self.ei.setDB(db)
        self.ei.setCollection(collection)
        self.events = []
        for e in self.ei.getAllDocuments():
            event = Event(e)
            event.selectOnePhotoForOneUser()
            e = event.toJSON()
            self.events.append(e)
        # self.events = [e for e in self.ei.getAllDocuments()]
        self._captions = self._getAllCaptions()

        if vectorizer is None:
            self.vectorizer = TfidfVectorizer(
                max_df=0.05,
                min_df=1,
                strip_accents="ascii",
                smooth_idf=True,
                preprocessor=self._preProcessor,
                sublinear_tf=True,
                norm="l2",
                analyzer="char_wb",
                ngram_range=(4, 4),
                stop_words="english",
            )
        else:
            self.vectorizer = vectorizer
        self.vectorizer.fit_transform(self._captions)
Esempio n. 6
0
    def addEvent(self, raw_event):
        # do not call the method saveDocument, instead, call this method
        # add an event to the db. raw_event can either be a json or an instance of Event
        if not type(raw_event) is types.DictType:
            new_event = raw_event.toJSON()
        else:
            new_event = raw_event
        new_event = Event(new_event)
        new_event.sortPhotos()
        new_event = new_event.toJSON()
        # before adding, find if any event can be merged
        condition = {'region': new_event['region']}
        #		condition = {'lat':new_event['lat'], 'lng':new_event['lng']}
        old_events = self.getAllDocuments(condition).sort('created_time', -1)
        for old_event in old_events:
            end_time1 = int(new_event['photos'][0]['created_time'])
            begin_time1 = int(new_event['photos'][-1]['created_time'])
            end_time2 = int(old_event['photos'][0]['created_time'])
            begin_time2 = int(old_event['photos'][-1]['created_time'])
            time_interval = InstagramConfig.merge_time_interval
            if end_time1 + time_interval >= begin_time2 and end_time2 + time_interval >= begin_time1:
                # if can merge
                merged_event = Event(old_event)
                merged = merged_event.mergeWith(new_event)
                if merged >= 0:
                    print '%d out of %d photos are merged into an old event' % (
                        merged, len(new_event['photos']))


#					print old_event['_id'], new_event['_id']
                if merged > 0:
                    self.updateDocument(merged_event)
                return
        # cannot merge
        print 'create a new event'
        super(EventInterface, self).saveDocument(new_event)