def getHistoricFeatures(self, entropy_para):
        # this method computes the features that capture the difference between current
        # event and background knowledge

        end_time = self.getLatestPhotoTime()
        begin_time = self.getEarliestPhotoTime()

        pi = PhotoInterface()

        photos = []
        dt = 0
        for day in xrange(1, 15):
            # here 15 is hard coded because we use 14 days' data as the training
            et = end_time - day * 24 * 3600 + dt / 2
            bt = begin_time - day * 24 * 3600 - dt / 2
            day_photos = pi.rangeQuery(self._event['region'], [str(bt), str(et)])
            for photo in day_photos:
                # since rangeQuery sorts the photos from the most current to the most early
                # thus all the photos in the List "photos" are sorted by their created time from 
                # the most current to the most early
                photos.append(photo)

        random.shuffle(photos)
        photos = photos[0:min(len(self._event['photos']), len(photos))]

        if len(photos) == 0:
            # TODO: refine
            return [1, 10, 10]

        # fake a historic event
        historic_event = Event()
        historic_event.setPhotos(photos)
        historic_event.setRegion(self._event['region'])
        historic_event.setActualValue(historic_event._getActualValueByCounting())
        historic_event = BaseFeature(historic_event)

        # compute the difference between entropy
        # this has been smoothed
        pro1 = self._divideAndCount(entropy_para)
        pro2 = historic_event._divideAndCount(entropy_para)
        entropy_divergence = KLDivergence.averageKLDivergence(pro1, pro2)

        # compute the difference between top words

        topic_divergence = self.computeWordKLDivergenceWith(historic_event)

        return [historic_event.getPhotoDisFeatures()[3], topic_divergence,
                #               historic_event.getEntropy(entropy_para),
                entropy_divergence]
Esempio n. 2
0
	def getHistoricFeatures(self, entropy_para):
		# this method computes the features that capture the difference between current
		# event and background knowledge
		
		end_time = self.getLatestPhotoTime()
		begin_time = self.getEarliestPhotoTime()
		
		pi = PhotoInterface()
		pi.setDB('citybeat')
		pi.setCollection('photos')
		
		photos = []
		dt = 0
		for day in xrange(1,15):
			# here 15 is hard coded because we use 14 days' data as the training
			et = end_time - day * 24 * 3600 + dt / 2
			bt = begin_time - day * 24 * 3600 - dt / 2
			day_photos = pi.rangeQuery(self._event['region'], [str(bt), str(et)])
			for photo in day_photos:
				# since rangeQuery sorts the photos from the most current to the most early
				# thus all the photos in the List "photos" are sorted by their created time from 
				# the most current to the most early
				photos.append(photo)
				
		random.shuffle(photos)
		photos = photos[0:min(len(self._event['photos']), len(photos))]
		
		# fake a historic event
		historic_event = Event()
		historic_event.setPhotos(photos)
		historic_event.setRegion(self._event['region'])
		historic_event.setActualValue(historic_event._getActualValueByCounting())
		historic_event = EventFeature(historic_event)
		
		# compute the difference between entropy
		# this has been smoothed
		pro1 = self._divideAndCount(entropy_para)
		pro2 = historic_event._divideAndCount(entropy_para)
		entropy_divergence = KLDivergence.averageKLDivergence(pro1, pro2)
		
		# compute the difference between top words
		
		topic_divergence = self.computeWordKLDivergenceWith(historic_event)
		
		return [historic_event.getPhotoDisFeatures()[3], topic_divergence,
#		        historic_event.getEntropy(entropy_para),
		        entropy_divergence]
Esempio n. 3
0
    def getHistoricFeatures(self, entropy_para):
        # this method computes the features that capture the difference between current
        # event and background knowledge

        end_time = self.getLatestPhotoTime()
        begin_time = self.getEarliestPhotoTime()

        pi = PhotoInterface()
        pi.setDB("citybeat")
        pi.setCollection("photos")

        photos = []
        dt = 3600
        for day in xrange(1, 15):
            # here 15 is hard coded because we use 14 days' data as the training
            et = end_time - day * 24 * 3600 + dt / 2
            bt = begin_time - day * 24 * 3600 - dt / 2
            day_photos = pi.rangeQuery(self._event["region"], [str(bt), str(et)])
            for photo in day_photos:
                # since rangeQuery sorts the photos from the most current to the most early
                # thus all the photos in the List "photos" are sorted by their created time from
                # the most current to the most early
                photos.append(photo)

        event = Event()
        event.setPhotos(photos)
        event.setRegion(self._event["region"])
        event.setActualValue(event.getActualValueByCounting())
        event = EventFeature(event)

        # compute the difference between entropy
        # this has been smoothed
        pro1 = self._divideAndCount(entropy_para)
        pro2 = event._divideAndCount(entropy_para)
        entropy_divergence = KLDivergence.averageKLDivergence(pro1, pro2)

        # compute the difference between top words
        event_topword_list = self._getTopWords(-1, True)
        historic_topword_list = event._getTopWords(-1, True)

        n_ind = 0
        ind = {}
        for word, freq in event_topword_list + historic_topword_list:
            if not ind.has_key(word):
                ind[word] = n_ind
                n_ind += 1

        freq1 = [0] * n_ind
        freq2 = [0] * n_ind

        for word, freq in event_topword_list:
            freq1[ind[word]] = freq
        for word, freq in historic_topword_list:
            freq2[ind[word]] = freq

        topic_divergence = KLDivergence.averageKLDivergence(freq1, freq2)

        return [
            event.getAvgPhotoDis(),
            topic_divergence,
            # 		        event.getEntropy(entropy_para),
            entropy_divergence,
            event.getAvgCaptionLen(),
            event.getRatioOfPeopleToPhoto(),
        ]
Esempio n. 4
0
    def getHistoricFeatures(self, entropy_para):
        # this method computes the features that capture the difference between current
        # event and background knowledge

        end_time = self.getLatestPhotoTime()
        begin_time = self.getEarliestPhotoTime()

        pi = PhotoInterface()
        pi.setDB('citybeat')
        pi.setCollection('photos')

        photos = []
        dt = 3600
        for day in xrange(1, 15):
            # here 15 is hard coded because we use 14 days' data as the training
            et = end_time - day * 24 * 3600 + dt / 2
            bt = begin_time - day * 24 * 3600 - dt / 2
            day_photos = pi.rangeQuery(self._event['region'],
                                       [str(bt), str(et)])
            for photo in day_photos:
                # since rangeQuery sorts the photos from the most current to the most early
                # thus all the photos in the List "photos" are sorted by their created time from
                # the most current to the most early
                photos.append(photo)

        event = Event()
        event.setPhotos(photos)
        event.setRegion(self._event['region'])
        event.setActualValue(event.getActualValueByCounting())
        event = EventFeature(event)

        # compute the difference between entropy
        # this has been smoothed
        pro1 = self._divideAndCount(entropy_para)
        pro2 = event._divideAndCount(entropy_para)
        entropy_divergence = KLDivergence.averageKLDivergence(pro1, pro2)

        # compute the difference between top words
        event_topword_list = self._getTopWords(-1, True)
        historic_topword_list = event._getTopWords(-1, True)

        n_ind = 0
        ind = {}
        for word, freq in event_topword_list + historic_topword_list:
            if not ind.has_key(word):
                ind[word] = n_ind
                n_ind += 1

        freq1 = [0] * n_ind
        freq2 = [0] * n_ind

        for word, freq in event_topword_list:
            freq1[ind[word]] = freq
        for word, freq in historic_topword_list:
            freq2[ind[word]] = freq

        topic_divergence = KLDivergence.averageKLDivergence(freq1, freq2)

        return [
            event.getAvgPhotoDis(),
            topic_divergence,
            #		        event.getEntropy(entropy_para),
            entropy_divergence,
            event.getAvgCaptionLen(),
            event.getRatioOfPeopleToPhoto()
        ]