コード例 #1
0
	def getWordList(self, event):
		# word_list is a list of (word, freq)
		cp = CaptionParser(True)
		for photo in event['photos']:
			photo = Photo(photo)
			cp.insertCaption(photo.getCaption())
		return cp.getTopWords(-1, False)
コード例 #2
0
ファイル: event_feature.py プロジェクト: oeddyo/CityBeat
 def _getTopWords(self, k, stopword_removal=False):
     caption_parser = CaptionParser(stopword_removal=stopword_removal)
     for photo in self._event["photos"]:
         p = Photo(photo)
         caption = p.getCaption()
         if not caption is None:
             caption_parser.insertCaption(caption)
     return caption_parser.getTopWords(k)
コード例 #3
0
 def _getTopWords(self, k, stopword_removal=False):
     caption_parser = CaptionParser(stopword_removal=stopword_removal)
     for photo in self._event['photos']:
         p = Photo(photo)
         caption = p.getCaption()
         if not caption is None:
             caption_parser.insertCaption(caption)
     return caption_parser.getTopWords(k)
コード例 #4
0
ファイル: event_feature.py プロジェクト: daifanxiang/CityBeat
	def _getTopWords(self, k, stopword_removal=False):
		# get top words by counting the frequecy
		caption_parser = CaptionParser(stopword_removal=stopword_removal)
		for photo in self._event['photos']:
			p = Photo(photo)
			caption = p.getCaption()
			if not caption is None:
				caption_parser.insertCaption(caption)
		return caption_parser.getTopWords(k)
コード例 #5
0
		def PhotoDistanceByCaption(photo1, photo2):
			
			p1 = Photo(photo1)
			p2 = Photo(photo2)
			cap1 = p1.getCaption()
			cap2 = p2.getCaption()
			cp1 = CaptionParser(True)
			cp1.insertCaption(cap1)
			cp2 = CaptionParser(True)
			cp2.insertCaption(cap2)
			word_list1 = cp1.getTopWords(-1)
			word_list2 = cp2.getTopWords(-1)
			if len(word_list1) == 0 or len(word_list2) == 0:
				# unable to compare
				return None
			word_dict1 = {}
			for word, freq in word_list1:
				word_dict1[word] = freq
			word_dict2 ={}
			for word, freq in word_list2:
				word_dict2[word] = freq
			return kldiv(word_dict1, word_dict2)
コード例 #6
0
ファイル: build_corpus.py プロジェクト: daifanxiang/CityBeat
from photo_interface import PhotoInterface
from caption_parser import CaptionParser
from mongodb_interface import MongoDBInterface
from photo import Photo

import random


if __name__ == '__main__':
	mi = MongoDBInterface()
	mi.setDB('test_caption')
	mi.setCollection('captions')
	
	cp = CaptionParser(True)
	
	i = 0
	captions = mi.getAllDocuments()
	for caption in captions:
		i += 1
		if i % 1000 == 0:
#			print cp.getTopWords(200)
			print i
			print len(cp._)
		cp.insertCaption(caption['caption'])

	for word, value in cp.getTopWords(300):
		print '\''+word+'\',',
	print
コード例 #7
0
from photo_interface import PhotoInterface
from caption_parser import CaptionParser
from mongodb_interface import MongoDBInterface
from photo import Photo

import random

if __name__ == '__main__':
    mi = MongoDBInterface()
    mi.setDB('test_caption')
    mi.setCollection('captions')

    cp = CaptionParser(True)

    i = 0
    captions = mi.getAllDocuments()
    for caption in captions:
        i += 1
        if i % 1000 == 0:
            #			print cp.getTopWords(200)
            print i
            print len(cp._)
        cp.insertCaption(caption['caption'])

    for word, value in cp.getTopWords(300):
        print '\'' + word + '\',',
    print