def task_gen_top_tag_via_number(query, K, root): from photo_dao import PhotoDao from database import DBHelper db_helper = DBHelper() db_helper.init(root) photo_dao = PhotoDao(db_helper) photo_ids = photo_dao.getClassPhotoIds(query, ''.join([query])) photos = photo_dao.getPhotos(query, photo_ids) hist = {} for photo in photos: tags = photo.tags for tag in tags: if(tag in hist): hist[tag] = hist[tag] + 1 else: hist[tag] = 0 top_word_freq = sorted(hist.items(), key=lambda t: -t[1]) top_word_freq = top_word_freq[0:min(len(top_word_freq), K)] top_word = [] for line in top_word_freq: top_word.append(line[0].strip()) output_path = ''.join([root, '/data/tags/%s.txt'%query]) from file_io import FileIO file_io = FileIO() file_io.write_strings_to_file(top_word, output_path) return top_word
def task_gen_top_tag_via_user(photos, query, K, root, do_skip): tags_path = ''.join([root, '/data/tags/%s.txt' % query]) import os if os.path.exists(tags_path) and do_skip: print ('top tags already generated.\n') fin = open(tags_path, 'r') top_word = [] for line in fin: top_word.append(line.strip()) return top_word print('begin gen top tag via user\n') tag_user_ids = {} for photo in photos: tags = photo.tags for tag in tags: if (not(tag in tag_user_ids)): tag_user_ids[tag] = '' tag_user_ids[tag] = ''.join([tag_user_ids[tag], ',', photo.ownerId]) tags_score = {} for tag in tag_user_ids: user_ids = tag_user_ids[tag] user_ids = user_ids.split(',') tags_score[tag] = len(user_ids) top_word_freq = sorted(tags_score.items(), key=lambda t: -t[1]) K = min(len(top_word_freq), K) top_word_freq = top_word_freq[0:K] top_word = [] for line in top_word_freq: top_word.append(line[0].strip()) print('done gen top tag via user, saving output ...\n') # print out top tags. from file_io import FileIO file_io = FileIO() file_io.write_strings_to_file(top_word, tags_path) # print out score for tags. top_word_str = [] for line in top_word_freq: top_word_str.append(line[0].strip() + ' ' + str(line[1])) output_path = ''.join([root, '/data/tags/%s_score.txt'%query]) from file_io import FileIO file_io = FileIO() file_io.write_strings_to_file(top_word_str, output_path) save_tag_to_mat(top_word, K, query, root) return top_word
def task_gen_photo_imagepath(root, query): print('Get photo ids.'); db_helper = DBHelper(); db_helper.init(root); photo_dao = PhotoDao(db_helper); tic(); photo_ids = photo_dao.getClassPhotoIds(query, ''.join([query])); toc(); print('Get photo path.'); imagepaths = get_photo_imagepath(root, query, photo_ids) output_path = ''.join([db_helper.datasetDir, '/', query, '_imagepath.txt']); file_io = FileIO(); file_io.write_strings_to_file(imagepaths, output_path);