def task_gen_top_tag_via_number(query, K, root): from photo_dao import PhotoDao from database import DBHelper db_helper = DBHelper() db_helper.init(root) photo_dao = PhotoDao(db_helper) photo_ids = photo_dao.getClassPhotoIds(query, ''.join([query])) photos = photo_dao.getPhotos(query, photo_ids) hist = {} for photo in photos: tags = photo.tags for tag in tags: if(tag in hist): hist[tag] = hist[tag] + 1 else: hist[tag] = 0 top_word_freq = sorted(hist.items(), key=lambda t: -t[1]) top_word_freq = top_word_freq[0:min(len(top_word_freq), K)] top_word = [] for line in top_word_freq: top_word.append(line[0].strip()) output_path = ''.join([root, '/data/tags/%s.txt'%query]) from file_io import FileIO file_io = FileIO() file_io.write_strings_to_file(top_word, output_path) return top_word
def task_gen_index_by_tag(query, root, top_tag_num, top_tags, photo_ids, output_root): from photo_dao import PhotoDao from database import DBHelper db_helper = DBHelper() db_helper.init(root) photo_dao = PhotoDao(db_helper) photos = photo_dao.getPhotos(query, photo_ids) top_tags_index = {} for i, tag in enumerate(top_tags): top_tags_index[tag] = i tag_image_index = ['']*top_tag_num for photo in photos: tags = photo.tags for tag in tags: if(tag in top_tags): tag_index = top_tags_index[tag] tag_image_index[tag_index] = ''.join([tag_image_index[tag_index], ',', photo.photoId]) web_dao = WebPageResultDao() web_dao.init(output_root) for key in top_tags: tag_index = top_tags_index[key] line = tag_image_index[tag_index] line = line.strip() if (line != ""): tag_image_ids = [] image_ids = line.split(',') for image_id in image_ids: image_id = image_id.strip() if (image_id != ""): tag_image_ids.append(image_id) try: web_dao.save_photo_ids('tag_images/%s' % key, '1', tag_image_ids) except: print('error in generating %s' % key)
from file_io import FileIO file_path = self._get_file_name(folder_name, result_id) io = FileIO() io.write_strings_to_file(photoIds, file_path) if __name__ == '__main__': top_tag_num = 6000 query = 'love' root = '/nas02/home/h/o/hongtao/Iconic' from photo_dao import PhotoDao from database import DBHelper db_helper = DBHelper() db_helper.init(root) photo_dao = PhotoDao(db_helper) photo_ids = photo_dao.getClassPhotoIds(query, ''.join([query])) photo_ids = photo_ids[1:100] photos = photo_dao.getPhotos(query, photo_ids) top_word = task_gen_top_tag_via_user(photos, query, top_tag_num, root) task_build_tag_features(top_word, query, photos, 0, root) import os output_root = os.path.join(root, 'output/%s/web' % query) task_gen_index_by_tag(query, root, top_tag_num, top_word, photo_ids, output_root)