def main(category_id): reload(sys) sys.setdefaultencoding('utf-8') file_utils.createDirs(['internal']) category_set = common.getCandidateCategory(category_id) inclusionRelation(category_id, category_set)
def main(category_id): reload(sys) sys.setdefaultencoding('utf-8') jieba.load_userdict("../../../data/jieba_userdict.txt") file_utils.createDirs(['wiki_search']) category_set = common.getCandidateCategory(category_id) category_crawl_dict =readJosn(category_id) clean(category_id,category_crawl_dict,category_set)
def main(category_id): reload(sys) sys.setdefaultencoding('utf-8') jieba.load_userdict("../../../data/jieba_userdict.txt") file_utils.createDirs(['wiki_search']) category_set = common.getCandidateCategory(category_id) category_crawl_dict = readJosn(category_id) clean(category_id, category_crawl_dict, category_set)
def main(category_path): reload(sys) sys.setdefaultencoding('utf-8') category_path_list = category_path.split('_') category_id = int(category_path_list[0]) query_category = "" if len(category_path_list) >= 2: query_category = category_path_list[-1].decode('utf-8') main_category_list = [query_category] file_utils.createDirs(['combine_feature']) category_set = common.getCandidateCategory(category_id) combineFeature(category_id, category_path, category_set)
def main(category_path): reload(sys) sys.setdefaultencoding('utf-8') jieba.load_userdict(data_path+"jieba_userdict.txt") file_utils.createDirs(['wikipedia']) category_path_list = category_path.split('_') category_id = int(category_path_list[0]) query_category = "" if len(category_path_list) >= 2: query_category = category_path_list[-1].decode('utf-8') main_category_list = [query_category] category_set = common.getCandidateCategory(category_id) extractFeatureFromWikiCategory(category_id,category_path,main_category_list,category_set)
def main(category_path): reload(sys) sys.setdefaultencoding('utf-8') jieba.load_userdict(data_path + "jieba_userdict.txt") file_utils.createDirs(['wikipedia']) category_path_list = category_path.split('_') category_id = int(category_path_list[0]) query_category = "" if len(category_path_list) >= 2: query_category = category_path_list[-1].decode('utf-8') main_category_list = [query_category] category_set = common.getCandidateCategory(category_id) extractFeatureFromWikiCategory(category_id, category_path, main_category_list, category_set)
def main(category_path): reload(sys) sys.setdefaultencoding('utf-8') print '-loading preparation file' jieba.load_userdict(data_path+"jieba_userdict.txt") app_tag_dict = pickle.load(open(data_path+'app_tag.dict','rb')) app_category_dict = pickle.load(open(data_path+'app_category.dict','rb')) file_utils.createDirs(['tag_tf','title_tf']) category_path_list = category_path.split('_') category_id = int(category_path_list[0]) query_category = "" if len(category_path_list) >= 2: query_category = category_path_list[-1].decode('utf-8') category_set = common.getCandidateCategory(category_id) tf(category_id,category_path,query_category,category_set,app_category_dict,app_tag_dict)
def main(category_path): reload(sys) sys.setdefaultencoding('utf-8') print '-loading preparation file' jieba.load_userdict(data_path + "jieba_userdict.txt") app_tag_dict = pickle.load(open(data_path + 'app_tag.dict', 'rb')) app_category_dict = pickle.load(open(data_path + 'app_category.dict', 'rb')) file_utils.createDirs(['tag_tf', 'title_tf']) category_path_list = category_path.split('_') category_id = int(category_path_list[0]) query_category = "" if len(category_path_list) >= 2: query_category = category_path_list[-1].decode('utf-8') category_set = common.getCandidateCategory(category_id) tf(category_id, category_path, query_category, category_set, app_category_dict, app_tag_dict)