def tag_id_rank_list_by_txt(txt): txt = txt.lower() tag_id_list_rank = defaultdict(int) for word, rank in tf_idf_seg_txt(txt): #print word ars = db_tag_bayes.get(word) if ars: ar = array('I') ar.fromstring(ars) #print len(ar) #print db_tag_bayes[word] #print word, ar for tag_id, bayes in chunkiter(ar, 2): tag_id_list_rank[tag_id] += (bayes * rank) result = [] for tag_id, rank in sorted(tag_id_list_rank.iteritems(), key=itemgetter(1), reverse=True): has_tag = False if tag_id not in ID2NAME: continue for i in ID2NAME[tag_id]: if has_tag: break tag_list = list(sp_txt(i)) if tag_list: for j in tag_list: #print j, str(j) in txt if str(j) in txt: has_tag = True break elif i in txt: has_tag = True break if has_tag: result.append((tag_id, rank)) return result
def tag_id_rank_list_by_txt(txt): txt = txt.lower() tag_id_list_rank = defaultdict(int) for word, rank in tf_idf_seg_txt(txt): # print word ars = db_tag_bayes.get(word) if ars: ar = array("I") ar.fromstring(ars) # print len(ar) # print db_tag_bayes[word] # print word, ar for tag_id, bayes in chunkiter(ar, 2): tag_id_list_rank[tag_id] += bayes * rank result = [] for tag_id, rank in sorted(tag_id_list_rank.iteritems(), key=itemgetter(1), reverse=True): has_tag = False if tag_id not in ID2NAME: continue for i in ID2NAME[tag_id]: if has_tag: break tag_list = list(sp_txt(i)) if tag_list: for j in tag_list: # print j, str(j) in txt if str(j) in txt: has_tag = True break elif i in txt: has_tag = True break if has_tag: result.append((tag_id, rank)) return result
def loads_id_score(id_score): r = array('I') r.fromstring(id_score) return list(chunkiter(r, 2))