def extract_im_feature(filename,content_indexs,feature_indexs,dic_path,svm_model,delete,str_splitTag,tc_splitTag): '''''' m = tms_svm.load_model(svm_model) f = file(filename,'r') for line in f.readlines(): text = line.strip().split(tc_splitTag) text_temp="" for i in content_indexs: text_temp+=str_splitTag+text[i] p_lab,p_acc,p_sc =tms_svm.predict()
def load_tms_model(config_file): '''通过模型配置文件加载词典、全局因子、局部因子、SVM模型''' model_main_path = os.path.dirname(config_file) f = file(config_file,'r') for line in f.readlines(): text = line.split(":") if text[0].strip()=="DicName": dic,global_weight = fileutil.read_dic_ex(os.path.join(model_main_path,text[1].strip()),dtype=str) if text[0].strip()=="ModelName": tms_svm.set_svm_type(tms_svm.detect_svm_type(os.path.join(model_main_path,text[1].strip()))) model= tms_svm.load_model(os.path.join(model_main_path,text[1].strip())) if text[0].strip()=="LocalFun": local_fun = measure.local_f(text[1].strip()) if text[0].strip()=="WordSeg": seg = int(float(text[1])) return local_fun,dic,global_weight,model,seg
def save_train_for_lsa(test_path,model_save_path,lsa_train_save_path): '''predict trainset using the initial classifier ,and save the trainset with lsa format : label score feature ''' y,x = tms_svm.read_problem(test_path) m = tms_svm.load_model(model_save_path) p_lab,p_acc,p_sc = tms_svm.predict(y,x,m) f= file(lsa_train_save_path,'w') for i in range(len(y)): f.write(str(int(y[i]))+"\t"+str(p_sc[i][0])+"\t") dic =x[i] sorted_x = sorted(dic.items(),key = lambda dic:dic[0]) for key in sorted_x: f.write(str(key[0])+":"+str(key[1])+"\t") f.write("\n") f.close()
def save_train_for_lsa(test_path, model_save_path, lsa_train_save_path): '''predict trainset using the initial classifier ,and save the trainset with lsa format : label score feature ''' y, x = tms_svm.read_problem(test_path) m = tms_svm.load_model(model_save_path) p_lab, p_acc, p_sc = tms_svm.predict(y, x, m) f = file(lsa_train_save_path, 'w') for i in range(len(y)): f.write(str(int(y[i])) + "\t" + str(p_sc[i][0]) + "\t") dic = x[i] sorted_x = sorted(dic.items(), key=lambda dic: dic[0]) for key in sorted_x: f.write(str(key[0]) + ":" + str(key[1]) + "\t") f.write("\n") f.close()