def extract_im_feature(filename,content_indexs,feature_indexs,dic_path,svm_model,delete,str_splitTag,tc_splitTag):
    ''''''
    m = tms_svm.load_model(svm_model)
    f = file(filename,'r')
    for line in f.readlines():
        text = line.strip().split(tc_splitTag)
        text_temp=""
        for i in content_indexs:
          text_temp+=str_splitTag+text[i]  
          p_lab,p_acc,p_sc =tms_svm.predict() 
def load_tms_model(config_file):
    '''通过模型配置文件加载词典、全局因子、局部因子、SVM模型'''
    model_main_path = os.path.dirname(config_file)
    f = file(config_file,'r')
    for line in f.readlines():
        text = line.split(":")
        if text[0].strip()=="DicName":
            dic,global_weight = fileutil.read_dic_ex(os.path.join(model_main_path,text[1].strip()),dtype=str)
        if text[0].strip()=="ModelName":
            tms_svm.set_svm_type(tms_svm.detect_svm_type(os.path.join(model_main_path,text[1].strip())))
            model= tms_svm.load_model(os.path.join(model_main_path,text[1].strip()))
        if text[0].strip()=="LocalFun":
            local_fun = measure.local_f(text[1].strip())
        if text[0].strip()=="WordSeg":
            seg = int(float(text[1]))
    return local_fun,dic,global_weight,model,seg
Esempio n. 3
0
def save_train_for_lsa(test_path,model_save_path,lsa_train_save_path):
    '''predict trainset using the initial classifier  ,and save the trainset with
    lsa format : label score feature
    '''
    y,x = tms_svm.read_problem(test_path)
    m = tms_svm.load_model(model_save_path)
    p_lab,p_acc,p_sc = tms_svm.predict(y,x,m)
    f= file(lsa_train_save_path,'w')
    for i  in range(len(y)):
        f.write(str(int(y[i]))+"\t"+str(p_sc[i][0])+"\t")
        dic =x[i]
        sorted_x = sorted(dic.items(),key = lambda dic:dic[0])
        for key in sorted_x:
            f.write(str(key[0])+":"+str(key[1])+"\t")
        f.write("\n")
    f.close()   
def save_train_for_lsa(test_path, model_save_path, lsa_train_save_path):
    '''predict trainset using the initial classifier  ,and save the trainset with
    lsa format : label score feature
    '''
    y, x = tms_svm.read_problem(test_path)
    m = tms_svm.load_model(model_save_path)
    p_lab, p_acc, p_sc = tms_svm.predict(y, x, m)
    f = file(lsa_train_save_path, 'w')
    for i in range(len(y)):
        f.write(str(int(y[i])) + "\t" + str(p_sc[i][0]) + "\t")
        dic = x[i]
        sorted_x = sorted(dic.items(), key=lambda dic: dic[0])
        for key in sorted_x:
            f.write(str(key[0]) + ":" + str(key[1]) + "\t")
        f.write("\n")
    f.close()