Ejemplo n.º 1
0
threhold= 1.0 #threhold indicates the initial score.  top n documents for local SVD
k = 500
title_content_lsa_train_save_path =  save_main_path +"lsa_title_content_"+str(k)+".train"
title_content_lsa_save_path = save_main_path + "lsa_title_content_"+str(k)

#step 5:
title_content_lsa_svm_param  = '-c 0.5 -g 0.5'
title_content_lsa_svm_model_save_path  = save_main_path + "LSA_title_content"+str(k)+".model"
#step 6 
extra_filename = save_main_path+".extra"



print "欢迎使用C社区帖子监控,LSA模型训练系统"
choice = int(raw_input("1为构造SVM训练的样本; 2为训练模型;3为生成初始分类得分;4为构造LSA模型;5为训练LSA生成的模型;6为向原模型中增加原先误判的样本;7为向LSA模型中增加原先误判样本。0为退出模型"))
while choice!=0:
    if choice==1:
        cons_train_sample_for_cla(filename,title_content_indexs,title_content_dic_path,title_content_glo_aff_path,title_content_sample_save_path,delete,str_splitTag)
    if choice==2:
      m=ctm_train_model(title_content_sample_save_path,title_content_svm_param,title_content_svm_model_save_path) 
    if choice==3:
        save_train_for_lsa(title_content_test_path,title_content_svm_model_save_path,title_content_for_lsa_train_save_path)
    if choice==4:
        ctm_lsa(title_content_M,threhold,k,title_content_for_lsa_train_save_path,title_content_lsa_train_save_path,title_content_lsa_save_path)
    if choice ==5:
        ctm_train_model(title_content_lsa_train_save_path,title_content_lsa_svm_param,title_content_lsa_svm_model_save_path)
    if choice ==6:
      add_sample_to_model(extra_filename,title_content_indexs,title_content_dic_path,title_content_glo_aff_path,title_content_sample_save_path,delete,str_splitTag)  
    choice = int(raw_input("1为构造SVM训练的样本; 2为训练模型;3为生成初始分类得分;4为构造LSA模型;5为训练LSA生成的模型;6为向原模型中增加原先误判的样本;7为向LSA模型中增加原先误判样本。0为退出模型"))
Ejemplo n.º 2
0
#step 5:
lsa_svm_param  = '-c 2.0 -g 1.0'
lsa_svm_model_save_path  = save_main_path + "LSA_title_content"+str(k)+".model"
#step 6 
extra_filename = save_main_path+".extra"

#step 7:


print "欢迎使用旺旺聊天欺诈监控系统,LSA模型训练系统"
choice = int(raw_input("0为自动生成模型,1为构造SVM训练的样本; 2为训练模型;3为LSA模型生成训练文本格式;4为构造LSA模型;5为训练LSA生成的模型;6为用原模型计算内容得分提取其他特征;7为向原模型中增加原先误判的样本;7为向LSA模型中增加原先误判样本。-1为退出模型"))
while choice!=-1:
    if choice==0:
        ctm_train(filename,indexs,save_main_path,stopword_filename)
    if choice==1:
        cons_train_sample_for_cla(filename,indexs,dic_path,sample_save_path,delete,str_splitTag)
    if choice==2:
      m=ctm_train_model(sample_save_path,svm_param,svm_model_save_path) 
    if choice==3:
        save_train_for_lsa(test_path,svm_model_save_path,for_lsa_train_save_path)
    if choice==4:
        M = len(read_dic(dic_path))
        ctm_lsa(M,threhold,k,for_lsa_train_save_path,lsa_train_save_path,lsa_save_path)
    if choice ==5:
        ctm_train_model(lsa_train_save_path,lsa_svm_param,lsa_svm_model_save_path)
    if choice ==6:
      add_sample_to_model(extra_filename,indexs,dic_path,sample_save_path,delete,str_splitTag)  
    choice = int(raw_input("0为自动生成模型,1为构造SVM训练的样本; 2为训练模型;3为生成初始分类得分;4为构造LSA模型;5为训练LSA生成的模型;6为向原模型中增加原先误判的样本;7为向LSA模型中增加原先误判样本。-1为退出模型"))