threhold= 1.0 #threhold indicates the initial score. top n documents for local SVD k = 500 title_content_lsa_train_save_path = save_main_path +"lsa_title_content_"+str(k)+".train" title_content_lsa_save_path = save_main_path + "lsa_title_content_"+str(k) #step 5: title_content_lsa_svm_param = '-c 0.5 -g 0.5' title_content_lsa_svm_model_save_path = save_main_path + "LSA_title_content"+str(k)+".model" #step 6 extra_filename = save_main_path+".extra" print "欢迎使用C社区帖子监控,LSA模型训练系统" choice = int(raw_input("1为构造SVM训练的样本; 2为训练模型;3为生成初始分类得分;4为构造LSA模型;5为训练LSA生成的模型;6为向原模型中增加原先误判的样本;7为向LSA模型中增加原先误判样本。0为退出模型")) while choice!=0: if choice==1: cons_train_sample_for_cla(filename,title_content_indexs,title_content_dic_path,title_content_glo_aff_path,title_content_sample_save_path,delete,str_splitTag) if choice==2: m=ctm_train_model(title_content_sample_save_path,title_content_svm_param,title_content_svm_model_save_path) if choice==3: save_train_for_lsa(title_content_test_path,title_content_svm_model_save_path,title_content_for_lsa_train_save_path) if choice==4: ctm_lsa(title_content_M,threhold,k,title_content_for_lsa_train_save_path,title_content_lsa_train_save_path,title_content_lsa_save_path) if choice ==5: ctm_train_model(title_content_lsa_train_save_path,title_content_lsa_svm_param,title_content_lsa_svm_model_save_path) if choice ==6: add_sample_to_model(extra_filename,title_content_indexs,title_content_dic_path,title_content_glo_aff_path,title_content_sample_save_path,delete,str_splitTag) choice = int(raw_input("1为构造SVM训练的样本; 2为训练模型;3为生成初始分类得分;4为构造LSA模型;5为训练LSA生成的模型;6为向原模型中增加原先误判的样本;7为向LSA模型中增加原先误判样本。0为退出模型"))
#step 5: lsa_svm_param = '-c 2.0 -g 1.0' lsa_svm_model_save_path = save_main_path + "LSA_title_content"+str(k)+".model" #step 6 extra_filename = save_main_path+".extra" #step 7: print "欢迎使用旺旺聊天欺诈监控系统,LSA模型训练系统" choice = int(raw_input("0为自动生成模型,1为构造SVM训练的样本; 2为训练模型;3为LSA模型生成训练文本格式;4为构造LSA模型;5为训练LSA生成的模型;6为用原模型计算内容得分提取其他特征;7为向原模型中增加原先误判的样本;7为向LSA模型中增加原先误判样本。-1为退出模型")) while choice!=-1: if choice==0: ctm_train(filename,indexs,save_main_path,stopword_filename) if choice==1: cons_train_sample_for_cla(filename,indexs,dic_path,sample_save_path,delete,str_splitTag) if choice==2: m=ctm_train_model(sample_save_path,svm_param,svm_model_save_path) if choice==3: save_train_for_lsa(test_path,svm_model_save_path,for_lsa_train_save_path) if choice==4: M = len(read_dic(dic_path)) ctm_lsa(M,threhold,k,for_lsa_train_save_path,lsa_train_save_path,lsa_save_path) if choice ==5: ctm_train_model(lsa_train_save_path,lsa_svm_param,lsa_svm_model_save_path) if choice ==6: add_sample_to_model(extra_filename,indexs,dic_path,sample_save_path,delete,str_splitTag) choice = int(raw_input("0为自动生成模型,1为构造SVM训练的样本; 2为训练模型;3为生成初始分类得分;4为构造LSA模型;5为训练LSA生成的模型;6为向原模型中增加原先误判的样本;7为向LSA模型中增加原先误判样本。-1为退出模型"))