def grid_search(y,x,fw,c_range,g_range): '''c,g从的变化 c_begin, c_end, c_step = -5, 15, 2 g_begin, g_end, g_step = 3, -15, -2 ''' best_rate,best_c,best_g = 0.0,2**-5,2**-15 esp = 0.001*100 #如果rate的增长幅度不超过esp,且c比best_c大时,就不会更新best_c job = calculate_jobs(permute_sequence(c_range),permute_sequence(g_range)) fw.write("需要试验的组合有%s个:\n"%(len(job))) for (c1,g1) in job: fw.write(str(2**c1)+"\t"+str(2**g1)+"\n") fw.write("下面是各种组合得到的交叉验证的效果:\n") for (c1,g1) in job: c,g=2**c1,2**g1 param="-v 5 " if len(c_range)>2: param+=" -c "+str(c) if len(g_range)>2: param+=" -g "+str(g) rate = tms_svm.train(y,x,param) if (c < best_c and rate > best_rate-esp ) or (c > best_c and rate-esp > best_rate) or (c== best_c and rate > best_rate) or (abs(rate-best_rate)<esp and g==best_g and c<best_c): best_rate = rate best_c,best_g=c,g fw.write(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))+"\t") fw.write("%s\t%s\t%s\t(best c = %s,g = %s,rate =%s)\n" %(c,g,rate,best_c,best_g,best_rate)) fw.flush() print "%s %s %s (best c = %s,g = %s,rate =%s)\n" %(c,g,rate,best_c,best_g,best_rate) fw.write("(best c = %s,g = %s,rate =%s)\n" %(best_c,best_g,best_rate)) return best_c,best_g
def reducer(self, key, values): '''reducer function''' prob_y = [] prob_x = [] line = key.split(None, 1) #设置SVM训练的参数 if sum([1 for i in line]) == 1: svm_param = " -v 5 -c " + str(line[0]) else: if sum([1 for i in line]) >= 2: svm_param = " -v 5 -c " + str(line[0]) + " -g " + str(line[1]) #对训练样本进行汇总整理 for value in values: value = value.split(None, 1) if len(value) == 1: value += [''] label, features = value xi = {} for e in features.split(): ind, val = e.split(":") xi[int(ind)] = float(val) prob_y += [float(label)] prob_x += [xi] #对得到的参数与训练样本进行训练 tms_svm.set_svm_type(svm_type) ratio = tms_svm.train(prob_y, prob_x, svm_param) self.write_output(key, str(ratio))
def reducer(self, key, values): '''reducer function''' prob_y=[] prob_x=[] line=key.split(None,1) #设置SVM训练的参数 if sum([1 for i in line])==1: svm_param = " -v 5 -c "+str(line[0]) else : if sum([1 for i in line])>=2: svm_param = " -v 5 -c "+str(line[0])+" -g "+str(line[1]) #对训练样本进行汇总整理 for value in values: value = value.split(None,1) if len(value)==1: value+=[''] label, features = value xi={} for e in features.split(): ind, val = e.split(":") xi[int(ind)] = float(val) prob_y +=[float(label)] prob_x +=[xi] #对得到的参数与训练样本进行训练 tms_svm.set_svm_type(svm_type) ratio = tms_svm.train(prob_y,prob_x,svm_param) self.write_output( key, str(ratio))
def ctm_train_model(sample_save_path,svm_type,param,model_save_path): '''训练模型,输入样本文件,训练的参数,模型的保存地址,最后会给出模型在训练样本上的测试结果。''' tms_svm.set_svm_type(svm_type) y,x = tms_svm.read_problem(sample_save_path) m = tms_svm.train(y,x,param) tms_svm.save_model(model_save_path,m) labels = {}.fromkeys(y).keys() if len(labels)>2: pred_labels, (Micro, Macro, ACC), pred_values = tms_svm.predict(y,x,m) print "(Micro=%g, Macro=%g, ACC=%g)"%(Micro, Macro, ACC) else: pred_labels, (f_score,recall,presion), pred_values=tms_svm.predict(y,x,m) print "(f_score=%g,recall=%g,presion=%g)"%(f_score,recall,presion) return m
def grid_search(y, x, fw, c_range, g_range): '''c,g从的变化 c_begin, c_end, c_step = -5, 15, 2 g_begin, g_end, g_step = 3, -15, -2 ''' best_rate, best_c, best_g = 0.0, 2**-5, 2**-15 esp = 0.001 * 100 #如果rate的增长幅度不超过esp,且c比best_c大时,就不会更新best_c job = calculate_jobs(permute_sequence(c_range), permute_sequence(g_range)) fw.write("需要试验的组合有%s个:\n" % (len(job))) for (c1, g1) in job: fw.write(str(2**c1) + "\t" + str(2**g1) + "\n") fw.write("下面是各种组合得到的交叉验证的效果:\n") for (c1, g1) in job: c, g = 2**c1, 2**g1 param = "-v 5 " if len(c_range) > 2: param += " -c " + str(c) if len(g_range) > 2: param += " -g " + str(g) rate = tms_svm.train(y, x, param) if (c < best_c and rate > best_rate - esp) or ( c > best_c and rate - esp > best_rate) or ( c == best_c and rate > best_rate) or (abs(rate - best_rate) < esp and g == best_g and c < best_c): best_rate = rate best_c, best_g = c, g fw.write( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) + "\t") fw.write("%s\t%s\t%s\t(best c = %s,g = %s,rate =%s)\n" % (c, g, rate, best_c, best_g, best_rate)) fw.flush() print "%s %s %s (best c = %s,g = %s,rate =%s)\n" % (c, g, rate, best_c, best_g, best_rate) fw.write("(best c = %s,g = %s,rate =%s)\n" % (best_c, best_g, best_rate)) return best_c, best_g