예제 #1
0
def grid_search(y,x,fw,c_range,g_range):
    '''c,g从的变化
    c_begin, c_end, c_step = -5,  15, 2
    g_begin, g_end, g_step =  3, -15, -2
    '''
    best_rate,best_c,best_g = 0.0,2**-5,2**-15
    esp = 0.001*100 #如果rate的增长幅度不超过esp,且c比best_c大时,就不会更新best_c
    job = calculate_jobs(permute_sequence(c_range),permute_sequence(g_range))
    fw.write("需要试验的组合有%s个:\n"%(len(job)))
    for (c1,g1) in job:
        fw.write(str(2**c1)+"\t"+str(2**g1)+"\n")
    fw.write("下面是各种组合得到的交叉验证的效果:\n")
    for (c1,g1) in job:
        c,g=2**c1,2**g1
        param="-v 5 "
        if len(c_range)>2:
            param+=" -c "+str(c)
        if len(g_range)>2:
            param+=" -g "+str(g)
        rate  = tms_svm.train(y,x,param)
        if (c < best_c and rate > best_rate-esp ) or (c > best_c and rate-esp > best_rate) or (c== best_c and rate > best_rate) or (abs(rate-best_rate)<esp and g==best_g and c<best_c):
            best_rate = rate
            best_c,best_g=c,g
        fw.write(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))+"\t")
        fw.write("%s\t%s\t%s\t(best c = %s,g = %s,rate =%s)\n" %(c,g,rate,best_c,best_g,best_rate))
        fw.flush()
        print "%s %s %s (best c = %s,g = %s,rate =%s)\n" %(c,g,rate,best_c,best_g,best_rate)
    fw.write("(best c = %s,g = %s,rate =%s)\n" %(best_c,best_g,best_rate))
    return best_c,best_g
    def reducer(self, key, values):
        '''reducer function'''
        prob_y = []
        prob_x = []
        line = key.split(None, 1)
        #设置SVM训练的参数
        if sum([1 for i in line]) == 1:
            svm_param = " -v 5 -c " + str(line[0])
        else:
            if sum([1 for i in line]) >= 2:
                svm_param = " -v 5 -c " + str(line[0]) + " -g " + str(line[1])

        #对训练样本进行汇总整理
        for value in values:
            value = value.split(None, 1)
            if len(value) == 1: value += ['']
            label, features = value
            xi = {}
            for e in features.split():
                ind, val = e.split(":")
                xi[int(ind)] = float(val)
            prob_y += [float(label)]
            prob_x += [xi]

        #对得到的参数与训练样本进行训练
        tms_svm.set_svm_type(svm_type)
        ratio = tms_svm.train(prob_y, prob_x, svm_param)
        self.write_output(key, str(ratio))
    def reducer(self, key, values):
        '''reducer function'''
        prob_y=[]
        prob_x=[]
        line=key.split(None,1)
        #设置SVM训练的参数
        if sum([1 for i in line])==1:
            svm_param = " -v 5 -c "+str(line[0])
        else :
            if sum([1 for i in line])>=2:
                svm_param = " -v 5 -c "+str(line[0])+" -g "+str(line[1])

        #对训练样本进行汇总整理
        for value in values:
            value = value.split(None,1)
            if len(value)==1: value+=['']
            label, features = value
            xi={}
            for e in features.split():
                ind, val = e.split(":")
                xi[int(ind)] = float(val)
            prob_y +=[float(label)]
            prob_x +=[xi]

        #对得到的参数与训练样本进行训练
        tms_svm.set_svm_type(svm_type)
        ratio = tms_svm.train(prob_y,prob_x,svm_param)
        self.write_output( key, str(ratio))
def ctm_train_model(sample_save_path,svm_type,param,model_save_path):
    '''训练模型,输入样本文件,训练的参数,模型的保存地址,最后会给出模型在训练样本上的测试结果。'''
    tms_svm.set_svm_type(svm_type)
    y,x = tms_svm.read_problem(sample_save_path)
    m = tms_svm.train(y,x,param)
    tms_svm.save_model(model_save_path,m)
    labels = {}.fromkeys(y).keys()
    if len(labels)>2:
        pred_labels, (Micro, Macro, ACC), pred_values = tms_svm.predict(y,x,m)
        print "(Micro=%g, Macro=%g, ACC=%g)"%(Micro, Macro, ACC)
    else:
        pred_labels, (f_score,recall,presion), pred_values=tms_svm.predict(y,x,m)
        print "(f_score=%g,recall=%g,presion=%g)"%(f_score,recall,presion)
    return m
예제 #5
0
def grid_search(y, x, fw, c_range, g_range):
    '''c,g从的变化
    c_begin, c_end, c_step = -5,  15, 2
    g_begin, g_end, g_step =  3, -15, -2
    '''
    best_rate, best_c, best_g = 0.0, 2**-5, 2**-15
    esp = 0.001 * 100  #如果rate的增长幅度不超过esp,且c比best_c大时,就不会更新best_c
    job = calculate_jobs(permute_sequence(c_range), permute_sequence(g_range))
    fw.write("需要试验的组合有%s个:\n" % (len(job)))
    for (c1, g1) in job:
        fw.write(str(2**c1) + "\t" + str(2**g1) + "\n")
    fw.write("下面是各种组合得到的交叉验证的效果:\n")
    for (c1, g1) in job:
        c, g = 2**c1, 2**g1
        param = "-v 5 "
        if len(c_range) > 2:
            param += " -c " + str(c)
        if len(g_range) > 2:
            param += " -g " + str(g)
        rate = tms_svm.train(y, x, param)
        if (c < best_c and rate > best_rate - esp) or (
                c > best_c and rate - esp > best_rate) or (
                    c == best_c
                    and rate > best_rate) or (abs(rate - best_rate) < esp
                                              and g == best_g and c < best_c):
            best_rate = rate
            best_c, best_g = c, g
        fw.write(
            time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) +
            "\t")
        fw.write("%s\t%s\t%s\t(best c = %s,g = %s,rate =%s)\n" %
                 (c, g, rate, best_c, best_g, best_rate))
        fw.flush()
        print "%s %s %s (best c = %s,g = %s,rate =%s)\n" % (c, g, rate, best_c,
                                                            best_g, best_rate)
    fw.write("(best c = %s,g = %s,rate =%s)\n" % (best_c, best_g, best_rate))
    return best_c, best_g