def run(seed): domain = createDomain() l0 = len(domain[0]) l1 = len(domain[1]) l2 = len(domain[2]) l3 = len(domain[3]) l4 = len(domain[4]) l5 = len(domain[5]) l6 = len(domain[6]) print('len(e0):' + str(len(domain[0]))) print('len(e1):' + str(len(domain[1]))) print('len(e2):' + str(len(domain[2]))) print('len(e3):' + str(len(domain[3]))) # 472 470 0.8 376 print('len(e4):' + str(len(domain[4]))) print('len(e5):' + str(len(domain[5]))) print('len(e6):' + str(len(domain[6]))) # docs=domain[0]+domain[1]+domain[2]+domain[3]+domain[4]+domain[5]+domain[6] # trains=domain[0][int(l0*0.2):]+domain[1][int(l1*0.2):]+domain[2][int(l2*0.2):]+\ # domain[3][int(l3*0.2):]+domain[4][int(l4*0.2):]+domain[5][int(l5*0.2):]+domain[6][int(l6*0.2):] tests=domain[0][:72]+domain[1][:72]+domain[2][:72]+domain[3][:72]+domain[4][:72]\ +domain[5][:72]+domain[6][:72] domain_2 = [] # re-sampling for j in range(6): for i in range(2204 - len(domain[j])): domain[j].append(random.choice(domain[j][72:])) # 测试样本 固定每类别的前90 *7 trains=domain[0][72:]+domain[1][72:]+domain[2][72:]+domain[3][72:]+domain[4][72:]\ +domain[5][72:]+domain[6][72:] # for item in domain: # random.shuffle(item) # 训练样本 随机采样 360 *7 # trains=random.sample(domain[0][72:],288)+random.sample(domain[1][72:],288)+random.sample(domain[2][72:],288)\ # +random.sample(domain[3][72:],288)+random.sample(domain[4][72:],288)\ # +random.sample(domain[5][72:],288)+random.sample(domain[6][72:],288) # random.shuffle(trains) # tests=domain[0][:int(l0*0.2)]+domain[1][:int(l1*0.2)]+domain[2][:int(l2*0.2)]+domain[3][:int(l3*0.2)]+\ # domain[4][:int(l4*0.2)]+domain[5][:int(l5*0.2)]+domain[6][:int(l6*0.2)] #random.shuffle(tests) #random.shuffle(docs) # trains=docs[:int(len(docs)*0.8)] # tests=docs[int(len(docs)*0.8):] print('len(trains):' + str(len(trains))) print('len(tests):' + str(len(tests))) # lexcion=maxent.get_lexcion(trains) # print('len(lexcion):'+str(len(lexcion))) maxent.me_classify(trains, tests) #maxent.createResult(tests,'result.txt') # acc=maxent.createResult2('result.txt') acc, G_mean = maxent.createPRF('result.txt', seed) return acc, G_mean
def main(seed): domain=createDomain() l0=len(domain[0]) l1=len(domain[1]) l2=len(domain[2]) l3=len(domain[3]) l4=len(domain[4]) l5=len(domain[5]) l6=len(domain[6]) print('len(e0):'+str(len(domain[0]))) print('len(e1):'+str(len(domain[1]))) print('len(e2):'+str(len(domain[2]))) print('len(e3):'+str(len(domain[3]))) # 472 470 0.8 376 print('len(e4):'+str(len(domain[4]))) print('len(e5):'+str(len(domain[5]))) print('len(e6):'+str(len(domain[6]))) tests=domain[0][:72]+domain[1][:72]+domain[2][:72]+domain[3][:72]+domain[4][:72]\ +domain[5][:72]+domain[6][:72] domain_2=[] # re-sampling for i in range(360-len(domain[2][72:])): domain[2].append(random.choice(domain[2][72:])) # domain[2]=domain_2 # trainList=[] # 训练样本列表 pred_prob_list=[] co_num=5 for i in range(co_num): # trainList.append(getTrains(domain)) trains=getTrains(domain) pred_prob,real_label=run(trains,tests,'result_%d.txt' %(i+1),seed) pred_prob_list.append(pred_prob) co_pred_prob=[] print len(pred_prob_list) for i in range(len(tests)): p=[] # p0=pred_prob_list[0][i][0]+pred_prob_list[1][i][0]+pred_prob_list[2][i][0]+pred_prob_list[3][i][0]+\ # pred_prob_list[4][i][0] p0=sum([item[i][0] for item in pred_prob_list]) p1=sum([item[i][1] for item in pred_prob_list]) p2=sum([item[i][2] for item in pred_prob_list]) p3=sum([item[i][3] for item in pred_prob_list]) p4=sum([item[i][4] for item in pred_prob_list]) p5=sum([item[i][5] for item in pred_prob_list]) p6=sum([item[i][6] for item in pred_prob_list]) # p1=pred_prob_list[0][i][1]+pred_prob_list[1][i][1]+pred_prob_list[2][i][1]+pred_prob_list[3][i][1]+\ # pred_prob_list[4][i][1] # p2=pred_prob_list[0][i][2]+pred_prob_list[1][i][2]+pred_prob_list[2][i][2]+pred_prob_list[3][i][2]+\ # pred_prob_list[4][i][2] # p3=pred_prob_list[0][i][3]+pred_prob_list[1][i][3]+pred_prob_list[2][i][3]+pred_prob_list[3][i][3]+\ # pred_prob_list[4][i][3] # p4=pred_prob_list[0][i][4]+pred_prob_list[1][i][4]+pred_prob_list[2][i][4]+pred_prob_list[3][i][4]+\ # pred_prob_list[4][i][4] # p5=pred_prob_list[0][i][5]+pred_prob_list[1][i][5]+pred_prob_list[2][i][5]+pred_prob_list[3][i][5]+\ # pred_prob_list[4][i][5] # p6=pred_prob_list[0][i][6]+pred_prob_list[1][i][6]+pred_prob_list[2][i][6]+pred_prob_list[3][i][6]+\ # pred_prob_list[4][i][6] p=[p0,p1,p2,p3,p4,p5,p6] co_pred_prob.append(p) # print co_pred_prob[0] print len(co_pred_prob) co_max_prob_label=[] for item in co_pred_prob: m=max(item) co_max_prob_label.append(item.index(m)) # co_max_prob.append(max(item)) print co_max_prob_label acc,g_mean=maxent.createPRF(co_max_prob_label,real_label,seed,'co_resort_maxent_prf.txt') print acc,g_mean return acc,g_mean
def run(trains,tests,filename,seed): maxent.me_classify(trains,tests,filename) pred_prob,pred_label,real_label=maxent.getPredProb(filename) acc,gmean=maxent.createPRF(pred_label,real_label,seed,'per_crf.txt') return pred_prob,real_label