def evaluate_siboru(initialW, n_epoch = 10, using_gpu = True): skf = StratifiedKFold(target, n_folds=5,shuffle = False) y_all = target.astype(np.int32) k2_newlist = [] all_except_indexes = [] for train, test in skf: topdocveccategoryMat_train = {} topdocveccategoryMat_test = {} for labelnum in range(DimentionN): topdocveccategoryMat_train[labelnum] = NewtopdocveccategoryMat_zeroadded[labelnum][train] topdocveccategoryMat_test[labelnum] = NewtopdocveccategoryMat_zeroadded[labelnum][test] y_train, y_test = y_all[train],y_all[test] k2_kyu = yahoo_data_preprocess_func.caluculatemodel_gpu2( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, topdocveccategoryMat_train, NewpreW_zeroadded,initialW, y_train, n_epoch = n_epoch,batchsize = 100, using_gpu = using_gpu, binary = True, print_True = False) (vmats0,y_pred_II_algo_all), modellist, accuracylist, y_trueall, y_pridictall, sum_loss_all, sum_accuracy_all = k2_kyu skf_sub = StratifiedKFold(y_train, n_folds=5,shuffle = False) except_indexes = [] for i, (train_sub, test_sub) in enumerate(skf_sub): test_indexes = train[test_sub] test_target = y_trueall[i*len(test_indexes):(i+1) * len(test_indexes)] if using_gpu == False: pred = y_pred_II_algo_all[i*len(test_indexes):(i +1) * len(test_indexes)].argmax(axis = 1) else: pred = chainer.cuda.to_cpu(y_pred_II_algo_all[i*len(test_indexes):(i +1) * len(test_indexes)].argmax(axis = 1)) print 1 - len(test_indexes[test_target != pred])/float(len(test_indexes)) except_indexes += list(test_indexes[test_target != pred]) all_except_indexes.append(except_indexes) use_indexes = np.array(list(set(train) - set(except_indexes))) use_indexes_minus = use_indexes[use_indexes < 5000] use_indexes_plus = use_indexes[use_indexes >= 5000] length = min(len(use_indexes_minus), len(use_indexes_plus)) CategoryMat_zeroadded_plus_minus_sampled = defaultdict(int) y_train_new = np.array([0]* length + [1] * length).astype(np.int32) for index in NewpreW: CategoryMat_zeroadded_plus_minus_sampled[index] = NewtopdocveccategoryMat_zeroadded[index][list(use_indexes_minus[0:length]) + list(use_indexes_plus[0:length])] k2_new = yahoo_data_preprocess_func.caluculatemodel_gpu2_train_test( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, CategoryMat_zeroadded_plus_minus_sampled,topdocveccategoryMat_test, NewpreW_zeroadded,initialW, y_train_new,y_test, n_epoch = 50,batchsize = 100, using_gpu = True, binary = True, print_True = False) #using_gpu = False, binary = True) k2_newlist.append(k2_new) return k2_newlist, all_except_indexes
count = 0 for i in range(3000):count += topdocveccategoryMat[274].values()[i][u"急減"] initialW = np.r_[np.random.randn(3,len(NewpreW_zeroadded)) * scale] k2list = [] for i in range(5): #initialW = np.r_[np.random.random((3,len(NewpreW_zeroadded))) * scale * 2 - scale] #initialW = np.r_[np.random.randn(3,len(NewpreW_zeroadded)) * scale] initialW = np.r_[-scale * np.random.random((1,len(NewpreW_zeroadded))), scale * np.random.random((1,len(NewpreW_zeroadded)))] #initialW = np.r_[-scale * np.ones((1,len(NewpreW_zeroadded))), #scale * np.ones((1,len(NewpreW_zeroadded)))] k2 = yahoo_data_preprocess_func.caluculatemodel_gpu2( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, NewtopdocveccategoryMat_zeroadded,NewpreW_zeroadded, initialW,target_balanced, n_epoch = 50,batchsize = 100,test_batchsize = 50, using_gpu = True,binary = True,kf_value = 10, print_True = False) k2list.append(k2) fileid = 0 for i, model in enumerate(k2[1]): word_polarity_dic = {} for index in NewpreW_namelist_dic: batch_dict = dict(zip(NewpreW_namelist_dic[index],model.predictor.l_hidden.W.data[index])) word_polarity_dic.update(batch_dict) if ((i == 0) & (fileid == 0)): dict_namelist = [word.encode("utf-8") for word in word_polarity_dic.keys()] dict_polarity_eval = np.array(word_polarity_dic.values())/10
#足りない分は0で埋める #NewpreW_zeroadded, NewtopdocveccategoryMat_zeroadded, max_numwords_in_class = yahoo_data_preprocess_func.Create_NewpreW_zero_add(NewpreW, NewtopdocveccategoryMat) #2値分類で間違ってラベルがついていそうなものは除く #k2 = yahoo_data_preprocess_func.caluculatemodel_gpu( #yahoo_data_preprocess_func.IIalgorithm_simple_gpu, #NewtopdocveccategoryMat_zeroadded,NewpreW_zeroadded,target, #n_epoch = 20,batchsize = 10,using_gpu = True) k2_list = [] scale = 0.01 initialW = np.r_[-scale * np.random.random((1,len(NewpreW_zeroadded))),scale * np.random.random((1,len(NewpreW_zeroadded)))] #initialW = np.r_[np.random.random((2,len(NewpreW_zeroadded))) * scale * 2 - scale] k2 = yahoo_data_preprocess_func.caluculatemodel_gpu2( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, NewtopdocveccategoryMat_zeroadded,NewpreW_zeroadded, initialW,target, n_epoch = 40,batchsize = 100,using_gpu = False) (vmats0,y_pred_II_algo_all), modellist, accuracylist, y_trueall, y_pridictall, sum_loss_all, sum_accuracy_all = k2 print confusion_matrix(y_trueall, y_pridictall) #print (y_trueall, y_pridictall) for thresh_value in [0.5, 0.55, 0.6,0.7, 0.8]: print thresh_value print len(np.array(y_trueall)[y_pred_II_algo_all.T[0] > thresh_value][np.array(y_trueall)[y_pred_II_algo_all.T[0] > thresh_value] == 0])/float(len(y_pred_II_algo_all.T[0][y_pred_II_algo_all.T[0] > thresh_value])), print len(y_pred_II_algo_all.T[0][y_pred_II_algo_all.T[0] > thresh_value]) print len(np.array(y_trueall)[y_pred_II_algo_all.T[1] > thresh_value][np.array(y_trueall)[y_pred_II_algo_all.T[1] > thresh_value] == 1])/float(len(y_pred_II_algo_all.T[1][y_pred_II_algo_all.T[1] > thresh_value])), print len(y_pred_II_algo_all.T[1][y_pred_II_algo_all.T[1] > thresh_value]) k2_list_1 = []
yahoo_data_preprocess_func.IIalgorithm_simple_gpu_1, topdocveccategoryMat3,preW,target4, n_epoch = 10,batchsize = 10,using_gpu = False,binary = True) """ k = yahoo_data_preprocess_func.caluculatemodel_gpu( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, topdocveccategoryMat3,preW,target4, n_epoch = 10,batchsize = 100,using_gpu = False,binary = True) inithialW_3_haba = 0.1 initialW = np.r_[-inithialW_3_haba * np.random.random((1,len(preW))),inithialW_3_haba * np.random.random((1,len(preW)))] k2 = yahoo_data_preprocess_func.caluculatemodel_gpu2( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, topdocveccategoryMat3,preW, initialW,target4, n_epoch = 10,batchsize = 10,using_gpu = False, binary = True) #k[1][0].predictor.l_hidden.W.data #for model in k3[1]: #for model in k[1]: for model in k2[1]: total = (np.sum(model.predictor.l_hidden.W.data.T[0:number_of_word_in_class/2]> 0) + np.sum(model.predictor.l_hidden.W.data.T[number_of_word_in_class/2:] < 0)) print float(total)/(DimentionN * number_of_word_in_class) total = (np.sum(model.predictor.l_hidden.W.data.T[0:number_of_word_in_class/3]> 0) + np.sum(model.predictor.l_hidden.W.data.T[-number_of_word_in_class/3:] < 0)) print float(total)/(DimentionN * number_of_word_in_class*2/3) k2[1][0].predictor.l_hidden.W.data for model in k2[1]: