コード例 #1
0
def evaluate_siboru(initialW, n_epoch = 10, using_gpu = True):
	skf = StratifiedKFold(target, n_folds=5,shuffle = False)
	y_all = target.astype(np.int32)
	k2_newlist = []
	all_except_indexes = []
	for train, test in skf:
		topdocveccategoryMat_train = {}
		topdocveccategoryMat_test = {}
		for labelnum in range(DimentionN):
			topdocveccategoryMat_train[labelnum] = NewtopdocveccategoryMat_zeroadded[labelnum][train]
			topdocveccategoryMat_test[labelnum] = NewtopdocveccategoryMat_zeroadded[labelnum][test]
		y_train, y_test = y_all[train],y_all[test]
		k2_kyu = yahoo_data_preprocess_func.caluculatemodel_gpu2(
		yahoo_data_preprocess_func.IIalgorithm_simple_gpu, 
		topdocveccategoryMat_train,
		NewpreW_zeroadded,initialW,
		y_train,
		n_epoch = n_epoch,batchsize = 100,
		using_gpu = using_gpu, binary = True,
		print_True = False)
		(vmats0,y_pred_II_algo_all), modellist, accuracylist, y_trueall, y_pridictall, sum_loss_all, sum_accuracy_all = k2_kyu
		skf_sub = StratifiedKFold(y_train, n_folds=5,shuffle = False)
		except_indexes = []
		for i, (train_sub, test_sub) in enumerate(skf_sub):
			test_indexes = train[test_sub]
			test_target = y_trueall[i*len(test_indexes):(i+1) * len(test_indexes)]
			if using_gpu == False:
				pred = y_pred_II_algo_all[i*len(test_indexes):(i +1) * len(test_indexes)].argmax(axis = 1)
			else:
				pred = chainer.cuda.to_cpu(y_pred_II_algo_all[i*len(test_indexes):(i +1) * len(test_indexes)].argmax(axis = 1))
			print 1 - len(test_indexes[test_target != pred])/float(len(test_indexes))
			except_indexes += list(test_indexes[test_target != pred])
		all_except_indexes.append(except_indexes)
		use_indexes = np.array(list(set(train) - set(except_indexes)))
		use_indexes_minus = use_indexes[use_indexes < 5000]
		use_indexes_plus = use_indexes[use_indexes >= 5000]
		length = min(len(use_indexes_minus), len(use_indexes_plus))
		CategoryMat_zeroadded_plus_minus_sampled = defaultdict(int)
		y_train_new = np.array([0]* length + [1] * length).astype(np.int32)
		for index in NewpreW:
			CategoryMat_zeroadded_plus_minus_sampled[index] = NewtopdocveccategoryMat_zeroadded[index][list(use_indexes_minus[0:length]) + list(use_indexes_plus[0:length])]
		k2_new = yahoo_data_preprocess_func.caluculatemodel_gpu2_train_test(
			yahoo_data_preprocess_func.IIalgorithm_simple_gpu, 
			CategoryMat_zeroadded_plus_minus_sampled,topdocveccategoryMat_test,
			NewpreW_zeroadded,initialW,
			y_train_new,y_test,
			n_epoch = 50,batchsize = 100,
			using_gpu = True, binary = True,
			print_True = False)
			#using_gpu = False, binary = True)
		k2_newlist.append(k2_new)
	return k2_newlist, all_except_indexes
コード例 #2
0
count = 0
for i in range(3000):count += topdocveccategoryMat[274].values()[i][u"急減"]

initialW = np.r_[np.random.randn(3,len(NewpreW_zeroadded)) * scale]
k2list = []
for i in range(5):
    #initialW = np.r_[np.random.random((3,len(NewpreW_zeroadded))) * scale * 2 - scale]
    #initialW = np.r_[np.random.randn(3,len(NewpreW_zeroadded)) * scale]
    initialW = np.r_[-scale * np.random.random((1,len(NewpreW_zeroadded))),
                     scale * np.random.random((1,len(NewpreW_zeroadded)))]
    #initialW = np.r_[-scale * np.ones((1,len(NewpreW_zeroadded))),
                     #scale * np.ones((1,len(NewpreW_zeroadded)))]
    k2 = yahoo_data_preprocess_func.caluculatemodel_gpu2(
        yahoo_data_preprocess_func.IIalgorithm_simple_gpu, 
        NewtopdocveccategoryMat_zeroadded,NewpreW_zeroadded,
        initialW,target_balanced,
        n_epoch = 50,batchsize = 100,test_batchsize = 50,
        using_gpu = True,binary = True,kf_value = 10,
        print_True = False)
    k2list.append(k2)


fileid = 0
for i, model in enumerate(k2[1]):
        word_polarity_dic = {}
        for index in NewpreW_namelist_dic:
            batch_dict = dict(zip(NewpreW_namelist_dic[index],model.predictor.l_hidden.W.data[index]))
            word_polarity_dic.update(batch_dict)
        if ((i == 0) & (fileid == 0)):
            dict_namelist = [word.encode("utf-8") for word in word_polarity_dic.keys()]
            dict_polarity_eval = np.array(word_polarity_dic.values())/10
コード例 #3
0
#足りない分は0で埋める
#NewpreW_zeroadded, NewtopdocveccategoryMat_zeroadded, max_numwords_in_class = yahoo_data_preprocess_func.Create_NewpreW_zero_add(NewpreW, NewtopdocveccategoryMat)
#2値分類で間違ってラベルがついていそうなものは除く

#k2 = yahoo_data_preprocess_func.caluculatemodel_gpu(
	#yahoo_data_preprocess_func.IIalgorithm_simple_gpu, 
	#NewtopdocveccategoryMat_zeroadded,NewpreW_zeroadded,target,
	#n_epoch = 20,batchsize = 10,using_gpu = True)

k2_list = []
scale = 0.01
initialW = np.r_[-scale * np.random.random((1,len(NewpreW_zeroadded))),scale * np.random.random((1,len(NewpreW_zeroadded)))]
#initialW = np.r_[np.random.random((2,len(NewpreW_zeroadded))) * scale * 2 - scale]
k2 = yahoo_data_preprocess_func.caluculatemodel_gpu2(
	yahoo_data_preprocess_func.IIalgorithm_simple_gpu, 
	NewtopdocveccategoryMat_zeroadded,NewpreW_zeroadded,
	initialW,target,
	n_epoch = 40,batchsize = 100,using_gpu = False)

(vmats0,y_pred_II_algo_all), modellist, accuracylist, y_trueall, y_pridictall, sum_loss_all, sum_accuracy_all = k2
print confusion_matrix(y_trueall, y_pridictall)
#print (y_trueall, y_pridictall)
for thresh_value in [0.5, 0.55, 0.6,0.7, 0.8]:
	print thresh_value
	print len(np.array(y_trueall)[y_pred_II_algo_all.T[0] > thresh_value][np.array(y_trueall)[y_pred_II_algo_all.T[0] > thresh_value] == 0])/float(len(y_pred_II_algo_all.T[0][y_pred_II_algo_all.T[0] > thresh_value])),
	print len(y_pred_II_algo_all.T[0][y_pred_II_algo_all.T[0] > thresh_value])
	print len(np.array(y_trueall)[y_pred_II_algo_all.T[1] > thresh_value][np.array(y_trueall)[y_pred_II_algo_all.T[1] > thresh_value] == 1])/float(len(y_pred_II_algo_all.T[1][y_pred_II_algo_all.T[1] > thresh_value])),
	print len(y_pred_II_algo_all.T[1][y_pred_II_algo_all.T[1] > thresh_value])


k2_list_1 = []
コード例 #4
0
	yahoo_data_preprocess_func.IIalgorithm_simple_gpu_1, 
	topdocveccategoryMat3,preW,target4,
	n_epoch = 10,batchsize = 10,using_gpu = False,binary = True)
"""

k = yahoo_data_preprocess_func.caluculatemodel_gpu(
	yahoo_data_preprocess_func.IIalgorithm_simple_gpu, 
	topdocveccategoryMat3,preW,target4,
	n_epoch = 10,batchsize = 100,using_gpu = False,binary = True)

inithialW_3_haba = 0.1
initialW = np.r_[-inithialW_3_haba * np.random.random((1,len(preW))),inithialW_3_haba * np.random.random((1,len(preW)))]

k2 = yahoo_data_preprocess_func.caluculatemodel_gpu2(
	yahoo_data_preprocess_func.IIalgorithm_simple_gpu, 
	topdocveccategoryMat3,preW,
	initialW,target4,
	n_epoch = 10,batchsize = 10,using_gpu = False, binary = True)

#k[1][0].predictor.l_hidden.W.data
#for model in k3[1]:
#for model in k[1]:
for model in k2[1]:
	total = (np.sum(model.predictor.l_hidden.W.data.T[0:number_of_word_in_class/2]> 0) + np.sum(model.predictor.l_hidden.W.data.T[number_of_word_in_class/2:] < 0))
	print float(total)/(DimentionN * number_of_word_in_class)
	total = (np.sum(model.predictor.l_hidden.W.data.T[0:number_of_word_in_class/3]> 0) + np.sum(model.predictor.l_hidden.W.data.T[-number_of_word_in_class/3:] < 0))
	print float(total)/(DimentionN * number_of_word_in_class*2/3)


k2[1][0].predictor.l_hidden.W.data
for model in k2[1]: