def test(): from PreProcess.createDataset import createDataSet from os import path from FeatureSelection import afterFeatureSelection2 from PreProcess.minmax2 import minmaxscaler from PreProcess.createDataset import featureAndLabel # filePath = path.abspath(path.join(path.dirname(__file__), path.pardir, r'DataSet', r'MDP', r'D1', r'KC1.arff')) filePath = r'/home/chyq/Document/MyProject/DataSet/MDP/D1/KC1.arff' data, trainsetWithLabel, testsetWithLabel, relation, attribute = createDataSet( filePath, 5) # featureSet = bagging.bagIt(trainset) # 分离出训练集、测试集的feature, label train_feature, train_label = featureAndLabel(trainsetWithLabel) test_feature, test_label = featureAndLabel(testsetWithLabel) # 做normalization 得出的结果在trainset, test。 trainset, x_min, x_max = minmaxscaler(train_feature, lower=-1) testset = minmaxscaler(test_feature, x_feature_min=x_min, x_feature_max=x_max) from sklearn import preprocessing # trainset = preprocessing.scale(train_feature) # testset = preprocessing.scale(test_feature) # x = list(trainset[:, 12]) # y = list(trainset[:, 16]) # plt.scatter(x, y) plt.hist(trainset[:, 13]) plt.show()
def test(): from PreProcess.createDataset import createDataSet from os import path from FeatureSelection import FeatureSelectionProcess from PreProcess.minmax2 import minmaxscaler from PreProcess.createDataset import featureAndLabel from Fsvmcil import create_weight import arff filePath = path.abspath(path.join(path.dirname(__file__), path.pardir, r'DataSet', r'MDP', r'D2', r'PC5.arff')) data, trainsetWithLabel, testsetWithLabel, relation, attribute = createDataSet(filePath, 10) # 分离出训练集、测试集的feature, label train_feature, train_label = featureAndLabel(trainsetWithLabel) test_feature, test_label = featureAndLabel(testsetWithLabel) # 做normalization 得出的结果在trainset, test。 trainset, x_min, x_max = minmaxscaler(train_feature) testset = minmaxscaler(test_feature, x_feature_min=x_min, x_feature_max=x_max) featured_trainset, featured_attribute = FeatureSelectionProcess.selectedSet(trainset, train_label, attribute, trainset) featured_trainset = np.array(featured_trainset) featured_trainset = featured_trainset[:, :-1] import LibsvmFormat x, y = LibsvmFormat.formatlib(featured_trainset, train_label) xtest, ytest = LibsvmFormat.formatlib(testset, test_label) # W=create_weight(featured_trainset, train_label) best_para = geneticFeature(x, y, xtest, ytest, featured_trainset, train_label) print best_para
def test(): from PreProcess.createDataset import createDataSet from os import path from FeatureSelection import afterFeatureSelection2 from PreProcess.minmax2 import minmaxscaler from PreProcess.createDataset import featureAndLabel filePath = path.abspath( path.join(path.dirname(__file__), path.pardir, r'DataSet', r'MDP', r'D1', r'KC1.arff')) data, trainsetWithLabel, testsetWithLabel, relation, attribute = createDataSet( filePath, 5) # featureSet = bagging.bagIt(trainset) # 分离出训练集、测试集的feature, label train_feature, train_label = featureAndLabel(trainsetWithLabel) test_feature, test_label = featureAndLabel(testsetWithLabel) # 做normalization 得出的结果在trainset, test。 trainset, x_min, x_max = minmaxscaler(train_feature) testset = minmaxscaler(test_feature, x_feature_min=x_min, x_feature_max=x_max) toDelete = spearman(trainset) print toDelete
def test(): from PreProcess.createDataset import createDataSet from os import path from FeatureSelection import afterFeatureSelection from PreProcess.minmax2 import minmaxscaler from PreProcess.createDataset import featureAndLabel filePath = path.abspath( path.join(path.dirname(__file__), path.pardir, r'DataSet', r'MDP', r'D1', r'KC1.arff')) data, trainset, testset, relation, attribute = createDataSet(filePath, 5) # featureSet = bagging.bagIt(trainset) train_feature, train_label = featureAndLabel(trainset) test_feature, test_label = featureAndLabel(testset) trainset, x_min, x_max = minmaxscaler(train_feature) testset = minmaxscaler(test_feature, x_feature_min=x_min, x_feature_max=x_max) featured_trainset, attribute = afterFeatureSelection.selectedSet( trainset, train_label, attribute) from Evaluation.eva import evaluationRes predicted_label_f = originSVM(featured_trainset, train_label, testset, test_label) # print predicted_label_f # print testset a, b = evaluationRes(predicted_label_f, test_label) print a, b print "--------------------------------------------" predicted_label_nf = originSVM(trainset, train_label, testset, test_label) c, d = evaluationRes(predicted_label_nf, test_label) print c, d
def test(): from PreProcess.createDataset import createDataSet from os import path from FeatureSelection import FeatureSelectionProcess from PreProcess.minmax2 import minmaxscaler from PreProcess.createDataset import featureAndLabel filePath = path.abspath( path.join(path.dirname(__file__), path.pardir, r'DataSet', r'MDP', r'D1', r'KC1.arff')) data, trainsetWithLabel, testsetWithLabel, relation, attribute = createDataSet( filePath, 5) # featureSet = bagging.bagIt(trainset) # 分离出训练集、测试集的feature, label train_feature, train_label = featureAndLabel(trainsetWithLabel) test_feature, test_label = featureAndLabel(testsetWithLabel) # 做normalization 得出的结果在trainset, test。 trainset, x_min, x_max = minmaxscaler(train_feature) testset = minmaxscaler(test_feature, x_feature_min=x_min, x_feature_max=x_max) # from sklearn import preprocessing # trainset = preprocessing.scale(train_feature) # testset = preprocessing.scale(test_feature) ############################ # 1. # 生成原始的arff,包括训练集、测试集 origin_train = np.c_[trainset, train_label] testData = np.c_[testset, test_label] arff_obj = { 'relation': relation, 'attributes': attribute, 'data': origin_train } import arff tr0 = arff.dumps(arff_obj) try: f1 = open( '/home/chyq/Document/MyProject/DataSet/MDP/my/my_kc1_origin.arff', 'w') f1.write(tr0) finally: f1.close() arff_obj = { 'relation': relation, 'attributes': attribute, 'data': testData } te = arff.dumps(arff_obj) try: f2 = open( '/home/chyq/Document/MyProject/DataSet/MDP/my/my_kc1_test.arff', 'w') f2.write(te) finally: f2.close() ############################# 2. # 生成Bagging+Correlation 的特征子集,需要注意的是,attribute已经经过了处理。数量和featureed_trainset是一样的 featured_trainset, featured_attribute = FeatureSelectionProcess.selectedSet( trainset, train_label, attribute, trainset) print "bagging+corr", len(featured_attribute) arff_obj = { 'relation': relation, 'attributes': featured_attribute, 'data': featured_trainset } # 写入to1 to1 = arff.dumps(arff_obj) try: f = open( '/home/chyq/Document/MyProject/DataSet/MDP/my/my_kc1_featured.arff', 'w') f.write(to1) finally: f.close() ############################# # 3. # 下边这里的代码:是用于检索出那些重复的下标,放进toDelete内 # 事实证明,是否经过了featureSelection,对计算Corr几乎没有影响 from FeatureSelection import correlation toDelete = correlation.corr(trainset) Corr_attribute = [] for i in range(len(attribute)): if i not in toDelete: Corr_attribute.append(attribute[i]) print "corr len", len(Corr_attribute) t = np.c_[trainset, train_label] noCorr_data = np.delete(t, toDelete, axis=1) arff_obj = { 'relation': relation, 'attributes': Corr_attribute, 'data': noCorr_data } # arff_obj = {'relation': relation, 'attributes': attribute, 'data': featured_trainset} to2 = arff.dumps(arff_obj) try: f = open( '/home/chyq/Document/MyProject/DataSet/MDP/my/my_kc1_corr.arff', 'w') f.write(to2) finally: f.close() #################### # 4. from FeatureSelection import afterFeatureSelection3 featured_trainset, featured_attribute = afterFeatureSelection3.selectedSet( trainset, train_label, attribute, train_feature) arff_obj = { 'relation': relation, 'attributes': featured_attribute, 'data': featured_trainset } print "info_gain", len(featured_attribute) # 写入to3 to3 = arff.dumps(arff_obj) try: f = open( '/home/chyq/Document/MyProject/DataSet/MDP/my/my_kc1_info.arff', 'w') f.write(to3) finally: f.close() ###### # 5 # 写入一个文件,方便读取 from Algo import Fsvmcil Fsvmcil.create_weight(trainset, train_label)
def baggingAlgo(): filePath = path.abspath( path.join(path.dirname(__file__), path.pardir, r'DataSet', r'MDP', r'D2', r'PC5.arff')) data, trainsetWithLabel, testsetWithLabel, relation, attribute = createDataSet( filePath, 10) # 分离出训练集、测试集的feature, label train_feature, train_label = featureAndLabel(trainsetWithLabel) test_feature, test_label = featureAndLabel(testsetWithLabel) # 做normalization 得出的结果在trainset, test。 trainset, x_min, x_max = minmaxscaler(train_feature) testset = minmaxscaler(test_feature, x_feature_min=x_min, x_feature_max=x_max) featured_trainset, featured_attribute = FeatureSelectionProcess.selectedSet( trainset, train_label, attribute, trainset) featured_trainset = np.array(featured_trainset) featured_trainset = featured_trainset[:, :-1] import LibsvmFormat x, y = LibsvmFormat.formatlib(featured_trainset, train_label) xtest, ytest = LibsvmFormat.formatlib(testset, test_label) # W = [] W_Knn = create_weightknn(featured_trainset, train_label, 10) best_para_knn = genetic(x, y, xtest, ytest, W_Knn) W_fsvmcil = create_weight(featured_trainset, train_label) best_para_fsvmcvil = genetic(x, y, xtest, ytest, W_fsvmcil) best_para_feature = geneticFeature(x, y, xtest, ytest) print best_para_knn, best_para_fsvmcvil, best_para_feature ############# prob = svm_problem(W_Knn, y, x) p = '-c {0} -g {1}'.format(best_para_knn[1][0], best_para_knn[1][1]) para = svm_parameter(p) model1 = svm_train(prob, para) p_label1, p_acc, p_val = svm_predict(ytest, xtest, model1) prob = svm_problem(W_fsvmcil, y, x) p = '-c {0} -g {1}'.format(best_para_fsvmcvil[1][0], best_para_fsvmcvil[1][1]) para = svm_parameter(p) model2 = svm_train(prob, para) p_label2, p_acc, p_val = svm_predict(ytest, xtest, model2) W_featureW = wf.create_weight(featured_trainset) prob = svm_problem(W_featureW, y, x) p = '-c {0} -g {1}'.format(best_para_feature[1][0], best_para_feature[1][1]) para = svm_parameter(p) model3 = svm_train(prob, para) p_label3, p_acc, p_val = svm_predict(ytest, xtest, model3) result = [] for i in range(len(p_label1)): judge = 0 if p_label1[i] == 1.0: judge += 1 if p_label2[i] == 1.0: judge += 1 if p_label3[i] == 1.0: judge += 1 if judge >= 2: result.append(1.0) else: result.append(-1.0) return result