# else:#如果name是一个文件夹 path1 = path_original + "\\" + name#更新原始数据集路径 os.mkdir(path_saveNew + "\\" + name)#创建和原始数据集文件夹一致的文件夹,用于保存采样的结果 path2 = path_saveNew + "\\" + name#更新保存数据的路径为新创建的文件夹 for i in range(m):#在这个文件夹中创建存放每一次循环采样结果的文件夹 os.mkdir(path2+"\\for_"+str(i+1)) self.run_dir(path1, path2)#调用循环采样的方法,循环调用 if __name__ == '__main__': based = Basic() # sm = Smote() # dataSet = based.loadSample('C:\\Users\Administrator\Desktop\Original_dataset - 副本\EasyEnsemble\\abalone_0_7.csv') # # print(dataSet[0]) # X, y = based.Split(dataSet) # print(type(X)) # print(type(y)) # sm.My_smote(X, y, 'C:\\Users\Administrator\Desktop\\test_dic\EasyEnsemble\\re_SMOTE_abalone_0_7.csv') # cca=CCA() # cca.My_cca('C:\\Users\Administrator\Desktop\\test_dic\EasyEnsemble\\re_SMOTE_abalone_0_7.csv', # 'C:\\Users\Administrator\Desktop\\test_dic\EasyEnsemble\\re_cca_abalone_0_7.csv', # 'C:\\Users\Administrator\Desktop\\test_dic\EasyEnsemble\\del_cca_abalone_0_7.csv') m=int(input("请输入循环次数:")) path_original='E:\Papers_dataset\OriginalDataSet' path_saveNew='E:\Papers_dataset\ResempledDataSet\CCA_all_1' dic=Director() dic.run_dir(path_original,path_saveNew) toArff.run_dir(path_saveNew,'E:\Papers_dataset\ResempledDataSet\CCA_all_1_arff')
path1 = path_original + "\\" + name # 更新原始数据集路径 os.mkdir(path_saveNew + "\\" + name) # 创建和原始数据集文件夹一致的文件夹,用于保存采样的结果 path2 = path_saveNew + "\\" + name # 更新保存数据的路径为新创建的文件夹 for i in range(m): # 在这个文件夹中创建存放每一次循环采样结果的文件夹 os.mkdir(path2 + "\\for_" + str(i + 1)) self.run_dir(path1, path2) # 调用循环采样的方法,循环调用 if __name__ == '__main__': m = int(input("请输入采样次数:")) path_originial = "E:\\Papers_dataset\\OriginalDataSet" # 存放原始数据文件的文件夹 path_saveNew = "E:\\Papers_dataset\\ResempledDataSet\\RUS" # 存放新采样过后的文件的文件夹 ramdomUnder=RamUnder() rus = RandomUnderSampler() ramdomUnder.run_dir(path_originial, path_saveNew) # 传入原始数据集文件夹和保存重采样数据集文件夹即可 toArff.run_dir(path_saveNew,"E:\\Papers_dataset\\ResempledDataSet\\RUS_arff") # file = open("C:\\Users\\Administrator\\Desktop\\Original_dataset20171121\\SMOTE+TonekLink\\Flag_0_white.csv", 'r') # '''读取文件的内容,readlines返回的是一个列表''' # contain = file.readlines() # count = len(contain) # 这是文件共有count行 # '''创建一个count x len(contain[0].split(','))-1的矩阵,其中len(contain[0].split(','))-1是样本属性的个数''' # features = np.zeros((count, len(re.split(r'[ ,;:\t]+', contain[0])) - 1)) # labels = [] # index = 0 # for line in contain: # 一行行读数据文件 # line = line.strip() # 删除line头和尾的空格 # listFormLine = re.split(r'[ ,;:\t]+', line) # 指定','为分隔符,将line分割开 # '''将listFormLine中的前len(len(listFormLine)-1)列加入到矩阵中去''' # features[index:] = listFormLine[0:len(listFormLine) - 1] # labels.append(listFormLine[-1]) # 最后一列作为类标
# else:#如果name是一个文件夹 path1 = path_original + "\\" + name#更新原始数据集路径 os.mkdir(path_saveNew + "\\" + name)#创建和原始数据集文件夹一致的文件夹,用于保存采样的结果 path2 = path_saveNew + "\\" + name#更新保存数据的路径为新创建的文件夹 for i in range(m):#在这个文件夹中创建存放每一次循环采样结果的文件夹 os.mkdir(path2+"\\for_"+str(i+1)) self.run_dir(path1, path2)#调用循环采样的方法,循环调用 if __name__ == '__main__': based = Basic() # sm = Smote() # dataSet = based.loadSample('C:\\Users\Administrator\Desktop\Original_dataset - 副本\EasyEnsemble\\abalone_0_7.csv') # # print(dataSet[0]) # X, y = based.Split(dataSet) # print(type(X)) # print(type(y)) # sm.My_smote(X, y, 'C:\\Users\Administrator\Desktop\\test_dic\EasyEnsemble\\re_SMOTE_abalone_0_7.csv') # cca=CCA() # cca.My_cca('C:\\Users\Administrator\Desktop\\test_dic\EasyEnsemble\\re_SMOTE_abalone_0_7.csv', # 'C:\\Users\Administrator\Desktop\\test_dic\EasyEnsemble\\re_cca_abalone_0_7.csv', # 'C:\\Users\Administrator\Desktop\\test_dic\EasyEnsemble\\del_cca_abalone_0_7.csv') m=int(input("请输入循环次数:")) path_original='E:\\Papers_dataset\\OriginalDataSet' path_saveNew='E:\\Papers_dataset\\ResempledDataSet\\SMOTE_CCA_maj' dic=Director() dic.run_dir(path_original,path_saveNew) toArff.run_dir(path_saveNew,'E:\\Papers_dataset\\ResempledDataSet\\SMOTE_CCA_maj_arff')
fcca, name) # 用于存储删除的信息 # else: # 如果name是一个文件夹 path1 = path_original + "\\" + name # 更新原始数据集路径 os.mkdir(path_saveNew + "\\" + name) # 创建和原始数据集文件夹一致的文件夹,用于保存采样的结果 path2 = path_saveNew + "\\" + name # 更新保存数据的路径为新创建的文件夹 for i in range(m): # 在这个文件夹中创建存放每一次循环采样结果的文件夹 os.mkdir(path2 + "\\for_" + str(i + 1)) self.run_dir(path1, path2) # 调用循环采样的方法,循环调用 if __name__ == '__main__': based = Basic() # sm = Smote() # dataSet = based.loadSample('C:\\Users\Administrator\Desktop\Original_dataset - 副本\EasyEnsemble\\abalone_0_7.csv') # # print(dataSet[0]) # X, y = based.Split(dataSet) # print(type(X)) # print(type(y)) # sm.My_smote(X, y, 'C:\\Users\Administrator\Desktop\\test_dic\EasyEnsemble\\re_SMOTE_abalone_0_7.csv') # cca=CCA() # cca.My_cca('C:\\Users\Administrator\Desktop\\test_dic\EasyEnsemble\\re_SMOTE_abalone_0_7.csv', # 'C:\\Users\Administrator\Desktop\\test_dic\EasyEnsemble\\re_cca_abalone_0_7.csv', # 'C:\\Users\Administrator\Desktop\\test_dic\EasyEnsemble\\del_cca_abalone_0_7.csv') m = int(input("请输入循环次数:")) path_original = 'E:\Papers_dataset\OriginalDataSet' path_saveNew = 'C:\\Users\Administrator\Desktop\CCA_all' dic = Director() dic.run_dir(path_original, path_saveNew) toArff.run_dir(path_saveNew, 'C:\\Users\Administrator\Desktop\CCA_all_arff')