def loadAllData(self, path_name): positive_data_images, positive_data_labels = load_dataset( path_name, 'traindata') negative_data_images, negative_data_labels = load_dataset( path_name, 'testdata') images = np.concatenate((positive_data_images, negative_data_images), axis=0) labels = np.concatenate((positive_data_labels, negative_data_labels), axis=0) return images, labels
def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3, nb_classes=5): #load data and preprocessing images, labels = load_dataset(self.path_name) train_images, valid_images, train_labels, valid_labels = train_test_split( images, labels, test_size=0.3, random_state=random.randint(0, 100)) _, test_images, _, test_labels = train_test_split( images, labels, test_size=0.5, random_state=random.randint(0, 100)) if K.image_dim_ordering() == 'th': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') #use the one hot coding method to vectorlize the labels train_labels = np_utils.to_categorical(train_labels, nb_classes) valid_labels = np_utils.to_categorical(valid_labels, nb_classes) test_labels = np_utils.to_categorical(test_labels, nb_classes) #normlize the image as float format train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') #normlize the image pixes into (0~1) train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels
def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE, img_channels = 3, nb_classes = 5): #加载数据集到内存 images, labels = load_dataset(self.path_name) train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size = 0.3, random_state = random.randint(0, 100)) _, test_images, _, test_labels = train_test_split(images, labels, test_size = 0.5, random_state = random.randint(0, 100)) #当前的维度顺序如果为'th',则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels #这部分代码就是根据keras库要求的维度顺序重组训练数据集 if K.image_dim_ordering() == 'th': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) #输出训练集、验证集、测试集的数量 print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') #我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将 #类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维 train_labels = np_utils.to_categorical(train_labels, nb_classes) valid_labels = np_utils.to_categorical(valid_labels, nb_classes) test_labels = np_utils.to_categorical(test_labels, nb_classes) #像素数据浮点化以便归一化 train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') #将其归一化,图像的各像素值归一化到0~1区间 train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels
def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE, img_channels = 3, nb_classes = 2): #Load the data set into memory images, labels = load_dataset(self.path_name,self.username) train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size = 0.3, random_state = random.randint(0, 100)) _, test_images, _, test_labels = train_test_split(images, labels, test_size = 0.5, random_state = random.randint(0, 100)) #if the shape is'th',then the order of putting images is:channels,rows,cols,otherwise :rows,cols,channels if K.image_dim_ordering() == 'th': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) #output print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') #Modle of loss function: categorical_crossentropy #Use nb_classes to trans to 2D train_labels = np_utils.to_categorical(train_labels, nb_classes) valid_labels = np_utils.to_categorical(valid_labels, nb_classes) test_labels = np_utils.to_categorical(test_labels, nb_classes) #Pixel data floating point for normalization train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') #put into [0,1] train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels
def mainProgram(dataset): # load data X, Y = loadData.load_dataset(dataset) fold = 10 kf = KFold(n_splits=fold) current_fold = 0 acc = 0 optimized_acc = 0 # clusters X_train_clusters = [] Y_train_clusters = [] for train, test in kf.split(X): X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test] X_train = preprocessing.normalize(X_train) X_test = preprocessing.normalize(X_test) sil = [] #dissimilarity would not be defined for a single cluster, thus, minimum number of clusters should be 2 K = range(2, 20) for k in K: kmeans = KMeans(n_clusters=k).fit(X_train) labels = kmeans.labels_ sil.append(silhouette_score(X_train, labels, metric='euclidean')) optimum_K = K[sil.index(max(sil))] plt.plot(K, sil, 'bx-') plt.xlabel('k') plt.ylabel('Silhouette Score') plt.title('Silhouette Dissimilarity Scores for various k') plt.axvline(x=optimum_K, color='r', linestyle='--') fig1 = plt.gcf() plt.show() plt.draw() fig1.savefig(dataset + '.png', format='png', bbox_inches='tight', dpi=300) plt.close() kmeans = KMeans(n_clusters=optimum_K, init='k-means++', max_iter=300, n_init=10, random_state=0) kmeans.fit(X_train) count = len(np.unique(Y_train)) for j in range(optimum_K): X_train_temp = X_train[kmeans.labels_ == j] Y_train_temp = Y_train[kmeans.labels_ == j] v = len(np.unique(Y_train_temp)) if v > 1: X_train_clusters.append(X_train_temp) Y_train_clusters.append(Y_train_temp) count = int(len(X_test) / 5) valX = X_test[0:count] valy = Y_test[0:count] X_test = X_test[count:] Y_test = Y_test[count:] ensemble = trainClassifiers(X_train_clusters, Y_train_clusters) acc += decisionFusion(ensemble, X_test, Y_test) optimized_ensemble = optimizeEnsemble(ensemble, valX, valy) optimized_acc += decisionFusion(optimized_ensemble, X_test, Y_test) current_fold += 1 print("Non_optimized and Optimized Accuracy for " + dataset + " is: " + str(acc / current_fold) + " and " + str(optimized_acc / current_fold)) return ((acc / current_fold), (acc / current_fold))