def initialize_cluster(self, loader, init="k-means++"): trainX=[] trainY=[] for batch_idx,(X,Y) in enumerate(loader): trainX.append(self.encodeBatch(X.float()).cpu()) trainY.append(Y.cpu()) trainX = torch.cat(tuple(trainX), 0).numpy() trainY = torch.cat(tuple(trainY), 0).numpy() n_components = self.n_centroids km = KMeans(n_clusters=n_components, init=init).fit(trainX) y_pred = km.predict(trainX) print("acc: %.5f, nmi: %.5f" % (acc(trainY, y_pred), normalized_mutual_info_score(trainY, y_pred))) write_log("acc: %.5f, nmi: %.5f" % (acc(trainY, y_pred), normalized_mutual_info_score(trainY, y_pred)), self.log_dir) u_p = km.cluster_centers_ return u_p, y_pred
def fit(self, loader, lr=0.001, batch_size=128, num_epochs=10): n_components = self.n_centroids use_cuda = torch.cuda.is_available() if use_cuda: self.cuda() print("=====Initialize Cluster Centers=======") write_log("=====Initialize Cluster Centers=======", self.log_dir) centers, assignments = self.initialize_cluster(loader) print("=====Stacked Denoising Autoencoding layer=======") write_log("=====Stacked Denoising Autoencoding layer=======", self.log_dir) optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) # n_batches = int(math.ceil(num_train / batch_size)) count = 100*np.ones(n_components, dtype=np.int) for epoch in range(num_epochs): # train 1 epoch train_loss = 0.0 train_recon_loss = 0.0 train_cluster_loss = 0.0 num_train = loader.dataset.__len__() for batch_idx, (inputs,labels) in enumerate(loader): # inputs = trainX[batch_idx*batch_size : min((batch_idx+1)*batch_size, num_train)] # labels = assignments[batch_idx*batch_size : min((batch_idx+1)*batch_size, num_train)] inputs = inputs.view(inputs.size(0), -1).float() labels=assignments[batch_idx*batch_size : min((batch_idx+1)*batch_size, num_train)] # print(labels) centers_batch_tensor = torch.from_numpy(centers[labels]) if use_cuda: inputs = inputs.cuda() centers_batch_tensor = centers_batch_tensor.cuda() optimizer.zero_grad() inputs = Variable(inputs) centers_batch_tensor = Variable(centers_batch_tensor) z, outputs = self.forward(inputs) loss, recon_loss, cluster_loss = self.loss_function(outputs, inputs, z, centers_batch_tensor) train_loss += loss.data*len(inputs) train_recon_loss += recon_loss.data*len(inputs) train_cluster_loss += cluster_loss.data*len(inputs) loss.backward() optimizer.step() # Perform mini-batch KM temp_idx, centers, count = batch_km(z.data.cpu().numpy(), centers, count) assignments[batch_idx*batch_size : min((batch_idx+1)*batch_size, num_train)] = temp_idx print("#Epoch %3d: Loss: %.3f, Recon Loss: %.3f, Cluster Loss: %.3f" % ( epoch+1, train_loss / num_train, train_recon_loss/num_train, train_cluster_loss/num_train)) write_log("#Epoch %3d: Loss: %.3f, Recon Loss: %.3f, Cluster Loss: %.3f" % ( epoch+1, train_loss / num_train, train_recon_loss/num_train, train_cluster_loss/num_train), self.log_dir) if self.writer is not None: self.writer.add_scalars('dcn', {'loss':train_loss / num_train}, epoch+1) # if (epoch+1) % 10 == 0: centers, assignments = self.initialize_cluster(loader, centers)
if __name__ == "__main__": parser = argparse.ArgumentParser(description='VAE MNIST Example') parser.add_argument('--sdae_lr', type=float, default=0.1, metavar='N', help='learning rate for training (default: 0.001)') parser.add_argument('--dcn_lr', type=float, default=0.01, metavar='N', help='learning rate for training (default: 0.001)') args = parser.parse_args() log_dir = 'logs/dec-' + datasetname if os.path.exists(log_dir) == False: os.makedirs(log_dir) for i in range(1, repeat+1): sdae_savepath = ("model/sdae-run-"+datasetname+"-%d.pt" % i) if os.path.exists(sdae_savepath)==False: print("Experiment #%d" % i) write_log("Experiment #%d" % i,log_dir) train_loader=None test_loader=None if datasetname=='mnist': train_loader = torch.utils.data.DataLoader( MNIST('./dataset/mnist', train=True, download=True), batch_size=batch_size, shuffle=True, num_workers=0) # test_loader = torch.utils.data.DataLoader( # MNIST('./dataset/mnist', train=False), # batch_size=batch_size, shuffle=False, num_workers=0) elif datasetname=='cifar': transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) trainset = datasets.CIFAR10( root='./dataset/cifar', train=True, download=False, transform=transform) # download=True会通过官方渠道下载 train_loader = torch.utils.data.DataLoader(
def fit(self, trainloader, lr=0.001, batch_size=128, num_epochs=10, corrupt=0.3, loss_type="mse"): """ data_x: FloatTensor valid_x: FloatTensor """ use_cuda = torch.cuda.is_available() print("cuda:", use_cuda) if use_cuda: self.cuda() print("=====Denoising Autoencoding layer=======") write_log("=====Denoising Autoencoding layer=======", self.log_dir) # optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.parameters()), lr=lr, momentum=0.9) if loss_type == "mse": criterion = MSELoss() elif loss_type == "cross-entropy": criterion = BCELoss() # validate # total_loss = 0.0 # total_num = 0 # for batch_idx, (inputs, _) in enumerate(validloader): # # print('dae layer batch_idx',batch_idx) # # print(inputs.size()) # inputs = inputs.view(inputs.size(0), -1).float() # # print(inputs.size()) # if use_cuda: # inputs = inputs.cuda() # inputs = Variable(inputs) # hidden = self.encode(inputs) # if loss_type=="cross-entropy": # outputs = self.decode(hidden, binary=True) # else: # outputs = self.decode(hidden) # # valid_recon_loss = criterion(outputs, inputs) # total_loss += valid_recon_loss.data * len(inputs) # total_num += inputs.size()[0] # # valid_loss = total_loss / total_num # print("#Epoch 0: Valid Reconstruct Loss: %.4f" % (valid_loss)) # write_log("#Epoch 0: Valid Reconstruct Loss: %.4f" % (valid_loss)) self.train() for epoch in range(num_epochs): print("dae epoch:", epoch) #计时 tic = timer() # train 1 epoch train_loss = 0.0 adjust_learning_rate(lr, optimizer, epoch) # print("start") for batch_idx, (inputs, _) in enumerate(trainloader): # print("inputs", inputs.size()) # print("dae batch_idx:", batch_idx) inputs = inputs.view(inputs.size(0), -1).float() inputs_corr = masking_noise(inputs, corrupt) if use_cuda: inputs = inputs.cuda() inputs_corr = inputs_corr.cuda() optimizer.zero_grad() inputs = Variable(inputs) inputs_corr = Variable(inputs_corr) hidden = self.encode(inputs_corr) if loss_type == "cross-entropy": outputs = self.decode(hidden, binary=True) else: outputs = self.decode(hidden) recon_loss = criterion(outputs, inputs) train_loss += recon_loss.data * len(inputs) recon_loss.backward() optimizer.step() toc = timer() print("cost:", toc - tic) # # validate # valid_loss = 0.0 # for batch_idx, (inputs, _) in enumerate(validloader): # inputs = inputs.view(inputs.size(0), -1).float() # if use_cuda: # inputs = inputs.cuda() # inputs = Variable(inputs) # hidden = self.encode(inputs, train=False) # if loss_type=="cross-entropy": # outputs = self.decode(hidden, binary=True) # else: # outputs = self.decode(hidden) # # valid_recon_loss = criterion(outputs, inputs) # valid_loss += valid_recon_loss.data * len(inputs) # print("#Epoch %3d: Reconstruct Loss: %.4f, Valid Reconstruct Loss: %.4f" % ( # epoch+1, train_loss / len(trainloader.dataset), valid_loss / len(validloader.dataset))) # write_log("#Epoch %3d: Reconstruct Loss: %.4f, Valid Reconstruct Loss: %.4f" % ( # epoch+1, train_loss / len(trainloader.dataset), valid_loss / len(validloader.dataset))) #去掉valid print("#Epoch %3d: Reconstruct Loss: %.4f" % (epoch + 1, train_loss / len(trainloader.dataset))) write_log( "#Epoch %3d: Reconstruct Loss: %.4f" % (epoch + 1, train_loss / len(trainloader.dataset)), self.log_dir)
def fit(self, dataloader, lr=0.001, batch_size=256, num_epochs=10, update_interval=1, tol=1e-3): '''X: tensor data''' use_cuda = torch.cuda.is_available() if use_cuda: self.cuda() # X=X.cuda() print("=====Training DEC=======") write_log("=====Training DEC=======", self.log_dir) # optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.parameters()), lr=lr, momentum=0.9) print("Initializing cluster centers with kmeans.") write_log("Initializing cluster centers with kmeans.", self.log_dir) kmeans = KMeans(self.n_clusters, n_init=20) #原始代码 # data, _ = self.forward(X) # 按batch_size求q,X,Y替换为Dataloader data = [] y = [] for batch_idx, (inputs, yi) in enumerate(dataloader): inputs = inputs.view(inputs.size(0), -1).float() inputs = inputs.cuda() datai, _ = self.forward(inputs) data.append(datai.data.cpu()) y.append(yi.data.cpu()) del inputs torch.cuda.empty_cache() data = torch.cat(tuple(data), 0) y = torch.cat(tuple(y), 0) y_pred = kmeans.fit_predict(data) y_pred_last = y_pred # print(y[0:10], y_pred[0:10]) self.mu.data.copy_(torch.Tensor(kmeans.cluster_centers_)) if y is not None: y = y.cpu().numpy() # print(y.shape,y_pred.shape) print("Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred))) write_log( "Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)), self.log_dir) del data, y torch.cuda.empty_cache() self.train() # num_batch = int(math.ceil(1.0*X.shape[0]/batch_size)) for epoch in range(num_epochs): tic = timer() if epoch % update_interval == 0: # update the targe distribution p # _, q = self.forward(X) #按batch计算q data = [] y = [] num = dataloader.dataset.__len__() for batch_idx, (xbatch, yi) in enumerate(dataloader): # xbatch = X[batch_idx * batch_size: min((batch_idx + 1) * batch_size, num)] xbatch = xbatch.float().cuda() datai, _ = self.forward(xbatch) data.append(datai.data.cpu()) y.append(yi.data.cpu()) del xbatch, datai torch.cuda.empty_cache() data = torch.cat(tuple(data), 0) y = torch.cat(tuple(y), 0).numpy() # print("data:",data,data.shape) q = 1.0 / (1.0 + torch.sum( (data.unsqueeze(1) - self.mu.data.cpu())**2, dim=2) / self.alpha) q = q**(self.alpha + 1.0) / 2.0 q = q / torch.sum(q, dim=1, keepdim=True) p = self.target_distribution(q).data del data torch.cuda.empty_cache() # evalute the clustering performance y_pred = torch.argmax(q, dim=1).data.cpu().numpy() if y is not None: print("acc: %.5f, nmi: %.5f" % (acc( y, y_pred), normalized_mutual_info_score(y, y_pred))) write_log("acc: %.5f, nmi: %.5f" % (acc( y, y_pred), normalized_mutual_info_score(y, y_pred)), logpath=self.log_dir) if self.writer is not None: self.writer.add_scalars( 'dec', { 'acc': acc(y, y_pred), 'nmi': normalized_mutual_info_score(y, y_pred) }, epoch) # check stop criterion #本次结果和上次结果相差小于tol=0.0001时停止训练 delta_label = np.sum(y_pred != y_pred_last).astype( np.float32) / num y_pred_last = y_pred if epoch > 0 and delta_label < tol: print('delta_label ', delta_label, '< tol ', tol) # write_log('delta_label '+str(delta_label) +'< tol '+str(tol) ) print("Reach tolerance threshold. Stopping training.") # write_log("Reach tolerance threshold. Stopping training.") break # train 1 epoch train_loss = 0.0 for batch_idx, (xbatch, _) in enumerate(dataloader): # xbatch = X[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)] pbatch = p[batch_idx * batch_size:min((batch_idx + 1) * batch_size, num)] xbatch = xbatch.float().cuda() pbatch = pbatch.cuda() optimizer.zero_grad() inputs = Variable(xbatch) target = Variable(pbatch) # print(inputs,target) z, qbatch = self.forward(inputs) loss = self.loss_function(target, qbatch) train_loss += loss * len(inputs) loss.backward() # for param in self.parameters(): # print('param', param.grad) optimizer.step() del xbatch, qbatch, inputs, target, loss torch.cuda.empty_cache() toc = timer() print("cost:", toc - tic) print("#Epoch %3d: Loss: %.4f" % (epoch + 1, train_loss / num)) write_log("#Epoch %3d: Loss: %.4f" % (epoch + 1, train_loss / num), self.log_dir) if self.writer is not None: self.writer.add_scalars('dec', {'loss': train_loss / num}, epoch + 1) torch.cuda.empty_cache()
def pretrain(self, trainloader, lr=0.001, batch_size=128, num_epochs=10, corrupt=0.2, loss_type="cross-entropy"): trloader = trainloader # valoader = validloader daeLayers = [] for l in range(1, len(self.layers)): infeatures = self.layers[l - 1] outfeatures = self.layers[l] if l != len(self.layers) - 1: dae = DenoisingAutoencoder(infeatures, outfeatures, activation=self.activation, dropout=corrupt, log_dir=self.log_dir) else: dae = DenoisingAutoencoder(infeatures, outfeatures, activation="none", dropout=0, log_dir=self.log_dir) print(dae) write_log(dae, self.log_dir) if l == 1: dae.fit(trloader, lr=lr, batch_size=batch_size, num_epochs=num_epochs, corrupt=corrupt, loss_type=loss_type) else: if self.activation == "sigmoid": dae.fit(trloader, lr=lr, batch_size=batch_size, num_epochs=num_epochs, corrupt=corrupt, loss_type="cross-entropy") else: dae.fit(trloader, lr=lr, batch_size=batch_size, num_epochs=num_epochs, corrupt=corrupt, loss_type="mse") data_x = dae.encodeBatch(trloader) # valid_x = dae.encodeBatch(valoader) trainset = Dataset(data_x, data_x) trloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=0) # validset = Dataset(valid_x, valid_x) # valoader = torch.utils.data.DataLoader( # validset, batch_size=batch_size, shuffle=False, num_workers=4) daeLayers.append(dae) # del trainset,trloader,validset,valoader self.copyParam(daeLayers)