def test_basic(self): """ Basic test to check that the calculation is sensible. """ true_value1 = np.array([1, 2, 1, 2, 0, 0], dtype=np.int64) pred_value1 = np.array([2, 1, 2, 1, 0, 0], dtype=np.int64) self.assertAlmostEqual( cluster_accuracy(true_value1, pred_value1)[1], 1.0) self.assertAlmostEqual( cluster_accuracy(true_value1, pred_value1, 3)[1], 1.0) self.assertDictEqual( cluster_accuracy(true_value1, pred_value1)[0], { 0: 0, 1: 2, 2: 1 }) true_value2 = np.array([1, 1, 1, 1, 1, 1], dtype=np.int64) pred_value2 = np.array([0, 1, 2, 3, 4, 5], dtype=np.int64) self.assertAlmostEqual( cluster_accuracy(true_value2, pred_value2)[1], 1.0 / 6.0) self.assertAlmostEqual( cluster_accuracy(true_value2, pred_value2, 6)[1], 1.0 / 6.0) true_value3 = np.array([1, 3, 1, 3, 0, 2], dtype=np.int64) pred_value3 = np.array([2, 1, 2, 1, 3, 0], dtype=np.int64) self.assertDictEqual( cluster_accuracy(true_value3, pred_value3)[0], { 2: 1, 1: 3, 3: 0, 0: 2 })
def main(data_dir, cuda, batch_size, pretrain_epochs, finetune_epochs, testing_mode): writer = SummaryWriter() # create the TensorBoard object # callback function to call during training, uses writer from the scope def training_callback(epoch, lr, loss, validation_loss): writer.add_scalars('data/autoencoder', { 'lr': lr, 'loss': loss, 'validation_loss': validation_loss, }, epoch) device = 'cuda' if torch.cuda.is_available() else 'cpu' ds_train = CachedMNIST(data_dir, is_train=True, device=device, testing_mode=testing_mode) # training dataset ds_val = CachedMNIST(data_dir, is_train=False, device=device, testing_mode=testing_mode) # evaluation dataset autoencoder = StackedDenoisingAutoEncoder([28 * 28, 500, 500, 2000, 10], final_activation=None) autoencoder = autoencoder.to(device) print('Pretraining stage.') ae.pretrain( ds_train, autoencoder, device=device, validation=ds_val, epochs=pretrain_epochs, batch_size=batch_size, silent=True, optimizer=lambda model: SGD(model.parameters(), lr=0.1, momentum=0.9), scheduler=lambda x: StepLR(x, 20000, gamma=0.1), corruption=0.2) print('Training stage.') ae_optimizer = SGD(params=autoencoder.parameters(), lr=0.1, momentum=0.9) ae.train(ds_train, autoencoder, device=device, validation=ds_val, epochs=finetune_epochs, batch_size=batch_size, silent=True, optimizer=ae_optimizer, scheduler=StepLR(ae_optimizer, 20000, gamma=0.1), corruption=0.2, update_callback=training_callback) print('DEC stage.') model = DEC(cluster_number=10, embedding_dimension=28 * 28, hidden_dimension=10, encoder=autoencoder.encoder) model = model.to(device) dec_optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9) train(dataset=ds_train, model=model, epochs=20000, batch_size=256, silent=True, optimizer=dec_optimizer, stopping_delta=0.000001, cuda=cuda) predicted, actual = predict(ds_train, model, 1024, silent=True, return_actual=True, cuda=cuda) actual = actual.cpu().numpy() predicted = predicted.cpu().numpy() reassignment, accuracy = cluster_accuracy(actual, predicted) print('Final DEC accuracy: %s' % accuracy) if not testing_mode: predicted_reassigned = [reassignment[item] for item in predicted] # TODO numpify confusion = confusion_matrix(actual, predicted_reassigned) normalised_confusion = confusion.astype('float') / confusion.sum( axis=1)[:, np.newaxis] confusion_id = uuid.uuid4().hex sns.heatmap(normalised_confusion).get_figure().savefig( 'confusion_%s.png' % confusion_id) print('Writing out confusion diagram with UUID: %s' % confusion_id) writer.close()
def train( dataset: torch.utils.data.Dataset, model: torch.nn.Module, epochs: int, batch_size: int, optimizer: torch.optim.Optimizer, stopping_delta: Optional[float] = None, cuda: bool = True, sampler: Optional[torch.utils.data.sampler.Sampler] = None, silent: bool = True, update_freq: int = 10, evaluate_batch_size: int = 1024, update_callback: Optional[Callable[[float, float], None]] = None, epoch_callback: Optional[Callable[[int, torch.nn.Module], None]] = None) -> None: """ Train the DEC model given a dataset, a model instance and various configuration parameters. :param dataset: instance of Dataset to use for training :param model: instance of DEC model to train :param epochs: number of training epochs :param batch_size: size of the batch to train with :param optimizer: instance of optimizer to use :param stopping_delta: label delta as a proportion to use for stopping, None to disable, default None :param cuda: whether to use CUDA, defaults to True :param sampler: optional sampler to use in the DataLoader, defaults to None :param silent: set to True to prevent printing out summary statistics, defaults to False :param update_freq: frequency of batches with which to update counter, None disables, default 10 :param evaluate_batch_size: batch size for evaluation stage, default 1024 :param update_callback: optional function of accuracy and loss to update, default None :param epoch_callback: optional function of epoch and model, default None :return: None """ static_dataloader = DataLoader( dataset, batch_size=batch_size, pin_memory=False, sampler=sampler, shuffle=False ) train_dataloader = DataLoader( dataset, batch_size=batch_size, sampler=sampler, shuffle=True ) data_iterator = tqdm( static_dataloader, leave=True, unit='batch', postfix={ 'epo': -1, 'acc': '%.4f' % 0.0, 'lss': '%.8f' % 0.0, 'dlb': '%.4f' % -1, }, disable=silent ) kmeans = KMeans(n_clusters=model.cluster_number, n_init=20) model.train() features = [] actual = [] # form initial cluster centres for index, batch in enumerate(data_iterator): if (isinstance(batch, tuple) or isinstance(batch, list)) and len(batch) == 2: batch, value = batch # if we have a prediction label, separate it to actual actual.append(value) if cuda: batch = batch.cuda(non_blocking=True) features.append(model.encoder(batch).detach().cpu()) # features.append(model.encoder(batch)) actual = torch.cat(actual).long() predicted = kmeans.fit_predict(torch.cat(features).numpy()) predicted_previous = torch.tensor(np.copy(predicted), dtype=torch.long) _, accuracy = cluster_accuracy(predicted, actual.cpu().numpy()) cluster_centers = torch.tensor(kmeans.cluster_centers_, dtype=torch.float) if cuda: cluster_centers = cluster_centers.cuda(non_blocking=True) model.assignment.cluster_centers = torch.nn.Parameter(cluster_centers) loss_function = nn.KLDivLoss(size_average=False) delta_label = None for epoch in range(epochs): features = [] data_iterator = tqdm( train_dataloader, leave=True, unit='batch', postfix={ 'epo': epoch, 'acc': '%.4f' % (accuracy or 0.0), 'lss': '%.8f' % 0.0, 'dlb': '%.4f' % (delta_label or 0.0), }, disable=silent, ) model.train() for index, batch in enumerate(data_iterator): if (isinstance(batch, tuple) or isinstance(batch, list)) and len(batch) == 2: batch, _ = batch # if we have a prediction label, strip it away if cuda: batch = batch.cuda(non_blocking=True) output = model(batch) target = target_distribution(output).detach() loss = loss_function(output.log(), target) / output.shape[0] data_iterator.set_postfix( epo=epoch, acc='%.4f' % (accuracy or 0.0), lss='%.8f' % float(loss.item()), dlb='%.4f' % (delta_label or 0.0), ) optimizer.zero_grad() loss.backward() optimizer.step(closure=None) features.append(model.encoder(batch).detach().cpu()) # features.append(model.encoder(batch)) if update_freq is not None and index % update_freq == 0: loss_value = float(loss.item()) data_iterator.set_postfix( epo=epoch, acc='%.4f' % (accuracy or 0.0), lss='%.8f' % loss_value, dlb='%.4f' % (delta_label or 0.0), ) if update_callback is not None: update_callback(accuracy, loss_value, delta_label) predicted, actual = predict(dataset, model, evaluate_batch_size, silent=True, return_actual=True, cuda=cuda) delta_label = float((predicted != predicted_previous).float().sum().item()) / predicted_previous.shape[0] # if stopping_delta is not None and delta_label < stopping_delta: # print('Early stopping as label delta "%1.5f" less than "%1.5f".' % (delta_label, stopping_delta)) # break predicted_previous = predicted _, accuracy = cluster_accuracy(predicted.cpu().numpy(), actual.cpu().numpy()) data_iterator.set_postfix( epo=epoch, acc='%.4f' % (accuracy or 0.0), lss='%.8f' % 0.0, dlb='%.4f' % (delta_label or 0.0), ) if epoch_callback is not None: epoch_callback(epoch, model)
def train(dataset: torch.utils.data.Dataset, wdec: torch.nn.Module, epochs: int, batch_size: int, optimizer: torch.optim.Optimizer, reinitKMeans: bool = True, scheduler = None, ### positive_ratio: float = 0.6, ### stopping_delta: Optional[float] = None, collate_fn = default_collate, cuda: bool = True, sampler: Optional[torch.utils.data.sampler.Sampler] = None, silent: bool = False, update_freq: int = 10, evaluate_batch_size: int = 1024, update_callback: Optional[Callable[[float, float], None]] = None, epoch_callback: Optional[Callable[[int, torch.nn.Module], None]] = None, start_time: Optional[float] = None, ) -> None: """ Train the DEC model given a dataset, a model instance and various configuration parameters. :param dataset: instance of Dataset to use for training :param model: instance of DEC model to train :param epochs: number of training epochs :param batch_size: size of the batch to train with :param reinitKMeans: if true, the clusters will be initialized. :param optimizer: instance of optimizer to use :param scheduler: instance of lr_scheduler to use :param stopping_delta: label delta as a proportion to use for stopping, None to disable, default None :param collate_fn: function to merge a list of samples into mini-batch :param cuda: whether to use CUDA, defaults to True :param sampler: optional sampler to use in the DataLoader, defaults to None :param silent: set to True to prevent printing out summary statistics, defaults to False :param update_freq: frequency of batches with which to update counter, None disables, default 10 :param evaluate_batch_size: batch size for evaluation stage, default 1024 :param update_callback:sample_weight optional function of accuracy and loss to update, default None :param epoch_callback: optional function of epoch and model, default None :param start_time: optional starting time of training process, default None :return: None """ static_dataloader = DataLoader( dataset, batch_size=batch_size, collate_fn=collate_fn, pin_memory=False, sampler=sampler, shuffle=False ) train_dataloader = DataLoader( dataset, batch_size=batch_size, collate_fn=collate_fn, sampler=sampler, shuffle=True ) data_iterator = tqdm( static_dataloader, leave=True, unit='batch', postfix={ 'epo': -1, 'acc': '%.4f' % 0.0, 'lss': '%.8f' % 0.0, 'dlb': '%.4f' % -1, }, disable=silent ) wdec.train() test_dataset(dataset) if reinitKMeans: # get all data needed for KMeans. if start_time is not None: print('\nLinearizing data') print(f'@ {time.time() - start_time}\n') features, actual, idxs, boxs, videos, frames = DataSetExtract(dataset, wdec) # KMeans. if start_time is not None: print('\nPerforming KMeans') print(f'@ {time.time() - start_time}\n') predicted, kmeans = SSKMeans( wdec, features, actual, idxs, boxs, videos, frames ) # Computing the positive ration scores and the positive ratio clusters cpr = PositiveRatioClusters( predicted, actual, wdec.assignment.cluster_number, ) predicted_previous = torch.tensor(np.copy(predicted), dtype=torch.long) _, accuracy = cluster_accuracy(predicted, actual.cpu().numpy()) cluster_centers = torch.tensor( kmeans.cluster_centers_, dtype=torch.float, requires_grad=True ) predicted_idxed = torch.cat( [idxs.reshape(-1,1), torch.tensor(predicted).reshape(-1,1).long()], dim = -1 ) del features, actual, idxs, boxs, videos, frames if cuda: wdec.cuda() cluster_centers = cluster_centers.cuda(non_blocking=True) with torch.no_grad(): # initialise the cluster centers wdec.state_dict()['assignment.cluster_centers'].copy_(cluster_centers) # wdec.state_dict()['assignment.cluster_predicted'].copy_(predicted_idxed) # wdec.state_dict()['assignment.cluster_positive_ratio'].copy_(cpr) wdec.assignment.cluster_predicted = predicted_idxed.clone() wdec.assignment.cluster_positive_ratio = cpr.clone() else: predicted, actual = predict( dataset, wdec, batch_size=evaluate_batch_size, collate_fn=collate_fn, silent=True, return_actual=True, cuda=cuda ) predicted_previous = torch.tensor(np.copy(predicted), dtype=torch.long) _, accuracy = cluster_accuracy(predicted.cpu().numpy(), actual.cpu().numpy()) if start_time is not None: print('\ntrainint DEC') print(f'@ {time.time() - start_time}\n') loss_function = nn.KLDivLoss(size_average=False) delta_label = None for epoch in range(epochs): # features = [] ### I see no use for this data_iterator = tqdm( train_dataloader, leave=True, unit='batch', postfix={ 'epo': epoch, 'acc': '%.4f' % (accuracy or 0.0), 'lss': '%.8f' % 0.0, 'dlb': '%.4f' % (delta_label or 0.0), }, disable=silent, ) wdec.train() for index, batch in enumerate(data_iterator): if (isinstance(batch, tuple) or isinstance(batch, list)) and len(batch) == 6: batch, actual, idxs, _, _, _ = batch # if we have a prediction label, strip it away if cuda: batch = batch.cuda(non_blocking=True) actual = actual.cuda() idxs = idxs.cuda() output = wdec(batch, actual, idxs,) target = target_distribution(output).detach() loss = loss_function(output.log(), target) / output.shape[0] data_iterator.set_postfix( epo = epoch, acc = '%.4f' % (accuracy or 0.0), lss = '%.8f' % float(loss.item()), dlb = '%.4f' % (delta_label or 0.0), ) optimizer.zero_grad() loss.backward() optimizer.step(closure=None) if scheduler is not None: scheduler.step() # features.append(model.encoder(batch).detach().cpu()) ### I see no use for this if update_freq is not None and index % update_freq == 0: loss_value = float(loss.item()) data_iterator.set_postfix( epo=epoch, acc='%.4f' % (accuracy or 0.0), lss='%.8f' % loss_value, dlb='%.4f' % (delta_label or 0.0), ) if update_callback is not None: update_callback(accuracy, loss_value, delta_label) predicted, actual = predict( dataset, wdec, batch_size=evaluate_batch_size, collate_fn=collate_fn, silent=True, return_actual=True, cuda=cuda ) delta_label = float((predicted != predicted_previous).float().sum().item()) / predicted_previous.shape[0] if stopping_delta is not None and delta_label < stopping_delta: print('Early stopping as label delta "%1.5f" less than "%1.5f".' % (delta_label, stopping_delta)) break predicted_previous = predicted _, accuracy = cluster_accuracy(predicted.cpu().numpy(), actual.cpu().numpy()) data_iterator.set_postfix( epo=epoch, acc='%.4f' % (accuracy or 0.0), lss='%.8f' % 0.0, dlb='%.4f' % (delta_label or 0.0), ) if epoch_callback is not None: epoch_callback(epoch, wdec) wdec.cpu()
def main(cuda, batch_size, pretrain_epochs, finetune_epochs, testing_mode): writer = SummaryWriter() # create the TensorBoard object # callback function to call during training, uses writer from the scope def training_callback(epoch, lr, loss, validation_loss): writer.add_scalars( "data/autoencoder", { "lr": lr, "loss": loss, "validation_loss": validation_loss, }, epoch, ) ds_train = CachedMNIST(train=True, cuda=cuda, testing_mode=testing_mode) # training dataset ds_val = CachedMNIST(train=False, cuda=cuda, testing_mode=testing_mode) # evaluation dataset autoencoder = StackedDenoisingAutoEncoder([28 * 28, 500, 500, 2000, 10], final_activation=None) if cuda: autoencoder.cuda() print("Pretraining stage.") ae.pretrain( ds_train, autoencoder, cuda=cuda, validation=ds_val, epochs=pretrain_epochs, batch_size=batch_size, optimizer=lambda model: SGD(model.parameters(), lr=0.1, momentum=0.9), scheduler=lambda x: StepLR(x, 100, gamma=0.1), corruption=0.2, ) print("Training stage.") ae_optimizer = SGD(params=autoencoder.parameters(), lr=0.1, momentum=0.9) ae.train( ds_train, autoencoder, cuda=cuda, validation=ds_val, epochs=finetune_epochs, batch_size=batch_size, optimizer=ae_optimizer, scheduler=StepLR(ae_optimizer, 100, gamma=0.1), corruption=0.2, update_callback=training_callback, ) print("DEC stage.") model = DEC(cluster_number=10, hidden_dimension=10, encoder=autoencoder.encoder) if cuda: model.cuda() dec_optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9) train( dataset=ds_train, model=model, epochs=100, batch_size=256, optimizer=dec_optimizer, stopping_delta=0.000001, cuda=cuda, ) predicted, actual = predict(ds_train, model, 1024, silent=True, return_actual=True, cuda=cuda) actual = actual.cpu().numpy() predicted = predicted.cpu().numpy() reassignment, accuracy = cluster_accuracy(actual, predicted) print("Final DEC accuracy: %s" % accuracy) if not testing_mode: predicted_reassigned = [reassignment[item] for item in predicted] # TODO numpify confusion = confusion_matrix(actual, predicted_reassigned) normalised_confusion = (confusion.astype("float") / confusion.sum(axis=1)[:, np.newaxis]) confusion_id = uuid.uuid4().hex sns.heatmap(normalised_confusion).get_figure().savefig( "confusion_%s.png" % confusion_id) print("Writing out confusion diagram with UUID: %s" % confusion_id) writer.close()