def on_validation_epoch_end(self, trainer, pl_module): if trainer.running_sanity_check: return epoch = trainer.current_epoch if epoch % self.period == 0: # encoder = pl_module.get_encoder() encoder = pl_module.get_representations train_features = [] train_target = [] with torch.no_grad(): for batch, target in trainer.datamodule.train_dataloader(transform=None): #(batch_size=512): input_, seq_len = batch train_features.append(encoder(input_.to(pl_module.device), seq_len).cpu()) train_target.append(torch.Tensor(target)) train_features = torch.cat(train_features, 0) train_target = torch.cat(train_target, 0) val_features = [] val_target = [] with torch.no_grad(): for batch, target in trainer.datamodule.val_dataloader(transform=None): #(batch_size=512): input_, seq_len = batch val_features.append(encoder(input_.to(pl_module.device), seq_len).cpu()) val_target.append(torch.Tensor(target)) val_features = torch.cat(val_features, 0) val_target = torch.cat(val_target, 0) knn_acc = modules.knn(train_features.numpy(), val_features.numpy(), train_target.numpy(), val_target.numpy(), nn=1) # knn_acc = trainer.current_epoch # knn_acc = torch.Tensor([trainer.current_epoch])[0] # pl_module.log('knn_acc', knn_acc) trainer.logger.log_metrics({'knn_acc': knn_acc}, step=trainer.current_epoch) # trainer.logger_connector.callback_metrics.update({'knn_acc': knn_acc})
def lincls(args, model): # extract dataset features with the model dataset_type = vars(datasets)[args.dataset] data_args = args.__dict__ data_args.update({'train_transforms': None, 'val_transforms': None}) datamodule = dataset_type(**data_args) train_loader = datamodule.train_dataloader(shuffle=False, drop_last=False) val_loader = datamodule.val_dataloader() encoder = model.get_encoder().cuda() encoder.eval() train_features = [] train_target = [] with torch.no_grad(): for batch, target in train_loader: input_, seq_len = batch train_features.append(encoder(input_.cuda(), seq_len).cpu()) train_target.append(torch.Tensor(target)) train_features = torch.cat(train_features, 0) train_target = torch.cat(train_target, 0) # train_features = torch.randn([20000, args.hidden_dim]) # train_target = torch.randint(10, [20000]) val_features = [] val_target = [] with torch.no_grad(): for batch, target in val_loader: input_, seq_len = batch val_features.append(encoder(input_.cuda(), seq_len).cpu()) val_target.append(torch.Tensor(target)) val_features = torch.cat(val_features, 0) val_target = torch.cat(val_target, 0) # val_features = torch.randn([1000, args.hidden_dim]) # val_target = torch.randint(10, [1000]) batch_size = 512 if 'NTU' in args.dataset else 128 # batch_size = 128 print(datamodule.num_classes) datamodule = datasets.FeatureDataModule(train_features, train_target, val_features, val_target, num_workers=4, batch_size=batch_size) model_checkpoint = pl.callbacks.ModelCheckpoint( filepath=os.path.join(args.exp_dir, 'lincls'), save_top_k=1, mode='max', monitor='val_acc1_agg', period=1) # , filename='{epoch}-{knn_acc}' # trainer = pl.Trainer(max_epochs = 100, progress_bar_refresh_rate=0, weights_summary=None, gpus=1) #, logger=logger, checkpoint_callback=model_checkpoint trainer = pl.Trainer(max_epochs=70, weights_summary=None, gpus=1, checkpoint_callback=model_checkpoint, progress_bar_refresh_rate=0) model = modules.LinearClassifierMod(input_dim=args.hidden_dim, n_label=datamodule.num_classes, learning_rate=2, momentum=0.9, weight_decay=0, epochs=70, lr_decay_rate=0.01) trainer.fit(model, datamodule) # best_ckpt = trainer.checkpoint_callback.best_model_path ckpts = list( filter(lambda x: 'lincls' in x and '.ckpt' in x, os.listdir(args.exp_dir))) best_ckpt = ckpts[-1] if len(ckpts) == 1 else ckpts[-2] best_ckpt = os.path.join(args.exp_dir, best_ckpt) # classifier = modules.LinearClassifierMod.load_from_checkpoint(checkpoint_path=best_ckpt) print(best_ckpt) best_model = modules.LinearClassifierMod.load_from_checkpoint( checkpoint_path=best_ckpt) result_dict = {} # lincls_result = trainer.test(model=best_model, verbose=True)[0] lincls_result = trainer.test(model=best_model, datamodule=datamodule)[0] ########################################################################################## # # baseline # classifier = best_model.classifier # # val_loader = datamodule.val_dataloader() # val_logits_trials = [] # val_logits = [] # val_targets = [] # with torch.no_grad(): # for batch, target in val_loader: # input_, seq_len = batch # val_logits.append(classifier(encoder(input_.cuda(), seq_len)).cpu()) # val_targets.append(torch.Tensor(target)) # print(target[-1]) # base_val_logits = torch.cat(val_logits, 0) # val_targets = torch.cat(val_targets, 0) # print('logit', base_val_logits[0]) # acc1, acc5 = precision_at_k(base_val_logits, val_targets, top_k=(1, 5)) # print('acc', acc1, acc5) # # results['baseline'] = np.array([acc1, acc5])[None,:] # # base_val_logits = base_val_logits.numpy() ########################################################################################## # print('best_ckpt', best_ckpt) # print('encoder weight') # print(encoder.encoder.rnn.bias_hh_l0[:10]) # print('classifier weight') # print(best_model.classifier.weight[:5]) result_dict.update(lincls_result) knn_acc = modules.knn(train_features.numpy(), val_features.numpy(), train_target.numpy(), val_target.numpy(), nn=1) result_dict['knn_acc'] = knn_acc return result_dict
result = [] for t in range(T): for tr_index, v_index in skf.split(X, y): #training data X_tr = X[tr_index, :] y_tr = y[tr_index] #validation data X_val = X[v_index, :] y_val = y[v_index] #standardize (X_tr_norm, X_val_norm) = utils.normalizing(X_tr, X_val) #Ridge regression on all features: result_L = [] for w in weights: for n in N_K: result_l = modules.knn(n, w, X_tr_norm, y_tr, X_val_norm, y_val) result_L.append(result_l) result.append(result_L) mean_Etrain = np.mean(np.array(result)[:, :, 2], axis=0) var_Etrain = np.var(np.array(result)[:, :, 2], axis=0) mean_Eval = np.mean(np.array(result)[:, :, 3], axis=0) var_Eval = np.var(np.array(result)[:, :, 3], axis=0) best_n_uniform = N_K[np.argmin(mean_Eval[0:9])] best_n_distance = N_K[np.argmin(mean_Eval[9:18])] print("KNN_uniform best k : " + str(best_n_uniform) + " " + str(mean_Etrain[np.argmin(mean_Eval[0:9])]) + " " + str(mean_Eval[np.argmin(mean_Eval[0:9])])) print("KNN_distance best k : " + str(best_n_distance) + " " + str(mean_Etrain[9 + np.argmin(mean_Eval[9:18])]) + " " + str(mean_Eval[9 + np.argmin(mean_Eval[9:18])]))
def lincls(args, model): # extract dataset features with the model dataset_type = vars(datasets)[args.dataset] data_args = args.__dict__ data_args.update({'train_transforms': None, 'val_transforms': None}) datamodule = dataset_type(**data_args) train_loader = datamodule.train_dataloader(shuffle=False, drop_last=False) val_loader = datamodule.val_dataloader() encoder = model.get_encoder().cuda() encoder.eval() train_features = [] train_target = [] with torch.no_grad(): for batch, target in train_loader: input_, seq_len = batch train_features.append(encoder(input_.cuda(), seq_len).cpu()) train_target.append(torch.Tensor(target)) train_features = torch.cat(train_features, 0) train_target = torch.cat(train_target, 0) val_features = [] val_target = [] with torch.no_grad(): for batch, target in val_loader: input_, seq_len = batch val_features.append(encoder(input_.cuda(), seq_len).cpu()) val_target.append(torch.Tensor(target)) val_features = torch.cat(val_features, 0) val_target = torch.cat(val_target, 0) batch_size = 512 if 'NTU' in args.dataset else 128 datamodule = datasets.FeatureDataModule(train_features, train_target, val_features, val_target, num_workers=4, batch_size=batch_size) #model_checkpoint = pl.callbacks.ModelCheckpoint(filepath=os.path.join(args.exp_dir,'lincls'), save_top_k=1, mode='max', monitor='val_acc1_agg', period=1) # , filename='{epoch}-{knn_acc}' model_checkpoint = pl.callbacks.ModelCheckpoint(save_top_k=1, mode='max', monitor='val_acc1_agg', period=1) # , filename='{epoch}-{knn_acc}' trainer = pl.Trainer(max_epochs = 70, weights_summary=None, gpus=1, checkpoint_callback=model_checkpoint, progress_bar_refresh_rate=0) model = modules.LinearClassifierMod(input_dim = args.hidden_dim, n_label = datamodule.num_classes, learning_rate = 2, momentum = 0.9, weight_decay = 0, epochs = 70, lr_decay_rate = 0.01 ) trainer.fit(model, datamodule) best_ckpt = trainer.checkpoint_callback.best_model_path best_model = modules.LinearClassifierMod.load_from_checkpoint(checkpoint_path=best_ckpt) result_dict = {} # lincls_result = trainer.test(model=best_model, verbose=True)[0] lincls_result = trainer.test(model=best_model, datamodule=datamodule)[0] result_dict.update(lincls_result) nn_ = 9 if 'NTU' in args.dataset else 1 knn_acc = modules.knn(train_features.numpy(), val_features.numpy(), train_target.numpy(), val_target.numpy(), nn=nn_) result_dict['knn_acc'] = knn_acc return result_dict
plt.plot(np.log10(L), etrain, 'g', label="train") plt.plot(np.log10(L), etest, 'b', label="test") plt.legend() plt.show() ''' ------------------------------------------------ nonlinear regression ''' #normalize pretraining data for non linear model (Dpt_tr_norm, Dpt_test_norm) = utils.normalizing(Dpt_tr, Dpt_test) #Baseline: KNN: weights = ['uniform', 'distance'], k = 1 to 10 for k in range(1, 10): (y_train_pred, y_test_pred, error_train, error_test) = modules.knn(k, 'distance', Dpt_tr_norm, ypt_tr, Dpt_test_norm, ypt_test) print("----") print(k) print(error_train, error_test) ''' using ABM model (tree based) ''' #CART: min_impurity_decrease = [0.001, 0.1, 1, 10, 100, 1000] and plot --- find not overfitting region # then try similar leafs around and plot, choose simplist model around +- 1 std Etest min_impurity_decrease = 10 max_leaf_nodes = 90 result = modules.cart(max_leaf_nodes, min_impurity_decrease, Dpt_tr_norm, ypt_tr, Dpt_test_norm, ypt_test) print("-----") print(max_leaf_nodes) print(result[0])