def preprocess_pipeline(args): # [STEP 0] load the .mat files (sample-level) if not os.path.exists(args.path_raw): sys.stdout = Logger(os.path.join(args.path_raw, "log_raw.txt")) print(paint("[STEP 0] Loading the .mat files...")) load_mat(path_data=args.path_data, path_raw=args.path_raw, class_map=args.class_map) else: print(paint("[STEP 0] Files already loaded!")) # [STEP 1] partition the datasets (segment-level) w, s = args.window, args.stride if not os.path.exists(args.path_processed): sys.stdout = Logger( os.path.join(args.path_processed, f"log_{w}_{s}.txt")) print( paint( f"[STEP 1] Partitioning the dataset (window,stride) = ({w},{s})..." )) partition( path_raw=args.path_raw, path_processed=args.path_processed, window=w, stride=s, class_map=args.class_map, ) else: print( paint( f"[STEP 1] Dataset already partitioned (window,stride) = ({w},{s})!" ))
def get_info(self, n_samples=3): print(paint(f"[-] Information on {self.prefix} dataset:")) print("\t data: ", self.data.shape, self.data.dtype, type(self.data)) print("\t target: ", self.target.shape, self.target.dtype, type(self.target)) target_idx = [ np.where(self.target == label)[0] for label in set(self.target) ] target_idx_samples = np.array([ np.random.choice(idx, n_samples, replace=False) for idx in target_idx ]).flatten() for i, random_idx in enumerate(target_idx_samples): data, target, index = self.__getitem__(random_idx) if i == 0: print( paint( f"[-] Information on segment #{random_idx}/{self.len}:" )) print("\t data: ", data.shape, data.dtype, type(data)) print("\t target: ", target.shape, target.dtype, type(target)) print("\t index: ", index, index.shape, index.dtype, type(index)) path_save = os.path.join(self.path_processed, "segments") plot_segment( data, target, index=index, prefix=self.prefix, path_save=path_save, num_class=len(target_idx), )
def main(): # get experiment arguments args, _, config_model = get_args() args.experiment = "test_models" config_model["experiment"] = "test_models" # [STEP 1] create synthetic HAR batch data_synthetic = torch.randn( (args.batch_size, args.window, args.input_dim)).cuda() # [STEP 2] create HAR models if torch.cuda.is_available(): model = create(args.model, config_model).cuda() torch.backends.cudnn.benchmark = True get_info_params(model) get_info_layers(model) model.apply(init_weights_orthogonal) model.eval() with torch.no_grad(): print(paint("[*] Performing a forward pass with a synthetic batch...")) z, logits = model(data_synthetic) print(f"\t input: {data_synthetic.shape} {data_synthetic.dtype}") print(f"\t z: {z.shape} {z.dtype}") print(f"\t logits: {logits.shape} {logits.dtype}")
def partition(path_raw, path_processed, window, stride, class_map): # read raw datasets (sample-level) print(f"[*] Reading raw files from {path_raw}") dataset_train = np.load(os.path.join(path_raw, "train.npz")) x_train, y_train = dataset_train["x"], dataset_train["y"] dataset_val = np.load(os.path.join(path_raw, "val.npz")) x_val, y_val = dataset_val["x"], dataset_val["y"] dataset_test = np.load(os.path.join(path_raw, "test.npz")) x_test, y_test = dataset_test["x"], dataset_test["y"] # apply sliding window over raw samples and generate segments data_train, target_train = sliding_window(x_train, y_train, window, stride) data_val, target_val = sliding_window(x_val, y_val, window, stride) data_test, target_test = sliding_window(x_test, y_test, window, stride) data_test_sample_wise, target_test_sample_wise = sliding_window( x_test, y_test, window, 1) # show processed datasets info (segment-level) print("[-] Train data : {} {}, target {} {}".format( data_train.shape, data_train.dtype, target_train.shape, target_train.dtype)) print("[-] Valid data : {} {}, target {} {}".format( data_val.shape, data_val.dtype, target_val.shape, target_val.dtype)) print("[-] Test data : {} {}, target {} {}".format(data_test.shape, data_test.dtype, target_test.shape, target_test.dtype)) print("[-] Test data sample-wise : {} {}, target sample-wise {} {}".format( data_test_sample_wise.shape, data_test_sample_wise.dtype, target_test_sample_wise.shape, target_test_sample_wise.dtype, )) # plot processed target distributions (segment-level) plot_pie(target_train, "train", path_processed, class_map) plot_pie(target_val, "val", path_processed, class_map) plot_pie(target_test, "test", path_processed, class_map) plot_pie(target_test_sample_wise, "test_sample_wise", path_processed, class_map) # save processed datasets (segment-level) np.savez_compressed(os.path.join(path_processed, "train.npz"), data=data_train, target=target_train) np.savez_compressed(os.path.join(path_processed, "val.npz"), data=data_val, target=target_val) np.savez_compressed(os.path.join(path_processed, "test.npz"), data=data_test, target=target_test) np.savez_compressed( os.path.join(path_processed, "test_sample_wise.npz"), data=data_test_sample_wise, target=target_test_sample_wise, ) print("[+] Processed segment datasets successfully saved!") print(paint("--" * 50, "blue"))
def get_info_params(model): """ Display a summary of trainable/frozen network parameter counts :param model: :return: """ num_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) num_total = sum(p.numel() for p in model.parameters()) print(paint(f"[-] {num_trainable}/{num_total} trainable parameters", "blue"))
def main(): # get experiment arguments args, config_dataset, config_model = get_args() # [STEP 0 and 1] load the .mat files (sample-level) and partition the datasets (segment-level) preprocess_pipeline(args) if args.train_mode: # [STEP 2] create HAR datasets dataset = SensorDataset(**config_dataset, prefix="train") dataset_val = SensorDataset(**config_dataset, prefix="val") # [STEP 3] create HAR models if torch.cuda.is_available(): model = create(args.model, config_model).cuda() torch.backends.cudnn.benchmark = True sys.stdout = Logger( os.path.join(model.path_logs, f"log_main_{args.experiment}.txt")) # show args print("##" * 50) print(paint(f"Experiment: {model.experiment}", "blue")) print( paint( f"[-] Using {torch.cuda.device_count()} GPU: {torch.cuda.is_available()}" )) print(args) get_info_params(model) get_info_layers(model) print("##" * 50) # [STEP 4] train HAR models model_train(model, dataset, dataset_val, args) # [STEP 5] evaluate HAR models dataset_test = SensorDataset(**config_dataset, prefix="test") if not args.train_mode: config_model["experiment"] = "inference" model = create(args.model, config_model).cuda() model_eval(model, dataset_test, args)
def model_eval(model, dataset_test, args): print(paint("[STEP 5] Running HAR evaluation loop ...")) loader_test = DataLoader(dataset_test, args.batch_size, False, pin_memory=True) criterion = nn.CrossEntropyLoss(reduction="mean").cuda() print("[-] Loading checkpoint ...") if args.train_mode: path_checkpoint = os.path.join(model.path_checkpoints, "checkpoint_best.pth") else: path_checkpoint = os.path.join( f"./weights/checkpoint_{args.dataset}.pth") checkpoint = torch.load(path_checkpoint) model.load_state_dict(checkpoint["model_state_dict"]) criterion.load_state_dict(checkpoint["criterion_state_dict"]) start_time = time.time() loss_test, acc_test, fm_test, fw_test = eval_one_epoch(model, loader_test, criterion, -1, logger=None, args=args) print( paint( f"[-] Test loss: {loss_test:.2f}" f"\tacc: {acc_test:.2f}(%)\tfm: {fm_test:.2f}(%)\tfw: {fw_test:.2f}(%)" )) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) print(paint(f"[STEP 5] Finished HAR evaluation loop (h:m:s): {elapsed}"))
def __init__( self, model, dataset, input_dim, hidden_dim, filter_num, filter_size, enc_num_layers, enc_is_bidirectional, dropout, dropout_rnn, dropout_cls, activation, sa_div, num_class, train_mode, experiment, ): super(AttendDiscriminate, self).__init__() self.experiment = f"train_{experiment}" if train_mode else experiment self.model = model self.dataset = dataset self.hidden_dim = hidden_dim print(paint(f"[STEP 3] Creating {self.model} HAR model ...")) self.fe = FeatureExtractor( input_dim, hidden_dim, filter_num, filter_size, enc_num_layers, enc_is_bidirectional, dropout, dropout_rnn, activation, sa_div, ) self.dropout = nn.Dropout(dropout_cls) self.classifier = Classifier(hidden_dim, num_class) self.register_buffer("centers", (torch.randn(num_class, self.hidden_dim).cuda())) # do not create log directories if we are only testing the models module if experiment != "test_models": if train_mode: makedir(self.path_checkpoints) makedir(self.path_logs) makedir(self.path_visuals)
def get_weights(self): target = self.target target_count = np.array( [np.sum(target == label) for label in set(target)]) weight_target = 1.0 / target_count weight_samples = np.array([weight_target[t] for t in target]) weight_samples = torch.from_numpy(weight_samples) weight_samples = weight_samples.double() if self.verbose: print(paint("[-] Target sampling weights:")), print(weight_target) return weight_samples
def __init__( self, dataset, window, stride, stride_test, path_processed, prefix, transform=None, verbose=False, ): self.dataset = dataset self.window = window self.stride = stride self.prefix = prefix self.transform = transform self.path_processed = path_processed self.verbose = verbose if prefix == "test" and stride_test == 1: self.path_dataset = os.path.join(path_processed, "test_sample_wise.npz") else: self.path_dataset = os.path.join(path_processed, "{}.npz".format(prefix)) dataset = np.load(self.path_dataset) self.data = dataset["data"] self.target = dataset["target"] self.len = self.data.shape[0] assert self.data.shape[0] == self.target.shape[0] print( paint( f"[STEP 2] Creating {self.dataset} {self.prefix} HAR dataset of size {self.len} ..." )) if self.verbose: self.get_info() self.get_distribution() if prefix == "train": self.weight_samples = self.get_weights()
def load_mat(path_data, path_raw, class_map): # load .mat files print(f"[*] Reading data files from {path_data}") contents = sio.loadmat(path_data) if len(class_map) == 18: # opportunity dataset x_train = contents["trainingData"].astype(np.float32).T y_train = contents["trainingLabels"].reshape(-1).astype(np.int64) - 1 x_val = contents["valData"].astype(np.float32).T y_val = contents["valLabels"].reshape(-1).astype(np.int64) - 1 x_test = contents["testingData"].astype(np.float32).T y_test = contents["testingLabels"].reshape(-1).astype(np.int64) - 1 # normalizing mean_train = np.mean(x_train, axis=0) std_train = np.std(x_train, axis=0) x_train = (x_train - mean_train) / std_train x_val = (x_val - mean_train) / std_train x_test = (x_test - mean_train) / std_train elif len(class_map) == 7: # hospital dataset x_train = contents["X_train"].astype(np.float32) y_train = contents["y_train"].reshape(-1).astype(np.int64) x_val = contents["X_valid"].astype(np.float32) y_val = contents["y_valid"].reshape(-1).astype(np.int64) x_test = contents["X_test"].astype(np.float32) y_test = contents["y_test"].reshape(-1).astype(np.int64) # normalizing mean_train = np.mean(x_train, axis=0) std_train = np.std(x_train, axis=0) x_train = (x_train - mean_train) / std_train x_val = (x_val - mean_train) / std_train x_test = (x_test - mean_train) / std_train else: # all other datasets x_train = contents["X_train"].astype(np.float32) y_train = contents["y_train"].reshape(-1).astype(np.int64) x_val = contents["X_valid"].astype(np.float32) y_val = contents["y_valid"].reshape(-1).astype(np.int64) x_test = contents["X_test"].astype(np.float32) y_test = contents["y_test"].reshape(-1).astype(np.int64) # show raw datasets info (sample-level) print("[-] Train data : {} {}, target {} {}".format( x_train.shape, x_train.dtype, y_train.shape, y_train.dtype)) print("[-] Valid data : {} {}, target {} {}".format( x_val.shape, x_val.dtype, y_val.shape, y_val.dtype)) print("[-] Test data : {} {}, target {} {}".format(x_test.shape, x_test.dtype, y_test.shape, y_test.dtype)) # plot raw target distributions (sample-level) plot_pie(y_train, "train", path_raw, class_map) plot_pie(y_val, "val", path_raw, class_map) plot_pie(y_test, "test", path_raw, class_map) # save raw datasets (sample-level) np.savez_compressed(os.path.join(path_raw, "train.npz"), x=x_train, y=y_train) np.savez_compressed(os.path.join(path_raw, "val.npz"), x=x_val, y=y_val) np.savez_compressed(os.path.join(path_raw, "test.npz"), x=x_test, y=y_test) print("[+] Raw sample datasets successfully saved!") print(paint("--" * 50, "blue"))
def model_train(model, dataset, dataset_val, args): print(paint("[STEP 4] Running HAR training loop ...")) logger = SummaryWriter(log_dir=os.path.join(model.path_logs, "train")) logger_val = SummaryWriter(log_dir=os.path.join(model.path_logs, "val")) if args.weighted_sampler: print(paint("[-] Using weighted sampler (balanced batch)...")) sampler = WeightedRandomSampler(dataset.weight_samples, len(dataset.weight_samples)) loader = DataLoader(dataset, args.batch_size, sampler=sampler, pin_memory=True) else: loader = DataLoader(dataset, args.batch_size, True, pin_memory=True) loader_val = DataLoader(dataset_val, args.batch_size, False, pin_memory=True) criterion = nn.CrossEntropyLoss(reduction="mean").cuda() params = filter(lambda p: p.requires_grad, model.parameters()) if args.optimizer == "Adam": optimizer = optim.Adam(params, lr=args.lr) elif args.optimizer == "RMSprop": optimizer = optim.RMSprop(params, lr=args.lr) if args.lr_step > 0: scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step, gamma=args.lr_decay) if args.init_weights == "orthogonal": print(paint("[-] Initializing weights (orthogonal)...")) model.apply(init_weights_orthogonal) metric_best = 0.0 start_time = time.time() for epoch in range(args.epochs): print("--" * 50) print("[-] Learning rate: ", optimizer.param_groups[0]["lr"]) train_one_epoch(model, loader, criterion, optimizer, epoch, args) loss, acc, fm, fw = eval_one_epoch(model, loader, criterion, epoch, logger, args) loss_val, acc_val, fm_val, fw_val = eval_one_epoch( model, loader_val, criterion, epoch, logger_val, args) print( paint( f"[-] Epoch {epoch}/{args.epochs}" f"\tTrain loss: {loss:.2f} \tacc: {acc:.2f}(%)\tfm: {fm:.2f}(%)\tfw: {fw:.2f}(%)" )) print( paint( f"[-] Epoch {epoch}/{args.epochs}" f"\tVal loss: {loss_val:.2f} \tacc: {acc_val:.2f}(%)\tfm: {fm_val:.2f}(%)\tfw: {fw_val:.2f}(%)" )) checkpoint = { "model_state_dict": model.state_dict(), "optim_state_dict": optimizer.state_dict(), "criterion_state_dict": criterion.state_dict(), "random_rnd_state": random.getstate(), "numpy_rnd_state": np.random.get_state(), "torch_rnd_state": torch.get_rng_state(), } metric = fm_val if metric >= metric_best: print( paint(f"[*] Saving checkpoint... ({metric_best}->{metric})", "blue")) metric_best = metric torch.save( checkpoint, os.path.join(model.path_checkpoints, "checkpoint_best.pth")) if epoch % 5 == 0: torch.save( checkpoint, os.path.join(model.path_checkpoints, f"checkpoint_{epoch}.pth"), ) if args.lr_step > 0: scheduler.step() elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) print(paint(f"[STEP 4] Finished HAR training loop (h:m:s): {elapsed}")) print(paint("--" * 50, "blue"))