def load_target_trials(dataset_path): trials, in_channels = read_dataset(dataset_path, 'D', False) trials_idx = list(range(len(trials))) _, valid_split = train_test_split(trials_idx, test_size=0.2, shuffle=False, random_state=RANDOM_SEED) valid_trials = [trials[valid_idx][1] for valid_idx in valid_split] return valid_trials
def main(configs, dataset_dir, subject, log_dir, task): with open(configs, 'r') as f: merge_configs(yaml.load(f)) # set the random state np.random.seed(cfg.TRAINING.RANDOM_SEED) torch.manual_seed(cfg.TRAINING.RANDOM_SEED) random.seed(cfg.TRAINING.RANDOM_SEED) train_path = os.path.join(log_dir, task.upper(), 'TRAIN', subject, cfg.TRAINING.MODEL.upper()) assert os.path.exists( train_path), f"Can't detect training folder: {train_path}" log_dir = os.path.join(log_dir, task.upper(), 'TRAIN', subject, cfg.TRAINING.MODEL.upper()) if task == 'xpos': datasets = glob(dataset_dir + '*' + subject + '_*_xpos.mat') elif task == 'xvel': datasets = glob(dataset_dir + '*' + subject + '_*_xvel.mat') elif task == 'abspos': datasets = glob(dataset_dir + '*' + subject + '_*_absPos.mat') elif task == 'absvel': datasets = glob(dataset_dir + '*' + subject + '_*_absVel.mat') else: raise KeyError assert len(datasets) > 0, 'no datasets for subject %s found!' % subject for dataset_path in datasets: year, sub, day = osp.basename(dataset_path).split('_')[1:-1] trials, in_channels = read_dataset(dataset_path, 'D', False) trials_idx = list(range(len(trials))) _, valid_split = train_test_split( trials_idx, test_size=0.2, shuffle=False, random_state=cfg.TRAINING.RANDOM_SEED) valid_trials = [trials[valid_idx] for valid_idx in valid_split] rec_name = '_'.join([year, day]) weights_path = os.path.join(train_path, rec_name, 'weights_final.pt') assert os.path.exists( weights_path), 'No weights are detected for this recording!' model = RNNs(in_channels=in_channels) model.load_state_dict(torch.load(weights_path)) model.cuda() model.eval() # TODO: hardcoded path. save predictions near the saved weights with h5py.File( f'/home/fattouhm/notebooks/{year}_{sub}_{day}_{task}_predictions_{WINDOW_SIZE}.h5', 'w') as hf: for trial_idx, trial in enumerate(valid_trials): inputs, targets = trial time_steps = inputs.shape[1] offsets = time_steps - WINDOW_SIZE + 1 predictions = np.empty((time_steps, offsets), dtype=np.float32) predictions[:] = np.nan dataset = create_dataset_loader(inputs, targets) for batch_offset, (X, _) in enumerate(dataset): with torch.no_grad(): X = X.cuda() output = model(X) output = output.detach().squeeze(-1).cpu().numpy() for sample_idx, sample_pred in enumerate(output): offset_idx = batch_offset * 32 + sample_idx predictions[offset_idx:offset_idx + WINDOW_SIZE, offset_idx] = sample_pred hf.create_dataset(f'trial{trial_idx:0>2d}', data=predictions) print('Done!')
TASK = 'xpos' # TASK = 'xvel' # recording = f'data4DNN_14_PR3_day1_{xpos}.mat' # recording = f'data4DNN_16_PR3_day1_{TASK}.mat' # recording = f'data4DNN_15_PR4_day1_{TASK}.mat' # recording = f'data4DNN_11_FR1_day1_{TASK}.mat' # recording = f'data4DNN_11_FR3_day2_{TASK}.mat' # recording = f'data4DNN_11_FR2_day2_{TASK}.mat' recording = f'data4DNN_17_PR10_day1_{TASK}.mat' dataset_path = os.path.join(dataset_home, recording) trials, in_channels = read_dataset(dataset_path, 'D', False) ecog_dataset = torch.utils.data.ConcatDataset([ ECoGDatast(X, y, window=cfg.TRAINING.CROP_LEN, stride=cfg.EVAL.INPUT_STRIDE, x2y_ratio=cfg.TRAINING.INPUT_SAMPLING_RATE / cfg.TRAINING.OUTPUT_SAMPLING_RATE, input_shape='ct', dummy_idx=cfg.TRAINING.DUMMY_IDX) for (X, y) in trials ]) # %% # %%
def main(command, configs, dataset_dir, subject, log_dir, task): with open(configs, 'r') as f: merge_configs(yaml.load(f)) # FIXME: PERTURBATION based connectivity analysis is buggy at the moment # set the random state np.random.seed(cfg.TRAINING.RANDOM_SEED) th.manual_seed(cfg.TRAINING.RANDOM_SEED) random.seed(cfg.TRAINING.RANDOM_SEED) train_path = os.path.join(log_dir, task.upper(), 'TRAIN', subject, cfg.TRAINING.MODEL.upper()) assert os.path.exists( train_path), f"Can't detect training folder: {train_path}" log_dir = os.path.join(log_dir, task.upper(), 'TRAIN', subject, cfg.TRAINING.MODEL.upper()) if task == 'xpos': datasets = glob(dataset_dir + '*' + subject + '_*_xpos.mat') elif task == 'xvel': datasets = glob(dataset_dir + '*' + subject + '_*_xvel.mat') elif task == 'abspos': datasets = glob(dataset_dir + '*' + subject + '_*_absPos.mat') elif task == 'absvel': datasets = glob(dataset_dir + '*' + subject + '_*_absVel.mat') elif task == 'xacc': datasets = glob(dataset_dir + 'ALL_*' + subject + '_*_xacc.mat') elif task == 'absacc': datasets = glob(dataset_dir + 'ALL_*' + subject + '_*_absAcc.mat') else: raise KeyError assert len(datasets) > 0, 'no datasets for subject %s found!' % subject dummy_idx = 1 if cfg.TRAINING.DUMMY_IDX == 'f' else 3 # assert cfg.TRAINING.NUM_CLASSES == 1 for dataset_path in datasets: rec_day_name = os.path.basename(dataset_path).split('.')[0].split('_') rec_name = '_'.join([rec_day_name[1], rec_day_name[3]]) dataset_name = cfg.EVAL.DATASET trials, in_channels = read_dataset( dataset_path, dataset_name, mha_only=cfg.TRAINING.MHA_CHANNELS_ONLY) # TODO: split dataset to training and evaluation sets. Add additional option to set split size data_loader = create_eval_loader(trials) num_classes = 1 weights_path = os.path.join(train_path, rec_name, 'weights_final.pt') assert os.path.exists( weights_path ), 'No weights are detected for this recording! in {weights_path}' model, _, _, _, _ = create_model(in_channels, num_classes, CUDA) if CUDA: model.cuda() model.load_state_dict(th.load(weights_path)) # TODO: check if the weights are loaded properly. check the corr of validation set for example. # cudnn RNN backward can only be called in training mode model.train() eval_dropouts(model) # mean_autocorr_x = np.zeros(cfg.TRAINING.CROP_LEN, dtype=np.float32) # mean_autocorr_y = np.zeros(cfg.TRAINING.CROP_LEN, dtype=np.float32) grads = [] if 'io' in command: output_name = 'input' # grads = np.zeros(cfg.TRAINING.CROP_LEN, dtype=np.float32) elif 'freq' in command: output_name = 'amps' fft_freq = np.fft.rfftfreq(cfg.TRAINING.CROP_LEN, 1 / cfg.TRAINING.INPUT_SAMPLING_RATE) num_freq_bins = len(fft_freq) # grads = np.zeros(cfg.TRAINING.CROP_LEN // 2 + 1, dtype=np.float32) elif 'pert' in command: output_name = 'pert' rng = np.random.RandomState(cfg.TRAINING.RANDOM_SEED) perturbation_list = [] output_diff_list = [] elif 'spectrogram' in command: output_name = 'spectrogram' window_size = 250 # overlap = 125 overlap = 245 unique = window_size - overlap han = th.tensor(np.hanning(window_size), requires_grad=False, dtype=th.float) # we have now 3 dimensions # 1. batch (average over samples) # 2. fft amps # 3. time (4000 / 250) (concatanate grads w.r.t. amps along the x axis) fft_freq = np.fft.rfftfreq(window_size, 1 / cfg.TRAINING.INPUT_SAMPLING_RATE) num_freq_bins = len(fft_freq) time_bins = list( range(0, cfg.TRAINING.CROP_LEN - window_size, unique)) # grads = np.zeros((len(time_bins), int(num_freq_bins)), dtype=np.float32) grads = [] else: raise RuntimeError('command not understood!') for X, Y in data_loader: # autocorr_x = np.zeros(cfg.TRAINING.CROP_LEN, dtype=np.float32) # for c in range(X.shape[2]): # autocorr_x += np.correlate(X[0, 0, c, :], X[0, 0, c, :], 'full')[cfg.TRAINING.CROP_LEN-1:] # mean_autocorr_x += autocorr_x / X.shape[2] # mean_autocorr_y += np.correlate(Y.squeeze(), Y.squeeze(), 'full')[cfg.TRAINING.CROP_LEN-1:] if 'freq' in command: # grads w.r.t. frequency amp amps_th, iffted = fb_fft(X.squeeze(dummy_idx), cfg.TRAINING.CROP_LEN) model.zero_grad() output = model(iffted.unsqueeze(dummy_idx)) # This is Robin's approach # output.mean().backward() # this is my approach output[:, -1, 0].backward( th.ones(iffted.shape[0], device=iffted.device)) # grads.append(th.mean(th.abs(amps_th.grad), dim=1).cpu().numpy()) grads.append(amps_th.grad.cpu().numpy()) assert grads[-1].ndim == 3 assert grads[-1].shape[-1] == num_freq_bins elif 'spectrogram' in command: # time-resolved grads w.r.t frequency amp window_grads = [] for i in time_bins: window = X[:, :, :, i:i + window_size] * han amps_th, iffted = fb_fft(window, window_size) rest_after = th.tensor(X[:, :, :, i + window_size:], requires_grad=False, dtype=th.float, device='cuda') if i > 0: rest_before = th.tensor(X[:, :, :, :i], requires_grad=False, dtype=th.float, device='cuda') input_tensor = th.cat( (rest_before, iffted, rest_after), dim=3) else: input_tensor = th.cat((iffted, rest_after), dim=3) model.zero_grad() output = model(input_tensor) output[:, -1, 0].backward( th.ones(input_tensor.shape[0], device=input_tensor.device)) window_grads.append( np.expand_dims(th.mean(th.abs( amps_th.grad.squeeze(dummy_idx)), dim=1).cpu().numpy(), axis=1)) # grads += np.vstack(window_grads) grads.append(np.hstack(window_grads)) assert grads[-1].shape[0] == input_tensor.shape[0] assert grads[-1].shape[1] == len(time_bins) assert grads[-1].shape[-1] == num_freq_bins elif 'io' in command: # grads w.r.t. input input_tensor = th.tensor(X, requires_grad=True, dtype=th.float, device='cuda') model.zero_grad() output = model(input_tensor) output[:, -1, 0].backward( th.ones(input_tensor.shape[0], device=input_tensor.device)) # channels dimension grads.append( input_tensor.grad.squeeze(dummy_idx).cpu().numpy()) assert grads[-1].ndim == 3 assert grads[-1].shape[-1] == cfg.TRAINING.CROP_LEN elif 'pert' in command: # grads w.r.t. input # find the model output given the input before perturbation with th.no_grad(): input_tensor = th.tensor(X, dtype=th.float, device='cuda') model.zero_grad() output_before_pert = model( input_tensor).detach().cpu().numpy()[0, -1, 0] # perturb the input signal and find the output for _ in range(1000): amps_th, iffted, pert_values = fb_fft_with_perturbation( X, amp_perturbation_additive, cfg.TRAINING.CROP_LEN, rng=rng) output_after_pert = model( iffted).detach().cpu().numpy()[0, -1, 0] # append perturbations and output diff from all pert. iterations and mini-batches output_diff_list.append(output_after_pert - output_before_pert) perturbation_list.append( np.expand_dims(pert_values.squeeze(), 2)) else: raise RuntimeError('command not understood!') if 'pert' not in command: # grads /= len(data_loader) * cfg.TRAINING.BATCH_SIZE grads_array = np.vstack(grads) assert grads_array.shape[0] == len(data_loader.dataset) # np.savetxt(f'grads_{cfg.TRAINING.MODEL}_{task}_{subject}_{rec_name}.csv', grads_array, delimiter=',') # np.save(f'grads_{cfg.TRAINING.MODEL}_{task}_{subject}_{rec_name}_NEW2.csv', grads_array) grads = stat_fn(np.abs(grads_array), axis=(0, 1)) if 'spectrogram' in command: assert grads.ndim == 2 assert grads.shape[0] == len(time_bins) assert grads.shape[1] == num_freq_bins elif 'freq' in command: assert grads.ndim == 1 assert grads.shape[0] == num_freq_bins else: assert grads.ndim == 1 assert grads.shape[0] == cfg.TRAINING.CROP_LEN else: output_diff = np.array(output_diff_list) perturbations = np.dstack(perturbation_list) grads = np.mean(np.abs( np.array([[ corr(output_diff.reshape(1, -1), pert_fb.reshape(1, -1)) for pert_fb in perturbation ] for perturbation in perturbations])), axis=0).squeeze() # mean_autocorr_x /= len(data_loader) * cfg.TRAINING.BATCH_SIZE # mean_autocorr_y /= len(data_loader) * cfg.TRAINING.BATCH_SIZE np.savetxt(os.path.join(log_dir, rec_name, f"connectivity_{output_name}_{STAT}.csv"), grads, delimiter=',') # np.savetxt(os.path.join(log_dir, rec_name, f"connectivity_{output_name}_{STAT}_new2.csv"), grads, delimiter=',') # np.savetxt(os.path.join(log_dir, rec_name, 'autocorr_x.csv'), mean_autocorr_x, delimiter=',') # np.savetxt(os.path.join(log_dir, rec_name, 'autocorr_y.csv'), mean_autocorr_y, delimiter=',') print('Done!')
def main(mode, configs, dataset_dir, subject, log_dir, n_splits, task): with open(configs, 'r') as f: merge_configs(yaml.load(f)) subject = subject.strip('\'\"') # set the random state np.random.seed(cfg.TRAINING.RANDOM_SEED) torch.manual_seed(cfg.TRAINING.RANDOM_SEED) random.seed(cfg.TRAINING.RANDOM_SEED) model_name = cfg.TRAINING.MODEL.upper() if 'RNN' in model_name: # consider RNNs #layers in log dir model_name = f'{cfg.RNNS.RNN.NUM_LAYERS}L_{model_name}' if mode == 'eval': train_path = parse_log_dir(log_dir, 'train', cfg.TRAINING.SPLIT, subject, task, model_name) assert os.path.exists( train_path), f"Can't detect training folder: {train_path}" log_dir = parse_log_dir(log_dir, mode, cfg.EVAL.SPLIT, subject, task, model_name) else: log_dir = parse_log_dir(log_dir, mode, cfg.TRAINING.SPLIT, subject, task, model_name) # TODO: get all recordings with the same task from all subjects rec_paths = get_rec_paths(dataset_dir, task, subject) assert len(rec_paths) > 0, 'no datasets for subject %s found!' % subject rec_names = [] # full rec_names including subject code if task == 'multi': for rec_path in [rec_path[0] for rec_path in rec_paths]: rec_names.append(rec_name_from_path(rec_path)) else: for rec_path in rec_paths: rec_names.append(rec_name_from_path(rec_path)) print('found the following datasets:') for rec_path in rec_paths: print(rec_path) for rec_path, rec_name in zip(rec_paths, rec_names): print(f"Working on {rec_path}") rec_dir = osp.join(log_dir, rec_name) if not osp.exists(rec_dir): os.makedirs(rec_dir) # setup logging file handler setup_logging_handler(root, rec_dir) logger.info('Called with configs:') logger.info(json.dumps(cfg, indent=2)) # export the configs as yaml with open(osp.join(rec_dir, 'configs.json'), 'w+') as fp: json.dump(cfg, fp, indent=2) msg = str('Working on dataset %s:' % rec_name if task == 'multi' else rec_path) logger.info(msg + '\n' + '=' * len(msg)) if mode == 'cv' or mode == 'train': dataset_name = 'D' else: dataset_name = cfg.EVAL.DATASET # TODO: refactor and simplify if task == 'multi': trials, in_channels = read_multi_datasets( rec_path, dataset_name, mha_only=cfg.TRAINING.MHA_CHANNELS_ONLY) num_classes = len(rec_path) else: trials, in_channels = read_dataset( rec_path, dataset_name, mha_only=cfg.TRAINING.MHA_CHANNELS_ONLY) num_classes = 1 # TODO: move the logs and check to the read_dataset logger.info(f'{len(trials)} trials found') logger.info(f'Number of input channels: {in_channels}') if in_channels < 1: logger.warning(f'Zero valid channels found!!!!!!') print(f'Zero valid channels found!!!!!!') return if mode == 'cv': run_cv(n_splits, task, rec_name, rec_dir, trials, in_channels, num_classes) elif mode == 'train': run_training(task, rec_name, rec_dir, trials, in_channels, num_classes) # eval else: run_eval(task, train_path, rec_name, rec_dir, trials, in_channels, num_classes) print(f"Finished working on {rec_path}") logger.info('Done!') print("Done!")