def test_get_model_device(): assert get_module_device(nn.Linear(5, 5)) == torch.device('cpu') assert get_module_device(nn.Module()) == torch.device('cpu') # TODO: didn't find a way to simulate GPU, # so let's at least add a conditional test... if torch.cuda.is_available(): assert get_module_device(nn.Linear( 5, 5).to('cuda')) == torch.device('cuda:0')
def compute_activations_entropy(model, dataloader, num_bins:int=100): num_layers = len(model) activations = [torch.Tensor([]) for _ in range(num_layers + 1)] device = get_module_device(model) for x, _ in dataloader: for i, act in enumerate(get_activations_for_sequential(model, x.to(device))): activations[i] = torch.cat([activations[i], act]) entropies = [compute_entropy(acts, num_bins) for acts in activations] return entropies
def validate(model, dataloader, criterion) -> Tuple[float, float]: model.eval() guessed = np.array([]) losses = np.array([]) device = get_module_device(model) with torch.no_grad(): for X, y in dataloader: X, y = X.to(device), y.to(device) preds = model(X) loss = criterion(preds, y) losses = np.hstack([losses, loss.cpu().numpy()]) guessed = np.hstack([guessed, (preds.argmax(dim=1) == y).long().cpu().numpy()]) return losses.mean(), guessed.mean()
def extract_features(imgs: List[np.ndarray], embedder: nn.Module, batch_size: int = 64, verbose: bool = True) -> List[np.ndarray]: dataloader = DataLoader(imgs, batch_size=batch_size, num_workers=4) device = get_module_device(embedder) result = [] with torch.no_grad(): batches = tqdm(dataloader, desc='[Extracting features]') if verbose else dataloader for x in batches: feats = embedder(x.to(device)).cpu().numpy() result.extend(feats) return result
def compute_grad(model: nn.Module, criterion: nn.Module, dataloader: DataLoader, output_mask: np.ndarray, elementwise_grad_norm: str) -> Tensor: """ Computes gradient of the given loss across the dataset :param model: :param dataloader: :return: """ num_samples = 0 num_params = sum(p.numel() for p in model.parameters()) device = get_module_device(model) grad = torch.zeros(num_params).to(device) for x, y in dataloader: x = torch.from_numpy(np.array(x)).to(device) y = torch.tensor(y).to(device) logits = model(x) pruned_logits = prune_logits(logits, output_mask) loss = criterion(pruned_logits, y) model.zero_grad() loss.backward() curr_grad = torch.cat( [get_grad(p).view(-1) for p in model.parameters()]) if elementwise_grad_norm == 'square': curr_grad = curr_grad.pow(2) elif elementwise_grad_norm == 'abs': curr_grad = curr_grad.abs() else: raise NotImplementedError( f'Unknown elementwise grad norm: {elementwise_grad_norm}') grad += curr_grad num_samples += len(x) return grad / num_samples