def saliency_map(model_fn, x, label, classifier_type): x = x.clone().detach().requires_grad_(True) if (classifier_type == 'softmax'): outputs = model_fn(x) #input(outputs) outputs_max, _ = torch.max(outputs, dim=1) input(outputs_max) #outputs_max = outputs[:,outputs_max_index] #input(outputs_max.size()) grad, = torch.autograd(outputs_max, [x]) elif (classifier_type == 'sigmoid'): outputs = model_fn(x) grad, = torch.autograd(outputs, [x]) input(grad.size()) return grad.cpu().detach().numpy()
def check_dropout_forward(batchsize, feature_dimension, seq_length, use_tanh): x_cpu_data = np.random.normal( 0, 1, size=(batchsize, feature_dimension, seq_length * 3)).astype( np.float32) * 10 x_gpu_data = cuda.to_gpu(x_cpu_data, gpu_device) with chainer.using_config("train", True): # get true output layer = SRU(feature_dimension, feature_dimension, use_tanh=use_tanh, dropout=0.5) mask_x = layer.generate_dropout_mask(x_cpu_data) output_true, cell_true, last_cell_true = autograd( x_cpu_data[..., :seq_length], layer.W, layer.B, None, layer.use_tanh, mask_x) output_true, cell_true, last_cell_true = autograd( x_cpu_data[..., seq_length:seq_length * 2], layer.W, layer.B, last_cell_true, layer.use_tanh, mask_x) output_true, cell_true, last_cell_true = autograd( x_cpu_data[..., seq_length * 2:], layer.W, layer.B, last_cell_true, layer.use_tanh, mask_x) # get cuda output layer.to_gpu(gpu_device) output, cell, last_cell = layer(x_gpu_data[..., :seq_length], None, cuda.to_gpu(mask_x)) output, cell, last_cell = layer( x_gpu_data[..., seq_length:seq_length * 2], last_cell, cuda.to_gpu(mask_x)) output, cell, last_cell = layer(x_gpu_data[..., seq_length * 2:], last_cell, cuda.to_gpu(mask_x)) threshold = 1e-5 assert (xp.mean(abs(output_true.data - cuda.to_cpu(output.data))) <= threshold), xp.mean( abs(output_true.data - cuda.to_cpu(output.data))) assert (xp.mean(abs(cell_true.data - cuda.to_cpu(cell.data))) <= threshold), xp.mean(abs(cell_true.data - cuda.to_cpu(cell.data))) assert (xp.mean(abs(last_cell_true.data - cuda.to_cpu(last_cell.data))) <= threshold), xp.mean( abs(last_cell_true.data - cuda.to_cpu(last_cell.data)))
def select_action(self, state): # generate a distribution of probabilities based on the inputted q values temperature = 7 # the higher this temperature parameter, the more likely we are to choose the highest q action probabilities = torch.nn.functional.softmax( self.model(torch.autograd(state, volatile=True)) * temperature) # applies a # softmax function to the last state to normalize the output action = probabilities.multinomial( ) # gives us a random draw from the distribution return action.data[ 0, 0] # returns 0,1,2 in accordance with action2rotation
def test_tanh(): x = torch.randn((3, 2)) y = torch.tanh(x) print(torch.autograd(y.sum(), x)) print()
""" Custom dataset """ pass def __getitem__(self, index): # this function should return one data for a given index pass def __len__(self): pass custom_dataset = CustomDataset() torch.utils.data.DataLoader(dataset=custom_dataset, batch_size=100, shuffle=True, num_workers=2) # using pre-trained model # download and use pre-trained model resnet = torchvision.models.resnet18(pretrained=True) # finetuning ==> removing top layer sub_model = torch.nn.Sequential(*list(resnet.children()[:-1])) # for test images = torch.autograd(Variable(torch.randn(10, 3, 256, 256))) print(resnet(images).size()) print(sub_model(images).size()) # save and load model torch.save(sub_model, 'model.pkl') model = torch.load('model.pkl')
def check_dropout_backward(batchsize, feature_dimension, seq_length, use_tanh): x_cpu_data = np.random.normal( 0, 1, size=(batchsize, feature_dimension, seq_length * 3)).astype( np.float32) * 10 x_gpu_data = cuda.to_gpu(x_cpu_data, gpu_device) x_cpu = chainer.Variable(x_cpu_data) x_gpu = chainer.Variable(x_gpu_data) with chainer.using_config("train", True): # get true output layer = SRU(feature_dimension, use_tanh=use_tanh, dropout=0.5) mask_x = layer.generate_dropout_mask(x_cpu_data) output_true, cell_true, last_cell_true = autograd( x_cpu[..., :seq_length], layer.W, layer.B, None, layer.use_tanh, mask_x) output_true, cell_true, last_cell_true = autograd( x_cpu[..., seq_length:seq_length * 2], layer.W, layer.B, last_cell_true, layer.use_tanh, mask_x) output_true, cell_true, last_cell_true = autograd( x_cpu[..., seq_length * 2:], layer.W, layer.B, last_cell_true, layer.use_tanh, mask_x) layer.cleargrads() functions.sum(output_true).backward() b_grad_true = layer.B.grad.copy() w_grad_true = layer.W.grad.copy() x_grad_true = x_cpu.grad.copy() # print("last_cell_true") # print(last_cell_true) layer.to_gpu(gpu_device) output, cell, last_cell = layer(x_gpu[..., :seq_length], None, cuda.to_gpu(mask_x)) output, cell, last_cell = layer(x_gpu[..., seq_length:seq_length * 2], last_cell, cuda.to_gpu(mask_x)) output, cell, last_cell = layer(x_gpu[..., seq_length * 2:], last_cell, cuda.to_gpu(mask_x)) # print(np.mean(abs(output_true.data - cuda.to_cpu(output.data)))) # print(np.mean(abs(cell_true.data - cuda.to_cpu(cell.data)))) layer.cleargrads() functions.sum(output).backward() # print("last_cell") # print(last_cell) # print("layer.W.data") # print(layer.W.data) # print("b_grad") # print(b_grad) # print("b_grad") # print(layer.B.grad) # print("w_grad") # print(w_grad) # print("w_grad") # print(layer.W.grad) # print("x_grad") # print(x_cpu.grad) # print("x_grad") # print(x_gpu.grad) threshold = 1e-3 assert (xp.mean(abs(b_grad_true - cuda.to_cpu(layer.B.grad))) <= threshold), xp.mean(abs(b_grad_true - cuda.to_cpu(layer.B.grad))) assert (xp.mean(abs(w_grad_true - cuda.to_cpu(layer.W.grad))) <= threshold), xp.mean(abs(w_grad_true - cuda.to_cpu(layer.W.grad))) assert (xp.mean(abs(x_grad_true - cuda.to_cpu(x_gpu.grad))) <= threshold), xp.mean(abs(x_grad_true - cuda.to_cpu(x_gpu.grad)))
def sample(self, batch_size): # take some random samples from memory, and reshape it samples = zip(*random.sample(self.memory, batch_size)) # turns this into a torch variable return map(lambda x: torch.autograd(torch.cat(x, 0)), samples)