def __getitem__(self, index): """ Args: index (int): Index Returns: task dictionary with keys x_train, y_train, x_val, y_val, all draw from the task of given index """ assert index < self.__len__() task_def = self.task_defs[index] x_train = [] y_train = [] x_val = [] y_val = [] rotations = np.random.choice([0., 90., 180., 270.], size=(task_def.num_cls, ), replace=True) # training instances for idx, file_name in enumerate(task_def.train_ids): image_path = file_name image = Image.open(image_path, mode='r').convert('L') image = image.resize((28, 28), resample=Image.LANCZOS) image = np.array(image, dtype=np.float32) image = rotate(image, rotations[idx // task_def.num_inst]) x_train.append(image.reshape(1, 28, 28) / 255.0) y_train.append(task_def.train_labels[idx]) # validation instances for idx, file_name in enumerate(task_def.val_ids): image_path = file_name image = Image.open(image_path, mode='r').convert('L') image = image.resize((28, 28), resample=Image.LANCZOS) image = np.array(image, dtype=np.float32) image = rotate(image, rotations[idx // task_def.num_inst]) x_val.append(image.reshape(1, 28, 28) / 255.0) y_val.append(task_def.val_labels[idx]) # base transforms x_train = utils.to_device(np.array(x_train), self.use_gpu) y_train = utils.to_device(np.array(y_train), self.use_gpu) x_val = utils.to_device(np.array(x_val), self.use_gpu) y_val = utils.to_device(np.array(y_val), self.use_gpu) if self.transform: x_train = self.transform(x_train) x_val = self.transform(x_val) if self.target_transform: y_train = self.target_transform(y_train) y_val = self.target_transform(y_val) # cross entropy expects targets to be of type LongTensor task = dict(task_def=task_def, x_train=x_train, y_train=y_train.long(), x_val=x_val, y_val=y_val.long()) return task
def matrix_evaluator(self, task, lam, regu_coef=1.0, lam_damping=10.0, x=None, y=None): """ Constructor function that can be given to CG optimizer Works for both type(lam) == float and type(lam) == np.ndarray """ if type(lam) == np.ndarray: lam = utils.to_device(lam, self.use_gpu) def evaluator(v): hvp = self.hessian_vector_product(task, v, x=x, y=y) Av = (1.0 + regu_coef) * v + hvp / (lam + lam_damping) return Av return evaluator
def hessian_vector_product(self, task, vector, params=None, x=None, y=None): """ Performs hessian vector product on the train set in task with the provided vector """ if x is not None and y is not None: xt, yt = x, y else: xt, yt = task['x_train'], task['y_train'] if params is not None: self.set_params(params) tloss = self.get_loss(xt, yt) grad_ft = torch.autograd.grad(tloss, self.model.parameters(), create_graph=True) flat_grad = torch.cat([g.contiguous().view(-1) for g in grad_ft]) vec = utils.to_device(vector, self.use_gpu) h = torch.sum(flat_grad * vec) hvp = torch.autograd.grad(h, self.model.parameters()) hvp_flat = torch.cat([g.contiguous().view(-1) for g in hvp]) return hvp_flat
def outer_step_with_grad(self, grad, flat_grad=False): """ Given the gradient, step with the outer optimizer using the gradient. Assumed that the gradient is a tuple/list of size compatible with model.parameters() If flat_grad, then the gradient is a flattened vector """ check = 0 for p in self.model.parameters(): check = check + 1 if type(p.grad) == type(None) else check if check > 0: # initialize the grad fields properly dummy_loss = self.regularization_loss(self.get_params()) dummy_loss.backward() # this would initialize required variables if flat_grad: offset = 0 grad = utils.to_device(grad, self.use_gpu) for p in self.model.parameters(): this_grad = grad[offset:offset + p.nelement()].view(p.size()) p.grad.copy_(this_grad) offset += p.nelement() else: for i, p in enumerate(self.model.parameters()): p.grad = grad[i] self.outer_opt.step()
def generate_tasks(self, num_tasks=None): num_tasks = self.num_tasks if num_tasks == None else num_tasks generated_tasks = [] for i in range(num_tasks): amp = np.random.uniform(low=self.amp_range[0], high=self.amp_range[1]) phase = np.random.uniform(low=self.phase_range[0], high=self.phase_range[1]) x_train = np.random.uniform(low=self.input_range[0], high=self.input_range[1], size=self.ntrain).reshape(-1, 1) y_train = self.sine_function(x_train, amp, phase).reshape(-1, 1) x_val = np.random.uniform(low=self.input_range[0], high=self.input_range[1], size=self.nval).reshape(-1, 1) y_val = self.sine_function(x_val, amp, phase).reshape(-1, 1) x_all = np.concatenate([x_train, x_val]) y_all = np.concatenate([y_train, y_val]) if self.float16: task = dict(amp=amp, phase=phase, x_train=utils.to_device(x_train, self.use_gpu).half(), y_train=utils.to_device(y_train, self.use_gpu).half(), x_val=utils.to_device(x_val, self.use_gpu).half(), y_val=utils.to_device(y_val, self.use_gpu).half(), x_all=utils.to_device(x_all, self.use_gpu).half(), y_all=utils.to_device(y_all, self.use_gpu).half(), ) else: task = dict(amp=amp, phase=phase, x_train=utils.to_device(x_train, self.use_gpu), y_train=utils.to_device(y_train, self.use_gpu), x_val=utils.to_device(x_val, self.use_gpu), y_val=utils.to_device(y_val, self.use_gpu), x_all=utils.to_device(x_all, self.use_gpu), y_all=utils.to_device(y_all, self.use_gpu), ) generated_tasks.append(task) return generated_tasks
def predict(self, x, return_numpy=False): yhat = self.model.forward(utils.to_device(x, self.use_gpu)) if return_numpy: yhat = utils.to_numpy(yhat) return yhat