curve = getattr(curves, args.curve) curve_model = curves.CurveNet( num_classes, curve, architecture.curve, args.num_bends, architecture_kwargs=architecture.kwargs, ) curve_model.cuda() checkpoint = torch.load(args.ckpt) curve_model.load_state_dict(checkpoint['model_state']) criterion = F.cross_entropy regularizer = utils.l2_regularizer(args.wd) def get_xy(point, origin, vector_x, vector_y): return np.array( [np.dot(point - origin, vector_x), np.dot(point - origin, vector_y)]) w = list() curve_parameters = list(curve_model.net.parameters()) for i in range(args.num_bends): w.append( np.concatenate([ p.data.cpu().numpy().ravel() for p in curve_parameters[i::args.num_bends]
def evaluate_one_epoch(self, start_time): val_loss = 0.0 tn = 0 fn = 0 fp = 0 tp = 0 self.val_times.append([]) self.val_loss.append([]) self.val_acc.append([]) self.val_precision.append([]) self.val_recall.append([]) self.val_F1.append([]) self.model.eval() with torch.no_grad(): for i, (a, bh_pos, def_pos, y) in enumerate(self.val_loader): a = a.to(self.device) bh_pos = bh_pos.to(self.device) def_pos = def_pos.to(self.device) y = y.to(self.device) outputs = self.model(a, bh_pos, def_pos) # Compute loss loss = self.loss_fn(outputs, y.float()) if self.params.get('reg_coeff'): if type(self.model.module).__name__.startswith('Full'): # reg = self.params['reg_coeff'] * (utils.l1_regularizer(self.model, self.device)) # reg = self.params['reg_coeff'] * (utils.l2_regularizer(self.model, self.device)) # reg = self.params['reg_coeff'] * ( # utils.spatial_regularizer(self.model, self.K_B, self.K_C, self.device)) reg = self.params['reg_coeff'] * ( utils.bball_spatial_regularizer( self.model, self.K_B, self.K_C, self.device) + utils.l2_regularizer(self.model, self.device)) # reg = self.params['reg_coeff'] * ( # utils.l2_regularizer(self.model, self.device) + utils.l1_regularizer(self.model, # self.device)) else: # reg = self.params['reg_coeff'] * (utils.l1_regularizer(self.model, self.device)) # reg = self.params['reg_coeff'] * (utils.l2_regularizer(self.model, self.device)) # reg = self.params['reg_coeff'] * ( # utils.spatial_regularizer(self.model, self.K_B, self.K_C, self.device)) # reg = self.params['reg_coeff'] * (utils.l1_regularizer(self.model, self.device) + # utils.l2_regularizer(self.model, self.device)) reg = self.params['reg_coeff'] * ( utils.bball_spatial_regularizer( self.model, self.K_B, self.K_C, self.device) + utils.l2_regularizer(self.model, self.device)) # reg = self.reg_coeff * (utils.spatial_regularizer(self.model, self.K_B, self.K_C, self.device) # + utils.l1_regularizer(self.model, self.device)) if i == 0: logger.info( "VAL | Step {0} | Loss={1:0.6f}, Reg={2:0.6f}". format(i + 1, loss, reg)) loss = loss + reg # Aggregate train_loss across batches val_loss += loss.item() # Update confusion matrix preds = (outputs > self.decision_threshold).bool() tn += torch.sum((preds == 0) & (y == 0)).item() fn += torch.sum((preds == 0) & (y == 1)).item() fp += torch.sum((preds == 1) & (y == 0)).item() tp += torch.sum((preds == 1) & (y == 1)).item() # Log interval # if i % self.eval_T == self.eval_T - 1: # logger.debug('VAL | Step {0} | Loss={1:0.6f}'.format(i + 1, curr_loss)) if i == 2 or i % self.eval_T == self.eval_T - 1 or i == len( self.val_loader) - 1: curr_loss = val_loss / (i + 1) self.val_times[-1].append(time.time() - start_time) self.val_loss[-1].append(curr_loss) # Class accuracy self.val_acc[-1].append([tn / (tn + fp), tp / (tp + fn)]) F1, precision, recall = utils.calc_F1(fp, fn, tp) self.val_precision[-1].append(precision) self.val_recall[-1].append(recall) self.val_F1[-1].append(F1) logger.info( 'VAL | Step {0} | Loss={1:0.6f}, P={2:0.6f}, R={3:0.6f}, F1={4:0.6f}' .format(i + 1, curr_loss, precision, recall, F1)) # Conf Matrix self.val_conf_matrix.append([[tn, fp], [fn, tp]]) logger.info("VAL | Loss={0:6f}, Conf. matrix={1}".format( self.val_loss[-1][-1], [[tn, fp], [fn, tp]]))
number_points = args.number_points models = [ architecture.base(num_classes=10, **architecture.kwargs) for i in range(number_points) ] for m in models: m.cuda() base_model = architecture.base(10, **architecture.kwargs) base_model.cuda() criterion = F.cross_entropy regularizer = utils.l2_regularizer(1e-4) statistic = [] ind = 47 T = True # index = list(range(number_points)) while ind < 100 - number_points + 1: l = [] for m in models: ckpt = 'curves/curve' + str(ind) + '/checkpoint-100.pt' checkpoint = torch.load(ckpt)
def train_one_epoch(self, start_time): train_loss = 0.0 self.train_times.append([]) self.train_loss.append([]) self.grad_norms.append([]) self.grad_entropies.append([]) self.grad_vars.append([]) # Zero out gradients for i in range(len(self.gradients)): self.accum_gradients[i].zero_() self.gradients[i].zero_() self.model.train() for i, (a, bh_pos, def_pos, y) in enumerate(self.train_loader): a = a.to(self.device) bh_pos = bh_pos.to(self.device) def_pos = def_pos.to(self.device) y = y.to(self.device) # zero the parameter gradients self.optimizer.zero_grad() # Forward pass outputs = self.model(a, bh_pos, def_pos) # Compute loss loss = self.loss_fn(outputs, y.float()) if self.params.get('reg_coeff'): if type(self.model.module).__name__.startswith('Full'): reg = self.params['reg_coeff'] * ( utils.bball_spatial_regularizer( self.model, self.K_B, self.K_C, self.device) + utils.l2_regularizer(self.model, self.device)) else: reg = self.params['reg_coeff'] * ( utils.bball_spatial_regularizer( self.model, self.K_B, self.K_C, self.device) + utils.l2_regularizer(self.model, self.device)) if i == 0: logger.info( "TRAIN | Step {0} | Loss={1:0.6f}, Reg={2:0.6f}". format(i + 1, loss, reg)) loss = loss + reg loss.backward() # Accumulate gradients utils.accum_grad(self.accum_gradients, self.model) self.optimizer.step() # Constrain weights if type(self.model.module).__name__.startswith( 'Low') and self.params.get('nonnegative_weights'): with torch.no_grad(): self.model.constrain() # Aggregate train_loss across batches train_loss += loss.item() # Log interval # if i % self.train_T == self.train_T - 1: # curr_loss = train_loss / (i + 1) # logger.debug('TRAIN | Step {0} | Loss={1:0.6f}'.format(i + 1, curr_loss)) if i == 2 or i % self.train_T == self.train_T - 1 or i == len( self.train_loader) - 1: curr_loss = train_loss / (i + 1) self.train_times[-1].append(time.time() - start_time) self.train_loss[-1].append(curr_loss) # Average gradients for p in range(len(self.gradients)): self.gradients[p] = self.accum_gradients[p].div(i + 1) # Calculate gradient statistics grad_norm, grad_entropy, grad_var = utils.grad_stats( self.gradients) self.grad_norms[-1].append(grad_norm) self.grad_entropies[-1].append(grad_entropy) self.grad_vars[-1].append(grad_var) logger.info( 'TRAIN | Step {0} | Loss={1:0.6f}, GN={2:0.6e}, GE={3:0.6f}, GV={4:0.6e}' .format(i + 1, curr_loss, grad_norm, grad_entropy, grad_var))