def __init__(self, hp, logname=''): super(ESNModelSNLI, self).__init__() input_size = 300 output_size = 3 self.epochs = hp['epochs'] self.lr = hp['lr'] self.batch_size = hp['n_batch'] self.weight_decay = hp['weight_decay'] self.n_layers = hp['num_layers'] self.batch_size = hp['n_batch'] self.reservoir_size = hp['reservoir_size'] #self.dropout = hp['dropout'] attention_hidden_size = hp['n_attention'] attention_heads = hp['attention_r'] num_directions = 2 def cell_provider(input_size_, reservoir_size_, layer, direction): return ESNMultiringCell( input_size_, reservoir_size_, bias=True, contractivity_coeff=hp['scale_rec'][layer] if direction == 0 else hp['scale_rec_bw'][layer], scale_in=hp['scale_in'][layer] if direction == 0 else hp['scale_in_bw'][layer], density_in=hp['scale_in'][layer] if direction == 0 else hp['density_in_bw'][layer], leaking_rate=hp['leaking_rate'][layer] if direction == 0 else hp['leaking_rate_bw'][layer]) self.esn = ESNBase(cell_provider, input_size, self.reservoir_size, num_layers=self.n_layers, bidirectional=True).to(device) # Dimensionality reduction self.ff1 = torch.nn.Linear( self.n_layers * num_directions * self.reservoir_size, attention_hidden_size).to(device) # Pairwise attention for SNLI self.attn = SNLIAttention(attention_hidden_size, r=attention_heads).to(device) # Classifier self.classifier = torch.nn.Linear(attention_hidden_size, output_size).to(device) self.early_stop = self.epochs < 0 self.epochs = abs(self.epochs) self.training_time = -1 self.actual_epochs = self.epochs
def __init__(self, input_size, reservoir_size, contractivity_coeff=0.9, scale_in=1.0, leaking_rate=1.0, alpha=1e-6, rescaling_method='norm', hp=None): super(ESNModelQC, self).__init__() bidirectional = True self.n_layers = hp['num_layers'] self.batch_size = hp['n_batch'] def cell_provider(input_size_, reservoir_size_, layer, direction): return ESNMultiringCell( input_size_, reservoir_size_, bias=True, contractivity_coeff=hp['scale_rec'][layer] if direction == 0 else hp['scale_rec_bw'][layer], scale_in=hp['scale_in'][layer] if direction == 0 else hp['scale_in_bw'][layer], density_in=hp['density_in'][layer] if direction == 0 else hp['density_in_bw'][layer], leaking_rate=hp['leaking_rate'][layer] if direction == 0 else hp['leaking_rate_bw'][layer], rescaling_method=rescaling_method ) self.esn = ESNBase( cell_provider, input_size, reservoir_size, num_layers=self.n_layers, bidirectional=bidirectional ).to(device) # self.regr = RidgeClassifier(alpha=alpha, class_weight='balanced', normalize=True) self.regr = None self.alpha = alpha # Cached output for the training set self.cached_train_out = None self.training_time = -1
class ESNModelSNLI(torch.nn.Module): def __init__(self, hp, logname=''): super(ESNModelSNLI, self).__init__() input_size = 300 output_size = 3 self.epochs = hp['epochs'] self.lr = hp['lr'] self.batch_size = hp['n_batch'] self.weight_decay = hp['weight_decay'] self.n_layers = hp['num_layers'] self.batch_size = hp['n_batch'] self.reservoir_size = hp['reservoir_size'] #self.dropout = hp['dropout'] attention_hidden_size = hp['n_attention'] attention_heads = hp['attention_r'] num_directions = 2 def cell_provider(input_size_, reservoir_size_, layer, direction): return ESNMultiringCell( input_size_, reservoir_size_, bias=True, contractivity_coeff=hp['scale_rec'][layer] if direction == 0 else hp['scale_rec_bw'][layer], scale_in=hp['scale_in'][layer] if direction == 0 else hp['scale_in_bw'][layer], density_in=hp['scale_in'][layer] if direction == 0 else hp['density_in_bw'][layer], leaking_rate=hp['leaking_rate'][layer] if direction == 0 else hp['leaking_rate_bw'][layer]) self.esn = ESNBase(cell_provider, input_size, self.reservoir_size, num_layers=self.n_layers, bidirectional=True).to(device) # Dimensionality reduction self.ff1 = torch.nn.Linear( self.n_layers * num_directions * self.reservoir_size, attention_hidden_size).to(device) # Pairwise attention for SNLI self.attn = SNLIAttention(attention_hidden_size, r=attention_heads).to(device) # Classifier self.classifier = torch.nn.Linear(attention_hidden_size, output_size).to(device) self.early_stop = self.epochs < 0 self.epochs = abs(self.epochs) self.training_time = -1 self.actual_epochs = self.epochs def forward(self, x1: torch.Tensor, x2: torch.Tensor): """ input: (seq_len, batch_size, input_size) output: (batch_size, N_Y) """ s1, _ = self.esn.forward(x1.to( device)) # states: (seq_len, batch, num_directions * hidden_size) s1 = torch.tanh(self.ff1(s1)) # states: (seq_len, batch, n_attn) s2, _ = self.esn.forward(x2.to( device)) # states: (seq_len, batch, num_directions * hidden_size) s2 = torch.tanh(self.ff1(s2)) # states: (seq_len, batch, n_attn) # Apply Attention embedding = self.attn.forward(s1, s2) return self.classifier(embedding) def forward_in_batches(self, dataset, batch_size): dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn, pin_memory=True, num_workers=6) _Xs = [] for _, minibatch in enumerate(dataloader): _Xs += [ self.forward(minibatch['x1'].to(device, non_blocking=True), minibatch['x2'].to(device, non_blocking=True)) ] return torch.cat(_Xs, dim=0) # FIXME Too slow def fit(self, train_fold, val_fold): """ Fits the model with self.alpha as regularization parameter. :param train_fold: training fold. :param val_fold: validation fold. :return: """ if self.early_stop and val_fold is None: raise Exception( "User requested early stopping but a validation set was not provided" ) t_train_start = time.time() #weights = 1.0 / torch.Tensor([67, 945, 1004, 949, 670, 727]) #sampler = torch.utils.data.WeightedRandomSampler(weights, len(train_fold)) #dataloader = DataLoader(train_fold, batch_size=self.batch_size, collate_fn=collate_fn, # pin_memory=True, sampler=sampler) dataloader = DataLoader(train_fold, shuffle=True, batch_size=self.batch_size, collate_fn=collate_fn, pin_memory=True) optimizer = torch.optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) criterion = torch.nn.CrossEntropyLoss() checkpoint = self.state_dict() best_val_accuracy = 0 epochs_without_val_acc_improvement = 0 patience = 10 epoch = 0 #for epoch in tqdm(range(1, epochs + 1), desc="epochs", dynamic_ncols=True): for epoch in range(1, self.epochs + 1): running_loss = 0.0 num_minibatches = 0 for i, data in enumerate(dataloader): # Move data to devices data_x1 = data['x1'].to(device, non_blocking=True) data_x2 = data['x2'].to(device, non_blocking=True) data_y = data['y'].to(device, non_blocking=True) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize self.train() train_out = self.forward(data_x1, data_x2) loss = criterion(train_out, data_y) loss.backward() optimizer.step() running_loss += loss.item() num_minibatches += 1 curr_avg_loss = running_loss / num_minibatches if math.isnan(curr_avg_loss): print("Loss is NaN. Stopping.") break if val_fold is not None: _, val_accuracy, _ = self.performance(None, val_fold, None) if val_accuracy > best_val_accuracy: epochs_without_val_acc_improvement = 0 best_val_accuracy = val_accuracy checkpoint = self.state_dict() else: epochs_without_val_acc_improvement += 1 # Early stopping if self.early_stop and epochs_without_val_acc_improvement >= patience: print( f"Epoch {epoch}: no accuracy improvement after {patience} epochs. Early stop." ) self.load_state_dict(checkpoint) break self.actual_epochs = epoch - patience if self.early_stop else epoch t_train_end = time.time() self.training_time = t_train_end - t_train_start # Compute accuracy on validation set _, val_accuracy, _ = self.performance(None, val_fold, None) return val_accuracy def performance(self, train_fold, val_fold, test_fold=None, batch_size=None): if batch_size is None: batch_size = self.batch_size if train_fold: train_accuracy, train_out, train_expected = self.performance_from_fold( train_fold, batch_size) else: train_accuracy, train_out, train_expected = (0, None, None) if val_fold: val_accuracy, val_out, val_expected = self.performance_from_fold( val_fold, batch_size) else: val_accuracy, val_out, val_expected = (0, None, None) if test_fold: test_accuracy, test_out, test_expected = self.performance_from_fold( test_fold, batch_size) else: test_accuracy, test_out, test_expected = (0, None, None) save_raw_predictions = False if save_raw_predictions: raw_preds_filename = '/home/disarli/tmp/predictions.pt' try: saved = torch.load(raw_preds_filename) except FileNotFoundError: saved = [] saved.append({ 'train_out': train_out.cpu(), 'train_expected': train_expected.cpu(), 'val_out': val_out.cpu(), 'val_expected': val_expected.cpu(), 'test_out': test_out.cpu() if test_fold else None, 'test_expected': test_expected.cpu() if test_fold else None, }) torch.save(saved, raw_preds_filename) return train_accuracy, val_accuracy, test_accuracy def performance_from_out(self, output, expected): """ Given a tensor of network outputs and a tensor of expected outputs, returns the performance :param output: :param expected: :return: """ output = output.argmax(dim=1).cpu() return common.accuracy(output, expected) def performance_from_fold(self, fold, batch_size): with torch.no_grad(): self.eval() out = self.forward_in_batches(fold, batch_size) expected = torch.Tensor([d['y'] for d in fold]) perf = self.performance_from_out(out, expected) return perf, out, expected
def __init__(self, input_size, output_size, reservoir_size: int = 100, num_esn_layers: int = 1, mlp_n_hidden: int = 1, mlp_hidden_size: int = 100, dropout: float = 0, attention_type: str = 'LinSelfAttention', attention_hidden_size: int = 100, attention_heads: int = 1, scale_rec: List[float] = (1, ), scale_rec_bw: List[float] = (1, ), scale_in: List[float] = (1, ), scale_in_bw: List[float] = (1, ), density_in: List[float] = (1, ), density_in_bw: List[float] = (1, ), leaking_rate: List[float] = (1, ), leaking_rate_bw: List[float] = (1, )): super(LeakyESNAttention, self).__init__() bidirectional = True self.input_size = input_size self.output_size = output_size self.n_layers = num_esn_layers self.reservoir_size = reservoir_size self.dropout = dropout self.attention_type = attention_type self.mlp_n_hidden = mlp_n_hidden self.mlp_hidden_size = mlp_hidden_size num_directions = 2 if bidirectional else 1 def cell_provider(input_size_, reservoir_size_, layer, direction): return ESNMultiringCell(input_size_, reservoir_size_, bias=True, contractivity_coeff=scale_rec[layer] if direction == 0 else scale_rec_bw[layer], scale_in=scale_in[layer] if direction == 0 else scale_in_bw[layer], density_in=density_in[layer] if direction == 0 else density_in_bw[layer], leaking_rate=leaking_rate[layer] if direction == 0 else leaking_rate_bw[layer]) self.esn = ESNBase(cell_provider, input_size, reservoir_size, num_layers=self.n_layers, bidirectional=bidirectional) if self.attention_type == 'LinSelfAttention': self.ff1 = torch.nn.Linear( self.n_layers * num_directions * reservoir_size, attention_hidden_size) self.attn = LinSelfAttention(attention_hidden_size, r=attention_heads) mlp_input_size = self.attn.output_features() elif self.attention_type == 'Attention': self.ff1 = torch.nn.Linear( self.n_layers * num_directions * reservoir_size, attention_hidden_size) self.attn = SingleTargetAttention(attention_hidden_size) mlp_input_size = self.attn.output_features() elif self.attention_type == 'MaxPooling': self.ff1 = torch.nn.Linear( self.n_layers * num_directions * reservoir_size, attention_hidden_size) mlp_input_size = attention_hidden_size self.esn_bn = torch.nn.BatchNorm1d(mlp_input_size) elif self.attention_type == 'None': mlp_input_size = self.n_layers * num_directions * reservoir_size elif self.attention_type == 'Mean': mlp_input_size = self.n_layers * num_directions * reservoir_size else: raise Exception("Invalid attention type: " + self.attention_type) if mlp_n_hidden == 0: self.mlp_hn = torch.nn.ModuleList([]) self.mlp_out = torch.nn.Linear(mlp_input_size, output_size) else: mlp_h1 = torch.nn.Linear(mlp_input_size, mlp_hidden_size) self.mlp_hn = torch.nn.ModuleList([ torch.nn.Linear(mlp_hidden_size, mlp_hidden_size) for _ in range(mlp_n_hidden - 1) ]) self.mlp_hn.insert(0, mlp_h1) self.mlp_out = torch.nn.Linear(mlp_hidden_size, output_size) self._attnweights = None # Attention weights for the latest minibatch.
class LeakyESNAttention(torch.nn.Module): def __init__(self, input_size, output_size, reservoir_size: int = 100, num_esn_layers: int = 1, mlp_n_hidden: int = 1, mlp_hidden_size: int = 100, dropout: float = 0, attention_type: str = 'LinSelfAttention', attention_hidden_size: int = 100, attention_heads: int = 1, scale_rec: List[float] = (1, ), scale_rec_bw: List[float] = (1, ), scale_in: List[float] = (1, ), scale_in_bw: List[float] = (1, ), density_in: List[float] = (1, ), density_in_bw: List[float] = (1, ), leaking_rate: List[float] = (1, ), leaking_rate_bw: List[float] = (1, )): super(LeakyESNAttention, self).__init__() bidirectional = True self.input_size = input_size self.output_size = output_size self.n_layers = num_esn_layers self.reservoir_size = reservoir_size self.dropout = dropout self.attention_type = attention_type self.mlp_n_hidden = mlp_n_hidden self.mlp_hidden_size = mlp_hidden_size num_directions = 2 if bidirectional else 1 def cell_provider(input_size_, reservoir_size_, layer, direction): return ESNMultiringCell(input_size_, reservoir_size_, bias=True, contractivity_coeff=scale_rec[layer] if direction == 0 else scale_rec_bw[layer], scale_in=scale_in[layer] if direction == 0 else scale_in_bw[layer], density_in=density_in[layer] if direction == 0 else density_in_bw[layer], leaking_rate=leaking_rate[layer] if direction == 0 else leaking_rate_bw[layer]) self.esn = ESNBase(cell_provider, input_size, reservoir_size, num_layers=self.n_layers, bidirectional=bidirectional) if self.attention_type == 'LinSelfAttention': self.ff1 = torch.nn.Linear( self.n_layers * num_directions * reservoir_size, attention_hidden_size) self.attn = LinSelfAttention(attention_hidden_size, r=attention_heads) mlp_input_size = self.attn.output_features() elif self.attention_type == 'Attention': self.ff1 = torch.nn.Linear( self.n_layers * num_directions * reservoir_size, attention_hidden_size) self.attn = SingleTargetAttention(attention_hidden_size) mlp_input_size = self.attn.output_features() elif self.attention_type == 'MaxPooling': self.ff1 = torch.nn.Linear( self.n_layers * num_directions * reservoir_size, attention_hidden_size) mlp_input_size = attention_hidden_size self.esn_bn = torch.nn.BatchNorm1d(mlp_input_size) elif self.attention_type == 'None': mlp_input_size = self.n_layers * num_directions * reservoir_size elif self.attention_type == 'Mean': mlp_input_size = self.n_layers * num_directions * reservoir_size else: raise Exception("Invalid attention type: " + self.attention_type) if mlp_n_hidden == 0: self.mlp_hn = torch.nn.ModuleList([]) self.mlp_out = torch.nn.Linear(mlp_input_size, output_size) else: mlp_h1 = torch.nn.Linear(mlp_input_size, mlp_hidden_size) self.mlp_hn = torch.nn.ModuleList([ torch.nn.Linear(mlp_hidden_size, mlp_hidden_size) for _ in range(mlp_n_hidden - 1) ]) self.mlp_hn.insert(0, mlp_h1) self.mlp_out = torch.nn.Linear(mlp_hidden_size, output_size) self._attnweights = None # Attention weights for the latest minibatch. def readout(self, states: torch.Tensor): """ :param states: (seq_len, batch_size, num_directions * hidden_size) :return: """ s = states for lyr in self.mlp_hn: s = torch.nn.functional.dropout(s, p=self.dropout) s = torch.relu(lyr(s)) s = self.mlp_out(s) return s def forward(self, input: torch.Tensor, seq_lengths=None): """ input: (seq_len, batch_size, input_size) lengths: integer list of lengths, one for each sequence in 'input'. If provided, padding states are automatically ignored. output: (1,) """ seq_len = input.size(0) batch = input.size(1) if self.attention_type == 'LinSelfAttention' or self.attention_type == 'Attention': n_attn = self.ff1.out_features states, _ = self.esn.forward( input ) # states: (seq_len, batch, num_directions * hidden_size) states = torch.tanh( self.ff1(states)) # states: (seq_len, batch, n_attn) ## Let the recurrent network compute the states #states = torch.empty((seq_len, batch, n_attn), device=input.device) #for i, x in enumerate(self.esn.forward_long_sequence_yld(input)): # # x: (num_layers * num_directions, batch, hidden_size) # x = x.permute(1, 0, 2).contiguous().view(x.shape[1], -1) # # Reduce dimensionality. x: (batch, n_attention) # x = torch.tanh(self.ff1(x)) # states[i] = x # Apply Attention x, self._attnweights = self.attn.forward( states) # x: (batch, n_attention) elif self.attention_type == 'MaxPooling': s = torch.zeros((batch, self.ff1.out_features), device=input.device) for i, x in enumerate(self.esn.forward_long_sequence_yld(input)): # x: (num_layers * num_directions, batch, hidden_size) x = x.permute(1, 0, 2).contiguous().view(x.shape[1], -1) # x: (batch, num_layers * num_directions * hidden_size) x = torch.tanh(self.ff1(x)) s = torch.max(torch.stack((s, x)), 0)[0] x = self.esn_bn(s) elif self.attention_type == 'Mean': n_res = self.esn.num_layers * self.esn.num_directions * self.esn.reservoir_size # Let the recurrent network compute the states states = torch.empty((batch, n_res), device=input.device) count = 0 for i, x in enumerate(self.esn.forward_long_sequence_yld(input)): # x: (num_layers * num_directions, batch, hidden_size) x = x.permute(1, 0, 2).contiguous().view(x.shape[1], -1) states += x count += 1 x = states / count elif self.attention_type == 'None': x = self.esn.forward_long_sequence(input, seq_lengths=seq_lengths) x = x.permute(1, 0, 2).contiguous().view(x.shape[1], -1) return self.readout(x) def loss_penalty(self): if self.attention_type == 'LinSelfAttention' and self.attn.r > 1: return LinSelfAttention.loss_penalization(self._attnweights) return 0
class ESNModelQC(torch.nn.Module): def __init__(self, input_size, reservoir_size, contractivity_coeff=0.9, scale_in=1.0, leaking_rate=1.0, alpha=1e-6, rescaling_method='norm', hp=None): super(ESNModelQC, self).__init__() bidirectional = True self.n_layers = hp['num_layers'] self.batch_size = hp['n_batch'] def cell_provider(input_size_, reservoir_size_, layer, direction): return ESNMultiringCell( input_size_, reservoir_size_, bias=True, contractivity_coeff=hp['scale_rec'][layer] if direction == 0 else hp['scale_rec_bw'][layer], scale_in=hp['scale_in'][layer] if direction == 0 else hp['scale_in_bw'][layer], density_in=hp['density_in'][layer] if direction == 0 else hp['density_in_bw'][layer], leaking_rate=hp['leaking_rate'][layer] if direction == 0 else hp['leaking_rate_bw'][layer], rescaling_method=rescaling_method ) self.esn = ESNBase( cell_provider, input_size, reservoir_size, num_layers=self.n_layers, bidirectional=bidirectional ).to(device) # self.regr = RidgeClassifier(alpha=alpha, class_weight='balanced', normalize=True) self.regr = None self.alpha = alpha # Cached output for the training set self.cached_train_out = None self.training_time = -1 def forward(self, input: torch.Tensor, seq_lengths=None, return_states=False, return_probs=False): """ input: (seq_len, batch_size, input_size) lengths: integer list of lengths, one for each sequence in 'input'. If provided, padding states are automatically ignored. output: (1,) """ # x: (seq_len, batch, num_directions * hidden_size) # last layer only, all steps # h: (num_layers * num_directions, batch, hidden_size) # last step only, all layers x, h = self.esn(input.to(device)) if self.n_layers == 1: state = self.esn.extract_last_states(x, seq_lengths=seq_lengths).cpu() concat_states = state # Tensor( batch_size, num_direction * output_size * n_layers=1 ) else: # Tensor( batch_size, num_direction * output_size * n_layers ) concat_states = h.permute(1, 0, 2).contiguous().view(h.shape[1], -1).cpu() if return_states: return concat_states if return_probs: return torch.from_numpy(self.regr.decision_function(concat_states.numpy())) else: return torch.from_numpy(self.regr.predict(concat_states.numpy())) def forward_in_batches(self, dataset, batch_size, return_states=False, return_probs=False): dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn) _Xs = [] for _, minibatch in enumerate(dataloader): _Xs += [self.forward(minibatch['x'], seq_lengths=minibatch['lengths'], return_states=return_states, return_probs=return_probs)] return torch.cat(_Xs, dim=0) def find_best_alpha(self, train_fold, val_fold, batch_size): """ Fit the model while searching for the best regularization parameter. The best regularization parameter is then assigned to self.alpha. :param train_fold: :param val_fold: :param batch_size: :return: """ # Collect the states without computing the readout train_states = self.forward_in_batches(train_fold, batch_size, return_states=True) train_expected = torch.Tensor([d['y'] for d in train_fold]) val_states = self.forward_in_batches(val_fold, batch_size, return_states=True) val_expected = torch.Tensor([d['y'] for d in val_fold]) #possible_alpha_exponents = [-6, -5, -4, -3, -2, -1, 0, 1, 0.5, 0.8, 2, 2.5, 2.8, 3, 4, 5, 6] #possible_alpha_exponents = [0, 1, 0.5, 0.8, 2, 2.5, 2.8, 3, 4, 5, 6] possible_alpha_exponents = [0, 1, 0.5, 0.8, 2, 2.2, 2.5, 2.6, 2.7, 2.8, 2.9, 3, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.8, 4, 5, 6] possible_alphas = [ 10**v for v in possible_alpha_exponents ] best_alpha = None best_val_perf = -math.inf for a in possible_alphas: # Fit with alpha = a self.alpha = a self._fit_from_states(train_states, train_expected) # Check the validation accuracy for this alpha self.eval() val_out = self._predict_from_states(val_states) val_perf = common.accuracy(val_out, val_expected) # Update the best alpha if val_perf > best_val_perf: best_val_perf = val_perf best_alpha = a self.alpha = best_alpha return best_alpha def fit(self, train_fold): """ Fits the model with self.alpha as regularization parameter. :param train_fold: training fold. :param batch_size: :return: """ self.train() # Collect all states t_train_start = time.time() _X = self.forward_in_batches(train_fold, self.batch_size, return_states=True) _y = torch.cat([d['y'] for d in train_fold]).to(device).type(torch.get_default_dtype()) _X = _X.cpu().numpy() _y = _y.cpu().numpy() # Actual training self._fit_from_states(_X, _y) t_train_end = time.time() self.training_time = t_train_end - t_train_start self.cached_train_out = torch.from_numpy(self.regr.predict(_X)) def _fit_from_states(self, states, expected): """ Fit the model from a matrix of states :param states: a tensor or numpy array of shape ( batch_size, state_size ) :param expected: expected index for each sample :return: """ if isinstance(states, torch.Tensor): states = states.cpu().numpy() if isinstance(expected, torch.Tensor): expected = expected.cpu().numpy() self.regr = RidgeClassifier(alpha=self.alpha, normalize=False) self.regr.fit(states, expected) # W = (X.t() @ X + (self.alpha)*torch.eye(X.shape[1], device=device)).inverse() @ X.t() @ y def _predict_from_states(self, states): if isinstance(states, torch.Tensor): states = states.cpu().numpy() return torch.from_numpy(self.regr.predict(states)) def performance(self, train_fold, val_fold, test_fold=None): with torch.no_grad(): # Actual output class indices self.eval() train_out = self.cached_train_out val_out = self.forward_in_batches(val_fold, self.batch_size) if val_fold else None test_out = self.forward_in_batches(test_fold, self.batch_size) if test_fold else None # Expected output class indices train_expected = torch.Tensor([ d['y'] for d in train_fold ]) val_expected = torch.Tensor([ d['y'] for d in val_fold ]) if val_fold else None test_expected = torch.Tensor([ d['y'] for d in test_fold ]) if test_fold else None save_raw_predictions = False if save_raw_predictions: raw_preds_filename = '/home/disarli/tmp/predictions.pt' raw_train_out = self.forward_in_batches(train_fold, self.batch_size, return_probs=True) raw_val_out = self.forward_in_batches(val_fold, self.batch_size, return_probs=True) if val_fold else None raw_test_out = self.forward_in_batches(test_fold, self.batch_size, return_probs=True) if test_fold else None try: saved = torch.load(raw_preds_filename) except FileNotFoundError: saved = [] saved.append({ 'train_out': raw_train_out.cpu(), 'train_expected': train_expected.cpu(), 'val_out': raw_val_out.cpu() if val_fold else None, 'val_expected': val_expected.cpu() if val_fold else None, 'test_out': raw_test_out.cpu() if test_fold else None, 'test_expected': test_expected.cpu() if test_fold else None, }) torch.save(saved, raw_preds_filename) # Compute performance measures train_accuracy = common.accuracy(train_out, train_expected) val_accuracy = common.accuracy(val_out, val_expected) if val_fold else 0 test_accuracy = common.accuracy(test_out, test_expected) if test_fold else 0 return train_accuracy, val_accuracy, test_accuracy @staticmethod def ensemble_performance(predictions, expected): out = torch.stack(predictions).mean(dim=0) out = out.argmax(dim=1) return common.accuracy(out, expected)
def __init__(self, input_size, reservoir_size, contractivity_coeff=0.9, scale_in=1.0, f=torch.tanh, leaking_rate=1.0, alpha=1e-6, rescaling_method='norm', hp=None): super(ESNModelQC, self).__init__() bidirectional = True # When True, adds a linear reservoir in addition to the common one. The states are then # concatenated before the readout. self.dual_reservoir = False self.n_layers = hp['num_layers'] self.batch_size = hp['n_batch'] def cell_provider(input_size_, reservoir_size_, layer, direction): return ESNMultiringCell( input_size_, reservoir_size_, bias=True, contractivity_coeff=hp['scale_rec'][layer] if direction == 0 else hp['scale_rec_bw'][layer], scale_in=hp['scale_in'][layer] if direction == 0 else hp['scale_in_bw'][layer], density_in=hp['scale_in'][layer] if direction == 0 else hp['density_in_bw'][layer], f=f, leaking_rate=hp['leaking_rate'][layer] if direction == 0 else hp['leaking_rate_bw'][layer], rescaling_method=rescaling_method) self.esn = ESNBase(cell_provider, input_size, reservoir_size, num_layers=self.n_layers, bidirectional=bidirectional).to(device) # Add a linear reservoir if self.dual_reservoir: def dual_cell_provider(input_size_, reservoir_size_, layer, direction): return ESNMultiringCell( input_size_, reservoir_size_, bias=True, contractivity_coeff=hp['scale_rec_dual'][layer] if direction == 0 else hp['scale_rec_bw_dual'][layer], scale_in=hp['scale_in_dual'][layer] if direction == 0 else hp['scale_in_bw_dual'][layer], f=lambda x: x, leaking_rate=hp['leaking_rate_dual'][layer] if direction == 0 else hp['leaking_rate_bw_dual'][layer], rescaling_method=rescaling_method) self.dual_esn = ESNBase(dual_cell_provider, input_size, reservoir_size, num_layers=self.n_layers, bidirectional=bidirectional).to(device) # self.esn = ESNMultiring( # input_size, # reservoir_size, # contractivity_coeff=contractivity_coeff, # scale_in=scale_in, # bidirectional=bidirectional, # f=f, # leaking_rate=leaking_rate, # rescaling_method=rescaling_method, # num_layers=self.n_layers # ).to(device) # self.regr = RidgeClassifier(alpha=alpha, class_weight='balanced', normalize=True) self.regr = None self.alpha = alpha # Cached output for the training set self.cached_train_out = None self.training_time = -1