def decay(self, decay): if not isinstance(decay, tuple): raise e.TypeError('`decay` should be a tuple') if len(decay) != self.n_layers: raise e.SizeError( f'`decay` should have size equal as {self.n_layers}') self._decay = decay
def momentum(self, momentum): if not isinstance(momentum, tuple): raise e.TypeError('`momentum` should be a tuple') if len(momentum) != self.n_layers: raise e.SizeError( f'`momentum` should have size equal as {self.n_layers}') self._momentum = momentum
def lr(self, lr): if not isinstance(lr, tuple): raise e.TypeError('`lr` should be a tuple') if len(lr) != self.n_layers: raise e.SizeError( f'`lr` should have size equal as {self.n_layers}') self._lr = lr
def steps(self, steps): if not isinstance(steps, tuple): raise e.TypeError('`steps` should be a tuple') if len(steps) != self.n_layers: raise e.SizeError( f'`steps` should have size equal as {self.n_layers}') self._steps = steps
def pretrain(self, dataset, batch_size=128, epochs=[10], frames=6): """DBN pre-training phase for further DBM initialization; Args: dataset (torch.utils.data.Dataset | Dataset): A Dataset object containing the training data. batch_size (int): Amount of samples per batch. epochs (list): Number of training epochs per layer. frames (int): Number of input frames. Returns: MSE (mean squared error) and log pseudo-likelihood from the training step. """ # Checking if the length of number of epochs' list is correct if len(epochs) != self.n_layers: # If not, raises an error raise e.SizeError( f'`epochs` should have size equal as {self.n_layers}') # Initializing MSE and pseudo-likelihood as lists mse, pl, custo = [], [], [] # For every possible model (RBM) for i, model in enumerate(self.models): logger.info(f'Fitting layer {i+1}/{self.n_layers} ...') if i == 0: # Setting the constraints to pretraining weight model.c1 = 2.0 model.c2 = 1.0 # Fits the RBM model_mse, model_pl, cst = model.fit(dataset, batch_size, epochs[i], frames) # Appending the metrics mse.append(model_mse.item()) pl.append(model_pl.item()) custo.append(cst.item()) self.dump(mse=model_mse.item(), pl=model_pl.item(), fe=cst.item()) else: if i == self.n_layers - 1: model.c1 = 1.0 model.c2 = 2.0 else: model.c1 = 2.0 model.c2 = 2.0 batches = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=workers, collate_fn=collate_fn) for ep in range( epochs[i]): # iterate over epochs for model 'i' logger.info(f'Epoch {ep+1}/{epochs[i]}') mse2, pl2, cst2 = 0, 0, 0 start = time.time() ii = 1 for x, y in tqdm(batches): # Checking whether GPU is avaliable and if it should be used if self.device == 'cuda': x = x.cuda() m2, p2, c2 = 0, 0, 0 for fr in range(frames): x_ = x[:, fr, :, :] x_ = x_.view(x.size(0), self.models[0].n_visible).detach() x_ = (x_ - torch.mean(x_, 0, True)) / ( torch.std(x_, 0, True) + 10e-6) for j in range(i): # iterate over trained models x_ = self.models[j].forward(x_) model_mse, model_pl, ct = model.fit( x_, len(x_), 1, frames) # Appending the partial metrics m2 += model_mse p2 += model_pl c2 += ct m2 /= frames p2 /= frames c2 /= frames mse2 += m2 pl2 += p2 cst2 += c2 if ii % 100 == 99: print('MSE:', (mse2 / ii).item(), 'Cost:', (cst2 / ii).item()) #inner_trans.update(1) ii += 1 mse2 /= len(batches) pl2 /= len(batches) cst2 /= len(batches) mse.append(mse2.item()) pl.append(pl2.item()) custo.append(cst2.item()) logger.info( f'MSE: {mse2.item()} | log-PL: {pl2.item()} | Cost: {cst2.item()}' ) end = time.time() model.dump(mse=mse2.item(), pl=pl2.item(), fe=cst2.item(), time=end - start) self.dump(mse=mse2.item(), pl=pl2.item(), fe=cst2.item()) for model in self.models: model.c1 = 1.0 model.c2 = 1.0 return mse, pl
def plot(*args, labels=None, title='', subtitle='', grid=True, legend=True): """Plots the convergence graph of desired variables. Essentially, each variable is a list or numpy array with size equals to (epochs x 1). Args: labels (list): Labels to be applied for each plot in legend. title (str): The title of the plot. subtitle (str): The subtitle of the plot. grid (bool): If grid should be used or not. legend (bool): If legend should be displayed or not. """ # Gathering the amount of possible ticks ticks = np.arange(1, len(args[0]) + 1) # Creating figure and axis subplots _, ax = plt.subplots(figsize=(7, 5)) # Defining some properties, such as axis labels ax.set(xlabel='epoch', ylabel='value') # Setting the amount of ticks ax.set_xticks(ticks) # Setting minimum and maximum possible ticks ax.set_xlim(xmin=1, xmax=ticks[-1]) # Setting both title and subtitles ax.set_title(title, loc='left', fontsize=14) ax.set_title(subtitle, loc='right', fontsize=8, color='grey') # If grid usage is true if grid: # Adds the grid property to the axis ax.grid() # Check if labels argument exists if labels: # Also check if it is a list if not isinstance(labels, list): raise e.TypeError('`labels` should be a list') # And check if it has the same size of arguments if len(labels) != len(args): raise e.SizeError('`args` and `labels` should have the same size') # If labels argument does not exists else: # Creates a list with indicators labels = [f'variable_{i}' for i in range(len(args))] # Plotting the axis for (arg, label) in zip(args, labels): ax.plot(ticks, arg, label=label) # If legend usage is true if legend: # Adds the legend property to the axis ax.legend() # Displaying the plot plt.show()
def fit(self, dataset, batch_size=128, epochs=[10], frames=6): """Fits a new DBN model. Args: dataset (torch.utils.data.Dataset | Dataset): A Dataset object containing the training data. batch_size (int): Amount of samples per batch. epochs (list): Number of training epochs per layer. Returns: MSE (mean squared error) and log pseudo-likelihood from the training step. """ # Checking if the length of number of epochs' list is correct if len(epochs) != self.n_layers: # If not, raises an error raise e.SizeError( f'`epochs` should have size equal as {self.n_layers}') # Initializing MSE and pseudo-likelihood as lists mse, pl, custo = [], [], [] # For every possible model (-RBM) for i, model in enumerate(self.models): logger.info(f'Fitting layer {i+1}/{self.n_layers} ...') if i == 0: # Fits the RBM model_mse, model_pl, cst = model.fit(dataset, batch_size, epochs[i], frames) # Appending the metrics mse.append(model_mse.item()) pl.append(model_pl.item()) custo.append(cst.item()) self.dump(mse=model_mse.item(), pl=model_pl.item(), fe=cst.item()) else: batches = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=workers, collate_fn=collate_fn) for ep in range( epochs[i]): # iterate over epochs for model 'i' logger.info(f'Epoch {ep+1}/{epochs[i]}') mse2, pl2, cst2 = 0, 0, 0 inner_trans = tqdm.tqdm(total=len(batches), desc='Batch', position=2) start = time.time() for ii, batch in enumerate(batches): x, y = batch # Checking whether GPU is avaliable and if it should be used if self.device == 'cuda': x = x.cuda() m2, p2, c2 = 0, 0, 0 for fr in range(1, frames): x[:, 0, :, :] -= x[:, fr, :, :] x = x[:, 0, :, :] x = x.view(x.size(0), self.models[0].n_visible).detach() x = (x - torch.mean(x, 0, True)) / ( torch.std(x, 0, True) + 10e-6) for j in range(i): # iterate over trained models x, _ = self.models[j].hidden_sampling(x) #x_ = self.models[j].forward(x_) model_mse, model_pl, ct = model.fit( x, len(x), 1, frames) # Appending the partial metrics m2 += model_mse p2 += model_pl c2 += ct mse2 += m2 pl2 += p2 cst2 += c2 if ii % 100 == 99: print('MSE:', (mse2 / ii).item(), 'Cost:', (cst2 / ii).item()) inner_trans.update(1) mse2 /= len(batches) pl2 /= len(batches) cst2 /= len(batches) mse.append(mse2.item()) pl.append(pl2.item()) custo.append(cst2.item()) logger.info( f'MSE: {mse2.item()} | log-PL: {pl2.item()} | Cost: {cst2.item()}' ) end = time.time() model.dump(mse=mse2.item(), pl=pl2.item(), fe=cst2.item(), time=end - start) self.dump(mse=mse2.item(), pl=pl2.item(), fe=cst2.item()) return mse, pl