from transformer.Layers import EncoderLayer from transformer.Models import Transformer, Encoder from transformer.Optim import ScheduledOptim device = torch.device("cuda" if torch.cuda.is_available() else "cpu") encoder_stacks = Encoder(d_model=32, d_inner=64, n_layers=2, n_head=4, d_k=16, d_v=16, dropout=0.1) criterion = torch.nn.MSELoss().to(device) optimizer = torch.optim.SGD(encoder_stacks.parameters(), lr=1) src = torch.rand(1, 2, 32, requires_grad=True) tgt = torch.rand(1, 2, 32) print(src) encoder_stacks.train() for i in range(100): out, attn = encoder_stacks.forward(src, src_mask=None) loss = criterion(out, tgt) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(encoder_stacks.parameters(), 0.5) optimizer.step()
class Dataset: def __init__(self, source_dataset, batch_size, epochs, window_size, device, plot_file, train_data, test_data, valid_data, target_column, target_min, target_max, d_inner, n_layers, n_head_, d_k, d_v, n_warmup_steps, criterion, target_name, d_model, model_file=None, load_data=False, load_model=False): self.data_frame = self.read_dataset(source_dataset) self.batch_size = batch_size self.epochs = epochs self.device = device self.target_column = target_column self.window = window_size self.plot_file = plot_file self.n_layers = n_layers self.n_head = n_head_ self.d_inner = d_inner self.warmup_step = n_warmup_steps self.d_k = d_k self.d_v = d_v self.d_model = d_model self.target_name = target_name self.input_mask = torch.ones([self.batch_size, 1, self.window], dtype=torch.int, device=device) self.target_max = target_max self.target_min = target_min self.model_file = model_file self.prev_epoch = 0 if load_data: self.train_df = pd.read_csv(train_data) self.test_df = pd.read_csv(test_data) self.valid_df = pd.read_csv(valid_data) else: self.train_df, self.valid_df, self.test_df = self.organize_dataset( train_data, test_data, valid_data) pad_col = [ 'col' + str(i) for i in range(self.train_df.shape[1], self.d_model) ] for col in pad_col: self.train_df[col] = 0 self.test_df[col] = 0 self.valid_df[col] = 0 self.columns = self.train_df.shape[1] self.model = Encoder(n_position=200, d_word_vec=self.columns, d_model=self.columns, d_inner=d_inner, n_layers=n_layers, n_head=n_head_, d_k=d_k, d_v=d_v, dropout=0).to(device) if load_model: self.model = torch.load(self.model_file)['model'] self.model.eval() self.model = self.model.to(device) self.prev_epoch = torch.load(self.model_file)['epoch'] self.criterion = criterion self.optimizer = ScheduledOptim( optim.Adam(self.model.parameters(), betas=(0.9, 0.98), eps=1e-09), 2.0, self.columns, n_warmup_steps, n_step=self.prev_epoch * (math.floor( self.train_df.shape[0] / self.window * self.batch_size))) self.loss_list = [] self.lr_list = [] def read_dataset(self, source_dataset): return pd.read_csv(source_dataset) def organize_dataset(self, train_data, test_data, valid_data): train_df = self.data_frame valid_df = self.data_frame test_df = self.data_frame return train_df, valid_df, test_df def train(self): train_tensor = torch.tensor(self.train_df.values, dtype=torch.float, device=self.device) train_rows = self.train_df.shape[0] section_size = self.window * self.batch_size avg_loss = 0 for i in range(self.epochs): chosen_idx = np.random.choice(train_rows, replace=True, size=math.floor(train_rows / 10)) imputing_df = self.train_df.copy() imputing_df.iloc[[j in chosen_idx for j in range(train_rows)], self.target_column] = 0 imputing_tensor = torch.tensor(imputing_df.values, dtype=torch.float, device=self.device) avg_loss = 0 lr = 0 for j in range(math.floor(train_rows / section_size)): batch_imputing_tensor = imputing_tensor[j * section_size:(j + 1) * section_size, :] batch_train_tensor = train_tensor[j * section_size:(j + 1) * section_size, :] input_tensor = self.unsqueeze(batch_imputing_tensor) self.optimizer.zero_grad() imputed_tensor = self.squeeze( self.model(input_tensor, self.input_mask)[0]) imputing_idx = [ k in chosen_idx for k in range(j * section_size, (j + 1) * section_size) ] imputing_idx_tensor = torch.tensor(imputing_idx) imputed_label_tensor = imputed_tensor[imputing_idx_tensor, self.target_column] true_label_tensor = batch_train_tensor[imputing_idx_tensor, self.target_column] loss = torch.sqrt( self.criterion(imputed_label_tensor, true_label_tensor)) # loss = self.criterion(imputed_label_tensor, true_label_tensor) if imputed_label_tensor.shape[0] > 0: loss.backward() #here compute engine lr = self.optimizer.step_and_update_lr() avg_loss = (j * avg_loss + loss) / (j + 1) self.loss_list.append(avg_loss * (self.target_max - self.target_min)) self.lr_list.append(10000 * lr) self.save_model(i) print(avg_loss * (self.target_max - self.target_min)) self.draw_plots(avg_loss * (self.target_max - self.target_min)) def validate(self): valid_tensor = torch.tensor(self.valid_df.values, dtype=torch.float, device=self.device) valid_rows = self.valid_df.shape[0] section_size = self.window * self.batch_size chosen_idx = np.random.choice(valid_rows, replace=True, size=math.floor(valid_rows / 10)) imputing_df = self.valid_df.copy() imputing_df.iloc[[j in chosen_idx for j in range(valid_rows)], self.target_column] = 0 imputing_tensor = torch.tensor(imputing_df.values, dtype=torch.float, device=self.device) avg_loss = 0 imputed_list = [] for j in range(math.floor(valid_rows / section_size)): batch_imputing_tensor = imputing_tensor[j * section_size:(j + 1) * section_size, :] batch_valid_tensor = valid_tensor[j * section_size:(j + 1) * section_size, :] input_tensor = self.unsqueeze(batch_imputing_tensor) imputed_tensor = self.squeeze( self.model(input_tensor, self.input_mask)[0]) imputing_idx = [ k in chosen_idx for k in range(j * section_size, (j + 1) * section_size) ] imputing_idx_tensor = torch.tensor(imputing_idx) imputed_label_tensor = imputed_tensor[imputing_idx_tensor, self.target_column] true_label_tensor = batch_valid_tensor[imputing_idx_tensor, self.target_column] imputed_list = imputed_list + imputed_tensor[:, self. target_column].tolist( ) # loss = torch.sqrt(self.criterion(imputed_label_tensor, true_label_tensor)) loss = self.criterion(imputed_label_tensor, true_label_tensor) if imputed_label_tensor.shape[0] > 0: avg_loss = (j * avg_loss + loss) / (j + 1) print(avg_loss * (self.target_max - self.target_min)) valid_list = valid_tensor[:, self.target_column].tolist() imputed_list = [(imputed_list[i] * (i in chosen_idx) + valid_list[i] * (i not in chosen_idx)) for i in range(len(imputed_list))] plt.plot(imputed_list, 'r', label="Imputed") plt.plot(valid_list, 'b', label="True") plt.legend(loc="upper right") plt.show() def unsqueeze(self, batch_tensor): temp_tensor = torch.zeros((self.batch_size, self.window, self.columns), dtype=torch.float, device=self.device) for i in range(self.batch_size): temp_tensor[i, :, :] = batch_tensor[i * self.window:(i + 1) * self.window, :] return temp_tensor def squeeze(self, predict_tensor): temp_tensor = torch.zeros( (self.batch_size * self.window, self.columns), dtype=torch.float, device=self.device) for i in range(self.batch_size): temp_tensor[i * self.window:(i + 1) * self.window, :] = predict_tensor[i, :, :] return temp_tensor def draw_plots(self, avg_loss): plt.plot(self.loss_list, 'r', label="Loss") plt.plot(self.lr_list, 'b', label="10000 * Learning Rate") title = 'n_layers: ' + str(self.n_layers) + '\n' + 'n_heads: ' + str( self.n_head ) + '\n' + 'd_inner: ' + str( self.d_inner ) + '\n' + 'warmup_step: ' + str( self.warmup_step ) + '\n' + 'd_v: ' + str(self.d_v) + '\n' + 'd_k: ' + str( self.d_k ) + '\n' + 'd_model: ' + str(self.d_model) + '\n' + 'window: ' + str( self.window ) + '\n' + 'target_column: ' + self.target_name + '\n' + 'Loss_function: ' + str( self.criterion) + '\n' + 'avg_loss: ' + str(float(avg_loss.data)) plt.legend(loc="upper right", title=title) timestr = time.strftime("%Y%m%d-%H%M%S") plt.savefig(self.plot_file + timestr, quality=90) def save_model(self, epoch): checkpoint = { 'epoch': epoch, 'lr_list': self.lr_list, 'loss_list': self.loss_list, 'model': self.model } if self.model_file: torch.save(checkpoint, self.model_file)