#Main Loop while True: min_test_loss = 1.e6 loss = 0.0 train_loss_seq = [] test_loss_seq = [] if model_type == 'Transformer': model = TransformerModel(config) elif model_type == 'LSTM': model = LSTMModel(config) if cuda: model = model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=config['train']['learning_rate'], weight_decay=config['train']['weight_decay']) criterion = torch.nn.MSELoss() optimizer.zero_grad() for it in range(n_iter): model.train() country = random.choice(train_countries) inp, target = get_data_tensor(data, country, measure_mode, output_mode=output_mode, cuda=cuda) out_nn, _ = get_net_output(inp, model_type, model, cuda)
class TransformerBot(BaseBot): def __init__(self, train_dataset, test_dataset, *, val_dataset, n_layers=6, n_head=8, d_model=512, d_inner_hid=1024, d_k=64, d_v=64, edrop=0.25, odrop=0.25, hdrop=0.1, propagate=False, steps=15, avg_window=AVERAGING_WINDOW, clip_grad=5, min_length=TRAIN_PERIODS, tf_decay=0.7**(1 / 6), tf_min=0.02, tf_warmup=12000, tf_steps=2000): self.name = "transformer" if propagate: self.name += "_tf" super(TransformerBot, self).__init__(train_dataset, test_dataset, clip_grad=clip_grad, val_dataset=val_dataset, avg_window=avg_window) self.model = TransformerModel(n_max_seq=TRAIN_PERIODS, n_layers=n_layers, n_head=n_head, d_word_vec=d_model, d_model=d_model, d_inner_hid=d_inner_hid, d_k=d_k, d_v=d_v, propagate=propagate, hdrop=hdrop, edrop=edrop, odrop=odrop, min_length=min_length, y_scale_by=1 / self.global_stds[0], steps=steps) self.model.cuda() self.current_tf_ratio = 1 self.best_tf_ratio = 1 self.tf_min = tf_min self.tf_decay = tf_decay self.tf_steps = tf_steps self.tf_warmup = tf_warmup self.logger.info(str(self.model)) if propagate: self.logger.info( "TF min: {:.2f} TF decay: {:.4f} TF steps: {:d} TF warmup: {:d}" .format(tf_min, tf_decay, tf_steps, tf_warmup)) self.tbwriter.add_text("model_structure", str(self.model)) self.tbwriter.add_text( "TF_setting", "TF min: {:.2f} TF decay: {:.4f} TF steps: {:d} TF warmup: {:d}". format(tf_min, tf_decay, tf_steps, tf_warmup)) def get_model_params(self, steps=0, is_train=True): if is_train: if steps < self.tf_warmup: return {"tf_ratio": 1} if (steps - self.tf_warmup) % self.tf_steps == 0: self.current_tf_ratio = max( self.current_tf_ratio * self.tf_decay, self.tf_min) return {"tf_ratio": self.current_tf_ratio} return {"tf_ratio": 0} def reset_params(self): self.current_tf_ratio = 1 self.best_tf_ratio = 1 def additional_logging(self, step): if self.model.propagate: self.logger.info("Current tf_ratio: {:.4f}".format( self.current_tf_ratio)) self.tbwriter.add_scalar("tf_ratio", self.current_tf_ratio, step) def save_state(self): self.best_tf_ratio = self.current_tf_ratio