def run(self): self.dataloader.run() self.INPUT_DIM = len(self.dataloader.SRC.vocab) self.OUTPUT_DIM = len(self.dataloader.TRG.vocab) self.define_model() print("Start Training ... ") best_valid_loss = float('inf') for epoch in range(config.TRAIN_EPOCHS): start_time = time.time() train_loss, train_bleu = self.train() print("Start Evaluation ... ") valid_loss, valid_bleu = self.evaluate() # train_loss = self.train() # valid_loss = self.evaluate() end_time = time.time() if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(self.model.state_dict(), 'tut3-model.pt') print(f"Epoch Num: {epoch}") epoch_time(start_time, end_time) print_loss(train_loss, valid_loss) print_bleu(train_bleu, valid_bleu)
def prep_tag_files( src_file, save_path, src_tok, max_len, min_len, ): tagger = SequenceTagger.load("pos-fast") good_len_sentences = 0 # save data to temporary file with open(os.path.join(save_path, "temp_src.txt"), "w") as src_sink: with open(os.path.join(save_path, "temp_trg.txt"), "w") as trg_sink: total_length = sum(1 for _ in open(src_file, "r")) print(f"total number of lines: {total_length}") start_time = time.time() for i in range(total_length): if i != 0 and i % 10000 == 0: end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) print(f"Line {i}| Time: {epoch_mins}m {epoch_secs}s") start_time = time.time() line = linecache.getline(src_file, i + 1) line, len_line = clean_and_tok(line, src_tok) if max_len or min_len: if len_line >= min_len and len_line <= max_len: good_len_sentences += 1 src_sink.write(" ".join(line) + "\n") trg_sink.write( " ".join(get_tags(line, tagger=tagger)) + "\n") keep_indices = [i for i in range(good_len_sentences)] print(f"Total number of examples {len(keep_indices)}") return keep_indices
def do_train(model_instance, train_features, train_label, test_features): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("Device: {}".format(device)) optimizer = optim.Adam(model_instance.parameters(), weight_decay=0.01) criterion = nn.CrossEntropyLoss(weight=torch.Tensor([0.1, 0.4, 0.5])) model_instance = model_instance.to(device) criterion = criterion.to(device) for epoch in range(N_EPOCHS): start_time = time.time() epoch_iterator = generate_batches_train(BATCH_SIZE, train_features, train_label) train_loss, train_acc, train_f1 = train_epoch(model_instance, epoch_iterator, optimizer, criterion, device) end_time = time.time() epoch_mins, epoch_secs = utils.epoch_time(start_time, end_time) print(f'Epoch: {epoch + 1:02} | ' f'Epoch Time: {epoch_mins}m {epoch_secs}s') print(f'\tTrain Loss: {train_loss:.3f} | ' f'Train Acc: {train_acc * 100:.2f}%| ' f'Train F1 hate_class: {utils.get_hate_class_f1(train_f1) * 100:.2f}%') # Test process test_iterator = generate_batches_test(BATCH_SIZE, test_features) results_test, results_test_prob = test(model_instance, test_iterator, device) # Export result utils.export_result_submit(results_test, f'./submit/submit_combine.csv', './data-bin/05_sample_submission.csv')
def do_train_model(type_embed, model_instance, model_name): file_log = open(f'./train-logs/log_{model_name}.txt', 'w', encoding='utf-8') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') utils.print_and_write_log("Device: {}".format(device), file_log) optimizer = optim.Adam(model_instance.parameters(), weight_decay=0.01) criterion = nn.CrossEntropyLoss(weight=torch.Tensor([0.1, 0.4, 0.5])) model_instance = model_instance.to(device) criterion = criterion.to(device) best_valid_f1 = -float('inf') # Change valid set for other model load_dataset.valid_data_ids = load_dataset.get_valid_data_ids(shuffer=True) # Train process for epoch in range(N_EPOCHS): start_time = time.time() epoch_iterator = load_dataset.generate_batches_train(BATCH_SIZE, type_embed) epoch_iterator_valid = load_dataset.generate_batches_valid(BATCH_SIZE, type_embed, shuffler=False) train_loss, train_acc, train_f1 = train_epoch(model_instance, epoch_iterator, optimizer, criterion, device, file_log=file_log) valid_loss, valid_acc, valid_f1 = evaluate_epoch(model_instance, epoch_iterator_valid, criterion, device, file_log=file_log) end_time = time.time() epoch_mins, epoch_secs = utils.epoch_time(start_time, end_time) if utils.get_hate_class_f1(valid_f1) > best_valid_f1: utils.print_and_write_log(f'Current best {model_name} epoch {epoch}. Save model....') best_valid_f1 = utils.get_hate_class_f1(valid_f1) torch.save(model_instance.state_dict(), os.path.join(MODEL_PATH, model_name)) utils.print_and_write_log(f'Epoch: {epoch + 1:02} | ' f'Epoch Time: {epoch_mins}m {epoch_secs}s', file_log) utils.print_and_write_log(f'\tTrain Loss: {train_loss:.3f} | ' f'Train Acc: {train_acc * 100:.2f}%| ' f'Train F1 hate_class: {utils.get_hate_class_f1(train_f1) * 100:.2f}%', file_log) utils.print_and_write_log(f'\t Val. Loss: {valid_loss:.3f} | ' f'Val. Acc: {valid_acc * 100:.2f}%| ' f'Train F1 hate_class: {utils.get_hate_class_f1(valid_f1) * 100:.2f}%', file_log) # Test process test_iterator = load_dataset.generate_batches_test(BATCH_SIZE, type_embed) model_instance.load_state_dict(torch.load(os.path.join(MODEL_PATH, model_name))) results_test, results_test_prob = test(model_instance, test_iterator, device) utils.export_result_combine(results_test_prob, f'./submit-combine/{model_name}_test.prob.json') # Export result utils.export_result_submit(results_test, f'./submit/submit_{model_name}.csv', './data-bin/05_sample_submission.csv') # Export probs for sample in train data train_iterator_for_combine = load_dataset.generate_batches_for_combine(BATCH_SIZE, type_embed) results_probs = evaluate_epoch_export_prob(model_instance, train_iterator_for_combine, device) utils.export_result_combine(results_probs, f'./submit-combine/{model_name}.prob.json') file_log.close()
def train(self): print(self.model) print( f'The model has {self.model.count_params():,} trainable parameters' ) best_valid_loss = float('inf') for epoch in range(self.params.num_epoch): self.model.train() epoch_loss = 0 start_time = time.time() for batch in self.train_iter: # For each batch, first zero the gradients self.optimizer.zero_grad() source = batch.kor target = batch.eng # target sentence consists of <sos> and following tokens (except the <eos> token) output = self.model(source, target[:, :-1])[0] # ground truth sentence consists of tokens and <eos> token (except the <sos> token) output = output.contiguous().view(-1, output.shape[-1]) target = target[:, 1:].contiguous().view(-1) # output = [(batch size * target length - 1), output dim] # target = [(batch size * target length - 1)] loss = self.criterion(output, target) loss.backward() # clip the gradients to prevent the model from exploding gradient torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.params.clip) self.optimizer.step() # 'item' method is used to extract a scalar from a tensor which only contains a single value. epoch_loss += loss.item() train_loss = epoch_loss / len(self.train_iter) valid_loss = self.evaluate() end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(self.model.state_dict(), self.params.save_model) print( f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s' ) print( f'\tTrain Loss: {train_loss:.3f} | Val. Loss: {valid_loss:.3f}' )
def train(train_data, eval_data, train_gloss_dict, eval_gloss_dict, model, optimizer, criterion, args): epochs = args.epochs gloss_bsz = args.gloss_bsz max_grad_norm = args.max_grad_norm logger = args.logger if args.multigpu: multigpu = args.multigpu else: multigpu = False print(f"The number of iteration for each epoch is {len(train_data)}") # 평가 데이터 라벨 기록 truth = [] for data in eval_data: sense_ids_org = chain(*[list(sense_d.values()) for sense_d in data[4]]) truth += sense_ids_org # 훈련 for epoch in range(epochs): logger.info(f"Epoch {epoch+1} initialized.") model_path = f"{args.checkpoint}/saved_checkpoint_{args.checkpoint_count}" start_time = time.time() model, optimizer, total_loss = train_one_epoch(train_data, train_gloss_dict, model, optimizer, criterion, model_path, args) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) # 평가 데이터셋 예측 preds = predict(eval_data, eval_gloss_dict, model) assert len(preds) == len(truth) eval_acc = np.mean(np.array(preds) == np.array(truth)) eval_f1 = f1_score(truth, preds, average='weighted') logger.info( f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s') logger.info(f'\tTrain Loss: {total_loss:.3f}') logger.info(f'\tEval. Acc: {eval_acc*100:.2f}%') logger.info(f'\tEval. F1 : {eval_f1*100:.2f}%') # Saving torch.save( model, f"{args.checkpoint}/saved_checkpoint_{args.checkpoint_count}") logger.info( f"Checkpoint saved at {args.checkpoint}/saved_checkpoint_{args.checkpoint_count}" ) args.checkpoint_count += 1
def train(self): print(f'The model has {self.model.count_parameters():,} trainable parameters') best_valid_loss = float('inf') print(self.model) for epoch in range(self.config.num_epoch): self.model.train() epoch_loss = 0 epoch_acc = 0 start_time = time.time() for batch in self.train_iter: # For each batch, first zero the gradients self.optimizer.zero_grad() # if Field has include_lengths=False, batch.text is only padded numericalized tensor # if Field has include_lengths=True, batch.text is tuple(padded numericalized tensor, sentence length) input, input_lengths = batch.text predictions = self.model(input, input_lengths).squeeze(1) # predictions = [batch size, 1]. after squeeze(1) = [batch size]) loss = self.criterion(predictions, batch.label) acc = binary_accuracy(predictions, batch.label) loss.backward() self.optimizer.step() # 'item' method is used to extract a scalar from a tensor which only contains a single value. epoch_loss += loss.item() epoch_acc += acc.item() train_loss = epoch_loss / len(self.train_iter) train_acc = epoch_acc / len(self.train_iter) valid_loss, valid_acc = self.evaluate() end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(self.model.state_dict(), self.config.save_model) print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s') print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%') print(f'\tVal. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
def main_lm(args): # Get data and model train_iterator, valid_iterator, test_iterator, src, trg, vec =\ data.get_lm_data(args) model = utils.create_seq2seq_model(args, src, trg, vec) best_valid_loss = float('inf') best_valid_epoch = 0 optimizer = optim.Adam(model.parameters()) pad_idx = src.vocab.stoi['<pad>'] criterion = nn.CrossEntropyLoss(ignore_index=pad_idx) # Main loop for epoch in range(args.num_epochs): start_time = time.time() train_loss = train_lm(args, model, train_iterator, optimizer, criterion, args.grad_clip) valid_loss = evaluate_lm(model, valid_iterator, criterion) end_time = time.time() epoch_mins, epoch_secs = utils.epoch_time(start_time, end_time) if valid_loss < best_valid_loss: if not os.path.exists(folder): os.makedirs(folder) best_valid_loss = valid_loss best_valid_epoch = epoch torch.save(model.state_dict(), args.save_path) print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s') print( f'\tTrain Loss: {train_loss:.4f} | Train PPL: {math.exp(train_loss):7.4f}' ) print( f'\t Val. Loss: {valid_loss:.4f} | Val. PPL: {math.exp(valid_loss):7.4f}' ) # Post training eval on test model.load_state_dict(torch.load(args.save_path)) test_loss = evaluate(model, test_iterator, criterion) print('****RESULTS****') print( f'| Best Val. Loss: {best_valid_loss:.4f} | Best Val. PPL: {math.exp(best_valid_loss):7.4f} | At epoch: {best_valid_epoch} ' ) print( f'| Test Loss with best val model: {test_loss:.4f} | Test PPL: {math.exp(test_loss):7.4f} | At epoch: {best_valid_epoch} ' )
def runner(epochs, model, train_iterator, valid_iterator, optim, writer, config): clip, save_path, model_name = config["clip"], config['data'][ 'path'], config['model_name'] best_valid_loss = float('inf') for epoch in range(epochs): start_time = time.time() train_loss, train_stats = train(model, train_iterator, optim, clip) valid_loss, valid_stats = evaluate(model, valid_iterator) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model.state_dict(), os.path.join(save_path, model_name)) logger.info("-------------------------") logger.info( f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s') logger.info(f'\tTrain Loss: {train_loss:.3f}') logger.info(f'\t Val. Loss: {valid_loss:.3f}') logger.info( f'\t Train f1: {train_stats[0]} \n Valid f1: {valid_stats[0]}') logger.info( f'\t Train action accuracy: {train_stats[1]:.3f} \t Valid action accuracy: {valid_stats[1]:.3f}' ) logger.info( f'\t Train object accuracy: {train_stats[2]:.3f} \t Valid object accuracy: {valid_stats[2]:.3f}' ) logger.info( f'\t Train location accuracy: {train_stats[3]:.3f} \t Valid location accuracy: {valid_stats[3]:.3f}' ) add_to_writer(writer, epoch, train_loss, valid_loss, train_stats, valid_stats, config) # dumping config file with open(config['log_path'] + "/config.yaml", "w") as file: _ = yaml.dump(config, file)
def train(self): " Train model using train dataset " print(f'The model has {self.model.count_params():,} parameters') best_valid_loss = float('inf') for epoch in range(self.params.num_epoch): self.model.train() train_loss = 0 start_time = time.time() for input_ids in self.train_iter: self.optimizer.zero_grad() input_ids = input_ids.to(self.params.device) output = self.model(input_ids[:, :-1]) preds = output.contiguous().view(-1, output.size(-1)) # preds = [(batch size * sentence length), vocab size] golds = input_ids[:, 1:].contiguous().view(-1) # golds = [(batch size * sentence length)] loss = self.criterion(preds, golds) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.params.clip) self.optimizer.step() train_loss += loss.item() train_loss = train_loss / len(self.train_iter) valid_loss = self.validate() end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(self.model.state_dict(), self.params.save_dir) print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s') print(f'\tTrain Loss: {train_loss:.3f} | Val. Loss: {valid_loss:.3f}')
# history for lr_finder history_lr_finder = {"lr": [], "loss": []} best_loss_lr_finder = None lr_scheduler = ExponentialLR(optimizer, end_lr=100, num_iter=100) N_EPOCHS = 150 CLIP = 1 best_valid_loss = float('inf') for epoch in range(N_EPOCHS): start_time = time.time() train_loss = train(model, train_iterator, optimizer, criterion, CLIP, lr_scheduler) valid_loss = evaluate(model, valid_iterator, criterion) end_time = time.time() m, s = epoch_time(start_time, end_time) history_lr_finder["lr"].append(lr_scheduler.get_lr()[0]) lr_scheduler.step() if epoch == 0: best_loss_lr_finder = valid_loss else: smooth_f = 0.05 valid_loss = (smooth_f*valid_loss + (1-smooth_f)*history_lr_finder["loss"][-1]) if valid_loss < best_loss_lr_finder: best_loss_lr_finder = valid_loss history_lr_finder["loss"].append(valid_loss) if valid_loss > 5 * best_loss_lr_finder: break
def main(args): # use cuda if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") SRC = torch.load(os.path.join(args.data_path, "src_vocab.pt")) TRG = torch.load(os.path.join(args.data_path, "trg_vocab.pt")) # gather parameters from the vocabulary input_dim = len(SRC.vocab) output_dim = len(TRG.vocab) pad_idx = SRC.vocab.stoi[SRC.pad_token] model_dict = torch.load(args.pretrained_model) dropout = model_dict["dropout"] prev_state_dict = model_dict["model_state_dict"] del model_dict # gather parameters except dec_hid_dim since in this model they are the same prev_param_dict = get_prev_params(prev_state_dict) # create model model = Seq2Seq( input_dim, prev_param_dict["emb_dim"], prev_param_dict["enc_hid_dim"], output_dim, prev_param_dict["enc_layers"], dropout, prev_param_dict["bidirectional"], pad_idx, device, ).to(device) model.load_state_dict(prev_state_dict) del prev_state_dict print(model) test_path = os.path.join(args.data_path, "test.tsv") test_set = LazyDataset(test_path, SRC, TRG, "evaluation") test_batch_sampler = BucketBatchSampler(test_path, args.batch_size) # build dictionary of parameters for the Dataloader test_loader_params = { # since bucket sampler returns batch, batch_size is 1 "batch_size": 1, # sort_batch reverse sorts for pack_pad_seq "collate_fn": sort_batch, "batch_sampler": test_batch_sampler, "num_workers": args.num_workers, "shuffle": False, "pin_memory": True, "drop_last": False, } test_iterator = torch.utils.data.DataLoader(test_set, **test_loader_params) start_time = time.time() final_preds = [] final_targets = [] for i, batch in enumerate(test_iterator): source, target_indicies, src_len = prep_eval_batch( batch, device, TRG.vocab.stoi[TRG.pad_token]) # get targets from file final_targets += [ get_target(test_path, idx) for idx in target_indicies ] if args.decode_method == "beam": final_preds += preds_to_toks( beam_decode(source, src_len, TRG, model, device), TRG) elif args.decode_method == "greedy": preds = greedy_decode(source, src_len, TRG, model, device) # tensor to integer numpy array for quicker processing preds = preds.numpy().astype(int) final_preds += preds_to_toks(preds, TRG) if i % int(len(test_iterator) / 100) == 0: end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) print(f" batch {i} |Time: {epoch_mins}m {epoch_secs}s") start_time = end_time if args.save_file: sink = open(args.save_file, "w") writer = csv.writer(sink, delimiter="\t") writer.writerows(zip(final_preds, final_targets)) if not args.no_bleu: final_preds = [p.split() for p in final_preds] final_targets = [[t.split()] for t in final_targets] print(bleu_score(final_preds, final_targets))
def analyse_sentiments(params=None, model_name='', training_mode=True): """ :param params: :param model_name: :return: """ vector_name = params['pretrained_vectors'] MAX_VOCAB_SIZE = params['MAX_VOCAB_SIZE'] min_freq = params['min_freq'] EMBEDDING_DIM = params['embedding_dim'] FREEZE_EMDEDDINGS = params['RNN_FREEZE_EMDEDDINGS'] HIDDEN_DIM = params['RNN_HIDDEN_DIM'] # model_params['RNN_HIDDEN_DIM'] OUTPUT_DIM = 1 # params['OUTPUT_DIM'] N_LAYERS = params['RNN_N_LAYERS'] # model_params['RNN_N_LAYERS'] DROPOUT = params['RNN_DROPOUT'] # model_params['RNN_DROPOUT'] USE_GRU = params['RNN_USE_GRU'] # model_params['RNN_USE_GRU'] N_EPOCHS = params['RNN_EPOCHS'] BATCH_SIZE = params['RNN_BATCH_SIZE'] pretrained = True if vector_name == None: pretrained = False TEXT = torchtext.data.Field(lower=True, pad_first=True, batch_first=True, init_token='<sos>', eos_token='<eos>' # include_lengths=True ) LABEL = torchtext.data.LabelField(dtype=torch.float) datafields = [('Sentiment', LABEL), ('SentimentText', TEXT)] train_set, val_set, test_set = TabularDataset.splits( path='../data/', train='processed_train.csv', validation='processed_val.csv', test='processed_test.csv', format='csv', skip_header=True, fields=datafields) if pretrained: vectors = load_vectors(fname=vector_name) TEXT.build_vocab(train_set, vectors=vectors, unk_init=torch.Tensor.normal_) vectors = TEXT.vocab.vectors # print(vectors.shape) EMBEDDING_DIM = vectors.shape[1] else: TEXT.build_vocab(train_set, max_size=MAX_VOCAB_SIZE) LABEL.build_vocab(train_set) print(f"Most frequent words in vocab. {TEXT.vocab.freqs.most_common(20)}") device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f"Device used is {device}") # minimise badding for each sentence train_iterator, val_iterator, test_iterator = torchtext.data.BucketIterator.splits( (train_set, val_set, test_set), batch_size=BATCH_SIZE, sort_key=lambda x: len(x.SentimentText), sort_within_batch=False, device=device) pad_idx = TEXT.vocab.stoi[TEXT.pad_token] INPUT_DIM = len(TEXT.vocab) print(f"Vocab size is {INPUT_DIM}, emdebbing dim is {EMBEDDING_DIM}") model = RNNModel(vocab_size=INPUT_DIM, embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM, output_dim=OUTPUT_DIM, n_layers=N_LAYERS, bidirectional=True, dropout=DROPOUT, pad_idx=pad_idx, use_gru=USE_GRU) print(model) if pretrained: model.embedding.weight.data.copy_(vectors) unk_idx = TEXT.vocab.stoi[TEXT.unk_token] init_idx = TEXT.vocab.stoi[TEXT.init_token] eos_idx = TEXT.vocab.stoi[TEXT.eos_token] print( f"pad_idx {pad_idx}, unk_idx {unk_idx}, init_idx {init_idx}, eos_idx {eos_idx}" ) model.embedding.weight.data[unk_idx] = torch.zeros(EMBEDDING_DIM) model.embedding.weight.data[pad_idx] = torch.zeros(EMBEDDING_DIM) # freeze embeddings if FREEZE_EMDEDDINGS: model.embedding.weight.requires_grad = False else: model.embedding.weight.requires_grad = True optimizer = optim.Adam(model.parameters(), lr=1e-3) criterion = nn.BCEWithLogitsLoss() model = model.to(device) criterion = criterion.to(device) if training_mode: best_valid_loss = float('inf') for epoch in range(N_EPOCHS): start_time = time.time() model, train_loss, train_acc = train_epoch(model, train_iterator, optimizer, criterion, device) valid_loss, valid_acc = evaluate(model, val_iterator, criterion) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model.state_dict(), f"{model_name}.pt") print( f'Epoch: {epoch + 1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s' ) print( f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc * 100:.2f}%' ) print( f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc * 100:.2f}%' ) # TODO DO TESTS AND PLOT RESULT # Evaluate model performance model.load_state_dict(torch.load(f"{model_name}.pt")) # print(model) test_loss, test_acc = evaluate(model, test_iterator, criterion) print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc * 100:.2f}%') confusion_matrix(model, test_iterator, device=device, fname=model_name) sentence = "got a whole new wave of depression when i saw it was my rafa's losing match I HATE YOU SODERLING" value = evaluate_sentences(model, sentence, TEXT, device) print(f"'{sentence}' sentiment is {value}") sentence = "STOKED for the show tomorrow night! 2 great shows combined." value = evaluate_sentences(model, sentence, TEXT, device) print(f"'{sentence}' sentiment is {value}") return test_loss, test_acc
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # device = torch.device('cpu') if torch.cuda.is_available(): print("current device: ", torch.cuda.current_device()) # special token SOPH = '<soph>' NSOPH = '<nsoph>' config = BertConfig.from_pretrained('bert-base-uncased') # constant the seed SEED = 1234 random.seed(SEED) np.random.seed(SEED) torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) torch.backends.cudnn.deterministic = True tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') num_added_token = tokenizer.add_tokens(([SOPH, NSOPH])) INPUT_DIM = len(tokenizer) # len(SRC.vocab) OUTPUT_DIM = len(tokenizer) # len(TRG.vocab) HID_DIM = 768 DEC_LAYERS = 3 DEC_HEADS = 8 DEC_PF_DIM = 512 ENC_DROPOUT = 0.1 DEC_DROPOUT = 0.1 SRC_PAD_IDX = 0 TRG_PAD_IDX = 0 BATCH_SIZE = 100 MAX_SEQ_LEN = 50 N_EPOCHS = 5 CLIP = 1 LEARNING_RATE = 0.0005 SAVE_PATH = 'tut6-model.pt' LOAD_PATH = 'tut6-model.pt' unfreeze_bert = False do_load = False do_train = False do_eval = False do_generate = True dec = Decoder(OUTPUT_DIM, HID_DIM, DEC_LAYERS, DEC_HEADS, DEC_PF_DIM, DEC_DROPOUT, device) model = Seq2Seq(dec, SRC_PAD_IDX, TRG_PAD_IDX, config, device).to(device) # Resize tokenizer model.bert_encoder.resize_token_embeddings(len(tokenizer)) model.decoder.apply(initialize_weights) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX) best_valid_loss = float('inf') processor = DiscoFuseProcessor() valid_iterator, num_val_ex = make_DataLoader(data_dir='./', processor=processor, tokenizer=tokenizer, max_seq_length=MAX_SEQ_LEN, batch_size=BATCH_SIZE, mode="dev", SOPH=SOPH, NSOPH=NSOPH, domain="sports") if do_train: for param in model.bert_encoder.parameters(): param.requires_grad = unfreeze_bert print(f'The model has {count_parameters(model):,} trainable parameters') train_iterator, num_tr_ex = make_DataLoader(data_dir='./', processor=processor, tokenizer=tokenizer, max_seq_length=MAX_SEQ_LEN, batch_size=BATCH_SIZE, mode="train", SOPH=SOPH, NSOPH=NSOPH) print("---- Begin Training ----") if do_load and os.path.exists(LOAD_PATH): print("---- Loading model from {} ----".format(LOAD_PATH)) model.load_state_dict(torch.load(LOAD_PATH)) for epoch in range(N_EPOCHS): start_time = time.time() num_batches_in_epoch = int(num_tr_ex/BATCH_SIZE) # 10000 train_loss = train(model, train_iterator, optimizer, criterion, CLIP, num_batches_in_epoch, device=device) valid_loss, valid_exact = evaluate(model, valid_iterator, criterion, device=device, tokenizer=tokenizer) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model.state_dict(), SAVE_PATH) print(f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s') print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}') print(f'\t Val. Loss: {valid_loss:.3f} | Val. PPL: {math.exp(valid_loss):7.3f}') print(f'\t Val. EXACT: {valid_exact:.2f}') elif do_eval: print("Doing only evaluation") model.load_state_dict(torch.load(LOAD_PATH)) valid_loss, valid_exact = evaluate(model, valid_iterator, criterion, device=device, tokenizer=tokenizer) print(f'\t Val. Loss: {valid_loss:.3f} | Val. EXACT: {valid_exact:3.3f}') elif do_generate: print("Doing only generation") model.load_state_dict(torch.load(LOAD_PATH)) all_predictions, all_trgs, all_counter_predictions = generate(model, valid_iterator, device, tokenizer) all_counter_pred_str = [" ".join(a).replace(" ##", "") for a in all_counter_predictions] all_pred_str = [" ".join(a).replace(" ##", "") for a in all_predictions] all_trgs_str = [" ".join(a).replace(" ##", "") for a in all_trgs] with open("generated_fuse.txt", 'a') as fp: for i in range(len(all_predictions)): counter_pred_line = "Counter pred: " + all_counter_pred_str[i] + "\n" pred_line = "Origin pred: " + all_pred_str[i] + "\n" trg_line = "origin trg: " + all_trgs_str[i] + "\n\n" fp.writelines(counter_pred_line) fp.writelines(pred_line) fp.writelines(trg_line) else: raise ValueError("Error - must either train evaluate, or generate!")
#-------------------------train & valid-------------------------------- model_init(model) optimizer = NoamOpt(HID_DIM, factor=1, warmup=2000, optimizer=optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) criterion = nn.CrossEntropyLoss(ignore_index=pad_idx) for epoch in range(N_EPOCH): start_time = time.time() train_loss, train_score = train(model, train_iterator, optimizer, criterion, CLIP, cor) valid_loss, valid_score = evaluate(model, valid_iterator, criterion, cor) end_time = time.time() EpochTime = epoch_time(start_time=start_time, end_time=end_time) print("Epoch No.", epoch, " Time: ", EpochTime[0], "min", EpochTime[1], "sec\n ", " Train Loss: ", train_loss, " Train PPL: ", math.exp(train_loss), " Train_BLEU: ", train_score, "\nValid Loss: ", valid_loss, " Valid PPL: ", math.exp(valid_loss), " Valid_BLEU: ", valid_score)
def train_model( model, iterator, task, optimizer, criterion, clip, device, epoch, start_time, save_path, dropout, pad_indices, num_batches, teacher_forcing=None, checkpoint=None, repr_layer=None, ): model.train() epoch_loss = 0 batch_loss = [] if task == "tagging": # save 10 times throughout training save_loss = np.linspace(0, num_batches, num=10, dtype=int) elif task == "translation": # save 100 times throughout training save_loss = np.linspace(0, num_batches, num=100, dtype=int) try: for i, batch in enumerate(iterator): source, targets, src_len = prep_batch(batch, device, pad_indices) optimizer.zero_grad() loss = train_step( model, source, src_len, targets, task, criterion, optimizer, clip, teacher_forcing, ) epoch_loss += loss if i in save_loss: batch_loss.append(loss) end_time = time.time() batch_mins, batch_secs = epoch_time(start_time, end_time) print( f"epoch {epoch} batch: {i} | Train loss: {loss:.3f} | Time: {batch_mins}m {batch_secs}s" ) start_time = end_time # optionally checkpoint if checkpoint is not None: if i % checkpoint == 0: adam, sparse_adam = optimizer.return_optimizers() torch.save( { "epoch": epoch, "model_state_dict": model.state_dict(), "adam_state_dict": adam.state_dict(), "sparse_adam_state_dict": sparse_adam.state_dict(), "loss": loss, "dropout": dropout, "repr_layer": repr_layer, }, os.path.join(save_path, f"checkpoint_{epoch}_{i}.pt"), ) print( f"Checkpoint saved at epoch {epoch} batch {i}. Train loss is {loss:.3f}" ) # skip batch in case of OOM except RuntimeError as e: if "out of memory" in str(e): print(f"| WARNING: ran out of memory, skipping batch number {i:,}") return epoch_loss / num_batches, batch_loss
result = dict() # training and evaluate for tokenizer_name, tokenizer in zip(tokenizer_names, tokenizers): print(f'-------------------------------------------------------------') print(f'Data loading with {tokenizer_name} tokenizer...') start_time = time.time() TEXT, LABEL, train_iterator, test_iterator = dataloader(tokenizer, args.max_vocab_size, args.batch_size, device) input_dim = len(TEXT.vocab) print(f'The number of vocabularies is {input_dim}.') end_time = time.time() data_loading_time = round(end_time - start_time,3) data_prep_mins, data_prep_secs = epoch_time(start_time, end_time) print(f'Data loading Time: {data_prep_mins}m {data_prep_secs}s') pad_idx = TEXT.vocab.stoi[TEXT.pad_token] model = RNN(input_dim, args.embedding_dim, args.hidden_dim, 1, args.n_layers, args.bidirectional, args.dropout, pad_idx) model.embedding.weight.data[pad_idx] = torch.zeros(args.embedding_dim) optimizer = optim.Adam(model.parameters()) criterion = nn.BCEWithLogitsLoss() model = model.to(device) criterion = criterion.to(device)
random.seed(SEED) np.random.seed(SEED) torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) torch.backends.cudnn.deterministic = True device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Preprocessing pp_start_time = time.time() trainloader, testloader = get_dataloaders(args) pp_end_time = time.time() pp_mins, pp_secs = epoch_time(pp_end_time - pp_start_time) print(f'Preprocessing time: {pp_mins}m {pp_secs}s') with wandb.init(project='RegulQuant', entity='womeiyouleezi', config=args): if args.run_name: wandb.run.name = args.run_name if (not args.save_file): file_name = wandb.run.name else: file_name = args.save_file # make model net = get_model(args).to(device) #net = ConvNet().to(device) # unpack args
def run(): Seed = 1234 random.seed(Seed) np.random.seed(Seed) torch.manual_seed(Seed) torch.cuda.manual_seed(Seed) torch.backends.cudnn.deterministic = True train, valid, test, SRC, TRG = dataset.create_dataset() train_iterator, valid_iterator, test_iterator = BucketIterator.splits( (train, valid, test), sort_key=lambda x: len(x.source), batch_size=config.BATCH_SIZE, device=config.device) INPUT_DIM = len(SRC.vocab) OUTPUT_DIM = len(TRG.vocab) ENC_EMB_DIM = config.ENCODER_EMBEDDING_DIMENSION DEC_EMB_DIM = config.DECODER_EMBEDDING_DIMENSION HID_DIM = config.LSTM_HIDDEN_DIMENSION N_LAYERS = config.LSTM_LAYERS ENC_DROPOUT = config.ENCODER_DROPOUT DEC_DROPOUT = config.DECODER_DROPOUT attn = model.Attention(HID_DIM, HID_DIM) enc = model.Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, HID_DIM, ENC_DROPOUT) dec = model.Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, HID_DIM, DEC_DROPOUT, attn) model_rnn = model.Seq2Seq(enc, dec, config.device).to(config.device) optimizer = optim.Adam(model_rnn.parameters(), lr=config.LEARNING_RATE) TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token] criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX) if (args.action == 'train'): model_rnn.apply(utils.init_weights) best_valid_loss = float('inf') for epoch in range(config.N_EPOCHS): start_time = time.time() train_loss = engine.train_fn(model_rnn, train_iterator, optimizer, criterion, config.CLIP) valid_loss = engine.evaluate_fn(model_rnn, valid_iterator, criterion) end_time = time.time() epoch_mins, epoch_secs = utils.epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model_rnn.state_dict(), config.MODEL_SAVE_FILE) with open(config.RESULTS_SAVE_FILE, 'a') as f: print( f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s', file=f) print( f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}', file=f) print( f'\t Val. Loss: {valid_loss:.3f} | Val. PPL: {math.exp(valid_loss):7.3f}', file=f) elif (args.action == 'test'): model_rnn.load_state_dict(torch.load(config.TEST_MODEL)) loss, target, output = engine.test_fn(model_rnn, test_iterator, criterion, SRC, TRG) bl = bleu_score(output, target, max_n=1, weights=[1]) met = 0 for z in range(len(output)): out = ' '.join(output[z][y] for y in range(min(10, len(output[z])))) tar = ' '.join(y for y in target[z]) met = met + metric_utils.compute_metric(out, 1.0, tar) with open(config.TEST_RESULTS_FILE, 'w') as f: print(f'Test bleu :, {bl*100}, Test PPL: {math.exp(loss):7.3f}', 'Metric:', met / len(output), file=f) elif (args.action == 'save_vocab'): print('Source Vocab Length', len(SRC.vocab)) print('Target vocab length', len(TRG.vocab)) s1 = '\n'.join(k for k in SRC.vocab.itos) s2 = '\n'.join(k for k in TRG.vocab.itos) with open('NL_vocabulary.txt', 'w') as f: f.write(s1) with open('Bash_vocabulary.txt', 'w') as f: f.write(s2)
def main(args): # use cuda if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # create directory for saving models if it doesn't already exist if not os.path.exists(args.save_path): os.mkdir(args.save_path) SRC = torch.load(os.path.join(args.nmt_data_path, "src_vocab.pt")) TRG = torch.load(os.path.join(args.data_path, "trg_vocab.pt")) # gather parameters from the vocabulary input_dim = len(SRC.vocab) output_dim = len(TRG.vocab) pad_idx = SRC.vocab.stoi[SRC.pad_token] # create lazydataset and data loader train_path = os.path.join(args.data_path, "train.tsv") training_set = LazyDataset(train_path, SRC, TRG, "tagging") train_batch_sampler = BucketBatchSampler(train_path, args.batch_size) # number of batches comes from the sampler, not the iterator num_batches = train_batch_sampler.num_batches # build dictionary of parameters for the Dataloader train_loader_params = { # since bucket sampler returns batch, batch_size is 1 "batch_size": 1, # sort_batch reverse sorts for pack_pad_seq "collate_fn": sort_batch, "batch_sampler": train_batch_sampler, "num_workers": args.num_workers, "shuffle": args.shuffle, "pin_memory": True, "drop_last": False, } train_iterator = torch.utils.data.DataLoader(training_set, **train_loader_params) # load pretrained-model prev_state_dict = torch.load(args.pretrained_model, map_location=torch.device("cpu")) enc_dropout = prev_state_dict["dropout"] prev_state_dict = prev_state_dict["model_state_dict"] # gather parameters except dec_hid_dim since tagger gets this from args prev_param_dict = get_prev_params(prev_state_dict) new_state_dict = make_encoder_dict(prev_state_dict) if args.repr_layer == "embedding": new_dict = {} # add embedding layer new_dict["enc_embedding.weight"] = new_state_dict[ "enc_embedding.weight"] # replace state dict with new dict new_state_dict = new_dict elif args.repr_layer == "rnn1": new_dict = {} # add embedding layer new_dict["enc_embedding.weight"] = new_state_dict[ "enc_embedding.weight"] # add first layer weights and bias for k, v in new_state_dict.items(): if "l0" in k: new_dict[k] = v # replace state dict with new dict new_state_dict = new_dict model = Tagger( new_state_dict=new_state_dict, input_dim=input_dim, emb_dim=prev_param_dict["emb_dim"], enc_hid_dim=prev_param_dict["enc_hid_dim"], dec_hid_dim=args.hid_dim, output_dim=output_dim, enc_layers=prev_param_dict["enc_layers"], dec_layers=args.n_layers, enc_dropout=enc_dropout, dec_dropout=args.dropout, bidirectional=prev_param_dict["bidirectional"], pad_idx=pad_idx, repr_layer=args.repr_layer, ).to(device) # optionally randomly initialize weights if args.random_init: model.apply(random_init_weights) print(model) print(f"The model has {count_parameters(model):,} trainable parameters") optimizer = make_muliti_optim(model.named_parameters(), args.learning_rate) if args.unfreeze_encoder == False: for param in model.encoder.parameters(): param.requires_grad = False SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token] TRG_PAD_IDX = len(TRG.vocab) + 1 criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX) best_valid_loss = float("inf") # training batch_history = [] epoch_history = [] for epoch in range(1, args.epochs + 1): start_time = time.time() train_loss, batch_loss = train_model( model=model, iterator=train_iterator, task="tagging", optimizer=optimizer, criterion=criterion, clip=args.clip, device=device, epoch=epoch, start_time=start_time, save_path=args.save_path, pad_indices=(SRC_PAD_IDX, TRG_PAD_IDX), dropout=(enc_dropout, args.dropout), checkpoint=args.checkpoint, repr_layer=args.repr_layer, num_batches=num_batches, ) batch_history += batch_loss epoch_history.append(train_loss) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) model_filename = os.path.join(args.save_path, f"model_epoch_{epoch}.pt") adam, sparse_adam = optimizer.return_optimizers() if not args.only_best: torch.save( { "epoch": epoch, "model_state_dict": model.state_dict(), "adam_state_dict": adam.state_dict(), "sparse_adam_state_dict": sparse_adam.state_dict(), "loss": valid_loss, "dropout": (enc_dropout, args.dropout), "repr_layer": args.repr_layer, }, model_filename, ) # optionally validate if not args.skip_validate: valid_path = os.path.join(args.data_path, "valid.tsv") valid_set = LazyDataset(valid_path, SRC, TRG, "tagging") valid_batch_sampler = BucketBatchSampler(valid_path, args.batch_size) # number of batches comes from the sampler, not the iterator valid_num_batches = valid_batch_sampler.num_batches valid_loader_params = { # since bucket sampler returns batch, batch_size is 1 "batch_size": 1, # sort_batch reverse sorts for pack_pad_seq "collate_fn": sort_batch, "batch_sampler": valid_batch_sampler, "num_workers": args.num_workers, "shuffle": args.shuffle, "pin_memory": True, "drop_last": False, } valid_iterator = torch.utils.data.DataLoader( valid_set, **valid_loader_params) valid_loss = evaluate_model( model, valid_iterator, num_batches=valid_num_batches, optimizer=optimizer, criterion=criterion, task="tagging", device=device, pad_indices=(SRC_PAD_IDX, TRG_PAD_IDX), ) if valid_loss < best_valid_loss: best_valid_loss = valid_loss best_filename = os.path.join(args.save_path, f"best_model.pt") torch.save( { "epoch": epoch, "model_state_dict": model.state_dict(), "adam_state_dict": adam.state_dict(), "sparse_adam_state_dict": sparse_adam.state_dict(), "loss": valid_loss, "dropout": (enc_dropout, args.dropout), "repr_layer": args.repr_layer, }, best_filename, ) print(f"Epoch: {epoch:02} | Time: {epoch_mins}m {epoch_secs}s") print( f"\t Train Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}" ) print( f"\t Val. Loss: {valid_loss:.3f} | Val. PPL: {math.exp(valid_loss):7.3f}" ) else: print(f"Epoch: {epoch:02} | Time: {epoch_mins}m {epoch_secs}s") print( f"\t Train Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}" ) if args.loss_plot: make_loss_plot(batch_history, args.save_path, args.epochs)
model.apply(initialize_weights) optimizer = optim.Adam(model.parameters(), lr=lr) target_pad_idx = en_field.vocab.stoi[en_field.pad_token] criterion = nn.CrossEntropyLoss(ignore_index=target_pad_idx) best_val_loss = float('inf') writer = SummaryWriter(log_dir) for epoch in range(num_epochs): s = time.time() train_loss = train(model, train_loader, optimizer, criterion, clip=1) val_loss = evaluate(model, val_loader, criterion) t = time.time() epoch_min, epoch_sec = epoch_time(s, t) if val_loss < best_val_loss: best_val_loss = val_loss torch.save(model.state_dict(), os.path.join(ckpt_dir, model_name)) print("Epoch : %02d | Elapsed Time : %02d min %02d sec" % (epoch + 1, epoch_min, epoch_sec)) print("\t Train Loss : %.3f | Train PPL : %7.3f" % (train_loss, math.exp(train_loss))) print("\t Val Loss : %.3f | Val PPL : %7.3f" % (val_loss, math.exp(val_loss))) writer.add_scalars(''.join([str(model_name), '/Train and Val Loss']), { "Train_Loss": train_loss, "Val_Loss": val_loss }, epoch + 1)
def main(): g_tokenizer = Tokenizer("de", rev=True) e_tokenizer = Tokenizer("en") SRC = Field( tokenize=g_tokenizer, init_token='<sos>', eos_token='<eps>', lower=True ) TRG = Field( tokenize=e_tokenizer, init_token='<sos>', eos_token='<eos>', lower=True ) train_data, valid_data, test_data = Multi30k.splits( exts=('.de', '.en'), fields=(SRC, TRG) ) print(f'train: {len(train_data.examples)}') print(f'valid: {len(valid_data.examples)}') print(f'test : {len(test_data.examples)}') SRC.build_vocab(train_data, min_freq=2) TRG.build_vocab(train_data, min_freq=2) print(f'SRC vocab: {len(SRC.vocab)}') print(f'TRG vocab: {len(TRG.vocab)}') train_iter, valid_iter, test_iter = BucketIterator.splits( (train_data, valid_data, test_data), batch_sizes=(BATCH_SIZE, BATCH_SIZE, BATCH_SIZE), device=torch.device('cuda') ) encoder = Encoder(len(SRC.vocab), 256, 512, 2, 0.5) decoder = Decoder(len(TRG.vocab), 256, 512, 2, 0.5) model = Seq2Seq(encoder, decoder, torch.device('cuda')).cuda() model.apply(init_weights) print(count_parameters(model)) optimizer = optim.Adam(model.parameters()) TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token] criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX) n_epochs = 10 clip = 1 best_valid_loss = float('inf') for epoch in range(n_epochs): start_time = time.time() train_loss = train(model, train_iter, optimizer, criterion, clip) valid_loss = evaluate(model, valid_iter, criterion) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model.state_dict(), 'best_model.pth') print(f'epoch {epoch}| time: {epoch_mins}m {epoch_secs}s') print(f'train loss: {train_loss} | train ppl: {math.exp(train_loss)}') print(f'valid loss: {valid_loss} | valid ppl: {math.exp(valid_loss)}') print() model.load_state_dict(torch.load('best_model.pth')) test_loss = evaluate(model, test_iter, criterion) print(f'test loss: {test_loss} | test ppl: {math.exp(test_loss)}') print() print('DONE')
def main_classification(args): print('Get data and model') ma_iterators, reiss_iterators, text, vec = data.get_cl_data(args) maslow_train_it, maslow_valid_it, maslow_test_it, maslow_label = ma_iterators reiss_train_it, reiss_valid_it, reiss_test_it, reiss_label = reiss_iterators # Number of labels per task: classes = [len(maslow_label.vocab), len(reiss_label.vocab)] if args.model == 'seq2seq': model = utils.create_seq2seq_model_cl(args, text, text, vec, maslow_label, reiss_label) elif args.model == 'gpt2': model = models.GPT2Classifier(classes, args.gpttokenizer).to(device) best_valid_loss = float('inf') best_valid_epoch = 0 optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss() print('Training starting...') # Main loop for epoch in range(args.num_epochs): start_time = time.time() # Training train_loss, ma_tr_pred, ma_tr_true, re_tr_pred, re_tr_true = \ train_cl(args, model, maslow_train_it, reiss_train_it, optimizer,criterion, args.grad_clip) # Validation valid_loss, ma_v_pred, ma_v_true, re_v_pred, re_v_true = \ evaluate_cl(model, maslow_valid_it, reiss_valid_it, criterion) # Test test_loss, ma_t_pred, ma_t_true, re_t_pred, re_t_true = \ evaluate_cl(model, maslow_test_it, reiss_test_it, criterion) end_time = time.time() epoch_mins, epoch_secs = utils.epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss best_valid_epoch = epoch # Maslow tr_acc = accuracy_score(ma_tr_true, ma_tr_pred) v_acc = accuracy_score(ma_v_true, ma_v_pred) v_f1 = f1_score(ma_v_true, ma_v_pred, average='macro') v_p = precision_score(ma_v_true, ma_v_pred, average='macro') v_r = recall_score(ma_v_true, ma_v_pred, average='macro') t_acc = accuracy_score(ma_t_true, ma_t_pred) t_f1 = f1_score(ma_t_true, ma_t_pred, average='macro') t_p = precision_score(ma_t_true, ma_t_pred, average='macro') t_r = recall_score(ma_t_true, ma_t_pred, average='macro') print('Maslow') print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s') print(f'\tTrain Loss: {train_loss:.4f} | acc: {tr_acc:7.4f}') print( f'\t Val. Loss: {valid_loss:.4f} | acc: {v_acc:7.4f} | f1: {v_f1:7.4f} | prec: {v_p:7.4f} | rec: {v_r:7.4f}' ) print( f'\t Test Loss: {test_loss:.4f} | acc: {t_acc:7.4f} | f1: {t_f1:7.4f} | prec: {t_p:7.4f} | rec: {t_r:7.4f}' ) # Reiss tr_acc = accuracy_score(re_tr_true, re_tr_pred) v_acc = accuracy_score(re_v_true, re_v_pred) v_f1 = f1_score(re_v_true, re_v_pred, average='macro') v_p = precision_score(re_v_true, re_v_pred, average='macro') v_r = recall_score(re_v_true, re_v_pred, average='macro') t_acc = accuracy_score(re_t_true, re_t_pred) t_f1 = f1_score(re_t_true, re_t_pred, average='macro') t_p = precision_score(re_t_true, re_t_pred, average='macro') t_r = recall_score(re_t_true, re_t_pred, average='macro') print('Reiss') print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s') print(f'\tTrain Loss: {train_loss:.4f} | acc: {tr_acc:7.4f}') print( f'\t Val. Loss: {valid_loss:.4f} | acc: {v_acc:7.4f} | f1: {v_f1:7.4f} | prec: {v_p:7.4f} | rec: {v_r:7.4f}' ) print( f'\t Test Loss: {test_loss:.4f} | acc: {t_acc:7.4f} | f1: {t_f1:7.4f} | prec: {t_p:7.4f} | rec: {t_r:7.4f}' )
verbose=True) loss_fn = nn.BCEWithLogitsLoss() # loss_fn = nn.BCEWithLogitsLoss() loss_fn = DiceBCELoss() loss_name = "BCE Dice Loss" data_str = f"Hyperparameters:\nImage Size: {size}\nBatch Size: {batch_size}\nLR: {lr}\nEpochs: {num_epochs}\n" data_str += f"Optimizer: Adam\nLoss: {loss_name}\n" print_and_save(train_log, data_str) """ Training the model. """ best_valid_loss = float('inf') for epoch in range(num_epochs): start_time = time.time() train_loss = train(model, train_loader, optimizer, loss_fn, device) valid_loss = evaluate(model, valid_loader, loss_fn, device) scheduler.step(valid_loss) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model.state_dict(), checkpoint_path) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) data_str = f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s\n' data_str += f'\tTrain Loss: {train_loss:.3f}\n' data_str += f'\t Val. Loss: {valid_loss:.3f}\n' print_and_save(train_log, data_str)
utils.SaveModel(m2p2_model, MODEL_DIR+'opt/', mod_weights) # train the reference models (slave procedure in alg 1) if WITH_HET_MODULE: for ref_epoch in range(utils.n_EPOCHS): _ = train.train_ref(m2p2_model, ref_model, cri_pers, tra_loader, ref_model_optim, False) # end of slave procedure # apply the trained reference models to get current concat weights tilde_mod_weights = train.train_ref(m2p2_model, ref_model, cri_pers, val_loader, ref_model_optim, True) # moving average by combing current concat weights with previous concat weights if WITH_HET_MODULE: utils.update_mod_weights(mod_weights, tilde_mod_weights) # gather information and print in verbose mode end_time = time.time() epoch_mins, epoch_secs = utils.epoch_time(start_time, end_time) if epoch % 1 == 0 and VERBOSE: print(f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s') if WITH_HET_MODULE: print('modality weights', mod_weights) print(f'\tTrain alignment loss:{train_emb_loss:.5f}\tTrain persuasion loss:{train_pers_loss:.5f}') print(f'\tVal alignment loss:{val_emb_loss:.5f}\tVal persuasion loss:{val_pers_loss:.5f}') ##### end of training process (master procedure in alg 1) ##### else: ##### load pre-trained model and test ##### mod_weights = utils.LoadModelDict(m2p2_model, PRETRAIN_MODEL_DIR) tes_emb_loss, tes_pers_loss = train.train_m2p2(m2p2_model, tes_loader, m2p2_optim, cri_align, cri_pers, COSINE, mod_weights, GAMMA, evaluate=True) print('MSE:',round(tes_pers_loss, 3)) ##### end of testing #####
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', type=str, default='rnn', help= "Available models are: 'rnn', 'cnn', 'bilstm', 'fasttext', and 'distilbert'\nDefault is 'rnn'" ) parser.add_argument('--train_data_path', type=str, default="./data/train_clean.csv", help="Path to the training data") parser.add_argument('--test_data_path', type=str, default="./data/dev_clean.csv", help="Path to the test data") parser.add_argument('--seed', type=int, default=1234) parser.add_argument('--vectors', type=str, default='fasttext.simple.300d', help=""" Pretrained vectors: Visit https://github.com/pytorch/text/blob/9ce7986ddeb5b47d9767a5299954195a1a5f9043/torchtext/vocab.py#L146 for more """) parser.add_argument('--max_vocab_size', type=int, default=750) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--bidirectional', type=bool, default=True) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--hidden_dim', type=int, default=64) parser.add_argument('--output_dim', type=int, default=1) parser.add_argument('--n_layers', type=int, default=2) parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--n_epochs', type=int, default=5) parser.add_argument('--n_filters', type=int, default=100) parser.add_argument('--filter_sizes', type=list, default=[3, 4, 5]) args = parser.parse_args() torch.manual_seed(args.seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') ########## BILSTM ########## if args.model == "bilstm": print('\nBiLSTM') TEXT = Field(tokenize='spacy') LABEL = LabelField(dtype=torch.float) data_fields = [("text", TEXT), ("label", LABEL)] train_data = TabularDataset(args.train_data_path, format='csv', fields=data_fields, skip_header=True, csv_reader_params={'delimiter': ","}) test_data = TabularDataset(args.test_data_path, format='csv', fields=data_fields, skip_header=True, csv_reader_params={'delimiter': ","}) train_data, val_data = train_data.split(split_ratio=0.8, random_state=random.seed( args.seed)) TEXT.build_vocab(train_data, max_size=args.max_vocab_size, vectors=args.vectors, unk_init=torch.Tensor.normal_) LABEL.build_vocab(train_data) train_iterator, valid_iterator, test_iterator = BucketIterator.splits( (train_data, val_data, test_data), batch_size=args.batch_size, sort_key=lambda x: len(x.text), device=device) input_dim = len(TEXT.vocab) embedding_dim = get_embedding_dim(args.vectors) pad_idx = TEXT.vocab.stoi[TEXT.pad_token] unk_idx = TEXT.vocab.stoi[TEXT.unk_token] model = BiLSTM(input_dim, embedding_dim, args.hidden_dim, args.output_dim, args.n_layers, args.bidirectional, args.dropout, pad_idx) pretrained_embeddings = TEXT.vocab.vectors model.embedding.weight.data.copy_(pretrained_embeddings) model.embedding.weight.data[unk_idx] = torch.zeros(embedding_dim) model.embedding.weight.data[pad_idx] = torch.zeros(embedding_dim) optimizer = optim.Adam(model.parameters(), lr=args.lr) criterion = nn.BCEWithLogitsLoss() model.to(device) criterion.to(device) best_valid_loss = float('inf') print("\nTraining...") print("===========") for epoch in range(1, args.n_epochs + 1): start_time = time.time() train_loss, train_acc = train(model, train_iterator, optimizer, criterion) valid_loss, valid_acc = evaluate(model, valid_iterator, criterion) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model.state_dict(), './checkpoints/{}-model.pt'.format(args.model)) print( f'[Epoch: {epoch:02}] | Epoch Time: {epoch_mins}m {epoch_secs}s' ) print( f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%' ) print( f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%' ) model.load_state_dict( torch.load('./checkpoints/{}-model.pt'.format(args.model))) test_loss, test_acc = evaluate(model, test_iterator, criterion) print('\nEvaluating...') print("=============") print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%' ) # Test Loss: 0.139, Test Acc: 95.27% ########## VANILLA RNN ########## else: print('\nVanilla RNN') TEXT = Field(tokenize='spacy') LABEL = LabelField(dtype=torch.float) data_fields = [("text", TEXT), ("label", LABEL)] train_data = TabularDataset(args.train_data_path, format='csv', fields=data_fields, skip_header=True, csv_reader_params={'delimiter': ","}) test_data = TabularDataset(args.test_data_path, format='csv', fields=data_fields, skip_header=True, csv_reader_params={'delimiter': ","}) train_data, val_data = train_data.split(split_ratio=0.8, random_state=random.seed( args.seed)) TEXT.build_vocab(train_data, max_size=args.max_vocab_size, vectors=args.vectors) LABEL.build_vocab(train_data) train_iterator, valid_iterator, test_iterator = BucketIterator.splits( (train_data, val_data, test_data), batch_size=args.batch_size, sort_key=lambda x: len(x.text), device=device) input_dim = len(TEXT.vocab) embedding_dim = get_embedding_dim(args.vectors) model = RNN(input_dim, embedding_dim, args.hidden_dim, args.output_dim) pretrained_embeddings = TEXT.vocab.vectors model.embedding.weight.data.copy_(pretrained_embeddings) optimizer = optim.Adam(model.parameters(), lr=args.lr) criterion = nn.BCEWithLogitsLoss() model.to(device) criterion.to(device) best_valid_loss = float('inf') print("\nTraining...") print("===========") for epoch in range(1, args.n_epochs + 1): start_time = time.time() train_loss, train_acc = train(model, train_iterator, optimizer, criterion) valid_loss, valid_acc = evaluate(model, valid_iterator, criterion) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model.state_dict(), './checkpoints/{}-model.pt'.format(args.model)) print( f'[Epoch: {epoch:02}] | Epoch Time: {epoch_mins}m {epoch_secs}s' ) print( f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%' ) print( f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%' ) model.load_state_dict( torch.load('./checkpoints/{}-model.pt'.format(args.model))) test_loss, test_acc = evaluate(model, test_iterator, criterion) print('\nEvaluating...') print("=============") print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%' ) # Test Loss: 0.138, Test Acc: 95.05%
def train(self): print( f'The model has {self.model.count_parameters():,} trainable parameters' ) best_valid_loss = float('inf') # apply the appropriate initialization method for the model if self.params.model == 'seq2seq': self.model.apply(init_weights) elif self.params.model == 'seq2seq_gru': self.model.apply(init_weights_gru) elif self.params.model == 'seq2seq_attention': self.model.apply(init_weights_attention) print(self.model) for epoch in range(self.params.num_epoch): self.model.train() epoch_loss = 0 start_time = time.time() for batch in self.train_iter: # For each batch, first zero the gradients self.optimizer.zero_grad() sources, sources_lengths = batch.kor targets = batch.eng predictions = self.model(sources, sources_lengths, targets) # targets = [target length, batch size] # predictions = [target length, batch size, output dim] # flatten the ground-truth and predictions since CrossEntropyLoss takes 2D predictions with 1D targets # +) in this process, we don't use 0-th token, since it is <sos> token targets = targets[1:].view(-1) predictions = predictions[1:].view(-1, predictions.shape[-1]) # targets = [(target sentence length - 1) * batch size] # predictions = [(target sentence length - 1) * batch size, output dim] loss = self.criterion(predictions, targets) # clip the gradients to prevent the model from exploding gradient torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.params.clip) loss.backward() self.optimizer.step() # 'item' method is used to extract a scalar from a tensor which only contains a single value. epoch_loss += loss.item() train_loss = epoch_loss / len(self.train_iter) valid_loss = self.evaluate() end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(self.model.state_dict(), self.params.save_model) print( f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s' ) print( f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}' ) print( f'\tVal. Loss: {valid_loss:.3f} | Val. PPL: {math.exp(valid_loss):7.3f}' )