def main(args): # Data Load with open(args.data_path, 'rb') as f: data = pickle.load(f) # Dataset setting dataset_dict = { 'train': CustomDataset(data['train']['title_encode'], data['train']['body_encode'], data['train']['ans_encode'], min_len=args.min_len, max_len=args.max_len), 'valid': CustomDataset(data['valid']['title_encode'], data['valid']['body_encode'], data['valid']['ans_encode'], min_len=args.min_len, max_len=args.max_len), 'test': CustomDataset(data['test']['title_encode'], data['test']['body_encode'], data['test']['ans_encode'], min_len=args.min_len, max_len=args.max_len) } dataloader_dict = { 'train': DataLoader(dataset_dict['train'], collate_fn=PadCollate(), drop_last=True, pin_memory=True, batch_size=args.batch_size), 'valid': DataLoader(dataset_dict['valid'], collate_fn=PadCollate(), drop_last=True, pin_memory=True, batch_size=args.batch_size), 'test': DataLoader(dataset_dict['test'], collate_fn=PadCollate(), drop_last=True, pin_memory=True, batch_size=args.batch_size) } # Word2Vec initialization word2vec = Word2Vec.load(args.embedding_path)
def write_validation_results(dataset, model, helper, outfile="temp/results.json"): """ Rescore validation detections and write them to file """ batch_size = 1024 # can increase size if enough GPU space to allow faster evaluation dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, collate_fn=PadCollate()) images = helper.images categories = helper.categories js_out = [] start = time.time() for i, (input_tensor, target_tensor, lengths) in enumerate(dataloader): mask = (target_tensor != -1).float() prediction = model.forward(input_tensor, lengths, mask) for batch in range(input_tensor.size(0)): img_id = dataset.get_id(i * batch_size + batch) H, W = images[img_id]["height"], images[img_id]["width"] seq_len = (target_tensor[batch] != -1).sum() for j in range(seq_len): pred_score = round(input_tensor[batch, j, 0].item(), 4) x, y, w, h = input_tensor[batch, j, 81:85].tolist() x = round(x * W, 2) y = round(y * H, 2) w = round(w * W, 2) h = round(h * H, 2) bbox = [x, y, w, h] _, category = input_tensor[batch, j, 1:81].max(0) category = category.item() category = categories[helper.category_index[category]]["id"] rescore = round(prediction[batch, j].item(), 4) js = { "image_id": img_id, "category_id": category, "bbox": bbox, "score": rescore, } js_out.append(js) print("Generated evaluation results (t={:.2f}s). Writing to {}".format( time.time() - start, outfile)) with open(outfile, "w") as f: json.dump(js_out, f)
def training(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #===================================# #============Data Load==============# #===================================# # 1) Data open print('Data Load & Setting!') with open(os.path.join(args.save_path, 'processed.pkl'), 'rb') as f: data_ = pickle.load(f) train_indices = data_['train_indices'] valid_indices = data_['valid_indices'] train_title_indices = data_['train_title_indices'] valid_title_indices = data_['valid_title_indices'] train_total_indices = data_['train_total_indices'] valid_total_indices = data_['valid_total_indices'] train_label = data_['train_label'] valid_label = data_['valid_label'] word2id = data_['word2id'] id2word = data_['id2word'] vocab_num = len(word2id.keys()) del data_ dataset_dict = { 'train': CustomDataset(train_total_indices, train_indices, train_title_indices, train_label, max_len=args.max_len), 'valid': CustomDataset(valid_total_indices, valid_indices, valid_title_indices, valid_label, max_len=args.max_len), } dataloader_dict = { 'train': DataLoader(dataset_dict['train'], collate_fn=PadCollate(), drop_last=True, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.num_workers), 'valid': DataLoader(dataset_dict['valid'], collate_fn=PadCollate(), drop_last=True, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.num_workers) } print( f"Total number of trainingsets iterations - {len(dataset_dict['train'])}, {len(dataloader_dict['train'])}" ) #===================================# #===========Model setting===========# #===================================# # 1) Model initiating print("Instantiating models...") model = Transformer(vocab_num=vocab_num, pad_idx=args.pad_idx, bos_idx=args.bos_idx, eos_idx=args.eos_idx, max_len=args.max_len, d_model=args.d_model, d_embedding=args.d_embedding, n_head=args.n_head, dim_feedforward=args.dim_feedforward, n_layers=args.n_layers, dropout=args.dropout, embedding_dropout=args.embedding_dropout, device=device) # optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) optimizer = Ralamb(params=filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.w_decay) # scheduler = WarmupLinearSchedule(optimizer, warmup_steps=len(dataloader_dict['train'])*3, # t_total=len(dataloader_dict['train'])*args.num_epochs) criterion = nn.CrossEntropyLoss() model = model.train() model = model.to(device) # 2) Model resume start_epoch = 0 if args.resume: checkpoint = torch.load(args.checkpoint_path, map_location='cpu') start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) del checkpoint #===================================# #=========Model Train Start=========# #===================================# # 1) Pre-setting best_val_f1 = 0 # 2) Training start for e in range(start_epoch, args.num_epochs): start_time_e = time.time() for phase in ['train', 'valid']: if phase == 'train': model.train() if phase == 'valid': print('Validation start...') model.eval() val_loss = 0 val_f1 = 0 for i, (total, segment, label) in enumerate(dataloader_dict[phase]): # Source, Target setting total = total.to(device) segment = segment.to(device) label = label.to(device) # Optimizer setting optimizer.zero_grad() # Model / Calculate loss with torch.set_grad_enabled(phase == 'train'): output = model(total, segment) output_cls_token = output[:, 0] loss = F.cross_entropy(output_cls_token, label) # F1-Score calculate predicted = output_cls_token.max(dim=1)[1] f1_score_macro = round( f1_score(predicted.tolist(), label.tolist(), average='macro'), 2) # If phase train, then backward loss and step optimizer and scheduler if phase == 'train': loss.backward() # clip_grad_norm_(model.parameters(), args.grad_norm) optimizer.step() # scheduler.step() # Print loss value only training if i == 0 or freq == args.print_freq or i == len( dataloader_dict['train']): total_loss = loss.item() print( "[Epoch:%d][%d/%d] train_loss:%5.3f | train_f1:%2.2f | learning_rate:%3.6f | spend_time:%3.2fmin" % (e + 1, i, len(dataloader_dict['train']), total_loss, f1_score_macro, optimizer.param_groups[0]['lr'], (time.time() - start_time_e) / 60)) freq = 0 freq += 1 if phase == 'valid': val_loss += loss.item() val_f1 += f1_score_macro # Finishing iteration if phase == 'valid': val_loss /= len(dataloader_dict['valid']) val_f1 /= len(dataloader_dict['valid']) print( "[Epoch:%d] val_loss:%5.3f | val_f1:%2.2f | spend_time:%5.2fmin" % (e + 1, val_loss, val_f1, (time.time() - start_time_e) / 60)) if val_f1 > best_val_f1: print("[!] saving model...") if not os.path.exists(args.save_path): os.mkdir(args.save_path) torch.save( model.state_dict(), os.path.join(args.save_path, f'model_testing.pt')) best_epoch = e best_val_f1 = val_f1 # 3) print(f'Best Epoch: {best_epoch}') print(f'Best F1-Score: {round(best_val_f1, 2)}')
import numpy as np from dataset import Dictionary, VQAFeatureDataset, PadCollate import base_model import utils from train_all import train, evaluate parser = parse_args() args = parser.parse_args([]) dictionary = Dictionary.load_from_file('data/dictionary.pkl') test_dset = VQAFeatureDataset('test', dictionary) batch_size = args.batch_size test_loader = DataLoader(test_dset, batch_size, shuffle=False, num_workers=4, collate_fn=PadCollate(dim=0)) import numpy as np from tqdm import trange import os n_models = 18 pred_list_sum = 0 models_root_dir = 'saved_models_trainall' for idx in trange(n_models): print(idx) args.seed = idx args.output = '%s/exp%02d'%(models_root_dir, idx) args.init_from = os.path.join(args.output, 'model.pth')
def train(): parser = argparse.ArgumentParser(description="recognition argument") parser.add_argument("dir", default="models") parser.add_argument("--arch", choices=[ 'BLSTM', 'LSTM', 'VGGBLSTM', 'VGGLSTM', 'LSTMrowCONV', 'TDNN_LSTM', 'BLSTMN' ], default='BLSTM') parser.add_argument("--min_epoch", type=int, default=15) parser.add_argument("--output_unit", type=int) parser.add_argument("--lamb", type=float, default=0.1) parser.add_argument("--hdim", type=int, default=512) parser.add_argument("--layers", type=int, default=6) parser.add_argument("--dropout", type=float, default=0.5) parser.add_argument("--batch_size", type=int, default=256) parser.add_argument("--feature_size", type=int, default=120) parser.add_argument("--data_path") parser.add_argument("--lr", type=float, default=0.001) parser.add_argument("--stop_lr", type=float, default=0.00001) parser.add_argument("--resume", type=bool, default=False) parser.add_argument("--pretrained_model_path") args = parser.parse_args() os.makedirs(args.dir + '/board', exist_ok=True) writer = SummaryWriter(args.dir + '/board') # save configuration with open(args.dir + '/config.json', "w") as fout: config = { "arch": args.arch, "output_unit": args.output_unit, "hdim": args.hdim, "layers": args.layers, "dropout": args.dropout, "feature_size": args.feature_size, } json.dump(config, fout) model = Model(args.arch, args.feature_size, args.hdim, args.output_unit, args.layers, args.dropout, args.lamb) if args.resume: print("resume from {}".format(args.pretrained_model_path)) pretrained_dict = torch.load(args.pretrained_model_path) model.load_state_dict(pretrained_dict) device = torch.device("cuda:0") model.cuda() model = nn.DataParallel(model) model.to(device) lr = args.lr optimizer = optim.Adam(model.parameters(), lr=lr) tr_dataset = SpeechDatasetMem(args.data_path + "/tr.hdf5") tr_dataloader = DataLoader(tr_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=0, collate_fn=PadCollate()) cv_dataset = SpeechDatasetMem(args.data_path + "/cv.hdf5") cv_dataloader = DataLoader(cv_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=0, collate_fn=PadCollate()) prev_t = 0 epoch = 0 prev_cv_loss = np.inf model.train() while True: # training stage torch.save(model.module.state_dict(), args.dir + "/best_model") epoch += 1 for i, minibatch in enumerate(tr_dataloader): print("training epoch: {}, step: {}".format(epoch, i)) logits, input_lengths, labels_padded, label_lengths, path_weights = minibatch sys.stdout.flush() model.zero_grad() optimizer.zero_grad() loss = model(logits, labels_padded, input_lengths, label_lengths) partial_loss = torch.mean(loss.cpu()) weight = torch.mean(path_weights) real_loss = partial_loss - weight loss.backward(loss.new_ones(len(TARGET_GPUS))) optimizer.step() t2 = timeit.default_timer() writer.add_scalar('training loss', real_loss.item(), (epoch - 1) * len(tr_dataloader) + i) prev_t = t2 # save model torch.save(model.module.state_dict(), args.dir + "/model.epoch.{}".format(epoch)) # cv stage model.eval() cv_losses_sum = [] count = 0 for i, minibatch in enumerate(cv_dataloader): print("cv epoch: {}, step: {}".format(epoch, i)) logits, input_lengths, labels_padded, label_lengths, path_weights = minibatch loss = model(logits, labels_padded, input_lengths, label_lengths) loss_size = loss.size(0) count = count + loss_size partial_loss = torch.mean(loss.cpu()) weight = torch.mean(path_weights) real_loss = partial_loss - weight real_loss_sum = real_loss * loss_size cv_losses_sum.append(real_loss_sum.item()) print("cv_real_loss: {}".format(real_loss.item())) cv_loss = np.sum(np.asarray(cv_losses_sum)) / count writer.add_scalar('mean_cv_loss', cv_loss, epoch) if epoch < args.min_epoch or cv_loss <= prev_cv_loss: torch.save(model.module.state_dict(), args.dir + "/best_model") prev_cv_loss = cv_loss else: print( "cv loss does not improve, decay the learning rate from {} to {}" .format(lr, lr / 10.0)) adjust_lr(optimizer, lr / 10.0) lr = lr / 10.0 if (lr < args.stop_lr): print("learning rate is too small, finish training") break model.train() ctc_crf_base.release_env(gpus)
def training(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #===================================# #============Data Load==============# #===================================# print('Data Load & Setting!') with open(os.path.join(args.save_path, 'preprocessed.pkl'), 'rb') as f: data_ = pickle.load(f) train_text_indices = data_['train_text_indices'] valid_text_indices = data_['valid_text_indices'] train_author_indices = data_['train_author_indices'] valid_author_indices = data_['valid_author_indices'] train_index_indices = data_['train_index_indices'] valid_index_indices = data_['valid_index_indices'] vocab_list = data_['vocab_list'] vocab_num = len(vocab_list) word2id = data_['word2id'] del data_ dataset_dict = { 'train': CustomDataset(train_text_indices, train_author_indices, train_index_indices, min_len=args.min_len, max_len=args.max_len), 'valid': CustomDataset(valid_text_indices, valid_author_indices, valid_index_indices, min_len=args.min_len, max_len=args.max_len) } dataloader_dict = { 'train': DataLoader(dataset_dict['train'], collate_fn=PadCollate(), drop_last=True, batch_size=args.batch_size, shuffle=True, pin_memory=True), 'valid': DataLoader(dataset_dict['valid'], collate_fn=PadCollate(), drop_last=True, batch_size=args.batch_size, shuffle=True, pin_memory=True) } print( f"Total number of trainingsets iterations - {len(dataset_dict['train'])}, {len(dataloader_dict['train'])}" ) #===================================# #===========Model Setting===========# #===================================# print("Build model") model = Total_model(vocab_num, author_num=5, pad_idx=args.pad_idx, bos_idx=args.bos_idx, eos_idx=args.eos_idx, max_len=args.max_len, d_model=args.d_model, d_embedding=args.d_embedding, n_head=args.n_head, d_k=args.d_k, d_v=args.d_v, dim_feedforward=args.dim_feedforward, dropout=args.dropout, bilinear=args.bilinear, num_transformer_layer=args.num_transformer_layer, num_rnn_layer=args.num_rnn_layer, device=device) # optimizer = Ralamb(params=filter(lambda p: p.requires_grad, model.parameters()), # lr=args.max_lr, weight_decay=args.w_decay) # optimizer = optim_lib.Lamb(params=model.parameters(), # lr=args.max_lr, weight_decay=args.w_decay) optimizer = optim.SGD(model.parameters(), lr=args.max_lr, momentum=args.momentum, weight_decay=args.w_decay) if args.n_warmup_epochs != 0: scheduler = WarmupLinearSchedule( optimizer, warmup_steps=args.n_warmup_epochs * len(dataloader_dict['train']), t_total=len(dataloader_dict['train']) * args.num_epoch) else: scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=len(dataloader_dict['train']) / 1.5) model.to(device) #===================================# #===========Model Training==========# #===================================# best_val_loss = None freq = 0 for e in range(args.num_epoch): start_time_e = time.time() print(f'Model Fitting: [{e+1}/{args.num_epoch}]') for phase in ['train', 'valid']: if phase == 'train': model.train() if phase == 'valid': model.eval() val_loss = 0 val_acc = 0 for i, (src, trg, index_) in enumerate(dataloader_dict[phase]): # Optimizer setting optimizer.zero_grad() # Source, Target sentence setting src = src.to(device) trg = trg.to(device) # Model / Calculate loss with torch.set_grad_enabled(phase == 'train'): predicted_logit = model(src) loss = F.cross_entropy(predicted_logit, trg) # If phase train, then backward loss and step optimizer and scheduler if phase == 'train': loss.backward() optimizer.step() if args.n_warmup_epochs != 0: scheduler.step() else: scheduler.step(loss) clip_grad_norm_(model.parameters(), args.grad_clip) # Print loss value only training if freq == args.print_freq or i == 0 or i == len( dataloader_dict['train']): total_loss = loss.item() _, predicted = predicted_logit.max(dim=1) accuracy = sum( predicted == trg).item() / predicted.size(0) print( "[Epoch:%d][%d/%d] train_loss:%5.3f | Accuracy:%2.3f | lr:%1.6f | spend_time:%5.2fmin" % (e + 1, i, len( dataloader_dict['train']), total_loss, accuracy, optimizer.param_groups[0]['lr'], (time.time() - start_time_e) / 60)) freq = 0 freq += 1 if phase == 'valid': val_loss += loss.item() _, predicted = predicted_logit.max(dim=1) accuracy = sum( predicted == trg).item() / predicted.size(0) val_acc += accuracy # Finishing iteration if phase == 'valid': val_loss /= len(dataloader_dict['valid']) val_acc /= len(dataloader_dict['valid']) print( "[Epoch:%d] val_loss:%5.3f | Accuracy:%5.2f | spend_time:%5.2fmin" % (e + 1, val_loss, val_acc, (time.time() - start_time_e) / 60)) if not best_val_loss or val_loss < best_val_loss: print("[!] saving model...") if not os.path.exists(args.save_path): os.mkdir(args.save_path) torch.save( model.state_dict(), os.path.join(args.save_path, f'model_saved2.pt')) best_val_loss = val_loss
def training(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #===================================# #============Data Load==============# #===================================# train_dat = pd.read_csv(os.path.join(args.data_path, 'news_train.csv')) train_dat_num = int(len(train_dat) * (1-args.valid_percent)) print('Data Load & Setting!') with open(os.path.join(args.save_path, 'preprocessed.pkl'), 'rb') as f: data_ = pickle.load(f) src_vocab_num_dict = dict() total_train_text_indices_spm = data_['total_train_text_indices_spm'] total_valid_text_indices_spm = data_['total_valid_text_indices_spm'] total_train_text_indices_khaiii = data_['total_train_text_indices_khaiii'] total_valid_text_indices_khaiii = data_['total_valid_text_indices_khaiii'] total_train_text_indices_konlpy = data_['total_train_text_indices_konlpy'] total_valid_text_indices_konlpy = data_['total_valid_text_indices_konlpy'] train_content_indices_spm = data_['train_content_indices_spm'] valid_content_indices_spm = data_['valid_content_indices_spm'] train_content_indices_khaiii = data_['train_content_indices_khaiii'] valid_content_indices_khaiii = data_['valid_content_indices_khaiii'] train_content_indices_konlpy = data_['train_content_indices_konlpy'] valid_content_indices_konlpy = data_['valid_content_indices_konlpy'] train_date_list = data_['train_date_list'] valid_date_list = data_['valid_date_list'] train_ord_list = data_['train_ord_list'] valid_ord_list = data_['valid_ord_list'] train_id_list = data_['train_id_list'] valid_id_list = data_['valid_id_list'] train_info_list = data_['train_info_list'] valid_info_list = data_['valid_info_list'] word2id_spm = data_['word2id_spm'] word2id_khaiii = data_['word2id_khaiii'] word2id_konlpy = data_['word2id_konlpy'] src_vocab_num_dict['spm'] = len(word2id_spm.keys()) src_vocab_num_dict['khaiii'] = len(word2id_khaiii.keys()) src_vocab_num_dict['konlpy'] = len(word2id_konlpy.keys()) del data_ dataset_dict = { 'train': CustomDataset(total_train_text_indices_spm, total_train_text_indices_khaiii, total_train_text_indices_konlpy, train_content_indices_spm, train_content_indices_khaiii, train_content_indices_konlpy, train_date_list, train_ord_list, train_id_list, train_info_list, isTrain=True, min_len=args.min_len, max_len=args.max_len), 'valid': CustomDataset(total_valid_text_indices_spm, total_valid_text_indices_khaiii, total_valid_text_indices_konlpy, valid_content_indices_spm, valid_content_indices_khaiii, valid_content_indices_konlpy, valid_date_list, valid_ord_list, valid_id_list, valid_info_list, isTrain=True, min_len=args.min_len, max_len=args.max_len), } dataloader_dict = { 'train': DataLoader(dataset_dict['train'], collate_fn=PadCollate(), drop_last=True, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, pin_memory=True), 'valid': DataLoader(dataset_dict['valid'], collate_fn=PadCollate(), drop_last=True, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, pin_memory=True) } print(f'Total number of trainingsets iterations - {len(dataset_dict["train"])}, {len(dataloader_dict["train"])}') print(f'{train_dat_num - len(dataset_dict["train"])} data is exceptd.') #===================================# #===========Model Setting===========# #===================================# print("Build model") model = Total_model(args.model_type, src_vocab_num_dict, trg_num=2, pad_idx=args.pad_idx, bos_idx=args.bos_idx, eos_idx=args.eos_idx, max_len=args.max_len, d_model=args.d_model, d_embedding=args.d_embedding, n_head=args.n_head, d_k=args.d_k, d_v=args.d_v, dim_feedforward=args.dim_feedforward, dropout=args.dropout, bilinear=args.bilinear, num_transformer_layer=args.num_transformer_layer, num_rnn_layer=args.num_rnn_layer, device=device) if args.Ralamb: optimizer = Ralamb(params=filter(lambda p: p.requires_grad, model.parameters()), lr=args.max_lr, weight_decay=args.w_decay) else: optimizer = optim.SGD(model.parameters(), lr=args.max_lr, momentum=args.momentum, weight_decay=args.w_decay) # optimizer = optim_lib.Lamb(params=model.parameters(), # lr=args.max_lr, weight_decay=args.w_decay) if args.n_warmup_epochs != 0: scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.n_warmup_epochs*len(dataloader_dict['train']), t_total=len(dataloader_dict['train'])*args.num_epoch) else: scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=len(dataloader_dict['train'])/1.5) criterion = LabelSmoothingLoss(classes=2, smoothing=args.label_smoothing) model.to(device) #===================================# #===========Model Training==========# #===================================# best_val_loss = None if not os.path.exists(args.model_path): os.mkdir(args.model_path) for e in range(args.num_epoch): start_time_e = time.time() print(f'Model Fitting: [{e+1}/{args.num_epoch}]') for phase in ['train', 'valid']: if phase == 'train': model.train() freq = 0 if phase == 'valid': model.eval() val_loss = 0 val_acc = 0 false_id_list, false_logit_list = list(), list() for i, (total_src_spm, total_src_khaiii, total_src_konlpy, src_spm, src_khaiii, src_konlpy, date, order, id_, trg) in enumerate(dataloader_dict[phase]): # Optimizer setting optimizer.zero_grad() # Source, Target sentence setting total_src_spm = total_src_spm.to(device) total_src_khaiii = total_src_khaiii.to(device) total_src_konlpy = total_src_konlpy.to(device) src_spm = src_spm.to(device) src_khaiii = src_khaiii.to(device) src_konlpy = src_konlpy.to(device) trg = trg.to(device) # Model / Calculate loss with torch.set_grad_enabled(phase == 'train'): predicted_logit = model(total_src_spm, total_src_khaiii, total_src_konlpy, src_spm, src_khaiii, src_konlpy) # If phase train, then backward loss and step optimizer and scheduler if phase == 'train': loss = criterion(predicted_logit, trg) loss.backward() clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() if args.n_warmup_epochs != 0: scheduler.step() else: scheduler.step(loss) # Print loss value only training if freq == args.print_freq or freq == 0 or i == len(dataloader_dict['train']): total_loss = loss.item() _, predicted = predicted_logit.max(dim=1) accuracy = sum(predicted == trg).item() / predicted.size(0) print("[Epoch:%d][%d/%d] train_loss:%5.3f | Accuracy:%2.3f | lr:%1.6f | spend_time:%5.2fmin" % (e+1, i, len(dataloader_dict['train']), total_loss, accuracy, optimizer.param_groups[0]['lr'], (time.time() - start_time_e) / 60)) freq = 0 freq += 1 if phase == 'valid': loss = F.cross_entropy(predicted_logit, trg) val_loss += loss.item() _, predicted = predicted_logit.max(dim=1) # Setting predicted_matching = (predicted == trg) logit_clone = F.softmax(predicted_logit.cpu().clone(), dim=1).numpy() # Calculate accuracy = sum(predicted_matching).item() / predicted.size(0) false_id_list.extend([id_[i] for i, x in enumerate(predicted_matching) if not x]) false_logit_list.extend(logit_clone[[i for i, x in enumerate(predicted_matching) if not x]]) val_acc += accuracy # Finishing iteration if phase == 'valid': val_loss /= len(dataloader_dict['valid']) val_acc /= len(dataloader_dict['valid']) print("[Epoch:%d] val_loss:%5.3f | Accuracy:%5.2f | spend_time:%5.2fmin" % (e+1, val_loss, val_acc, (time.time() - start_time_e) / 60)) if not best_val_loss or val_loss < best_val_loss: print("[!] saving model...") if not os.path.exists(args.save_path): os.mkdir(args.save_path) torch.save(model.state_dict(), os.path.join(args.model_path, f'model_saved.pt')) best_val_loss = val_loss wrong_id_list = false_id_list wrong_logit_list = false_logit_list #===================================# #============Result save============# #===================================# # 1) Path setting if not os.path.exists(args.results_path): os.mkdir(args.results_path) if not os.path.isfile(os.path.join(args.results_path, 'results.csv')): column_list_results = ['date_time', 'best_val_loss', 'tokenizer', 'valid_percent', 'vocab_size', 'num_epoch', 'batch_size', 'max_len', 'n_warmup_epochs', 'max_lr', 'momentum', 'w_decay', 'dropout', 'grad_clip', 'model_type', 'bilinear', 'num_transformer_layer', 'num_rnn_layer', 'd_model', 'd_embedding', 'd_k', 'd_v', 'n_head', 'dim_feedforward'] pd.DataFrame(columns=column_list_results).to_csv(os.path.join(args.results_path, 'results.csv'), index=False) if not os.path.isfile(os.path.join(args.results_path, 'wrong_list.csv')): column_list_wrong = ['date_time', 'id_', 'title', 'content', '0', '1', 'info'] pd.DataFrame(columns=column_list_wrong).to_csv(os.path.join(args.results_path, 'wrong_list.csv'), index=False) results_dat = pd.read_csv(os.path.join(args.results_path, 'results.csv')) wrong_dat_total = pd.read_csv(os.path.join(args.results_path, 'wrong_list.csv')) # 2) Model setting save new_row = { 'date_time':datetime.datetime.today().strftime('%m/%d/%H:%M'), 'best_val_loss': best_val_loss, 'tokenizer': args.sentencepiece_tokenizer, 'valid_percent': args.valid_percent, 'vocab_size': args.vocab_size, 'num_epoch': args.num_epoch, 'batch_size': args.batch_size, 'max_len': args.max_len, 'n_warmup_epochs': args.n_warmup_epochs, 'max_lr': args.max_lr, 'momentum': args.momentum, 'w_decay': args.w_decay, 'dropout': args.dropout, 'grad_clip': args.grad_clip, 'model_type': args.model_type, 'bilinear': args.bilinear, 'num_transformer_layer': args.num_transformer_layer, 'num_rnn_layer': args.num_rnn_layer, 'd_model': args.d_model, 'd_embedding': args.d_embedding, 'd_k': args.d_k, 'd_v': args.d_v, 'n_head': args.n_head, 'dim_feedforward': args.dim_feedforward, 'label_smoothing': args.label_smoothing } results_dat = results_dat.append(new_row, ignore_index=True) results_dat.to_csv(os.path.join(args.results_path, 'results.csv'), index=False) # 3) Worng ID list save train_dat['id_'] = train_dat['n_id'] + '_' + train_dat['ord'].astype(str) wrong_dat = pd.DataFrame(np.stack(wrong_logit_list)) wrong_dat['date_time'] = [datetime.datetime.today().strftime('%m/%d/%H:%M') for _ in range(len(wrong_dat))] wrong_dat['id_'] = wrong_id_list wrong_dat = wrong_dat.merge(train_dat[['id_', 'title', 'content', 'info']], on='id_') wrong_dat = wrong_dat[['date_time', 'id_', 'title', 'content', 0, 1, 'info']] wrong_dat_total = pd.concat([wrong_dat_total, wrong_dat], axis=0) wrong_dat_total.to_csv(os.path.join(args.results_path, 'wrong_list.csv'), index=False)
def train(): parser = argparse.ArgumentParser(description="recognition argument") parser.add_argument("--min_epoch", type=int, default=15) parser.add_argument("--output_unit", type=int) parser.add_argument("--lamb", type=float, default=0.1) parser.add_argument("--hdim", type=int, default=512) parser.add_argument("--layers", type=int, default=6) parser.add_argument("--dropout", type=float, default=0.5) parser.add_argument("--batch_size", type=int, default=256) parser.add_argument("--feature_size", type=int, default=120) parser.add_argument("--data_path") parser.add_argument("--lr", type=float, default=0.001) parser.add_argument("--stop_lr", type=float, default=0.00001) args = parser.parse_args() batch_size = args.batch_size model = Model(args.feature_size, args.hdim, args.output_unit, args.layers, args.dropout, args.lamb) device = torch.device("cuda:0") model.cuda() model = nn.DataParallel(model) model.to(device) lr = args.lr optimizer = optim.Adam(model.parameters(), lr=lr) tr_dataset = SpeechDatasetMem(args.data_path + "/data/hdf5/tr.hdf5") tr_dataloader = DataLoader(tr_dataset, batch_size=batch_size, shuffle=True, num_workers=16, collate_fn=PadCollate()) cv_dataset = SpeechDatasetMem(args.data_path + "/data/hdf5/cv.hdf5") cv_dataloader = DataLoader(cv_dataset, batch_size=batch_size, shuffle=False, num_workers=16, collate_fn=PadCollate()) prev_t = 0 epoch = 0 prev_cv_loss = np.inf model.train() while True: # training stage torch.save(model.module.state_dict(), args.data_path + "/models/best_model") epoch += 1 for i, minibatch in enumerate(tr_dataloader): print("training epoch: {}, step: {}".format(epoch, i)) logits, input_lengths, labels_padded, label_lengths, path_weights = minibatch sys.stdout.flush() model.zero_grad() optimizer.zero_grad() loss = model(logits, labels_padded, input_lengths, label_lengths) partial_loss = torch.mean(loss.cpu()) weight = torch.mean(path_weights) real_loss = partial_loss - weight loss.backward(loss.new_ones(len(TARGET_GPUS))) optimizer.step() t2 = timeit.default_timer() print("time: {}, tr_real_loss: {}, lr: {}".format( t2 - prev_t, real_loss.item(), optimizer.param_groups[0]['lr'])) prev_t = t2 # save model torch.save(model.module.state_dict(), args.data_path + "/models/model.epoch.{}".format(epoch)) # cv stage model.eval() cv_losses = [] cv_losses_sum = [] count = 0 for i, minibatch in enumerate(cv_dataloader): print("cv epoch: {}, step: {}".format(epoch, i)) logits, input_lengths, labels_padded, label_lengths, path_weights = minibatch loss = model(logits, labels_padded, input_lengths, label_lengths) loss_size = loss.size(0) count = count + loss_size partial_loss = torch.mean(loss.cpu()) weight = torch.mean(path_weights) real_loss = partial_loss - weight real_loss_sum = real_loss * loss_size cv_losses_sum.append(real_loss_sum.item()) print("cv_real_loss: {}".format(real_loss.item())) cv_loss = np.sum(np.asarray(cv_losses_sum)) / count print("mean_cv_loss: {}".format(cv_loss)) if epoch < args.min_epoch or cv_loss <= prev_cv_loss: torch.save(model.module.state_dict(), args.data_path + "/models/best_model") prev_cv_loss = cv_loss else: print( "cv loss does not improve, decay the learning rate from {} to {}" .format(lr, lr / 10.0)) adjust_lr(optimizer, lr / 10.0) lr = lr / 10.0 if (lr < args.stop_lr): print("learning rate is too small, finish training") break model.train() ctc_crf_base.release_env(gpus)
def train(trn_dir, dev_dir, exp_dir, resume): lang_dict, lang_list = utils.ReadLang2UttGetLangLabel( os.path.join(trn_dir, "spk2utt")) hparams.lang = lang_list in_domain_classes_num = len(lang_list) Model = eval(hparams.model_type) model = Model(in_domain_classes_num, activation='logsoftmax') if hparams.use_cuda: model.cuda() # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) best_cavg = 9999.9 best_cavg_acc = "UNK" best_cavg_eer = "UNK" best_cavg_epo = 0 best_cavg_loss = 999.9 current_epoch = 0 if resume != None: checkpoint = torch.load(resume) model.load_state_dict(checkpoint['model_state_dict']) current_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer_state_dict']) losses = checkpoint['losses'] if 'best_cavg' in checkpoint: best_cavg = checkpoint['best_cavg'] print(model) # Data generator data_set_trn = KaldiDataSet(trn_dir) data_set_dev = KaldiDataSet(dev_dir) dataloader_trn = DataLoader(data_set_trn, collate_fn=PadCollate(dim=1), batch_size=hparams.batch_size, shuffle=True) dataloader_dev = DataLoader(data_set_dev, collate_fn=PadCollate(dim=1), batch_size=hparams.batch_size, shuffle=True) criterion = nn.NLLLoss() losses = [] log_interval = 10 while (current_epoch < hparams.max_epoch): total_loss = 0 batch = 0 model.train() for x, targets in dataloader_trn: x = torch.FloatTensor(x).to(device) targets = torch.LongTensor(targets).to(device) batch_output = model(x) loss = criterion(batch_output, targets) losses.append(loss.item()) total_loss += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() batch += 1 acc, eval_loss, confusion_matrix, cavg, eer, thd = Evaluate( model, criterion, dataloader_dev, exp_dir) if best_cavg > cavg: best_cavg = cavg best_cavg_acc = acc best_cavg_eer = eer best_cavg_epo = current_epoch best_cavg_loss = eval_loss torch.save( { "epoch": current_epoch, "cavg": cavg, "acc": acc, "eer": eer, "losses": losses, "model_state_dict": model.state_dict(), "optimizer_state_dict": optimizer.state_dict(), }, os.path.join(exp_dir, 'bestcavg.pth')) print( ": Epoch {} Best[Cavg:{} Acc:{.2f}% EER%:{:.2f} Epoch:{} Loss:{:.4f}]" .format(current_epoch, best_cavg, best_cavg_acc * 100, best_cavg_eer, best_cavg_epo, best_cavg_loss)) current_epoch += 1
if __name__ == '__main__': args = parse_args().parse_args() print(args) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True dictionary = Dictionary.load_from_file('data/dictionary.pkl') train_dset = VQAFeatureDataset('train', dictionary) eval_dset = VQAFeatureDataset('val', dictionary) # train_dset = eval_dset batch_size = args.batch_size constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda() model.w_emb.init_embedding('data/glove6b_init_300d.npy') model = nn.DataParallel(model).cuda() if args.init_from is not None: print('Init from: ' + args.init_from) init_model = torch.load(args.init_from) model.load_state_dict(init_model) train_loader = DataLoader(train_dset, batch_size, shuffle=True, num_workers=4, collate_fn=PadCollate(dim=0)) eval_loader = DataLoader(eval_dset, batch_size, shuffle=True, num_workers=4, collate_fn=PadCollate(dim=0)) train(model, train_loader, eval_loader, args.epochs, args.output)
def main(config, params, dataset): helper = Helper("data/annotations/instances_val2017.json") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") start = time() print("Loading train dataset...") train_dataset = Dataset("data/preprocessed/preprocessed_train2017_" + dataset + ".pt") torch.cuda.empty_cache() print("Loading validation set...") val_dataset = Dataset("data/preprocessed/preprocessed_val2017_" + dataset + ".pt") torch.cuda.empty_cache() print("Loaded validation set. (t=%.1f seconds)" % (time() - start)) val_params = { "batch_size": params["val_batch_size"], "collate_fn": PadCollate() } val_dataloader = torch.utils.data.DataLoader(val_dataset, **val_params) train_params = { "batch_size": params["batch_size"], "shuffle": True, "collate_fn": PadCollate(shuffle_rate=params["shuffle_rate"]), } train_dataloader = torch.utils.data.DataLoader(train_dataset, **train_params) # Train loop model = ContextualRescorer(params).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=params["learning_rate"]) scheduler = LrScheduler(optimizer) logger = Logger(config, params, dataset=dataset) early_stopping_params = {"mode": "max", "patience": 20, "delta": 0.0001} early_stopper = EarlyStopping(**early_stopping_params) start = time() for epoch in range(params["n_epochs"]): loss, corrects, total = 0, 0, 0 prog_bar = ProgressBar(len(train_dataloader)) for i, (input_batch, target_batch, lengths) in enumerate(train_dataloader): batch_loss, corrects_, total_ = training_step( model, optimizer, input_batch, target_batch, lengths) loss += batch_loss corrects += corrects_ total += total_ prog_bar.update() loss = loss / (i + 1) accuracy = corrects / total * 100 # Measure loss and accuracy on validation set val_loss, val_accuracy = validate(val_dataloader, model) # Evaluate the AP on the validation set model.eval() print("\n --> Evaluating AP") write_validation_results(val_dataset, model, helper) stats = coco_eval() ap = stats[0] print("AP: {} \n\n".format(ap)) if scheduler.step(ap): print(" --> Backtracking to best model") model.load_state_dict(logger.best_model) # Logging and early stopping logger.epoch(model, loss, accuracy, val_loss, val_accuracy, ap, optimizer.param_groups[0]["lr"]) if early_stopper.step(ap): print(" --> Early stopping") break logger.close() #visualize_model(helper, params, logger.best_model, val_dataset) print(config)
#with open(os.path.join(exp_dir, "config.json")) as f: # hparams.parse_json(f.read()) print(hparams_debug_string()) Model = eval(hparams.model_type) # TODO model = Model(10, activation='logsoftmax') print("Load the model: %s" % os.path.join(exp_dir, 'best.pth')) checkpoint = torch.load(os.path.join(exp_dir, 'best.pth')) model.load_state_dict(checkpoint['model_state_dict']) epoch = checkpoint['epoch'] print("The epoch of the best model is {}".format(epoch)) criterion = nn.NLLLoss() if hparams.use_cuda: model.cuda() data_set_dev = KaldiDataSet(dev_dir) dataloader_dev = DataLoader(data_set_dev, collate_fn=PadCollate(dim=1), batch_size=hparams.batch_size, shuffle=True) Evaluate(model, criterion, dataloader_dev, exp_dir)
def testing(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #===================================# #============Data Load==============# #===================================# print('Data Load & Setting!') with open(os.path.join(args.save_path, 'test_preprocessed.pkl'), 'rb') as f: data_ = pickle.load(f) test_text_indices = data_['test_text_indices'] test_index_indices = data_['test_index_indices'] vocab_list = data_['vocab_list'] vocab_num = len(vocab_list) word2id = data_['word2id'] del data_ test_dataset = CustomDataset(test_text_indices, test_index_indices, test_index_indices, min_len=args.min_len, max_len=args.max_len) test_dataloader = DataLoader(test_dataset, collate_fn=PadCollate(), drop_last=False, batch_size=args.batch_size, shuffle=True, pin_memory=True) print(f"Total number of testsets iterations - {len(test_dataset)}, {len(test_dataloader)}") #===================================# #===========Model Setting===========# #===================================# print("Build model") model = Total_model(vocab_num, author_num=5, pad_idx=args.pad_idx, bos_idx=args.bos_idx, eos_idx=args.eos_idx, max_len=args.max_len, d_model=args.d_model, d_embedding=args.d_embedding, n_head=args.n_head, d_k=args.d_k, d_v=args.d_v, dim_feedforward=args.dim_feedforward, dropout=args.dropout, bilinear=args.bilinear, num_transformer_layer=args.num_transformer_layer, num_rnn_layer=args.num_rnn_layer, device=device) model.load_state_dict(torch.load(os.path.join(args.save_path, 'model_saved2.pt'))) model = model.to(device) model = model.eval() freq = 0 start_time = time.time() for i, (src, _, index_) in enumerate(test_dataloader): src = src.to(device) trg_softmax = nn.Softmax(dim=1) with torch.no_grad(): predicted_logit = model(src) predicted_logit_clone = trg_softmax(predicted_logit.clone().detach()) index_clone = index_.clone().detach() if i == 0: predicted_total = torch.cat((index_clone.type('torch.FloatTensor').unsqueeze(1), predicted_logit_clone.cpu()), dim=1) else: predicted = torch.cat((index_clone.type('torch.FloatTensor').unsqueeze(1), predicted_logit_clone.cpu()), dim=1) predicted_total = torch.cat((predicted_total, predicted), dim=0) if freq == 100 or i == 0 or i == len(test_dataloader): spend_time = time.time() - start_time print('testing...[%d/%d] %2.2fmin spend' % (i, len(test_dataloader), spend_time / 60)) freq = 0 freq += 1 #===================================# #======Submission csv setting=======# #===================================# submission_dat = pd.DataFrame(predicted_total.numpy()) submission_dat[0] = submission_dat[0].astype(int) submission_dat.columns = ['index', 0, 1, 2, 3, 4] submission_dat = submission_dat.sort_values(by=['index'], ascending=True) submission_dat.to_csv(os.path.join(args.save_path, 'submission.csv'), index=False, encoding='utf-8')
def training(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #===================================# #============Data Load==============# #===================================# # 1) Data open print('Data Load & Setting!') with open(os.path.join(args.preprocess_path, 'processed.pkl'), 'rb') as f: data_ = pickle.load(f) train_comment_indices = data_['train_comment_indices'] test_comment_indices = data_['test_comment_indices'] train_label = data_['train_label'] test_label = data_['test_label'] del data_ if args.augmentation_data_training: with open( os.path.join(args.preprocess_path, 'augmented_processed.pkl'), 'rb') as f: data_ = pickle.load(f) train_comment_indices = data_['augmented_comment_indices'] train_label = data_['augmented_label'] # 2) Dataloader setting dataset_dict = { 'train': CustomDataset(train_comment_indices, train_label, min_len=args.min_len, max_len=args.max_len), 'test': CustomDataset(test_comment_indices, test_label, min_len=args.min_len, max_len=args.max_len) } dataloader_dict = { 'train': DataLoader(dataset_dict['train'], collate_fn=PadCollate(), drop_last=True, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.num_workers), 'test': DataLoader(dataset_dict['test'], collate_fn=PadCollate(), drop_last=False, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.num_workers) } print( f"Total number of trainingsets iterations - {len(dataset_dict['train'])}, {len(dataloader_dict['train'])}" ) #===================================# #===========Model setting===========# #===================================# # 1) Model initiating print("Instantiating models...") model = BertForSequenceClassification.from_pretrained('bert-large-cased') model = model.train() for para in model.bert.parameters(): para.reguires_grad = False model = model.to(device) # Optimizer setting # optimizer = AdamW(model.parameters(), lr=args.lr, eps=1e-8) optimizer = optimizer_select(model, args) scheduler = shceduler_select(optimizer, dataloader_dict, args) # 2) Model resume start_epoch = 0 # if args.resume: # checkpoint = torch.load('./checkpoint_testing.pth.tar', map_location='cpu') # start_epoch = checkpoint['epoch'] + 1 # model.load_state_dict(checkpoint['model']) # optimizer.load_state_dict(checkpoint['optimizer']) # scheduler.load_state_dict(checkpoint['scheduler']) # del checkpoint #===================================# #=========Model Train Start=========# #===================================# best_test_acc = 0 print('Train start!') for epoch in range(start_epoch, args.num_epochs): start_time_e = time.time() for phase in ['train', 'test']: if phase == 'train': model.train() if phase == 'test': print('Test start...') test_loss = 0 test_acc = 0 model.eval() for i, batch in enumerate(dataloader_dict[phase]): # Optimizer setting optimizer.zero_grad() # Input, output setting src_seq = batch[0].to(device) label = batch[1].to(device) if phase == 'train': with torch.set_grad_enabled(True): out = model(src_seq, attention_mask=src_seq != 0, labels=label) acc = sum(out.logits.max( dim=1)[1] == label) / len(label) # Loss backpropagation out.loss.backward() clip_grad_norm_(model.parameters(), 5) optimizer.step() if args.scheduler in ['warmup', 'reduce_train']: scheduler.step() # Print loss value only training if i == 0 or freq == args.print_freq or i == len( dataloader_dict['train']): print( "[Epoch:%d][%d/%d] train_loss:%3.3f | train_acc:%3.3f | learning_rate:%3.6f | spend_time:%3.3fmin" % (epoch + 1, i, len( dataloader_dict['train']), out.loss.item(), acc.item(), optimizer.param_groups[0]['lr'], (time.time() - start_time_e) / 60)) freq = 0 freq += 1 if phase == 'test': with torch.no_grad(): out = model(src_seq, attention_mask=src_seq != 0, labels=label) acc = sum(out.logits.max(dim=1)[1] == label) / len(label) test_loss += out.loss.item() test_acc += acc.item() if args.scheduler in ['reduce_valid', 'lambda']: scheduler.step() if phase == 'test': test_loss /= len(dataloader_dict[phase]) test_acc /= len(dataloader_dict[phase]) print(f'Test Loss: {test_loss:3.3f}') print(f'Test Accuracy: {test_acc*100:2.2f}%') if test_acc > best_test_acc: print('Checkpoint saving...') torch.save( { 'epoch': epoch, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), }, 'checkpoint_testing3.pth.tar') best_test_acc = test_acc best_epoch = epoch # 3) print(f'Best Epoch: {best_epoch}') print(f'Best Accuracy: {round(best_test_acc, 2)}')
def training(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #===================================# #==============Logging==============# #===================================# logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) handler = TqdmLoggingHandler() handler.setFormatter( logging.Formatter(" %(asctime)s - %(message)s", "%Y-%m-%d %H:%M:%S")) logger.addHandler(handler) logger.propagate = False #===================================# #============Data Load==============# #===================================# # 1) Dataloader setting write_log(logger, "Load data...") gc.disable() dataset_dict = { 'train': CustomDataset(data_path=args.preprocessed_path, phase='train'), 'valid': CustomDataset(data_path=args.preprocessed_path, phase='valid'), 'test': CustomDataset(data_path=args.preprocessed_path, phase='test') } unique_menu_count = dataset_dict['train'].unique_count() dataloader_dict = { 'train': DataLoader(dataset_dict['train'], drop_last=True, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.num_workers, collate_fn=PadCollate()), 'valid': DataLoader(dataset_dict['valid'], drop_last=False, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.num_workers, collate_fn=PadCollate()), 'test': DataLoader(dataset_dict['test'], drop_last=False, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.num_workers, collate_fn=PadCollate()) } gc.enable() write_log( logger, f"Total number of trainingsets iterations - {len(dataset_dict['train'])}, {len(dataloader_dict['train'])}" ) #===================================# #===========Model setting===========# #===================================# # 1) Model initiating write_log(logger, "Instantiating models...") model = Transformer(model_type=args.model_type, input_size=unique_menu_count, d_model=args.d_model, d_embedding=args.d_embedding, n_head=args.n_head, dim_feedforward=args.dim_feedforward, num_encoder_layer=args.num_encoder_layer, dropout=args.dropout) model = model.train() model = model.to(device) # 2) Optimizer setting optimizer = optimizer_select(model, args) scheduler = shceduler_select(optimizer, dataloader_dict, args) criterion = nn.MSELoss() scaler = GradScaler(enabled=True) model, optimizer = amp.initialize(model, optimizer, opt_level='O1') # 2) Model resume start_epoch = 0 if args.resume: checkpoint = torch.load(os.path.join(args.model_path, 'checkpoint.pth.tar'), map_location='cpu') start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) model = model.train() model = model.to(device) del checkpoint #===================================# #=========Model Train Start=========# #===================================# best_val_rmse = 9999999 write_log(logger, 'Train start!') for epoch in range(start_epoch, args.num_epochs): for phase in ['train', 'valid']: if phase == 'train': model.train() train_start_time = time.time() freq = 0 elif phase == 'valid': model.eval() val_loss = 0 val_rmse = 0 for i, (src_menu, label_lunch, label_supper) in enumerate(dataloader_dict[phase]): # Optimizer setting optimizer.zero_grad() # Input, output setting src_menu = src_menu.to(device, non_blocking=True) label_lunch = label_lunch.float().to(device, non_blocking=True) label_supper = label_supper.float().to(device, non_blocking=True) # Model with torch.set_grad_enabled(phase == 'train'): with autocast(enabled=True): if args.model_type == 'sep': logit = model(src_menu) logit_lunch = logit[:, 0] logit_supper = logit[:, 0] elif args.model_type == 'total': logit = model(src_menu) logit_lunch = logit[:, 0] logit_supper = logit[:, 1] # Loss calculate loss_lunch = criterion(logit_lunch, label_lunch) loss_supper = criterion(logit_supper, label_supper) loss = loss_lunch + loss_supper # Back-propagation if phase == 'train': scaler.scale(loss).backward() scaler.unscale_(optimizer) clip_grad_norm_(model.parameters(), args.clip_grad_norm) scaler.step(optimizer) scaler.update() # Scheduler setting if args.scheduler in ['constant', 'warmup']: scheduler.step() if args.scheduler == 'reduce_train': scheduler.step(loss) # Print loss value rmse_loss = torch.sqrt(loss) if phase == 'train': if i == 0 or freq == args.print_freq or i == len( dataloader_dict['train']): batch_log = "[Epoch:%d][%d/%d] train_MSE_loss:%2.3f | train_RMSE_loss:%2.3f | learning_rate:%3.6f | spend_time:%3.2fmin" \ % (epoch+1, i, len(dataloader_dict['train']), loss.item(), rmse_loss.item(), optimizer.param_groups[0]['lr'], (time.time() - train_start_time) / 60) write_log(logger, batch_log) freq = 0 freq += 1 elif phase == 'valid': val_loss += loss.item() val_rmse += rmse_loss.item() if phase == 'valid': val_loss /= len(dataloader_dict['valid']) val_rmse /= len(dataloader_dict['valid']) write_log(logger, 'Validation Loss: %3.3f' % val_loss) write_log(logger, 'Validation RMSE: %3.3f' % val_rmse) if val_rmse < best_val_rmse: write_log(logger, 'Checkpoint saving...') if not os.path.exists(args.save_path): os.mkdir(args.save_path) torch.save( { 'epoch': epoch, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'scaler': scaler.state_dict() }, os.path.join(args.save_path, f'checkpoint_cap.pth.tar')) best_val_rmse = val_rmse best_epoch = epoch else: else_log = f'Still {best_epoch} epoch RMSE({round(best_val_rmse, 3)}) is better...' write_log(logger, else_log) # 3) write_log(logger, f'Best Epoch: {best_epoch+1}') write_log(logger, f'Best Accuracy: {round(best_val_rmse, 3)}')
def augmenting(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") start_time = time.time() #===================================# #============Data Load==============# #===================================# # 1) Data open print('Data Load & Setting!') with open(os.path.join(args.preprocess_path, 'processed.pkl'), 'rb') as f: data_ = pickle.load(f) train_comment_indices = data_['train_comment_indices'] train_label = data_['train_label'] del data_ # 2) Dataloader setting dataset_dict = { 'train': CustomDataset(train_comment_indices, train_label, min_len=args.min_len, max_len=args.max_len) } dataloader_dict = { 'train': DataLoader(dataset_dict['train'], collate_fn=PadCollate(), drop_last=False, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.num_workers) } print( f"Total number of trainingsets iterations - {len(dataset_dict['train'])}, {len(dataloader_dict['train'])}" ) # model = Custom_ConditionalBERT(mask_id_token=103, device=device) model = model.to(device) model = model.eval() tokenizer = BertTokenizer.from_pretrained('bert-base-cased') #===================================# #===========Augmentation============# #===================================# augmented_dataset = pd.DataFrame() augmented_count = 0 original_count = 0 with torch.no_grad(): for batch in tqdm(dataloader_dict['train']): src_seq = batch[0].to(device) label = batch[1].tolist() mlm_logit, ner_masking_tensor = model(src_seq) # Pre-setting i = 0 old_masking_token_count = 0 label_pop_list = list() augmented_tensor = torch.LongTensor([]).to(device) top_3_predicted = mlm_logit[ner_masking_tensor == 103].topk(3, 1)[1] # Augmentation for n_i, n in enumerate(ner_masking_tensor): if (n == 103).sum().item() == 0: # label = torch.cat([label[0:n_i], label[n_i+1:]]) label_pop_list.append(n_i) continue else: for k in range(args.augment_top_k): n_augmented = n.clone().detach() masking_token_count = (n_augmented == 103).sum().item() for ix in (n_augmented == 103).nonzero( as_tuple=True)[0]: n_augmented[ix] = top_3_predicted[i][k] i += 1 if i == masking_token_count + old_masking_token_count: i = old_masking_token_count augmented_tensor = torch.cat( (augmented_tensor, n_augmented.unsqueeze(0)), dim=0) i += masking_token_count old_masking_token_count += masking_token_count # Counting augmented_count += augmented_tensor.size(0) original_count += len(label_pop_list) # Process non NER masking sequence if len(label_pop_list) != 0: for i, original_ix in enumerate(label_pop_list): if i == 0: original_seq = src_seq[original_ix].unsqueeze(0) else: original_seq = torch.cat( (original_seq, src_seq[original_ix].unsqueeze(0)), dim=0) # Concat augmented_text = tokenizer.batch_decode( augmented_tensor, skip_special_tokens=True) augmented_text = augmented_text + tokenizer.batch_decode( original_seq, skip_special_tokens=True) original_label = [ value for i, value in enumerate(label) if i in label_pop_list ] label = [ i for j, i in enumerate(label) if j not in label_pop_list ] augmented_label = [ item for item in label for i in range(args.augment_top_k) ] augmented_label = augmented_label + original_label # If NER_mask in none in sequence else: augmented_text = tokenizer.batch_decode( augmented_tensor, skip_special_tokens=True) label = [ i for j, i in enumerate(label) if j not in label_pop_list ] augmented_label = [ item for item in label for i in range(args.augment_top_k) ] new_dat = pd.DataFrame({ 'comment': augmented_text, 'sentiment': augmented_label }) augmented_dataset = pd.concat([augmented_dataset, new_dat], axis=0) print(f'Augmented data size: {augmented_count}') print(f'Non NER_Masking data size: {original_count}') print(f'Total data size: {augmented_dataset.shape[0]}') augmented_dataset.to_csv(os.path.join(args.preprocess_path, 'augmented_train.csv'), index=False) #===================================# #==============Saving===============# #===================================# print('Cleansing...') # 1) Cleansing augmented_dataset['comment'] = encoding_text(augmented_dataset['comment'], tokenizer, args.max_len) # 2) Training pikcle saving with open(os.path.join(args.preprocess_path, 'augmented_processed.pkl'), 'wb') as f: pickle.dump( { 'augmented_comment_indices': augmented_dataset['comment'].tolist(), 'augmented_label': augmented_dataset['sentiment'].tolist(), }, f) print(f'Done! ; {round((time.time()-start_time)/60, 3)}min spend')
print("CUDA") optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) hidden_state = None for epoch in range(epochs): for ind_batch, (batch_vectors, batch_labels) in enumerate(dataloader): if cuda: batch_vectors = batch_vectors.to(device="cuda") batch_labels = batch_labels.to(device="cuda") optimizer.zero_grad() output, hidden_state = model(batch_vectors, None) loss = criterion(torch.squeeze(output[:, -1]), batch_labels.type(torch.float)) loss.backward() optimizer.step() if ind_batch % 10 == 0: print("[Epoch {}, Batch {}/{}]: [Loss: {:03.2f}]".format( epoch, ind_batch, len(dataloader), loss.data[0])) if __name__ == "__main__": r_model = RNNRegressor() dataset = VectDataset(DATASET_SIZE) dataloader = data.DataLoader(dataset, batch_size=BATCH_SIZE, collate_fn=PadCollate(dim=0)) train(r_model, dataloader=dataloader, epochs=1000, criterion=CRITERION)
def main_worker(gpu, ngpus_per_node, args): csv_file = None csv_writer = None args.gpu = gpu args.rank = args.start_rank + gpu TARGET_GPUS = [args.gpu] logger = None ckpt_path = "models" os.system("mkdir -p {}".format(ckpt_path)) if args.rank == 0: logger = init_logging(args.model, "{}/train.log".format(ckpt_path)) args_msg = [ ' %s: %s' % (name, value) for (name, value) in vars(args).items() ] logger.info('args:\n' + '\n'.join(args_msg)) csv_file = open(args.csv_file, 'w', newline='') csv_writer = csv.writer(csv_file) csv_writer.writerow(header) gpus = torch.IntTensor(TARGET_GPUS) ctc_crf_base.init_env(args.den_lm_fst_path, gpus) dist.init_process_group(backend='nccl', init_method=args.dist_url, world_size=args.world_size, rank=args.rank) torch.cuda.set_device(args.gpu) model = CAT_Model(args.arch, args.feature_size, args.hdim, args.output_unit, args.layers, args.dropout, args.lamb, args.ctc_crf) if args.rank == 0: params_msg = params_num(model) logger.info('\n'.join(params_msg)) lr = args.origin_lr optimizer = optim.Adam(model.parameters(), lr=lr) epoch = 0 prev_cv_loss = np.inf if args.checkpoint: checkpoint = torch.load(args.checkpoint) epoch = checkpoint['epoch'] lr = checkpoint['lr'] prev_cv_loss = checkpoint['cv_loss'] model.load_state_dict(checkpoint['model']) model.cuda(args.gpu) model = nn.parallel.DistributedDataParallel(model, device_ids=TARGET_GPUS) tr_dataset = SpeechDatasetPickel(args.tr_data_path) tr_sampler = DistributedSampler(tr_dataset) tr_dataloader = DataLoader(tr_dataset, batch_size=args.gpu_batch_size, shuffle=False, num_workers=args.data_loader_workers, pin_memory=True, collate_fn=PadCollate(), sampler=tr_sampler) cv_dataset = SpeechDatasetPickel(args.dev_data_path) cv_dataloader = DataLoader(cv_dataset, batch_size=args.gpu_batch_size, shuffle=False, num_workers=args.data_loader_workers, pin_memory=True, collate_fn=PadCollate()) prev_epoch_time = timeit.default_timer() while True: # training stage epoch += 1 tr_sampler.set_epoch(epoch) # important for data shuffle gc.collect() train(model, tr_dataloader, optimizer, epoch, args, logger) cv_loss = validate(model, cv_dataloader, epoch, args, logger) # save model if args.rank == 0: save_ckpt( { 'cv_loss': cv_loss, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'lr': lr, 'epoch': epoch }, cv_loss <= prev_cv_loss, ckpt_path, "model.epoch.{}".format(epoch)) csv_row = [ epoch, (timeit.default_timer() - prev_epoch_time) / 60, lr, cv_loss ] prev_epoch_time = timeit.default_timer() csv_writer.writerow(csv_row) csv_file.flush() plot_train_figure(args.csv_file, args.figure_file) if epoch < args.min_epoch or cv_loss <= prev_cv_loss: prev_cv_loss = cv_loss else: args.annealing_epoch = 0 lr = adjust_lr_distribute(optimizer, args.origin_lr, lr, cv_loss, prev_cv_loss, epoch, args.annealing_epoch, args.gpu_batch_size, args.world_size) if (lr < args.stop_lr): print("rank {} lr is too slow, finish training".format(args.rank), datetime.datetime.now(), flush=True) break ctc_crf_base.release_env(gpus)
def main(args): # Setting warnings.simplefilter("ignore", UserWarning) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Data Loading print('Data loading and data spliting...') with open(args.data_path, 'rb') as f: data = pickle.load(f) src_word2id = data['hanja_word2id'] src_vocab = [k for k in src_word2id.keys()] trg_word2id = data['korean_word2id'] trg_vocab = [k for k in trg_word2id.keys()] train_src_list = data['train_hanja_indices'] train_trg_list = data['train_korean_indices'] train_add_hanja = data['train_additional_hanja_indices'] valid_src_list = data['valid_hanja_indices'] valid_trg_list = data['valid_korean_indices'] valid_add_hanja = data['valid_additional_hanja_indices'] src_vocab_num = len(src_vocab) trg_vocab_num = len(trg_vocab) del data print('Done!') # Dataset & Dataloader setting dataset_dict = { 'train': CustomDataset(train_src_list, train_trg_list, mask_idx=args.mask_idx, min_len=args.min_len, src_max_len=args.src_max_len, trg_max_len=args.trg_max_len), 'valid': CustomDataset(valid_src_list, valid_trg_list, mask_idx=args.mask_idx, min_len=args.min_len, src_max_len=args.src_max_len, trg_max_len=args.trg_max_len) } dataloader_dict = { 'train': DataLoader(dataset_dict['train'], collate_fn=PadCollate(), drop_last=True, batch_size=args.batch_size, shuffle=True, pin_memory=True), 'valid': DataLoader(dataset_dict['valid'], collate_fn=PadCollate(), drop_last=True, batch_size=args.batch_size, shuffle=True, pin_memory=True) } print( f"Total number of trainingsets iterations - {len(dataset_dict['train'])}, {len(dataloader_dict['train'])}" ) # Model Setting print("Instantiating models...") encoder = Encoder(src_vocab_num, args.embed_size, args.hidden_size, n_layers=args.n_layers, pad_idx=args.pad_idx, dropout=args.dropout, embedding_dropout=args.embedding_dropout) decoder = Decoder(args.embed_size, args.hidden_size, trg_vocab_num, n_layers=args.n_layers, pad_idx=args.pad_idx, dropout=args.dropout, embedding_dropout=args.embedding_dropout) seq2seq = Seq2Seq(encoder, decoder, device) optimizer = optim.Adam(filter(lambda p: p.requires_grad, seq2seq.parameters()), lr=args.lr, weight_decay=args.w_decay) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_decay_step, gamma=args.lr_decay) #criterion = nn.CrossEntropyLoss(ignore_index=args.pad_idx) torch_utils.clip_grad_norm_(seq2seq.parameters(), args.grad_clip) print(seq2seq) print('Model train start...') best_val_loss = None seq2seq.to(device) teacher_forcing_ratio = 1.0 if not os.path.exists('./rnn_based/save'): os.mkdir('./rnn_based/save') for e in range(args.num_epoch): start_time_e = time.time() for phase in ['train', 'valid']: if phase == 'train': seq2seq.train() if phase == 'valid': seq2seq.eval() val_loss = 0 total_loss_list = list() freq = args.print_freq - 1 for (src, trg, _, _) in tqdm(dataloader_dict[phase]): # Sourcen, Target sentence setting src = src.transpose(0, 1).to(device) trg = trg.transpose(0, 1).to(device) # Optimizer setting optimizer.zero_grad() # Model / Calculate loss with torch.set_grad_enabled(phase == 'train'): teacher_forcing_ratio_ = teacher_forcing_ratio if phase == 'train' else 0 output = seq2seq( src, trg, teacher_forcing_ratio=teacher_forcing_ratio_) output_flat = output[1:].view(-1, trg_vocab_num) trg_flat = trg[1:].contiguous().view(-1) #loss = criterion(output_flat, trg_flat) loss = F.cross_entropy( output[1:].transpose(0, 1).contiguous().view( -1, trg_vocab_num), trg[1:].transpose(0, 1).contiguous().view(-1), ignore_index=args.pad_idx) if phase == 'valid': val_loss += loss.item() # If phase train, then backward loss and step optimizer and scheduler if phase == 'train': loss.backward() optimizer.step() # Print loss value only training freq += 1 if freq == args.print_freq: total_loss = loss.item() print("[loss:%5.2f][pp:%5.2f]" % (total_loss, math.exp(total_loss))) total_loss_list.append(total_loss) freq = 0 # Finishing iteration if phase == 'train': pd.DataFrame(total_loss_list).to_csv( './rnn_based/save/{} epoch_loss.csv'.format(e), index=False) if phase == 'valid': val_loss /= len(dataloader_dict['valid']) print( "[Epoch:%d] val_loss:%5.3f | val_pp:%5.2fS | spend_time:%5.2fmin" % (e, val_loss, math.exp(val_loss), (time.time() - start_time_e) / 60)) if not best_val_loss or val_loss < best_val_loss: print("[!] saving model...") torch.save(seq2seq.state_dict(), './rnn_based/save/seq2seq_{}.pt'.format(e)) best_val_loss = val_loss scheduler.step() teacher_forcing_ratio *= 0.9 print('Done!')
def testing(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #===================================# #============Data Load==============# #===================================# print('Data Load & Setting!') with open(os.path.join(args.save_path, 'test_preprocessed.pkl'), 'rb') as f: data_ = pickle.load(f) src_vocab_num_dict = dict() total_test_text_indices_spm = data_['total_test_text_indices_spm'] test_title_indices_spm = data_['test_title_indices_spm'] test_content_indices_spm = data_['test_content_indices_spm'] total_test_text_indices_khaiii = data_['total_test_text_indices_khaiii'] test_title_indices_khaiii = data_['test_title_indices_khaiii'] test_content_indices_khaiii = data_['test_content_indices_khaiii'] total_test_text_indices_konlpy = data_['total_test_text_indices_konlpy'] test_title_indices_konlpy = data_['test_title_indices_konlpy'] test_content_indices_konlpy = data_['test_content_indices_konlpy'] test_date_list = data_['test_date_list'] test_ord_list = data_['test_ord_list'] test_id_list = data_['test_id_list'] word2id_spm = data_['word2id_spm'] word2id_khaiii = data_['word2id_khaiii'] word2id_konlpy = data_['word2id_konlpy'] src_vocab_num_dict['spm'] = len(word2id_spm.keys()) src_vocab_num_dict['khaiii'] = len(word2id_khaiii.keys()) src_vocab_num_dict['konlpy'] = len(word2id_konlpy.keys()) del data_ test_dataset = CustomDataset(total_test_text_indices_spm, total_test_text_indices_khaiii, total_test_text_indices_konlpy, test_content_indices_spm, test_content_indices_khaiii, test_content_indices_konlpy, test_date_list, test_ord_list, test_id_list, isTrain=False, min_len=args.min_len, max_len=args.max_len) test_dataloader = DataLoader(test_dataset, collate_fn=PadCollate(isTrain=False), drop_last=False, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True) print(f"Total number of testsets iterations - {len(test_dataset)}, {len(test_dataloader)}") print(f'{len(total_test_text_indices_spm) - len(test_dataset)} data is exceptd.') #===================================# #============Model load=============# #===================================# print("Load model") model = Total_model(args.model_type, src_vocab_num_dict, trg_num=2, pad_idx=args.pad_idx, bos_idx=args.bos_idx, eos_idx=args.eos_idx, max_len=args.max_len, d_model=args.d_model, d_embedding=args.d_embedding, n_head=args.n_head, d_k=args.d_k, d_v=args.d_v, dim_feedforward=args.dim_feedforward, dropout=args.dropout, bilinear=args.bilinear, num_transformer_layer=args.num_transformer_layer, num_rnn_layer=args.num_rnn_layer, device=device) model.load_state_dict(torch.load(os.path.join(args.model_path, 'model_saved.pt'))) model = model.to(device) model = model.eval() #===================================# #=============Testing===============# #===================================# freq = 0 start_time = time.time() for i, (total_src_spm, total_src_khaiii, total_src_konlpy, src_spm, src_khaiii, src_konlpy, date, order, id_) in enumerate(test_dataloader): # Source, Target sentence setting total_src_spm = total_src_spm.to(device) total_src_khaiii = total_src_khaiii.to(device) total_src_konlpy = total_src_konlpy.to(device) src_spm = src_spm.to(device) src_khaiii = src_khaiii.to(device) src_konlpy = src_konlpy.to(device) with torch.no_grad(): predicted_logit = model(total_src_spm, total_src_khaiii, total_src_konlpy, src_spm, src_khaiii, src_konlpy) predicted = predicted_logit.max(dim=1)[1].clone().tolist() if i == 0: id_list = id_ info_list = predicted else: id_list = id_list + id_ info_list = info_list + predicted if freq == args.test_print_freq or i == 0 or i == len(test_dataloader): spend_time = time.time() - start_time print('testing...[%d/%d] %2.2fmin spend' % (i, len(test_dataloader), spend_time / 60)) freq = 0 freq += 1 #===================================# #============Rule-base==============# #===================================# submission_id = pd.read_csv(os.path.join(args.data_path, 'sample_submission.csv'))['id'] submission_pre = pd.DataFrame({ 'id': id_list, 'info': info_list }) submission_dat = pd.merge(pd.DataFrame(submission_id), submission_pre, on='id', how='left') test_dat = pd.read_csv(os.path.join(args.data_path, 'news_test.csv')) nan_content = pd.merge(test_dat[['id', 'content']], submission_dat.loc[submission_dat['info'].isnull()], on='id', how='right') submission_dat = submission_dat.dropna() rule_base_list = ['무료', '증권방송', '바로가기'] for i, content in enumerate(nan_content['content']): if any([rule in content for rule in rule_base_list]): nan_content['info'].iloc[i] = 1 else: nan_content['info'].iloc[i] = 0 submission_dat = pd.concat([submission_dat, nan_content[['id', 'info']]]) submission_dat = pd.merge(pd.DataFrame(submission_id), submission_dat, on='id', how='left') # Sorting submission_dat['info'] = submission_dat['info'].apply(int) #===================================# #======Submission csv setting=======# #===================================# submission_dat.to_csv(os.path.join(args.results_path, 'submission.csv'), index=False, encoding='utf-8')