import HyperParameters as hp from transformer.Models import Transformer from transformer.Translator import Translator from torch.autograd import Variable import torch import resnet import HyperParameters as hp import torch import torch.nn as nn import os import transformer.Constants as Constants import numpy as np if __name__ == "__main__": torch.cuda.set_device(hp.gpu) testLoader = dataset.getDataLoader(is_train=False, batch_size=5, shuffle=False) net1 = resnet.resnet34() net2 = Transformer(len_encoder=hp.enc_input_len, n_tgt_vocab=hp.num_classes, len_max_seq=hp.max_seq_len, n_layers=hp.n_layers) net2.word_prob_prj = nn.LogSoftmax(dim=1) net1.cuda().eval() #net2.cuda().eval() path_to_restore = os.path.join(hp.checkpoint_path, hp.model_path_pre+"_"+str(hp.model_path_idx) + ".pth") if os.path.exists(path_to_restore): print("restore from:", path_to_restore) checkpoint = torch.load(path_to_restore) net1.load_state_dict(checkpoint["state_dict_net1"]) net2.load_state_dict(checkpoint["state_dict_net2"]) print("restore successfully!") else:
def main(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load Data print('Data Loading...') start_time = time.time() data_list = sorted(glob(os.path.join(args.data_path, args.data_type))) dataset_dict = { 'train': CustomDataset(data_list[0]), 'valid': CustomDataset(data_list[1]) } dataloader_dict = { 'train': getDataLoader(dataset_dict['train'], shuffle=True, drop_last=True, pin_memory=True, batch_size=args.batch_size, num_workers=args.num_workers), 'valid': getDataLoader(dataset_dict['valid'], shuffle=True, drop_last=True, pin_memory=True, batch_size=args.batch_size, num_workers=args.num_workers)} # Model setting model = littleBert(pad_idx=args.pad_idx, bos_idx=args.bos_idx, eos_idx=args.eos_idx, max_len=args.max_len, d_model=args.d_model, d_embedding=args.d_embedding, n_head=args.n_head, dim_feedforward=args.dim_feedforward, n_layers=args.n_layers, dropout=args.dropout, device=device) model = model.to(device) # Optimizer Setting criterion = nn.MSELoss() optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.learning_rate) lr_step_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_decay_step, gamma=args.lr_decay_gamma) # Decay LR by a factor of 0.1 every step_size # Preparing best_val_loss = None now = datetime.datetime.now() nowDatetime = now.strftime('%Y-%m-%d %H_%M_%S') if not os.path.exists('./save/'): os.mkdir('./save/') if not os.path.exists(f'./save/save_{nowDatetime}'): os.mkdir(f'./save/save_{nowDatetime}') hyper_parameter_setting = dict() hyper_parameter_setting['n_layers'] = args.n_layers hyper_parameter_setting['d_model'] = args.d_model hyper_parameter_setting['n_head'] = args.n_head hyper_parameter_setting['d_embedding'] = args.d_embedding hyper_parameter_setting['dim_feedforward'] = args.dim_feedforward with open(f'./save/save_{nowDatetime}/hyper_parameter_setting.txt', 'w') as f: for key in hyper_parameter_setting.keys(): f.write(str(key) + ': ' + str(hyper_parameter_setting[key])) f.write('\n') spend_time = round((time.time() - start_time) / 60, 4) print(f'Setting done...! / {spend_time}min spend...!') for epoch in range(args.num_epochs): print('Epoch {}/{}'.format(epoch + 1, args.num_epochs)) start_time_e = time.time() for phase in ['train', 'valid']: running_loss = 0 freq = args.print_freq - 1 if phase == 'train': model.train() else: model.eval() print(dataloader_dict[phase]) # Iterate over data for i, input_ in enumerate(tqdm(dataloader_dict[phase])): # Input to Device(CUDA) and split src = input_[0].to(device) src_hour = input_[2].to(device) src_weekday = input_[3].to(device) trg = input_[4].to(device) src_location = input_[5].to(device) # Optimizer Setting optimizer.zero_grad() # Model Training & Validation with torch.set_grad_enabled(phase == 'train'): outputs = model(src, src_hour, src_weekday, src_location) # Backpropagate Loss loss = criterion(outputs, trg.to(torch.float)) if phase == 'train': loss.backward() torch_utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() # Print every print_frequency freq += 1 if freq == args.print_freq: total_loss = loss.item() print("[loss:%5.2f]" % (total_loss)) total_loss_list.append(total_loss) freq = 0 if phase == 'valid': val_loss += loss.item() # Save model and view total loss if phase == 'valid': print('='*45) val_loss /= len(dataloader_dict['valid']) print("[Epoch:%d] val_loss:%5.3f | spend_time:%5.2fmin" % (e, val_loss, (time.time() - start_time_e) / 60)) if not best_val_loss or val_loss < best_val_loss: print("[!] saving model...") val_loss_save = round(val_loss, 2) torch.save(model.state_dict(), f'./save/save_{nowDatetime}/model_{e}_{val_loss_save}.pt') best_val_loss = val_loss # Gradient Scheduler Step scheduler.step()
def main(): num_workers = 8 if CUDA else 0 nepochs = 20 lr = 1e-4 # Criterion & Optimizer criterion = nn.CrossEntropyLoss() num_layers = [2, 4, 6] embedding_dims = [128, 256, 512] drop_outs = [.2, .4] # Run training and testing loop batches = [64, 16] max_lengths = [64, 32] equal = [True, False] for eqs in equal: eq_name = 'equal' if eqs else 'unequal' for d in drop_outs: for embedding_dim in embedding_dims: for nLayer in num_layers: for b in batches: train_losses = [] train_accs = [] test_losses = [] test_accs = [] ms = [] for m in max_lengths: max_sentence_length = m print( 'Batchsize: %d, %s, Length: %d, nLayers:%d, EmbedDim:%d Dropout:%f' % (b, eq_name, m, nLayer, embedding_dim, d)) dataset = PreprocessedData([ "./data/architecture_dz-cleaned-tagged.json", "./data/design_dz-cleaned-tagged.json", "./data/technology_dz-cleaned-tagged.json" ], [ "./data/architecture_dz-cleaned.json", "./data/design_dz-cleaned.json", "./data/technology_dz-cleaned.json" ], max_sentence_length, eqs) # Hyperparameters batch_size = b vocab_size = dataset.vocab_size num_hidden_nodes = embedding_dim num_output_nodes = 2 bidirection = True dropout = d train_loader = getDataLoader( batch_size, num_workers, dataset, CUDA, True) dev_loader = getDataLoader(batch_size, num_workers, dataset, CUDA, False) # Instantiate model = classifier(vocab_size, embedding_dim, num_hidden_nodes, num_output_nodes, nLayer, bidirectional=bidirection, dropout=dropout) model.to(DEVICE) optimizer = optim.Adam(model.parameters(), lr=lr) data = run(model, optimizer, criterion, train_loader, dev_loader, nepochs) train_l, train_a, test_l, test_a = data train_losses.append(train_l) train_accs.append(train_a) test_losses.append(test_l) test_accs.append(test_a) ms.append(m) del model torch.cuda.empty_cache() # Make plot data name_starter = 'Batch:%d-%s-Length:%d-LR:%f-nLayers:%d-EmbedDim:%d-Drop%.2f' % ( b, eq_name, m, lr, nLayer, embedding_dim, d) makePlotData(ms, train_losses, test_losses, train_accs, test_accs, eq_name, name_starter)
torch.save(save_model, save_name) if __name__ == "__main__": torch.cuda.set_device(hp.gpu) net1 = resnet.resnet34() net2 = Transformer(len_encoder=hp.enc_input_len, n_tgt_vocab=hp.num_classes, len_max_seq=hp.MAX_LEN, n_layers=hp.n_layers) net1 = net1.cuda() net2 = net2.cuda() trainLoader = dataset.getDataLoader(is_train=True, batch_size=hp.BATCH_SIZE, shuffle=True) iter_one_epoch = len(trainLoader) print("iteration_every_epoch: ", iter_one_epoch) #testloader = dataset.getDataLoader(is_train=False, batch_size=BATCH_SIZE, shuffle=False) lossFunction = nn.CrossEntropyLoss(ignore_index=Constants.PAD) optimizer_ = optim.Adam( [{ 'params': net1.parameters() }, { 'params': filter(lambda x: x.requires_grad, net2.parameters()) }], betas=[0.9, 0.98], lr=hp.LEARNING_RATE) optimizer = optimizer_ optimizer_scheduler = ExponentialLR(optimizer_, 0.98)
required=True, help='sequences length') parser.add_argument('-cnn_type', type=str, required=True, help='features extractor cnn type') parser.add_argument('-gpu', action="store_true", help='use gpu or not') args = parser.parse_args() print(args.model) print(args.gpu) model = getModel(model_type=args.model, use_gpu=args.gpu) train_loader = getDataLoader(args.seq_dir, args.seq_dir + '/train_metadata.txt', args.seq_length, args.cnn_type) print('get train loader done') val_loader = getDataLoader(args.seq_dir, args.seq_dir + '/test_metadata.txt', args.seq_length, args.cnn_type) print('get val loader done') checkpoints_path = os.path.join(conf.CHECKPOINTS_PATH, args.model, datetime.now().isoformat()) if not os.path.exists(checkpoints_path): os.makedirs(checkpoints_path) checkpoints_path = os.path.join(checkpoints_path, '{model}-{epoch}-{type}.pth') loss_function = nn.CrossEntropyLoss()