Esempio n. 1
0
import HyperParameters as hp
from transformer.Models import Transformer
from transformer.Translator import Translator
from torch.autograd import Variable
import torch
import resnet
import HyperParameters as hp
import torch
import torch.nn as nn
import os
import transformer.Constants as Constants
import numpy as np

if __name__ == "__main__":
	torch.cuda.set_device(hp.gpu)
	testLoader = dataset.getDataLoader(is_train=False, batch_size=5, shuffle=False)

	net1 = resnet.resnet34()
	net2 = Transformer(len_encoder=hp.enc_input_len, n_tgt_vocab=hp.num_classes, len_max_seq=hp.max_seq_len, n_layers=hp.n_layers)
	net2.word_prob_prj = nn.LogSoftmax(dim=1)
	net1.cuda().eval()
	#net2.cuda().eval()

	path_to_restore = os.path.join(hp.checkpoint_path, hp.model_path_pre+"_"+str(hp.model_path_idx) + ".pth")
	if os.path.exists(path_to_restore):
		print("restore from:", path_to_restore)
		checkpoint = torch.load(path_to_restore)
		net1.load_state_dict(checkpoint["state_dict_net1"])
		net2.load_state_dict(checkpoint["state_dict_net2"])
		print("restore successfully!")
	else:
Esempio n. 2
0
def main(args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load Data
    print('Data Loading...')
    start_time = time.time()

    data_list = sorted(glob(os.path.join(args.data_path, args.data_type)))


    dataset_dict = {
        'train': CustomDataset(data_list[0]),
        'valid': CustomDataset(data_list[1])
    }
    
    dataloader_dict = {
        'train':  getDataLoader(dataset_dict['train'], 
                                shuffle=True,
                                drop_last=True, 
                                pin_memory=True, 
                                batch_size=args.batch_size,
                                num_workers=args.num_workers),
        'valid':  getDataLoader(dataset_dict['valid'], 
                                shuffle=True, 
                                drop_last=True, 
                                pin_memory=True, 
                                batch_size=args.batch_size,
                                num_workers=args.num_workers)}


    # Model setting
    model = littleBert(pad_idx=args.pad_idx, bos_idx=args.bos_idx, eos_idx=args.eos_idx, 
                       max_len=args.max_len, d_model=args.d_model, d_embedding=args.d_embedding, 
                       n_head=args.n_head, dim_feedforward=args.dim_feedforward,
                       n_layers=args.n_layers, dropout=args.dropout, device=device)
    model = model.to(device)

    # Optimizer Setting
    criterion = nn.MSELoss()
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.learning_rate)
    lr_step_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_decay_step, gamma=args.lr_decay_gamma) # Decay LR by a factor of 0.1 every step_size

    # Preparing
    best_val_loss = None
    now = datetime.datetime.now()
    nowDatetime = now.strftime('%Y-%m-%d %H_%M_%S')
    if not os.path.exists('./save/'):
        os.mkdir('./save/')
    if not os.path.exists(f'./save/save_{nowDatetime}'):
        os.mkdir(f'./save/save_{nowDatetime}')
    hyper_parameter_setting = dict()
    hyper_parameter_setting['n_layers'] = args.n_layers
    hyper_parameter_setting['d_model'] = args.d_model
    hyper_parameter_setting['n_head'] = args.n_head
    hyper_parameter_setting['d_embedding'] = args.d_embedding
    hyper_parameter_setting['dim_feedforward'] = args.dim_feedforward
    with open(f'./save/save_{nowDatetime}/hyper_parameter_setting.txt', 'w') as f:
        for key in hyper_parameter_setting.keys():
            f.write(str(key) + ': ' + str(hyper_parameter_setting[key]))
            f.write('\n')

    spend_time = round((time.time() - start_time) / 60, 4)
    print(f'Setting done...! / {spend_time}min spend...!')

    for epoch in range(args.num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, args.num_epochs))
        start_time_e = time.time()
        for phase in ['train', 'valid']:
            running_loss = 0
            freq = args.print_freq - 1
            if phase == 'train':
                model.train()
            else:
                model.eval()
           
            print(dataloader_dict[phase])
           

            # Iterate over data
            for i, input_ in enumerate(tqdm(dataloader_dict[phase])):

               # Input to Device(CUDA) and split
                src = input_[0].to(device)
                src_hour = input_[2].to(device)
                src_weekday = input_[3].to(device)
                trg = input_[4].to(device)
                src_location = input_[5].to(device)

                # Optimizer Setting
                optimizer.zero_grad()

                # Model Training & Validation
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(src, src_hour, src_weekday, src_location)

                    # Backpropagate Loss
                    loss = criterion(outputs, trg.to(torch.float))
                    if phase == 'train':
                        loss.backward()
                        torch_utils.clip_grad_norm_(model.parameters(), args.grad_clip)
                        optimizer.step()

                        # Print every print_frequency
                        freq += 1
                        if freq == args.print_freq:
                            total_loss = loss.item()
                            print("[loss:%5.2f]" % (total_loss))
                            total_loss_list.append(total_loss)
                            freq = 0
                    if phase == 'valid':
                        val_loss += loss.item()

            # Save model and view total loss
            if phase == 'valid': 
                print('='*45)
                val_loss /= len(dataloader_dict['valid'])
                print("[Epoch:%d] val_loss:%5.3f | spend_time:%5.2fmin"
                        % (e, val_loss, (time.time() - start_time_e) / 60))
                if not best_val_loss or val_loss < best_val_loss:
                    print("[!] saving model...")
                    val_loss_save = round(val_loss, 2)
                    torch.save(model.state_dict(), f'./save/save_{nowDatetime}/model_{e}_{val_loss_save}.pt')
                    best_val_loss = val_loss

        # Gradient Scheduler Step
        scheduler.step()
Esempio n. 3
0
def main():
    num_workers = 8 if CUDA else 0
    nepochs = 20
    lr = 1e-4

    # Criterion & Optimizer
    criterion = nn.CrossEntropyLoss()
    num_layers = [2, 4, 6]
    embedding_dims = [128, 256, 512]
    drop_outs = [.2, .4]

    # Run training and testing loop
    batches = [64, 16]
    max_lengths = [64, 32]
    equal = [True, False]
    for eqs in equal:
        eq_name = 'equal' if eqs else 'unequal'
        for d in drop_outs:
            for embedding_dim in embedding_dims:
                for nLayer in num_layers:
                    for b in batches:
                        train_losses = []
                        train_accs = []
                        test_losses = []
                        test_accs = []
                        ms = []
                        for m in max_lengths:
                            max_sentence_length = m
                            print(
                                'Batchsize: %d, %s, Length: %d, nLayers:%d, EmbedDim:%d Dropout:%f'
                                % (b, eq_name, m, nLayer, embedding_dim, d))
                            dataset = PreprocessedData([
                                "./data/architecture_dz-cleaned-tagged.json",
                                "./data/design_dz-cleaned-tagged.json",
                                "./data/technology_dz-cleaned-tagged.json"
                            ], [
                                "./data/architecture_dz-cleaned.json",
                                "./data/design_dz-cleaned.json",
                                "./data/technology_dz-cleaned.json"
                            ], max_sentence_length, eqs)

                            # Hyperparameters
                            batch_size = b

                            vocab_size = dataset.vocab_size

                            num_hidden_nodes = embedding_dim
                            num_output_nodes = 2

                            bidirection = True
                            dropout = d

                            train_loader = getDataLoader(
                                batch_size, num_workers, dataset, CUDA, True)
                            dev_loader = getDataLoader(batch_size, num_workers,
                                                       dataset, CUDA, False)

                            # Instantiate
                            model = classifier(vocab_size,
                                               embedding_dim,
                                               num_hidden_nodes,
                                               num_output_nodes,
                                               nLayer,
                                               bidirectional=bidirection,
                                               dropout=dropout)
                            model.to(DEVICE)
                            optimizer = optim.Adam(model.parameters(), lr=lr)

                            data = run(model, optimizer, criterion,
                                       train_loader, dev_loader, nepochs)

                            train_l, train_a, test_l, test_a = data
                            train_losses.append(train_l)
                            train_accs.append(train_a)
                            test_losses.append(test_l)
                            test_accs.append(test_a)
                            ms.append(m)

                            del model
                            torch.cuda.empty_cache()

                        # Make plot data
                        name_starter = 'Batch:%d-%s-Length:%d-LR:%f-nLayers:%d-EmbedDim:%d-Drop%.2f' % (
                            b, eq_name, m, lr, nLayer, embedding_dim, d)
                        makePlotData(ms, train_losses, test_losses, train_accs,
                                     test_accs, eq_name, name_starter)
            torch.save(save_model, save_name)


if __name__ == "__main__":
    torch.cuda.set_device(hp.gpu)

    net1 = resnet.resnet34()
    net2 = Transformer(len_encoder=hp.enc_input_len,
                       n_tgt_vocab=hp.num_classes,
                       len_max_seq=hp.MAX_LEN,
                       n_layers=hp.n_layers)
    net1 = net1.cuda()
    net2 = net2.cuda()

    trainLoader = dataset.getDataLoader(is_train=True,
                                        batch_size=hp.BATCH_SIZE,
                                        shuffle=True)
    iter_one_epoch = len(trainLoader)
    print("iteration_every_epoch: ", iter_one_epoch)
    #testloader = dataset.getDataLoader(is_train=False, batch_size=BATCH_SIZE, shuffle=False)
    lossFunction = nn.CrossEntropyLoss(ignore_index=Constants.PAD)
    optimizer_ = optim.Adam(
        [{
            'params': net1.parameters()
        }, {
            'params': filter(lambda x: x.requires_grad, net2.parameters())
        }],
        betas=[0.9, 0.98],
        lr=hp.LEARNING_RATE)
    optimizer = optimizer_
    optimizer_scheduler = ExponentialLR(optimizer_, 0.98)
                        required=True,
                        help='sequences length')
    parser.add_argument('-cnn_type',
                        type=str,
                        required=True,
                        help='features extractor cnn type')
    parser.add_argument('-gpu', action="store_true", help='use gpu or not')
    args = parser.parse_args()

    print(args.model)
    print(args.gpu)

    model = getModel(model_type=args.model, use_gpu=args.gpu)

    train_loader = getDataLoader(args.seq_dir,
                                 args.seq_dir + '/train_metadata.txt',
                                 args.seq_length, args.cnn_type)
    print('get train loader done')
    val_loader = getDataLoader(args.seq_dir,
                               args.seq_dir + '/test_metadata.txt',
                               args.seq_length, args.cnn_type)
    print('get val loader done')

    checkpoints_path = os.path.join(conf.CHECKPOINTS_PATH, args.model,
                                    datetime.now().isoformat())
    if not os.path.exists(checkpoints_path):
        os.makedirs(checkpoints_path)
    checkpoints_path = os.path.join(checkpoints_path,
                                    '{model}-{epoch}-{type}.pth')

    loss_function = nn.CrossEntropyLoss()