def main(args): pl.seed_everything(args.seed) torch.multiprocessing.set_sharing_strategy('file_system') args.multigpu = torch.cuda.device_count() > 1 train_data = load_data(args.train, args.add_eos, args.cat_sent, args.max_len) valid_data = load_data(args.valid, args.add_eos, args.cat_sent, args.max_len) os.makedirs(args.root_dir, exist_ok=True) vocab_file = os.path.join(args.root_dir, 'vocab.txt') if not os.path.isfile(vocab_file): max_blank_len = args.max_len if args.model_type == 'lblm' else None Vocab.build(train_data, vocab_file, args.vocab_size, max_blank_len) vocab = Vocab(vocab_file) args.vocab_size = vocab.size train_dl = get_train_dataloader( train_data, vocab, args.max_tok, data_workers=args.data_workers if not args.multigpu else 0, model_type=args.model_type) val_dl = get_eval_dataloader( valid_data, vocab, args.eval_max_tok, data_workers=args.data_workers if not args.multigpu else 0, model_type=args.model_type) model = get_model_class(args.model_type)(args) trainer = pl.Trainer(accumulate_grad_batches=args.accum_grad, max_steps=args.max_steps, callbacks=[LearningRateMonitor()] if args.lr_schedule != 'fixed' else None, val_check_interval=args.val_check_interval if args.val_check_interval > 0 else 1.0, gpus=args.gpus, distributed_backend='ddp' if args.multigpu else None, amp_level=args.fp16_opt_level, precision=16 if args.fp16 else 32, default_root_dir=args.root_dir, resume_from_checkpoint=args.load_checkpoint) trainer.fit(model, train_dataloader=train_dl, val_dataloaders=val_dl)
model.apply(inplace_relu) #optimizer = torch.optim.Adam(model.parameters(),lr = args.lr, weight_decay = 1e-3) #optimizer = torch.optim.SGD(model.parameters(),lr = args.lr,momentum = 0.9,weight_decay=5e-4) optimizer = adabound.AdaBound(model.parameters(), lr=args.lr, final_lr=0.1) #Adabound: Adaboost+ SGD criterion = RegressionLoss() tensorboard_dir = os.path.join(args.runs, args.name) writer = SummaryWriter(tensorboard_dir) checkpoint_path = os.path.join(args.checkpoints_dir, args.name) if not os.path.exists(checkpoint_path): os.makedirs(checkpoint_path) checkpoints = os.path.join(checkpoint_path, '{net}-{epoch}-{type}.pth') best_criterion = -1 lr = args.lr train_loader, val_loader = get_train_dataloader(args) test_loader = get_test_dataloader(args) for epoch in range(1, args.epochs + 1): y_l = np.array([]) y_p = np.array([]) for param_group in optimizer.param_groups: current_lr = param_group['lr'] time_cost, y_l, y_p = train(epoch, y_l, y_p) print( "====================Epoch:{}==================== Learning Rate:{:.5f}" .format(epoch, current_lr)) SROCC, KROCC, PLCC, RMSE, Acc = evaluate(y_l, y_p) writer.add_scalar('Train/SROCC', SROCC, epoch) print(
if __name__ == '__main__': args = parse_args() print(args) use_cuda = not args.use_cpu and torch.cuda.is_available() device = 'cuda' if use_cuda else 'cpu' bs = args.train_batch_size dataset_version = args.dataset_version eval_batch = 2 if dataset_version == 'flip' else ( 1 if dataset_version == 'single' else 6) train_dataloader = get_train_dataloader(args.data_dir, args.train_batch_size, dataset_version, shuffle=True, use_transforms=args.augmentations) test_dataloader = get_test_dataloader(args.data_dir, args.test_batch_size, dataset_version, shuffle=True, use_transforms=args.augmentations) metrics_train_dataloader = get_train_dataloader(args.data_dir, eval_batch, dataset_version, shuffle=False, use_transforms=False) metrics_test_dataloader = get_test_dataloader(args.data_dir, eval_batch,
def main(): args = parse_args() # Set the GPU to use torch.cuda.set_device(args.gpu) transform = transforms.Compose([ transforms.Resize(256), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) vqa_loader = dataset.get_train_dataloader(osp.expanduser(args.annotations), osp.expanduser(args.questions), args.images, args, raw_images=args.raw_images, transforms=transform) # We always use the vocab from the training set vocab = vqa_loader.dataset.vocab maps = { "vocab": vocab, "word_to_wid": vqa_loader.dataset.word_to_wid, "wid_to_word": vqa_loader.dataset.wid_to_word, "ans_to_aid": vqa_loader.dataset.ans_to_aid, "aid_to_ans": vqa_loader.dataset.aid_to_ans, } val_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) val_loader = dataset.get_val_dataloader(osp.expanduser( args.val_annotations), osp.expanduser(args.val_questions), args.val_images, args, raw_images=args.raw_images, maps=maps, vocab=vocab, shuffle=False, transforms=val_transform) arch = Models[args.arch].value model = arch(len(vocab), output_dim=args.top_answer_limit, raw_images=args.raw_images) if args.resume: state = torch.load(args.resume) model.load_state_dict(state["model"]) model.cuda() criterion = nn.CrossEntropyLoss().cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=tuple(args.betas), weight_decay=args.weight_decay) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.decay_interval, gamma=args.lr_decay) if args.visualize: vis = visualize.Visualizer(args.port) else: vis = None print("Beginning training") print("#" * 80) for epoch in range(args.start_epoch, args.epochs): scheduler.step() trainer.train(model, vqa_loader, criterion, optimizer, epoch, args, vis=vis) trainer.evaluate(model, val_loader, criterion, epoch, args, vis=vis) print("Training complete!")
def main(): args = parse_args() # Set the GPU to use torch.cuda.set_device(args.gpu) annotations = osp.expanduser(args.annotations) questions = osp.expanduser(args.questions) vqa_loader = dataset.get_train_dataloader(annotations, questions, args.images, args) # We always use the vocab from the training set vocab = vqa_loader.dataset.vocab maps = { "word_to_wid": vqa_loader.dataset.word_to_wid, "wid_to_word": vqa_loader.dataset.wid_to_word, "ans_to_aid": vqa_loader.dataset.ans_to_aid, "aid_to_ans": vqa_loader.dataset.aid_to_ans, } val_loader = dataset.get_val_dataloader(osp.expanduser( args.val_annotations), osp.expanduser(args.val_questions), args.val_images, args, maps=maps, vocab=vocab, shuffle=False) arch = Models[args.arch].value model = arch(len(vocab), output_dim=args.top_answer_limit) if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=tuple(args.betas), weight_decay=args.weight_decay) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.decay_interval, gamma=args.lr_decay) vis = visualize.Visualizer(args.port) print("Beginning training") print("#" * 80) for epoch in range(args.start_epoch, args.epochs): scheduler.step() trainer.train(model, vqa_loader, criterion, optimizer, epoch, args, vis=vis) trainer.evaluate(model, val_loader, criterion, epoch, args, vis=vis) print("Training complete!")
def main(): args = parse_args() # Set the GPU to use torch.cuda.set_device(args.gpu) vqa_loader = dataset.get_train_dataloader(osp.expanduser(args.annotations), osp.expanduser(args.questions), args.images, args, raw_images=args.raw_images, transforms=None) # We always use the vocab from the training set vocab = vqa_loader.dataset.vocab maps = { "vocab": vocab, "word_to_wid": vqa_loader.dataset.word_to_wid, "wid_to_word": vqa_loader.dataset.wid_to_word, "ans_to_aid": vqa_loader.dataset.ans_to_aid, "aid_to_ans": vqa_loader.dataset.aid_to_ans, } val_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) val_loader = dataset.get_val_dataloader(osp.expanduser( args.val_annotations), osp.expanduser(args.val_questions), args.val_images, args, raw_images=args.raw_images, maps=maps, vocab=vocab, shuffle=False, transforms=val_transform) arch = Models[args.arch].value model = arch(len(vocab), output_dim=args.top_answer_limit, raw_images=args.raw_images) if args.resume: state = torch.load(args.resume) model.load_state_dict(state["model"]) else: print( "No trained model weights provided. Don't expect the answers to be meaningful." ) if torch.cuda.is_available(): model.cuda() with torch.no_grad(): results = evaluate(model, val_loader) for k in results.keys(): results[k] = np.asarray(results[k]) acc = results[k].sum() / results[k].shape print("Accuracy for {0} type answers: \t\t{1}".format(k, acc))
from metrics import compute_accuracy save_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'models') embeddings = [] if __name__ == '__main__': args = parse_args() print(args) use_cuda = not args.use_cpu and torch.cuda.is_available() device = 'cuda' if use_cuda else 'cpu' bs = args.train_batch_size best_acc = 0 train_dataloader = get_train_dataloader(args.data_dir, args.train_batch_size, embedding=args.embedding) test_dataloader = get_test_dataloader(args.data_dir, args.train_batch_size, embedding=args.embedding) # metrics_train_dataloader = None # et_train_dataloader(args.data_dir, eval_batch, dataset_version, shuffle=False, use_transforms=False) # metrics_test_dataloader = None # get_test_dataloader(args.data_dir, eval_batch, dataset_version, shuffle=False, use_transforms=False) model = dispatch_model(args, device) wandb.init(project=args.project_name, name=args.run_name, config=args) wandb.watch(model, log='all') config = wandb.config loss_function = CrossEntropyLoss(reduction='mean')
from torch.optim import SGD from models import SimpleModel from training import warmup, dispatch_lr_scheduler, get_lr, dispatch_optimizer from metrics import compute_accuracy, compute_confusion_matrix, compute_loss from dataset import get_train_dataloader, get_test_dataloader from utils import parse_args if __name__ == '__main__': args = parse_args() use_cuda = not args.use_cpu and torch.cuda.is_available() device = 'cuda' if use_cuda else 'cpu' bs = args.train_batch_size train_dataloader = get_train_dataloader( os.path.join(args.data_dir, 'train/'), args.train_batch_size, args.augmentation) test_dataloader = get_test_dataloader(os.path.join(args.data_dir, 'test/'), args.test_batch_size) model = SimpleModel(use_bn=args.use_bn).to(device) wandb.init(project="classifying-celebrities", config=args) wandb.watch(model, log='all') config = wandb.config loss_function = CrossEntropyLoss(reduction='mean') optimizer = dispatch_optimizer(model, args) lr_scheduler = dispatch_lr_scheduler(optimizer, args) iteration = 0