def save_model(self, model, optimizer, save_variable_list): ''' Save the parameters of the model and the optimizer, as well as some other variables such as step and learning_rate ''' with open(os.path.join(self.args['save_path'], 'config.json'), 'w') as fjson: json.dump(self.args, fjson) torch.save({ **save_variable_list, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict()}, os.path.join(self.args['save_path'], 'checkpoint') ) entity_embedding = model.entity_embedding.detach().cpu().numpy() np.save( os.path.join(self.args['save_path'], 'entity_embedding'), entity_embedding ) relation_embedding = model.relation_embedding.detach().cpu().numpy() np.save( os.path.join(self.args['save_path'], 'relation_embedding'), relation_embedding )
def _save_model(self, save_path: str, model: PreTrainedModel, tokenizer: PreTrainedTokenizer, iteration: int, optimizer: optimizer = None, save_as_best: bool = False, extra: dict = None, include_iteration: int = True, name: str = 'model'): extra_state = dict(iteration=iteration) if optimizer: extra_state['optimizer'] = optimizer.state_dict() if extra: extra_state.update(extra) if save_as_best: dir_path = os.path.join(save_path, '%s_best' % name) else: dir_name = '%s_%s' % (name, iteration) if include_iteration else name dir_path = os.path.join(save_path, dir_name) util.create_directories_dir(dir_path) # save model if isinstance(model, DataParallel): model.module.save_pretrained(dir_path) else: model.save_pretrained(dir_path) # save vocabulary tokenizer.save_pretrained(dir_path) # save extra state_path = os.path.join(dir_path, 'extra.state') torch.save(extra_state, state_path)
def main(): expdir = os.path.join("exp", "train_" + args.tag) model_dir = os.path.join(expdir, "models") args.model_dir = model_dir for x in ['exp', expdir, model_dir]: if not os.path.isdir(x): os.mkdir(x) cifar_train = datasets.CIFAR10("./dataset_cache", train=True, download=True, transform=transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor() ])) cifar_val = datasets.CIFAR10("./dataset_cache", train=False, download=True, transform=transforms.ToTensor()) train_loader = DataLoader(cifar_train, shuffle=True, batch_size=args.batch, num_workers=args.workers) val_loader = DataLoader(cifar_val, shuffle=False, batch_size=args.batch, num_workers=args.workers) # model = get_architecture(args.arch) model = torchvision.models.resnet18(pretrained=False, progress=True, **{ "num_classes": 10 }).to(device) criterion = CrossEntropyLoss() optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, step_size=args.lr_step_size, gamma=args.gamma) for i in range(args.epochs): train(train_loader, model, criterion, optimizer, scheduler, i) acc = validate(val_loader, model, criterion) torch.save( { 'epoch': i, 'arch': args.arch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, os.path.join(args.model_dir, "ep{}_acc_{}.pth".format(i, acc)))
def main(): X_train, y_train = load_data(path) labels = np.unique(y_train).tolist() le = preprocessing.LabelEncoder() le.fit_transform(labels) y_train = torch.as_tensor(le.transform(y_train)) X_train_, X_val, y_train_, y_val = train_test_split(X_train, y_train, test_size=0.33, random_state=40, shuffle=True) data = [X_train_, y_train_] for epoch in range(N_EPOCHS): train(epoch, data) torch.save( { 'epoch': epoch, 'model_state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, "../models/model_CNN/model.pt") return 0
def main(): expdir = os.path.join("exp", "train_" + args.tag) model_dir = os.path.join(expdir,"models") log_dir = os.path.join(expdir,"log") # args.model_dir = model_dir for x in ['exp', expdir, model_dir, log_dir]: if not os.path.isdir(x): os.mkdir(x) logfilename = os.path.join(log_dir, "log.txt") init_logfile( logfilename, "arch={} epochs={} batch={} lr={} lr_step={} gamma={} noise_sd={} k_value={} eps_step={}".format( args.arch, args.epochs, args.batch, args.lr, args.lr_step_size, args.gamma, args.noise_sd, args.k_value, args.eps_step)) log(logfilename, "epoch\ttime\tlr\ttrain loss\ttrain acc\tval loss\tval acc") cifar_train = datasets.CIFAR10("./dataset_cache", train=True, download=True, transform=transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor() ])) cifar_val = datasets.CIFAR10("./dataset_cache", train=False, download=True, transform=transforms.ToTensor()) train_loader = DataLoader(cifar_train, shuffle=True, batch_size=args.batch, num_workers=args.workers) val_loader = DataLoader(cifar_val, shuffle=False, batch_size=args.batch,num_workers=args.workers) # model = get_architecture(args.arch) if args.arch == "resnet18": model = torchvision.models.resnet18(pretrained=False, progress=True, **{"num_classes": get_num_classes()}).to(device) elif args.arch == "resnet34": model = torchvision.models.resnet34(pretrained=False, progress=True, **{"num_classes": get_num_classes()}).to(device) elif args.arch == "resnet50": model = torchvision.models.resnet50(pretrained=False, progress=True, **{"num_classes": get_num_classes()}).to(device) else: model = torchvision.models.resnet18(pretrained=False, progress=True, **{"num_classes": get_num_classes()}).to(device) criterion = CrossEntropyLoss() optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, step_size=args.lr_step_size, gamma=args.gamma, verbose=True) for i in range(args.epochs): before = time.time() train_loss, train_acc = train(train_loader, model, criterion, optimizer, scheduler, i) val_loss, val_acc = validate(val_loader, model, criterion) after = time.time() log(logfilename, "{}\t{:.3}\t{:.3}\t{:.3}\t{:.3}\t{:.3}\t{:.3}".format( i, float(after - before), float(scheduler.get_last_lr()[0]), train_loss, train_acc, val_loss, val_acc)) torch.save( { 'epoch': i, 'arch': args.arch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, os.path.join(model_dir,"ep{}.pth".format(i)))
writer.add_scalar("val", val_loss, epoch) writer.add_scalars("train and val losses", { "train": train_loss, "val": val_loss }, epoch) if epoch % 25 == 0: print(epoch) writer.close() # torch.save(model.state_dict(), os.path.join(models_weights, exp + ".pt")) checkpoint = { "epoch": epochs, "model": model.state_dict(), "optimizer": optimizer.state_dict(), } # torch.save(checkpoint, os.path.join(optimizer_chp, exp + ".pt")) torch.save(checkpoint, os.path.join(optimizer_chp, "exp_24_MAE.pt")) # checkpoint = torch.load('checkpoint.pth') plt.figure() plt.plot(list(range(1, epochs + 1)), train_losses, label="train") plt.plot(list(range(1, epochs + 1)), val_losses, label="val") plt.legend() plt.savefig(os.path.join(loss_curves, exp + ".png")) print("") # wrap model and optimizer with NVIDIA's apex