Example #1
0
	def save_model(self, model, optimizer, save_variable_list):
		'''
	    Save the parameters of the model and the optimizer,
	    as well as some other variables such as step and learning_rate
	    '''
		with open(os.path.join(self.args['save_path'], 'config.json'), 'w') as fjson:
			json.dump(self.args, fjson)

		torch.save({
			**save_variable_list,
			'model_state_dict': model.state_dict(),
			'optimizer_state_dict': optimizer.state_dict()},
			os.path.join(self.args['save_path'], 'checkpoint')
		)

		entity_embedding = model.entity_embedding.detach().cpu().numpy()
		np.save(
			os.path.join(self.args['save_path'], 'entity_embedding'),
			entity_embedding
		)

		relation_embedding = model.relation_embedding.detach().cpu().numpy()
		np.save(
			os.path.join(self.args['save_path'], 'relation_embedding'),
			relation_embedding
		)
Example #2
0
    def _save_model(self, save_path: str, model: PreTrainedModel, tokenizer: PreTrainedTokenizer,
                    iteration: int, optimizer: optimizer = None, save_as_best: bool = False,
                    extra: dict = None, include_iteration: int = True, name: str = 'model'):
        extra_state = dict(iteration=iteration)

        if optimizer:
            extra_state['optimizer'] = optimizer.state_dict()

        if extra:
            extra_state.update(extra)

        if save_as_best:
            dir_path = os.path.join(save_path, '%s_best' % name)
        else:
            dir_name = '%s_%s' % (name, iteration) if include_iteration else name
            dir_path = os.path.join(save_path, dir_name)

        util.create_directories_dir(dir_path)

        # save model
        if isinstance(model, DataParallel):
            model.module.save_pretrained(dir_path)
        else:
            model.save_pretrained(dir_path)

        # save vocabulary
        tokenizer.save_pretrained(dir_path)

        # save extra
        state_path = os.path.join(dir_path, 'extra.state')
        torch.save(extra_state, state_path)
Example #3
0
def main():

    expdir = os.path.join("exp", "train_" + args.tag)
    model_dir = os.path.join(expdir, "models")
    args.model_dir = model_dir
    for x in ['exp', expdir, model_dir]:
        if not os.path.isdir(x):
            os.mkdir(x)

    cifar_train = datasets.CIFAR10("./dataset_cache",
                                   train=True,
                                   download=True,
                                   transform=transforms.Compose([
                                       transforms.RandomCrop(32, padding=4),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor()
                                   ]))
    cifar_val = datasets.CIFAR10("./dataset_cache",
                                 train=False,
                                 download=True,
                                 transform=transforms.ToTensor())

    train_loader = DataLoader(cifar_train,
                              shuffle=True,
                              batch_size=args.batch,
                              num_workers=args.workers)
    val_loader = DataLoader(cifar_val,
                            shuffle=False,
                            batch_size=args.batch,
                            num_workers=args.workers)

    # model = get_architecture(args.arch)
    model = torchvision.models.resnet18(pretrained=False,
                                        progress=True,
                                        **{
                                            "num_classes": 10
                                        }).to(device)

    criterion = CrossEntropyLoss()
    optimizer = SGD(model.parameters(),
                    lr=args.lr,
                    momentum=args.momentum,
                    weight_decay=args.weight_decay)
    scheduler = StepLR(optimizer,
                       step_size=args.lr_step_size,
                       gamma=args.gamma)

    for i in range(args.epochs):
        train(train_loader, model, criterion, optimizer, scheduler, i)
        acc = validate(val_loader, model, criterion)

        torch.save(
            {
                'epoch': i,
                'arch': args.arch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, os.path.join(args.model_dir, "ep{}_acc_{}.pth".format(i, acc)))
Example #4
0
def main():
    X_train, y_train = load_data(path)
    labels = np.unique(y_train).tolist()
    le = preprocessing.LabelEncoder()
    le.fit_transform(labels)
    y_train = torch.as_tensor(le.transform(y_train))
    X_train_, X_val, y_train_, y_val = train_test_split(X_train,
                                                        y_train,
                                                        test_size=0.33,
                                                        random_state=40,
                                                        shuffle=True)

    data = [X_train_, y_train_]
    for epoch in range(N_EPOCHS):
        train(epoch, data)
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': net.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, "../models/model_CNN/model.pt")
    return 0
Example #5
0
def main():
    
    expdir = os.path.join("exp", "train_" + args.tag)
    model_dir = os.path.join(expdir,"models")
    log_dir = os.path.join(expdir,"log")
    # args.model_dir = model_dir
    for x in ['exp', expdir, model_dir, log_dir]:
        if not os.path.isdir(x):
            os.mkdir(x)

    logfilename = os.path.join(log_dir, "log.txt")
    init_logfile(
        logfilename, 
        "arch={} epochs={} batch={} lr={} lr_step={} gamma={} noise_sd={} k_value={} eps_step={}".format(
        args.arch, args.epochs, args.batch, args.lr, args.lr_step_size, 
        args.gamma, args.noise_sd, args.k_value, args.eps_step))
    log(logfilename, "epoch\ttime\tlr\ttrain loss\ttrain acc\tval loss\tval acc")

    
    cifar_train = datasets.CIFAR10("./dataset_cache", train=True, download=True, transform=transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor()
        ]))
    cifar_val = datasets.CIFAR10("./dataset_cache", train=False, download=True, transform=transforms.ToTensor())


    train_loader = DataLoader(cifar_train, shuffle=True, batch_size=args.batch, num_workers=args.workers)
    val_loader = DataLoader(cifar_val, shuffle=False, batch_size=args.batch,num_workers=args.workers)
    
    # model = get_architecture(args.arch)
    if args.arch == "resnet18":
        model = torchvision.models.resnet18(pretrained=False, progress=True, **{"num_classes": get_num_classes()}).to(device)
    elif args.arch == "resnet34":
        model = torchvision.models.resnet34(pretrained=False, progress=True, **{"num_classes": get_num_classes()}).to(device)
    elif args.arch == "resnet50":
        model = torchvision.models.resnet50(pretrained=False, progress=True, **{"num_classes": get_num_classes()}).to(device)
    else:
        model = torchvision.models.resnet18(pretrained=False, progress=True, **{"num_classes": get_num_classes()}).to(device)
    
    criterion = CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    scheduler = StepLR(optimizer, step_size=args.lr_step_size, gamma=args.gamma, verbose=True)

    for i in range(args.epochs):
        before = time.time()
        train_loss, train_acc = train(train_loader, model, criterion, optimizer, scheduler, i)
        val_loss, val_acc = validate(val_loader, model, criterion)
        after = time.time()

        log(logfilename, "{}\t{:.3}\t{:.3}\t{:.3}\t{:.3}\t{:.3}\t{:.3}".format(
            i, float(after - before),
            float(scheduler.get_last_lr()[0]), train_loss, train_acc, val_loss, val_acc))

        torch.save(
            {
            'epoch': i,
            'arch': args.arch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            
            }, os.path.join(model_dir,"ep{}.pth".format(i)))
Example #6
0
        writer.add_scalar("val", val_loss, epoch)
        writer.add_scalars("train and val losses", {
            "train": train_loss,
            "val": val_loss
        }, epoch)

        if epoch % 25 == 0:
            print(epoch)
    writer.close()

    # torch.save(model.state_dict(), os.path.join(models_weights, exp + ".pt"))

    checkpoint = {
        "epoch": epochs,
        "model": model.state_dict(),
        "optimizer": optimizer.state_dict(),
    }
    # torch.save(checkpoint, os.path.join(optimizer_chp, exp + ".pt"))
    torch.save(checkpoint, os.path.join(optimizer_chp, "exp_24_MAE.pt"))
    # checkpoint = torch.load('checkpoint.pth')

    plt.figure()
    plt.plot(list(range(1, epochs + 1)), train_losses, label="train")
    plt.plot(list(range(1, epochs + 1)), val_losses, label="val")
    plt.legend()

    plt.savefig(os.path.join(loss_curves, exp + ".png"))

    print("")

    # wrap model and optimizer with NVIDIA's apex