Exemple #1
0
def run(config, num_checkpoints, cuda=False):

    train_joint_transform_list, train_img_transform, train_label_transform = get_transforms(
        config, mode="train")
    val_joint_transform_list, val_img_transform, val_label_transform = None, None, None

    train_dataset = DataSet(mode="train",
                            joint_transform_list=train_joint_transform_list,
                            img_transform=train_img_transform,
                            label_transform=train_label_transform)
    val_dataset = DataSet(mode="val",
                          joint_transform_list=val_joint_transform_list,
                          img_transform=val_img_transform,
                          label_transform=val_label_transform)

    train_loader = data.DataLoader(train_dataset,
                                   batch_size=config.batch_size,
                                   shuffle=True,
                                   num_workers=config.num_workers,
                                   drop_last=True)
    val_loader = data.DataLoader(val_dataset,
                                 batch_size=config.batch_size,
                                 shuffle=False,
                                 num_workers=config.num_workers)

    criterion, val_criterion = get_loss(config, cuda=cuda)

    model = get_net(config, criterion, cuda=cuda)

    checkpoints = get_checkpoints(config, num_checkpoints)
    print("[*] Checkpoints as follow:")
    pprint.pprint(checkpoints)

    util_checkpoint.load_checkpoint(model, None, checkpoints[0])
    for i, checkpoint in enumerate(checkpoints[1:]):
        model2 = get_net(config, criterion, cuda=cuda)

        util_checkpoint.load_checkpoint(model2, None, checkpoint)
        swa.moving_average(model, model2, 1. / (i + 2))

    with torch.no_grad():
        swa.update_bn(train_loader, model, cuda=cuda)

    output_name = "model-swa.pth"
    print(f"[*] SAVED: to {output_name}")
    checkpoint_dir = os.path.join(ROOT_DIR, LOG_DIR,
                                  os.path.basename(config.model_dir))
    util_checkpoint.save_checkpoint(checkpoint_dir, output_name, model)

    # test the model
    scores = validation(config,
                        val_loader,
                        model,
                        val_criterion,
                        "swa",
                        cuda=cuda,
                        is_record=False)
    print(scores)
    with open(os.path.join(checkpoint_dir, "swa-scores.json"), "w") as f:
        json.dump(scores["FWIOU"], f)
Exemple #2
0
def main(mode, epoches, learning_rate, train_path, val_path, batch_size,
         model_path):

    device = "cuda:0" if torch.cuda.is_available() else "cpu"

    model = Network(7).to(device)

    if os.path.isfile(model_path):
        print("loading model")
        model.load_state_dict(torch.load(model_path))

    if mode == "train":
        train_loader = dataloader(train_path, batch_size)
        val_loader = dataloader(val_path, batch_size)

        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        loss_F = nn.NLLLoss()
        train(epoches, model, train_loader, val_loader, optimizer, loss_F,
              device)
        torch.save(model.state_dict(), model_path)
    else:
        val_loader = dataloader(val_path, batch_size)
        acc = validation(model, val_loader, device)
        print("\taccuracy: %.2f%%" % (acc))
Exemple #3
0
    # train
    train_loss, train_f1_macro, train_f1_micro, train_stat = train(
        epoch, model, args.output_dim, optimizer, scheduler, criterion,
        params['alpha'], train_loader, device, args.log_interval,
        append_line_to_log, checkPath)
    #train_loss, train_f1 = train(epoch, model, optimizer, scheduler, criterion, train_loader, device, args.log_interval, append_line_to_log, checkPath)
    history["train_loss"].append(train_loss)
    history["train_f1_macro"].append(train_f1_macro)
    history["train_f1_micro"].append(train_f1_micro)
    train_stat_filename = stat_train_path + "/train_" + str(epoch) + ".pkl"
    with open(train_stat_filename, "wb") as fout:
        pickle.dump(train_stat, fout)

    # validation
    valid_loss, valid_f1_macro, valid_f1_micro, valid_stat = validation(
        epoch, model, args.output_dim, criterion, params['alpha'],
        valid_loader, device, append_line_to_log)
    #valid_loss, valid_f1 = validation(epoch, model, criterion, valid_loader, device, append_line_to_log)
    history["valid_loss"].append(valid_loss)
    history["valid_f1_macro"].append(valid_f1_macro)
    history["valid_f1_micro"].append(valid_f1_micro)
    valid_stat_filename = stat_valid_path + "/valid_" + str(epoch) + ".pkl"
    with open(valid_stat_filename, "wb") as fout:
        pickle.dump(valid_stat, fout)

    scheduler.step(train_loss)

    # save the model of this epoch
    model_file = "/model_" + str(epoch) + ".pth"
    model_file = modelPath + model_file
    torch.save(model.state_dict(), model_file)
def pseudo_labeling(num_epochs, model, data_loader, val_loader,
                    unlabeled_loader, device, val_every, file_name):
    # Instead of using current epoch we use a "step" variable to calculate alpha_weight
    # This helps the model converge faster
    from torch.optim.swa_utils import AveragedModel, SWALR
    from segmentation_models_pytorch.losses import SoftCrossEntropyLoss, JaccardLoss
    from adamp import AdamP

    criterion = [
        SoftCrossEntropyLoss(smooth_factor=0.1),
        JaccardLoss('multiclass', classes=12)
    ]
    optimizer = AdamP(params=model.parameters(), lr=0.0001, weight_decay=1e-6)
    swa_scheduler = SWALR(optimizer, swa_lr=0.0001)
    swa_model = AveragedModel(model)
    optimizer = Lookahead(optimizer, la_alpha=0.5)

    step = 100
    size = 256
    best_mIoU = 0
    model.train()
    print('Start Pseudo-Labeling..')
    for epoch in range(num_epochs):
        hist = np.zeros((12, 12))
        for batch_idx, (imgs, image_infos) in enumerate(unlabeled_loader):

            # Forward Pass to get the pseudo labels
            # --------------------------------------------- test(unlabelse)를 모델에 통과
            model.eval()
            outs = model(torch.stack(imgs).to(device))
            oms = torch.argmax(outs.squeeze(), dim=1).detach().cpu().numpy()
            oms = torch.Tensor(oms)
            oms = oms.long()
            oms = oms.to(device)

            # --------------------------------------------- 학습

            model.train()
            # Now calculate the unlabeled loss using the pseudo label
            imgs = torch.stack(imgs)
            imgs = imgs.to(device)
            # preds_array = preds_array.to(device)

            output = model(imgs)
            loss = 0
            for each in criterion:
                loss += each(output, oms)

            unlabeled_loss = alpha_weight(step) * loss

            # Backpropogate
            optimizer.zero_grad()
            unlabeled_loss.backward()
            optimizer.step()
            output = torch.argmax(output.squeeze(),
                                  dim=1).detach().cpu().numpy()
            hist = add_hist(hist,
                            oms.detach().cpu().numpy(),
                            output,
                            n_class=12)

            if (batch_idx + 1) % 25 == 0:
                acc, acc_cls, mIoU, fwavacc = label_accuracy_score(hist)
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, mIoU:{:.4f}'.
                      format(epoch + 1, num_epochs, batch_idx + 1,
                             len(unlabeled_loader), unlabeled_loss.item(),
                             mIoU))
            # For every 50 batches train one epoch on labeled data
            # 50배치마다 라벨데이터를 1 epoch학습
            if batch_idx % 50 == 0:

                # Normal training procedure
                for batch_idx, (images, masks, _) in enumerate(data_loader):
                    labeled_loss = 0
                    images = torch.stack(images)
                    # (batch, channel, height, width)
                    masks = torch.stack(masks).long()

                    # gpu 연산을 위해 device 할당
                    images, masks = images.to(device), masks.to(device)

                    output = model(images)

                    for each in criterion:
                        labeled_loss += each(output, masks)

                    optimizer.zero_grad()
                    labeled_loss.backward()
                    optimizer.step()

                # Now we increment step by 1
                step += 1

        if (epoch + 1) % val_every == 0:
            avrg_loss, val_mIoU = validation(epoch + 1, model, val_loader,
                                             criterion, device)
            if val_mIoU > best_mIoU:
                print('Best performance at epoch: {}'.format(epoch + 1))
                print('Save model in', saved_dir)
                best_mIoU = val_mIoU
                save_model(model, file_name=file_name)

        model.train()

        if epoch > 3:
            swa_model.update_parameters(model)
            swa_scheduler.step()
def main():
    # argument parsing
    parser = argparse.ArgumentParser()

    parser.add_argument('--max-epochs', type=int, default=2)
    parser.add_argument('--batch-size', type=int, default=4)
    parser.add_argument('--max-sequence-length', type=int, default=128)

    parser.add_argument('--seed', type=int, default=None)
    parser.add_argument('--data-dir', type=str, default='data')
    parser.add_argument('--real-dataset', type=str, default='webtext')
    parser.add_argument('--fake-dataset', type=str, default='xl-1542M-nucleus')
    parser.add_argument('--save-dir', type=str, default='bert_logs')

    parser.add_argument('--learning-rate', type=float, default=2e-5)
    parser.add_argument('--weight-decay', type=float, default=0)
    parser.add_argument('--model-name', type=str, default='bert-base-cased')
    parser.add_argument('--wandb', type=bool, default=True)

    args = parser.parse_args()
    if args.wandb:
        wandb.init(project=args.model_name)

    device = "cuda" if torch.cuda.is_available() else "cpu"

    # config, tokenizer, model
    config = AutoConfig.from_pretrained(
        args.model_name,
        num_labels=2
    )

    tokenizer = AutoTokenizer.from_pretrained(args.model_name)
    tokenization_utils.logger.setLevel('DEBUG')

    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name,
        config=config
    )
    model.to(device)

    # load data
    train_loader, validation_loader, test_loader = load_datasets(args, tokenizer)

    # my model
    optimizer = AdamW(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)

    best_val = 0.
    for epoch in range(args.max_epochs):
        train(model, optimizer, train_loader, args, device)
        val_acc = validation(model, validation_loader, args, device)
        test_acc = test(model, test_loader, args, device)

        print(f"Epoch {epoch + 1} | val_acc: {val_acc} test_acc: {test_acc}")

        if val_acc > best_val:
            os.makedirs(args.save_dir, exist_ok=True)
            model_name = 'baseline_' + args.model_name + '.pt'
            model_to_save = model.module if hasattr(model, 'module') else model
            torch.save(dict(
                epoch=epoch+1,
                model_state_dict=model_to_save.state_dict(),
                optimizer_state_dict=optimizer.state_dict(),
                args=args
            ),
                os.path.join(args.save_dir, model_name)
            )
            print("Model saved to", args.save_dir)
            best_val = val_acc
Exemple #6
0
                                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                ]))
    

test_data_set = CIFAR10('./data', train=False, download=True,
                                    transform=transforms.Compose([
                                    transforms.ToTensor(),
                                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                ]))

triplet_train_data_loader, test_data_loader = createTripletDataLoaders(train_data_set, test_data_set)

model = TripletNet()

argumento = input('Type train for Train or val for Validation: ')


if(argumento == 'train'):
    train(model, triplet_train_data_loader, device, PATH)

elif(argumento == 'val'):
    config = {
        'batch_size': 128,
        'num_workers': 2
    }

    train_data_loader = DataLoader(train_data_set, **config)

    validation(train_data_loader, test_data_loader, device, PATH)

Exemple #7
0
# Loop through each epoch.
print('Epoch')
for epoch in tqdm(range(epochs)):
    print()
    print('Training on batches...')

    # Perform one full pass over the training set.
    train_labels, train_predict, train_loss = train(train_dataloader, model,
                                                    optimizer, scheduler,
                                                    device, scaler)

    train_acc = accuracy_score(train_labels, train_predict)

    # Get prediction form model on validation data.
    print('Validation on batches...')
    valid_labels, valid_predict, val_loss = validation(valid_dataloader, model,
                                                       device)
    val_acc = accuracy_score(valid_labels, valid_predict)

    # Print loss and accuracy values to see how training evolves.
    print(
        "  train_loss: %.5f - val_loss: %.5f - train_acc: %.5f - valid_acc: %.5f"
        % (train_loss, val_loss, train_acc, val_acc))
    print()

    # Store the loss value for plotting the learning curve.
    all_loss['train_loss'].append(train_loss)
    all_loss['val_loss'].append(val_loss)
    all_acc['train_acc'].append(train_acc)
    all_acc['val_acc'].append(val_acc)

save_path = "/home/jovyan/data-vol-1/gpt2/fine_tuned_models/test_gp2_full"  #test_gp2_full"
Exemple #8
0
# to set the best validation loss as inifinity
best_val_loss = np.inf

# training process
start_epoch = 1
for epoch in range(start_epoch, args.epochs + 1):
    # train
    train_loss, train_acc = train(epoch, model, optimizer, criterions,
                                  train_loader, device, args.log_interval,
                                  append_line_to_log, checkPath)
    history["train_loss"].append(train_loss)
    history["train_acc"].append(train_acc)

    # validation
    valid_loss, valid_acc = validation(epoch, model, criterions, valid_loader,
                                       device, append_line_to_log)
    history["valid_loss"].append(valid_loss)
    history["valid_acc"].append(valid_acc)

    #scheduler.step(valid_loss)

    # save the best model
    is_best = valid_loss < best_val_loss
    best_val_loss = min(valid_loss, best_val_loss)

    if is_best:
        best_model_file = "/best_model_" + str(epoch) + ".pth"
        best_model_file = bestPath + best_model_file
        torch.save(model.state_dict(), best_model_file)

    # save the model of this epoch