예제 #1
0
def run():
    df = pd.read_csv(config.TRAIN_PATH)
    kfold = KFold(n_splits=5, random_state=config.SEED, shuffle=True)
    fold_losses = []

    for i, (train_idx, val_idx) in enumerate(kfold.split(df)):
        print("-------------------------------------------------------")
        print(f"Training fold {i}")
        print("-------------------------------------------------------")
        train = df.iloc[train_idx]
        validation = df.iloc[val_idx]
        train_dataset = PicDataset(train)
        train_data_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=config.BATCH_SIZE
        )

        val_dataset = PicDataset(validation)
        val_data_loader = torch.utils.data.DataLoader(
            val_dataset,
            batch_size=config.BATCH_SIZE
        )

        device = 'cuda:0' if torch.cuda.is_available() else "cpu"
        model = DNN()
        model.to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=config.LR)
        loss = 0

        for _ in range(config.EPOCHS):
            engine.train_fn(train_data_loader, model, optimizer, device)
            loss = engine.eval_fn(val_data_loader, model, device)
        print(f"Loss on fold {i} is {loss}")
        fold_losses.append(loss)
        torch.save(model.state_dict(), f'./models/model_{i}.bin')

    print(f"Average loss on cross validation is {sum(fold_losses) / 5}")
예제 #2
0
def train(args, config, io):
    train_loader, validation_loader = get_loader(args, config)
    device = torch.device("cuda" if args.cuda else "cpu")
    # print(len(train_loader), len(validation_loader))

    #Try to load models
    model = DNN(args).to(device)
    """if device == torch.device("cuda"):
        model = nn.DataParallel(model)"""
    if args.model_path != "":
        model.load_state_dict(torch.load(args.model_path))

    # for para in list(model.parameters())[:-5]:
    #     para.requires_grad=False
    # print(model)

    if args.use_sgd:
        # print("Use SGD")
        opt = optim.SGD(model.parameters(),
                        lr=args.lr * 100,
                        momentum=args.momentum,
                        weight_decay=1e-4)
    else:
        # print("Use Adam")
        opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4)
        """opt = optim.Adam([
        {'params': list(model.parameters())[:-1], 'lr':args.lr/50, 'weight_decay': 1e-4},
        {'params': list(model.parameters())[-1], 'lr':args.lr, 'weight_decay': 1e-4}
        ])
        """

    scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr)

    criterion = nn.MSELoss()

    best_test_loss = 9999999.
    for epoch in range(args.epochs):
        startTime = time.time()

        ####################
        # Train
        ####################
        train_loss = 0.0
        train_dis = 0.0
        count = 0.0
        model.train()
        for data, label in train_loader:
            data, label = data.to(device), label.to(device)
            data = drop(jitter(data, device), device)
            # data = jitter(data, device, delta=0.05)
            batch_size = data.shape[0]
            logits = model(data)
            loss = criterion(logits, label)
            opt.zero_grad()
            loss.backward()
            opt.step()
            dis = distance(logits, label)
            count += batch_size
            train_loss += loss.item() * batch_size
            train_dis += dis.item() * batch_size
        scheduler.step()
        outstr = 'Train %d, loss: %.6f, distance: %.6f' % (
            epoch, train_loss * 1.0 / count, train_dis * 1.0 / count)
        io.cprint(outstr)

        ####################
        # Evaluation
        ####################
        test_loss = 0.0
        test_dis = 0.0
        count = 0.0
        model.eval()
        with torch.no_grad():
            for data, label in validation_loader:
                data, label = data.to(device), label.to(device)
                batch_size = data.shape[0]
                logits = model(data)
                loss = criterion(logits, label)
                dis = distance(logits, label)
                count += batch_size
                test_loss += loss.item() * batch_size
                test_dis += dis.item() * batch_size
        outstr = 'Test %d, loss: %.6f, distance: %.6f' % (
            epoch, test_loss * 1.0 / count, test_dis * 1.0 / count)
        io.cprint(outstr)
        if test_loss <= best_test_loss:
            best_test_loss = test_loss
            torch.save(model.state_dict(),
                       'checkpoints/%s/models/model.t7' % args.exp_name)
            torch.save(model, (config.root + config.model_path))
        io.cprint('Time: %.3f sec' % (time.time() - startTime))
예제 #3
0
            # print(correct, total)

            loss = criterion(y_pred, y.long())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (idx + 1) % 100 == 0:
                print(
                    'Epoch: [{}/{}], Step: [{}/{}], Loss: {:.4f}, Acc: {:.4f}'.
                    format(epoch + 1, args.epochs, idx + 1, len(train_loader),
                           loss.item(), 100 * correct / total))

    # 保存模型参数
    torch.save(
        model.state_dict(),
        os.path.join(
            './log', '{}_{}_{}.ckpt'.format(args.model, args.dataset,
                                            args.epochs)))
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for idx, (x, y) in enumerate(test_loader):
            x, y = x.to(device), y.to(device)
            y_pred = model(x)
            _, y_pred = torch.max(y_pred.data, 1)
            total += y.size(0)
            correct += (y_pred == y).sum().item()
            # print(result)
            if idx % 100 == 0:
def train(args, config, io):
    train_loader, validation_loader, unlabelled_loader = get_loader(
        args, config)

    device = torch.device("cuda" if args.cuda else "cpu")

    #Try to load models
    model = DNN(args).to(device)
    ema_model = DNN(args).to(device)
    for param in ema_model.parameters():
        param.detach_()
    if device == torch.device("cuda"):
        model = nn.DataParallel(model)
        ema_model = nn.DataParallel(ema_model)
    if args.model_path != "":
        model.load_state_dict(torch.load(args.model_path))
        ema_model.load_state_dict(torch.load(args.model_path))

    if args.use_sgd:
        print("Use SGD")
        opt = optim.SGD(model.parameters(),
                        lr=args.lr * 100,
                        momentum=args.momentum,
                        weight_decay=1e-4)
    else:
        print("Use Adam")
        opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4)

    scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr)

    criterion = nn.MSELoss()
    consistency_criterion = nn.MSELoss()

    best_test_loss = 9999999.
    global_step = 0
    for epoch in range(args.epochs):
        startTime = time.time()

        ####################
        # Train
        ####################
        train_loss = 0.0
        count = 0.0
        model.train()
        ema_model.train()
        i = -1
        for (data, label), (u, _) in zip(cycle(train_loader),
                                         unlabelled_loader):
            i = i + 1
            if data.shape[0] != u.shape[0]:
                bt_size = np.minimum(data.shape[0], u.shape[0])
                data = data[0:bt_size]
                label = label[0:bt_size]
                u = u[0:bt_size]
            data, label, u = data.to(device), label.to(device), u.to(device)
            batch_size = data.shape[0]
            logits = model(data)
            class_loss = criterion(logits, label)

            u_student = jitter(u, device)
            u_teacher = jitter(u, device)
            logits_unlabeled = model(u_student)
            ema_logits_unlabeled = ema_model(u_teacher)
            ema_logits_unlabeled = Variable(ema_logits_unlabeled.detach().data,
                                            requires_grad=False)
            consistency_loss = consistency_criterion(logits_unlabeled,
                                                     ema_logits_unlabeled)
            if epoch < args.consistency_rampup_starts:
                consistency_weight = 0.0
            else:
                consistency_weight = get_current_consistency_weight(
                    args, args.final_consistency, epoch, i,
                    len(unlabelled_loader))

            consistency_loss = consistency_weight * consistency_loss
            loss = class_loss + consistency_loss

            opt.zero_grad()
            loss.backward()
            opt.step()

            global_step += 1
            # print(global_step)
            update_ema_variables(model, ema_model, args.ema_decay, global_step)

            count += batch_size
            train_loss += loss.item() * batch_size
        scheduler.step()
        outstr = 'Train %d, loss: %.6f' % (epoch, train_loss * 1.0 / count)
        io.cprint(outstr)

        ####################
        # Evaluation
        ####################
        test_loss = 0.0
        count = 0.0
        model.eval()
        ema_model.eval()
        for data, label in validation_loader:
            data, label = data.to(device), label.to(device)
            batch_size = data.shape[0]
            logits = ema_model(data)
            loss = criterion(logits, label)
            count += batch_size
            test_loss += loss.item() * batch_size
        outstr = 'Test %d, loss: %.6f' % (epoch, test_loss * 1.0 / count)
        io.cprint(outstr)
        if test_loss <= best_test_loss:
            best_test_loss = test_loss
            torch.save(ema_model.state_dict(),
                       'checkpoints/%s/models/model.t7' % args.exp_name)
            torch.save(ema_model, (config.root + config.model_path))
        io.cprint('Time: %.3f sec' % (time.time() - startTime))
예제 #5
0
def main():
    print('> Starting execution...')

    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--fit',
                       action='store_true',
                       help='fit the tuned model on digits 0-4')
    group.add_argument('--transfer',
                       action='store_true',
                       help='train a pretrained model on digits 5-9')

    parser.add_argument('--batch-size',
                        type=int,
                        default=256,
                        metavar='N',
                        help='input batch size for training (default: 256)')
    parser.add_argument('--epochs',
                        type=int,
                        default=50,
                        metavar='E',
                        help='number of epochs to train (default: 50)')
    parser.add_argument('--lr',
                        type=float,
                        default=1e-3,
                        metavar='L',
                        help='learning rate (default: 1e-3)')
    parser.add_argument('--early-stopping',
                        type=int,
                        default=7,
                        metavar='E',
                        help='early stopping (default: 7 epochs)')
    parser.add_argument(
        '--size',
        type=int,
        default=100,
        metavar='S',
        help='size of the training data for transfer learning (default: 100)')

    parser.add_argument('--seed',
                        type=int,
                        default=23,
                        metavar='S',
                        help='random seed (default: 23)')

    args = parser.parse_args()

    use_cuda = torch.cuda.is_available()  # use cuda if available
    device = torch.device("cuda" if use_cuda else "cpu")
    torch.manual_seed(args.seed)  # random seed

    print('> Loading MNIST data')
    train_set = datasets.MNIST(MNIST_DATA_DIR,
                               train=True,
                               download=True,
                               transform=transforms.Compose([
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.1307, ), (0.3081, ))
                               ]))

    test_set = datasets.MNIST(MNIST_DATA_DIR,
                              train=False,
                              download=True,
                              transform=transforms.Compose([
                                  transforms.ToTensor(),
                                  transforms.Normalize((0.1307, ), (0.3081, ))
                              ]))

    train_digits_04 = np.where(train_set.train_labels < 5)[0]
    train_digits_59 = np.where(train_set.train_labels > 4)[0]

    test_digits_04 = np.where(test_set.test_labels < 5)[0]
    test_digits_59 = np.where(test_set.test_labels > 4)[0]

    if args.fit:
        # Training the tuned model on digits 0-4
        print('> Training a new model on MNIST digits 0-4')

        X_train_04, y_train_04, X_valid_04, y_valid_04 = data_to_numpy(
            train_set, test_set, INPUT_DIM, train_digits_04, test_digits_04)

        torch.manual_seed(args.seed)

        print('> Initializing the model')

        model = DNN(INPUT_DIM, OUTPUT_DIM, HIDDEN_DIM, batch_norm=True)
        model.apply(init_he_normal)  # He initialization

        model = model.to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=args.lr)

        print('> Training the model')
        model, _, _ = train_model(model,
                                  device,
                                  X_train_04,
                                  y_train_04,
                                  criterion,
                                  optimizer,
                                  X_valid=X_valid_04,
                                  y_valid=y_valid_04,
                                  batch_size=args.batch_size,
                                  n_epochs=args.epochs,
                                  early_stopping=args.early_stopping)

        print(f'> Saving the model state at {MODEL_04_PATH}')
        torch.save(model.state_dict(), MODEL_04_PATH)
    elif args.transfer:
        # Transfer learning
        print(
            '> Training a model on MNIST digits 5-9 from a pretrained model for digits 0-4'
        )

        if os.path.isfile(MODEL_04_PATH):
            print('> Loading the pretrained model')

            model = DNN(INPUT_DIM, OUTPUT_DIM, HIDDEN_DIM,
                        batch_norm=True).to(device)
            model.load_state_dict(torch.load(MODEL_04_PATH))

            for param in model.parameters():
                param.requires_grad = False

            # Parameters of newly constructed modules have requires_grad=True by default
            model.fc4 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM)
            model.fc5 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM)
            model.out = nn.Linear(HIDDEN_DIM, OUTPUT_DIM)

            print('> Using saved model state')
        else:
            print(
                '> Model state file is not found, fit a model before the transfer learning'
            )
            print('> Stopping execution')
            return

        X_train_59, y_train_59, X_valid_59, y_valid_59 = data_to_numpy(
            train_set, test_set, INPUT_DIM, train_digits_59[:args.size],
            test_digits_59)

        # fixing the issues with labels
        y_train_59 = y_train_59 - 5
        y_valid_59 = y_valid_59 - 5

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=args.lr)

        print('> Training the model')
        model, _, _ = train_model(model,
                                  device,
                                  X_train_59,
                                  y_train_59,
                                  criterion,
                                  optimizer,
                                  X_valid=X_valid_59,
                                  y_valid=y_valid_59,
                                  batch_size=args.batch_size,
                                  n_epochs=args.epochs,
                                  early_stopping=args.early_stopping)

        print(f'> Saving the model state at {MODEL_59_PATH}')
        torch.save(model.state_dict(), MODEL_59_PATH)
    else:
        print('> Incorrect mode, try either `--fit` or `--transfer`')
        print('> Stopping execution')
            noisy_batch_var = noisy_batch_var.to(device)
            outputs = model(noisy_batch_var)
            loss = MSE(outputs, clean_batch_var)
            # back-propagate and update
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            tbwriter.add_scalar('loss', loss.item(), total_steps)
            if (i + 1) % 100 == 0:
                print(
                    'Epoch {}\t'
                    'Step {}\t'
                    'loss {:.5f}'
                    .format(epoch + 1, i + 1, loss.item()))
              #  print(outputs)
              #  print(clean_batch_var)
            # record scalar data for tensorboard
        total_steps += 1
    # save various states
    state_path = os.path.join(checkpoint_path, 'state-{}.pkl'.format(epoch + 1))
    state = {
        'DNN': model.state_dict(),
        'optimizer': optimizer.state_dict(),
    }
    torch.save(state, state_path)


    tbwriter.close()
    print('Finished Training!')