Beispiel #1
0
def train(load_model: str, save_model: str, train_dataset: str,
          test_dataset: str, no_train: bool, no_test: bool, epochs: int,
          batch_size: int, learning_rate: float):
    device = torch.device('cuda:0' if cuda.is_available() else 'cpu')
    click.secho('Using device={}'.format(device), fg='blue')

    net = Net()
    net.to(device)

    if load_model is not None:
        click.secho('Loading model from \'{}\''.format(load_model),
                    fg='yellow')
        net.load_state_dict(torch.load(load_model, map_location=device))

    if not no_train:
        click.echo('Training model using {}'.format(train_dataset))
        net.train()
        train_net(net,
                  data_path=train_dataset,
                  batch_size=batch_size,
                  num_epochs=epochs,
                  learning_rate=learning_rate)

    if not no_train and save_model is not None:
        click.secho('Saving model as \'{}\''.format(save_model), fg='yellow')
        torch.save(net.state_dict(), save_model)

    if not no_test:
        click.echo('Testing model using {}'.format(test_dataset))
        net.eval()
        accuracy = test_net(net, data_path=test_dataset, batch_size=batch_size)
        color = 'green' if accuracy > 97. else 'red'
        click.secho('Accuracy={}'.format(accuracy), fg=color)
Beispiel #2
0
def train(train_data, val_data, fold_idx=None):
    train_data = MyDataset(train_data, train_transform)
    train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True)

    val_data = MyDataset(val_data, val_transform)
    val_loader = DataLoader(val_data, batch_size=config.batch_size, shuffle=False)

    model = Net(model_name).to(device)
    # criterion = nn.CrossEntropyLoss()
    criterion = FocalLoss(0.5)
    # optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    # scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
    optimizer = Ranger(model.parameters(), lr=1e-3, weight_decay=0.0005)
    # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=4)

    if fold_idx is None:
        print('start')
        model_save_path = os.path.join(config.model_path, '{}.bin'.format(model_name))
    else:
        print('start fold: {}'.format(fold_idx + 1))
        model_save_path = os.path.join(config.model_path, '{}_fold{}.bin'.format(model_name, fold_idx))
    # if os.path.isfile(model_save_path):
    #     print('加载之前的训练模型')
    #     model.load_state_dict(torch.load(model_save_path))

    best_val_score = 0
    best_val_score_cnt = 0
    last_improved_epoch = 0
    adjust_lr_num = 0
    for cur_epoch in range(config.epochs_num):
        start_time = int(time.time())
        model.train()
        print('epoch:{}, step:{}'.format(cur_epoch + 1, len(train_loader)))
        cur_step = 0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            optimizer.zero_grad()
            probs = model(batch_x)

            train_loss = criterion(probs, batch_y)
            train_loss.backward()
            optimizer.step()

            cur_step += 1
            if cur_step % config.train_print_step == 0:
                train_acc = accuracy(probs, batch_y)
                msg = 'the current step: {0}/{1}, train loss: {2:>5.2}, train acc: {3:>6.2%}'
                print(msg.format(cur_step, len(train_loader), train_loss.item(), train_acc[0].item()))
        val_loss, val_score = evaluate(model, val_loader, criterion)
        if val_score >= best_val_score:
            if val_score == best_val_score:
                best_val_score_cnt += 1
            best_val_score = val_score
            torch.save(model.state_dict(), model_save_path)
            improved_str = '*'
            last_improved_epoch = cur_epoch
        else:
            improved_str = ''
        msg = 'the current epoch: {0}/{1}, val loss: {2:>5.2}, val acc: {3:>6.2%}, cost: {4}s {5}'
        end_time = int(time.time())
        print(msg.format(cur_epoch + 1, config.epochs_num, val_loss, val_score,
                         end_time - start_time, improved_str))
        if cur_epoch - last_improved_epoch >= config.patience_epoch or best_val_score_cnt >= 3:
            if adjust_lr_num >= config.adjust_lr_num:
                print("No optimization for a long time, auto stopping...")
                break
            print("No optimization for a long time, adjust lr...")
            # scheduler.step()
            last_improved_epoch = cur_epoch  # 加上,不然会连续更新的
            adjust_lr_num += 1
            best_val_score_cnt = 0
        scheduler.step()
    del model
    gc.collect()

    if fold_idx is not None:
        model_score[fold_idx] = best_val_score
Beispiel #3
0
def train(train_data, val_data, fold_idx=None):
    train_data = MyDataset(train_data, train_transform)
    train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True)

    val_data = MyDataset(val_data, val_transform)
    val_loader = DataLoader(val_data, batch_size=config.batch_size, shuffle=False)

    model = Net(model_name).to(device)
    criterion = nn.CrossEntropyLoss()
    # criterion = FocalLoss(0.5)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    # optimizer = torch.optim.Adagrad(model.parameters(), lr=1e-3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    # config.model_save_path = os.path.join(config.model_path, '{}.bin'.format(model_name))

    best_val_acc = 0
    last_improved_epoch = 0
    if fold_idx is None:
        print('start')
        model_save_path = os.path.join(config.model_path, '{}.bin'.format(model_name))
    else:
        print('start fold: {}'.format(fold_idx + 1))
        model_save_path = os.path.join(config.model_path, '{}_fold{}.bin'.format(model_name, fold_idx))
    for cur_epoch in range(config.epochs_num):
        start_time = int(time.time())
        model.train()
        print('epoch: ', cur_epoch + 1)
        cur_step = 0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            optimizer.zero_grad()
            probs = model(batch_x)

            train_loss = criterion(probs, batch_y)
            train_loss.backward()
            optimizer.step()

            cur_step += 1
            if cur_step % config.train_print_step == 0:
                train_acc = accuracy(probs, batch_y)
                msg = 'the current step: {0}/{1}, train loss: {2:>5.2}, train acc: {3:>6.2%}'
                print(msg.format(cur_step, len(train_loader), train_loss.item(), train_acc[0].item()))
        val_loss, val_acc = evaluate(model, val_loader, criterion)
        if val_acc >= best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), model_save_path)
            improved_str = '*'
            last_improved_epoch = cur_epoch
        else:
            improved_str = ''
        # msg = 'the current epoch: {0}/{1}, train loss: {2:>5.2}, train acc: {3:>6.2%},  ' \
        #       'val loss: {4:>5.2}, val acc: {5:>6.2%}, {6}'
        msg = 'the current epoch: {0}/{1}, val loss: {2:>5.2}, val acc: {3:>6.2%}, cost: {4}s {5}'
        end_time = int(time.time())
        print(msg.format(cur_epoch + 1, config.epochs_num, val_loss, val_acc,
                         end_time - start_time, improved_str))
        scheduler.step()
        if cur_epoch - last_improved_epoch > config.patience_epoch:
            print("No optimization for a long time, auto-stopping...")
            break
    del model
    gc.collect()
Beispiel #4
0
def adjust_learning_rate(optimizer, lr, gamma, step):
    lr = lr * (gamma**(step))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


if __name__ == "__main__":
    # 1.获取命令行参数、创建网络、加载网络参数
    args = getArgs()
    model = Net('train')
    print('-- Loading weights into state dict...')
    pretrained_dict = torch.load(
        args.weight_path,
        map_location='cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items()
        if np.shape(model_dict[k]) == np.shape(v)
    }
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    print('-- Loading weights finished.')
    # 2.多GPU并行
    if torch.cuda.is_available():
        model = torch.nn.DataParallel(model)
        cudnn.benchmark = True
        model = model.cuda()
    # 3.创建计算loss的类
    criterion = MultiBoxLoss()