Ejemplo n.º 1
0
def train(args):
    use_cuda = args.num_gpus > 0
    device = torch.device("cuda" if use_cuda > 0 else "cpu")

    torch.manual_seed(args.seed)
    if use_cuda:
        torch.cuda.manual_seed(args.seed)

    train_loader = DataLoader(
        MNIST(args.train, train=True), batch_size=args.batch_size, shuffle=True
    )
    test_loader = DataLoader(
        MNIST(args.test, train=False), batch_size=args.test_batch_size, shuffle=False
    )

    net = MNISTNet().to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(
        net.parameters(),
        betas=(args.beta_1, args.beta_2),
        weight_decay=args.weight_decay,
    )

    logger.info("Start training ...")
    for epoch in range(1, args.epochs + 1):
        net.train()
        for batch_idx, (imgs, labels) in enumerate(train_loader, 1):
            imgs, labels = imgs.to(device), labels.to(device)
            output = net(imgs)
            loss = loss_fn(output, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % args.log_interval == 0:
                print(
                    "Train Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}".format(
                        epoch,
                        batch_idx * len(imgs),
                        len(train_loader.sampler),
                        100.0 * batch_idx / len(train_loader),
                        loss.item(),
                    )
                )

        # test the model
        test(net, test_loader, device)

    # save model checkpoint
    save_model(net, args.model_dir)
    return
Ejemplo n.º 2
0
def main():
    train_data = MNISTData(dset='train')
    val_data = MNISTData(dset='val')

    train_loader = DataLoader(train_data, batch_size=8)
    val_loader = DataLoader(val_data, batch_size=8)

    loss_func = F.cross_entropy
    # loss_func

    Net = MNISTNet()

    optimizer = optim.Adam(Net.parameters(), lr=1e-3)

    Net.train()
    for epoch in range(1):
        for x, y in tqdm(train_loader):
            pred = Net(x)
            loss = loss_func(pred, y)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        print(loss.item())

    Net.eval()
    print("TRAIN ACCURACY")
    print(accuracy(Net, train_loader))

    print("VAL ACCURACY")
    print(accuracy(Net, val_loader))

    torch.save(Net.state_dict(), './model.pt')
Ejemplo n.º 3
0
def test_gflops(model, input_size):
    assert model in ['shallowcnn', 'resnet18', 'vgg11']
    if model == 'shallowcnn':
        model = MNISTNet(3, 10, img_size=input_size)
    if model == 'resnet18':
        model = models.resnet18(num_classes=10)
    if model == 'vgg11':
        model = models.vgg11_bn(num_classes=10)
    input = torch.randn(1, 3, input_size, input_size)
    macs, params = profile(model, inputs=(input, ))
    gflops = 2 * macs / 1e9
    print(gflops)
    return gflops
Ejemplo n.º 4
0
def main():
    use_cuda = not args.use_cpu and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    #Get dataset handle and train and test dataloader.
    #code goes here.
    
    #get the model.
    model = MNISTNet().to(device)
    #Create optimizer
    #optimizer = 
    #criterion
    criterion = nn.CrossEntropyLoss()
    
    ## Train and test the network.
    for epoch in range(args.epochs):
        train(args, model, device, train_loader, optimizer, criterion, epoch)
        test(model, device, test_loader, criterion)

    ## Save the network.
    if args.save_model:
Ejemplo n.º 5
0
def main():
    train_data = MNISTData(dset='train')
    val_data = MNISTData(dset='val')

    train_loader = DataLoader(train_data, batch_size=8)
    val_loader = DataLoader(val_data, batch_size=8)

    Net = MNISTNet()
    Net.load_state_dict(torch.load('./model.pt'))
    Net.eval()

    print("TRAIN ACCURACY")
    print(accuracy(Net, train_loader))

    print("VAL ACCURACY")
    print(accuracy(Net, val_loader))
                    action='store_true',
                    default=False,
                    help='enables CUDA training')
if __name__ == "__main__":
    #解析参数
    args = parser.parse_args()
    #判断是否使用GPU
    use_cuda = args.cuda and torch.cuda.is_available()
    #运行时设备
    device = torch.device("cuda" if use_cuda else "cpu")
    #使用固定缓冲区
    dataloader_kwargs = {'pin_memory': True} if use_cuda else {}
    #多进程训练,windows使用spawn方式
    mp.set_start_method('spawn')
    #模型拷贝到设备
    model = MNISTNet().to(device)
    #多进程共享模型参数
    model.share_memory()

    processes = []
    for rank in range(args.num_processes):
        p = mp.Process(target=train,
                       args=(rank, args, model, device, dataloader_kwargs))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()

    #测试模型
    test(args, model, device, dataloader_kwargs)
def model_fn(model_dir):
    """Load saved model from file
    """
    model = MNISTNet().to(device)
    model.eval()
    return model
Ejemplo n.º 8
0
from thop import profile
from model import MNISTNet
import numpy as np
from pypapi import events, papi_high as high

# https://github.com/Lyken17/pytorch-OpCounter
# https://github.com/sovrasov/flops-counter.pytorch/issues/16


def train_gflops(model, epochs=1, num_train_samples=1, input_size=28):
    gflops = epochs * num_train_samples * 2 * test_gflops(model, 1, input_size)
    return gflops


himodel = MNISTNet(3, 10, img_size=28).double()
high.start_counters([
    events.PAPI_DP_OPS,
])
himodel(torch.randn(1, 3, 28, 28).double())
print(high.stop_counters()[0] / 1e9)


def test_gflops(model, input_size):
    assert model in ['shallowcnn', 'resnet18', 'vgg11']
    if model == 'shallowcnn':
        model = MNISTNet(3, 10, img_size=input_size)
    if model == 'resnet18':
        model = models.resnet18(num_classes=10)
    if model == 'vgg11':
        model = models.vgg11_bn(num_classes=10)
Ejemplo n.º 9
0
# Adhoc code to make it works on AffMNIST
if args.dataset == 'AffMNIST':
    po_train_max = 7

np.random.seed(0)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True
device = 'cpu'

if __name__ == '__main__':
    if args.model == 'vgg11':
        model = models.vgg11_bn(num_classes=num_classes).double().to(device)
    elif args.model == 'shallowcnn':
        model = MNISTNet(3, num_classes, img_size).double().to(device)
    if args.model == 'resnet18':
        model = models.resnet18(num_classes=num_classes).double().to(device)
    torch.save(model.state_dict(), './model_init.pth')

    model.eval()
    with torch.no_grad():
        high.start_counters([
            events.PAPI_DP_OPS,
        ])
        x_test_batch = torch.rand(1,
                                  3,
                                  img_size,
                                  img_size,
                                  dtype=torch.float64)
        test_logit = model(x_test_batch)
Ejemplo n.º 10
0
def main():
    full_train_dataset = datasets.MNIST(root='dataset/', train=True, transform=config.transform, download=True)
    train_dataloader, val_dataloader = create_validation_dataset(full_train_dataset)

    test_dataset = datasets.MNIST(root='dataset/', train=False, transform=config.transform, download=True)
    test_dataloader = DataLoader(test_dataset, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS,
                                 pin_memory=config.PIN_MEMORY, shuffle=config.SHUFFLE)

    loss_function = nn.CrossEntropyLoss()

    model = MNISTNet()
    model=model.to(config.DEVICE)

    optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY)
    scaler = torch.cuda.amp.GradScaler()

    print("Training on:", config.DEVICE)
    for epoch in range(config.NUM_EPOCHS):
        print("Epoch:", epoch)
        train_losses = []
        loop = tqdm(train_dataloader)
        model.train()
        train_loss = 0
        for data, targets in loop:
            data = data.to(device=config.DEVICE)
            targets = targets.to(device=config.DEVICE)

            output = model(data)
            train_loss_batch = loss_function(output, targets)

            optimizer.zero_grad()
            train_loss_batch.backward()
            optimizer.step()
            train_loss += train_loss_batch
            train_losses.append(train_loss_batch.item())
        train_loss /= len(train_dataloader)
        print("Train loss:", train_loss.item())

        loop = tqdm(val_dataloader)
        val_losses = []
        model.eval()
        val_loss = 0
        acc = 0
        with torch.no_grad():
            for data, targets in loop:
                data = data.to(device=config.DEVICE)
                targets = targets.to(device=config.DEVICE)

                output = model(data)
                
                val_loss_batch = loss_function(output, targets)
                val_loss += val_loss_batch
                val_losses.append(val_loss_batch.item())
        acc /= len(val_dataloader)
        val_loss /= len(val_dataloader)
        print("Validation loss:", val_loss.item())
        print("Accuracy:", acc)
    plt.figure(0)
    plt.title("Train losses")
    plt.plot(train_losses)
    plt.show()
    plt.figure(1)
    plt.title("Validation losses")
    plt.plot(val_losses)
    plt.show()