Ejemplo n.º 1
0
def main():
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    else:
        dataloader = datasets.CIFAR100
        num_classes = 100


    trainset = dataloader(root=args.dataroot, train=True, download=True, transform=transform_train)
    sampler = torch.utils.data.distributed.DistributedSampler(trainset,num_replicas=hvd.size(), rank=hvd.rank())
    trainloader = data.DataLoader(dataset=trainset, batch_size=args.train_batch * world_size, shuffle=False, sampler=sampler)

    testset = dataloader(root=args.dataroot, train=False, download=False, transform=transform_test)
    testloader = data.DataLoader(testset, batch_size=args.test_batch * world_size, shuffle=False, num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format("Alexnet"))
    model = AlexNet(num_classes=num_classes)

    device = torch.device('cuda', local_rank)
    model = model.to(device)
    # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[local_rank], output_device=local_rank)    
    print('Model on cuda:%d' % local_rank)
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    # 用horovod封装优化器
    optimizer = hvd.DistributedOptimizer(optimizer, named_parameters=model.named_parameters())
    # 广播参数
    hvd.broadcast_parameters(model.state_dict(), root_rank=0)

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)
        train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda)
        test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda)
        print('Rank:{} Epoch[{}/{}]: LR: {:.3f}, Train loss: {:.5f}, Test loss: {:.5f}, Train acc: {:.2f}, Test acc: {:.2f}.'.format(local_rank,epoch+1, args.epochs, state['lr'], 
        train_loss, test_loss, train_acc, test_acc))
Ejemplo n.º 2
0
def main():
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    else:
        dataloader = datasets.CIFAR100
        num_classes = 100


    trainset = dataloader(root=args.dataroot, train=True, download=True, transform=transform_train)
    trainloader = data.DataLoader(dataset=trainset, batch_size=args.train_batch, shuffle=False)

    testset = dataloader(root=args.dataroot, train=False, download=False, transform=transform_test)
    testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format("Alexnet"))
    model = AlexNet(num_classes=num_classes)
    model = model.cuda() 
    print('Model on cuda')
    cudnn.benchmark = True
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)


    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)
        train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda)
        test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda)
        print('Epoch[{}/{}]: LR: {:.3f}, Train loss: {:.5f}, Test loss: {:.5f}, Train acc: {:.2f}, Test acc: {:.2f}.'.format(epoch+1, args.epochs, state['lr'], 
        train_loss, test_loss, train_acc, test_acc))
def main():
    IMAGE_PATH = "/home/gonken2019/Desktop/subProject/dataset45"  #"/home/gonken2019/Desktop/subProject/images"#
    LABELS_PATH = "/home/gonken2019/Desktop/subProject/poseData45/"  #"/home/gonken2019/Desktop/subProject/labels/"#
    BATCH_SIZE = 256  #こことsubmodel.py 85行目と113行目の最初の引数を変える
    NUM_EPOCH = 20  #多くて20~30

    if torch.cuda.is_available():
        device = "cuda"
        print("[Info] Use CUDA")
    else:
        device = "cpu"
    model1 = AlexNet()
    model2 = PositionNet()
    dataloaders = Dataloaders(IMAGE_PATH, LABELS_PATH, BATCH_SIZE)

    # optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    optimizer1 = torch.optim.AdamW(model1.parameters(),
                                   lr=0.00001,
                                   weight_decay=5e-4)
    optimizer2 = torch.optim.AdamW(model2.parameters(),
                                   lr=0.0001,
                                   weight_decay=5e-4)
    #lossがnanになるのはよくあるので、こういうときはoptimizerを変えるか学習率変えるかするといい

    trainer1 = MyTrainer(model1, dataloaders, optimizer1, device,
                         "Classification")
    trainer2 = MyTrainer(model2, dataloaders, optimizer2, device, "Regression")

    trainer1.run(NUM_EPOCH)
    trainer2.run(NUM_EPOCH)
Ejemplo n.º 4
0
def main():
    IMAGE_PATH = "/home/gonken2019/Desktop/subProject/images"  #
    LABELS_PATH = "/home/gonken2019/Desktop/subProject/labels/"  #
    BATCH_SIZE = 512
    #BATCH_SIZE = 10
    #RuntimeError: size mismatch, m1: [10 x 12544], m2: [9216 x 4096] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:197
    #9216×4096=37748736
    #37748736÷12544=3009.306122449
    #4096=2**12

    #BATCH_SIZE = 8
    #RuntimeError: size mismatch, m1: [8 x 12544], m2: [9216 x 4096] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:197

    NUM_EPOCH = 50  #多くて20~30

    if torch.cuda.is_available():
        device = "cuda"
        print("[Info] Use CUDA")
    else:
        device = "cpu"
    model = AlexNet()
    dataloaders = Dataloaders(IMAGE_PATH, LABELS_PATH, BATCH_SIZE)

    # optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    optimizer = torch.optim.AdamW(model.parameters(),
                                  lr=0.001,
                                  weight_decay=5e-4)
    #lossがnanになるのはよくあるので、こういうときはoptimizerを変えるか学習率変えるかするといい

    trainer = MyTrainer(model, dataloaders, optimizer, device)

    trainer.run(NUM_EPOCH)  #
Ejemplo n.º 5
0
def main():
    print(f"Train numbers:{len(dataset)}")

    # first train run this line
    model = AlexNet().to(device)
    # Load model
    # if device == 'cuda':
    #     model = torch.load(MODEL_PATH + MODEL_NAME).to(device)
    # else:
    #     model = torch.load(MODEL_PATH + MODEL_NAME, map_location='cpu')
    # cast
    cast = torch.nn.CrossEntropyLoss().to(device)
    # Optimization
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=LEARNING_RATE,
                                 weight_decay=1e-8)
    step = 1
    for epoch in range(1, NUM_EPOCHS + 1):
        model.train()

        # cal one epoch time
        start = time.time()

        for images, labels in dataset_loader:
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = cast(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            print(f"Step [{step * BATCH_SIZE}/{NUM_EPOCHS * len(dataset)}], "
                  f"Loss: {loss.item():.8f}.")
            step += 1

        # cal train one epoch time
        end = time.time()
        print(f"Epoch [{epoch}/{NUM_EPOCHS}], " f"time: {end - start} sec!")

        # Save the model checkpoint
        torch.save(model, MODEL_PATH + '/' + MODEL_NAME)
    print(f"Model save to {MODEL_PATH + '/' + MODEL_NAME}.")
Ejemplo n.º 6
0
def _main(data_dir, batch_size, learning_rate, n_epoch):
    '''
    main function
    '''
    # Create dataloader
    dataloaders_dict = create_dataloader(data_dir, batch_size)

    # Detect if we have a GPU available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Create model
    model = AlexNet()
    model = model.to(device)

    # Observe that all parameters are being optimized
    optimizer_ft = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    model = train_model(model, dataloaders_dict, criterion, optimizer_ft,
                        device, n_epoch)

    torch.save(model, 'model.pt')
Ejemplo n.º 7
0
def main():
    # 设置运行设备
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    # 数据处理
    data_transform = {
        "train":
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ]),
        "val":
        transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
    }
    # 存放train与val的路径
    image_path = '/home/xulei/数据集大本营/5_flower_data/flower_data'  # flower data root path
    # 若该目录不存在,在报错并终止程序
    assert os.path.exists(image_path), "{} path does not exist.".format(
        image_path)
    # 定义训练数据集
    train_dataset = datasets.ImageFolder(root=os.path.join(
        image_path, "train"),
                                         transform=data_transform["train"])
    # 训练数据集的文件数量
    train_num = len(train_dataset)

    # flower_list: {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    # cla_dict : {0: 'daisy', 1: 'dandelion', 2: 'roses', 3: 'sunflowers', 4: 'tulips'}
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    # 要输出json格式,需要对json数据进行编码,要用到函数:json.dumps
    # indent=4, 的作用是让字典的内容逐行显示,每个key占一行
    # json_str :
    # '{
    #     "0": "daisy",
    #     "1": "dandelion",
    #     "2": "roses",
    #     "3": "sunflowers",
    #     "4": "tulips"
    # }'
    json_str = json.dumps(cla_dict, indent=4)

    with open('class_idices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 128
    nw = min(os.cpu_count(), batch_size if batch_size > 1 else 0,
             8)  # number of workers nw: 8 ?????
    print("using {} dataloader workers every process".format(nw))
    train_loader = datas.DataLoader(train_dataset,
                                    batch_size,
                                    shuffle=True,
                                    num_workers=nw)
    validate_dataset = datasets.ImageFolder(root=os.path.join(
        image_path, "val"),
                                            transform=data_transform["val"])
    # val_num: 364
    val_num = len(validate_dataset)
    validate_loader = datas.DataLoader(validate_dataset,
                                       batch_size,
                                       shuffle=False,
                                       num_workers=nw)
    print("using {} images for trainning, {} images for validation.".format(
        train_num, val_num))

    net = AlexNet(num_classes=5).to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.00004)

    epoches = 20
    save_path = './AlexNet.pth'
    best_acc = 0.0
    # train_steps : 26 len(train_loader)= training_images_num/batch_size
    train_steps = len(train_loader)
    for epoch in range(epoches):

        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader)  # 进度条
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            outputs = net(images.to(device))
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(
                epoch + 1, epoches, loss)

        # validata
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader)  # , colour='green'
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
        val_accurate = acc / val_num
        print('\n[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))
        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)
    print("Finshed Training")
Ejemplo n.º 8
0
def train():
    try:
        os.makedirs(opt.checkpoints_dir)
    except OSError:
        pass
    if torch.cuda.device_count() > 1:
        model = torch.nn.parallel.DataParallel(
            AlexNet(num_classes=opt.num_classes))
    else:
        model = AlexNet(num_classes=opt.num_classes)
    if os.path.exists(MODEL_PATH):
        model.load_state_dict(
            torch.load(MODEL_PATH, map_location=lambda storage, loc: storage))
    model.to(device)
    ################################################
    # Set loss function and Adam optimizer
    ################################################
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=opt.lr)

    for epoch in range(opt.epochs):
        # train for one epoch
        print(f"\nBegin Training Epoch {epoch + 1}")
        # Calculate and return the top-k accuracy of the model
        # so that we can track the learning process.
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()

        for i, data in enumerate(train_dataloader):
            # get the inputs; data is a list of [inputs, labels]
            inputs, targets = data
            inputs = inputs.to(device)
            targets = targets.to(device)

            # compute output
            output = model(inputs)
            loss = criterion(output, targets)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output, targets, topk=(1, 2))
            losses.update(loss.item(), inputs.size(0))
            top1.update(prec1, inputs.size(0))
            top5.update(prec5, inputs.size(0))

            # compute gradients in a backward pass
            optimizer.zero_grad()
            loss.backward()

            # Call step of optimizer to update model params
            optimizer.step()

            print(
                f"Epoch [{epoch + 1}] [{i + 1}/{len(train_dataloader)}]\t"
                f"Loss {loss.item():.4f}\t"
                f"Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t"
                f"Prec@5 {top5.val:.3f} ({top5.avg:.3f})",
                end="\r")

        # save model file
        torch.save(model.state_dict(), MODEL_PATH)
Ejemplo n.º 9
0
        (0.4914,0.4822, 0.4465),
        (0.2023, 0.1994, 0.2010))
    ])


cifar_train = CIFAR10(root=data_dir, download=False, train=True, transform=transform)
cifar_test = CIFAR10(root=data_dir, download=False, train=False, transform=transform)

# setup model
model = AlexNet().to(device)

# loss function
loss_fn = nn.CrossEntropyLoss()

# optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate,
    weight_decay=weight_decay)

# initialize our entropy based select query
select_fn = EntropySelectQuery(model, cifar_train)


# main training loop
unlabeled, labeled = [i for i in range(len(cifar_train))], []
for loop in range(num_loops):

    if loop == 0:
        # randomly select <init_samples> many samples
        selected = random.sample(unlabeled, init_samples) 
    else:
        # select based on entropy
Ejemplo n.º 10
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #指定设备
    print("using {} device.".format(device))

    data_transform = { #数据预处理
        "train": transforms.Compose([transforms.RandomResizedCrop(224),# key 为trian 返回这些方法 随机裁剪 224*224
                                     transforms.RandomHorizontalFlip(),#随机反转
                                     transforms.ToTensor(),#转成
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),#标准化处理
        "val": transforms.Compose([transforms.Resize((224, 224)),  # cannot 224, must (224, 224)
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "dog_data")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                         transform=data_transform["train"])#数据预处理
    train_num = len(train_dataset) #个数

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx #获取名称所对应索引
    cla_dict = dict((val, key) for key, val in flower_list.items()) #遍历 key value 对调
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:#生成json 便于打开
        json_file.write(json_str)

    batch_size = 32
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw) #加载

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=4, shuffle=False,
                                                  num_workers=nw)

    print("using {} images for training, {} images fot validation.".format(train_num,
                                                                           val_num))
    # test_data_iter = iter(validate_loader)
    # test_image, test_label = test_data_iter.next()
    #
    # def imshow(img):
    #     img = img / 2 + 0.5  # unnormalize
    #     npimg = img.numpy()
    #     plt.imshow(np.transpose(npimg, (1, 2, 0)))
    #     plt.show()
    #
    # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
    # imshow(utils.make_grid(test_image))

    net = AlexNet(num_classes=5, init_weights=True) #类别5

    net.to(device) #网络设备
    loss_function = nn.CrossEntropyLoss() #损失函数
    # pata = list(net.parameters())
    optimizer = optim.Adam(net.parameters(), lr=0.0002) #adam优化器 对象是网络中可训练参数 学习率 自己调参

    save_path = './AlexNet.pth' #保存模型路径
    best_acc = 0.0
    for epoch in range(10):#训练
        # train
        net.train() #管理神经元失活
        running_loss = 0.0 #统计平均损失
        t1 = time.perf_counter() #训练时间
        for step, data in enumerate(train_loader, start=0): #遍历数据集
            images, labels = data #分为图像 标签
            optimizer.zero_grad() #清空梯度信息
            outputs = net(images.to(device)) #正向传播 指定设备
            loss = loss_function(outputs, labels.to(device)) #损失
            loss.backward() #反向传播
            optimizer.step() #更新结点参数

            # print statistics
            running_loss += loss.item() #损失累加
            # print train process
            rate = (step + 1) / len(train_loader) #打印训练进度
            a = "*" * int(rate * 50)
            b = "." * int((1 - rate) * 50)
            print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="")
        print()
        print(time.perf_counter()-t1)

        # validate
        net.eval() #关闭失活
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            for val_data in validate_loader:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1] #最大就是类别
                acc += (predict_y == val_labels.to(device)).sum().item() #预测与真实对比 累加
            val_accurate = acc / val_num #准确率
            if val_accurate > best_acc: #如果准确率大于历史最优
                best_acc = val_accurate #更新
                torch.save(net.state_dict(), save_path) #保存权重
            print('[epoch %d] train_loss: %.3f  test_accuracy: %.3f' % #打印信息
                  (epoch + 1, running_loss / step, val_accurate))

    print('Finished Training')
Ejemplo n.º 11
0
def train(args):
    device = torch.device(f"cuda:{args.device_id}")
    model = AlexNet(n_cls=100, useLRN=args.useLRN, useDropOut=args.useDropOut)
    # model = AlexNet(num_classes= 100)
    criterion = nn.CrossEntropyLoss()

    model.to(device)
    optimizer = Adam(model.parameters(), lr=args.lr)

    train_loader, valid_loader = getLoaders(split="train",
                                            batch_size=args.batch_size,
                                            num_workers=args.num_workers,
                                            aug=args.useAug)

    train_loss_arr = []
    valid_loss_arr = []
    valid_acc_arr = []
    valid_top5_arr = []
    n_iter = 0
    best_loss = float('inf')
    best_top1_acc = 0
    best_top5_acc = 0
    for ep in range(args.epoch):
        model.train()
        for _, (img, label) in tqdm(enumerate(train_loader),
                                    total=len(train_loader)):
            img, label = img.to(device), label.to(device)
            optimizer.zero_grad()
            pred = model(img)
            loss = criterion(pred, label)
            # loss = model.criterion(pred, label)
            loss.backward()
            optimizer.step()
            train_loss_arr.append(loss.item())
            n_iter += 1
        model.eval()
        ep_valid_loss_arr = []
        ep_acc_arr = []
        ep_top5_arr = []
        with torch.no_grad():
            for _, (img, label) in tqdm(enumerate(valid_loader),
                                        total=len(valid_loader)):
                img, label = img.to(device), label.to(device)
                pred = model(img)
                loss = criterion(pred, label)
                # loss = model.criterion(pred, label)
                acc = utils.top_k_acc(k=1,
                                      pred=pred.detach().cpu().numpy(),
                                      label=label.detach().cpu().numpy())
                acc5 = utils.top_k_acc(k=5,
                                       pred=pred.detach().cpu().numpy(),
                                       label=label.detach().cpu().numpy())
                ep_acc_arr.append(acc)
                ep_top5_arr.append(acc5)
                ep_valid_loss_arr.append(loss.item())
        valid_loss = np.mean(ep_valid_loss_arr)
        valid_acc = np.mean(ep_acc_arr)
        valid_top5 = np.mean(ep_top5_arr)
        train_loss = np.mean(train_loss_arr[-len(train_loader):])
        valid_loss_arr.append(valid_loss)
        if valid_loss < best_loss:
            best_loss = valid_loss
            best_top1_acc = valid_acc
            best_top5_acc = valid_top5
            model.cpu()
            torch.save(model.state_dict(), "best_model.pth")
            model.to(device)
        if (ep + 1) % 10 == 0:
            model.cpu()
            torch.save(
                {
                    "model": model.state_dict(),
                    "optimizer": optimizer.state_dict(),
                    "train_loss": train_loss_arr,
                    "valid_loss": valid_loss_arr,
                    "valid_acc": valid_acc_arr,
                    "valid_top5": valid_top5_arr,
                    "best_loss": best_loss,
                    "ep": ep,
                    "n_iter": n_iter,
                }, "model_checkpoint.pth")
            model.to(device)
        print(
            f"[{ep}, {n_iter}] train: {train_loss:.4f}, valid: {valid_loss:.4f}, acc: {valid_acc:.4f}, top5: {valid_top5:.4f}"
        )
    with open("exp_result.txt", "a+") as f:
        f.write(
            f"{args}, loss: {best_loss:.4f}, top1: {best_top1_acc*100:.1f}, top5: {best_top5_acc*100:.1f}\n"
        )
Ejemplo n.º 12
0
def main():
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print("using {} device.".format(device))

    tbwriter = SummaryWriter(log_dir="./logs")

    data_transform = {
        "train":
        transforms.Compose([
            transforms.RandomResizedCrop(360),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ]),
        "val":
        transforms.Compose([
            transforms.Resize(360, 360),  # cannot 360, must (360,360)
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
    }

    data_root = os.path.abspath(os.path.join(os.getcwd(),
                                             "./DATA"))  # get data root path
    image_path = os.path.join(data_root, "male")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(
        image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(
        image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=2)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 8
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(
        image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=8,
                                                  shuffle=True,
                                                  num_workers=nw)

    print("using {} images for training, {} images fot validation.".format(
        train_num, val_num))

    if os.path.exists("./log360.pth"):
        net = AlexNet()
        #net.load_state_dict(torch.load("./log360.pth", map_location='cuda:2'))
        net = torch.load("./log360.pth", 'cpu')
        print("continue training")
    else:
        net = AlexNet(num_classes=3, init_weights=True)
        net.to(device)
        print("start training anew")

    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.0001)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.98)

    epochs = 2000
    save_path = './AlexNet.pth'
    best_acc = 0.0
    train_steps = len(train_loader)

    #json_path = './class_indices.json'
    #json_file = open(json_path, "r")
    #class_indict = json.load(json_file)
    #model = AlexNet(num_classed=6).to(device)

    trainLOSS = []  #save loss
    testLOSS = []  #save loss
    valACC = []  #save val acc

    for epoch in range(epochs):
        scheduler.step()
        print('LR:{}'.format(scheduler.get_lr()[0]))
        # train
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            outputs = net(images.to(device))
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(
                epoch + 1, epochs, loss)

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader, colour='green')
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num

        tbwriter.add_scalar('train/loss', running_loss / train_steps, epoch)
        tbwriter.add_scalar('val/acc', val_accurate, epoch)

        trainLOSS.append(running_loss / train_steps)
        valACC.append(val_accurate)

        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))
        print(' ')

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

        #predict
        #weights_path="./AlexNet.pth"
        #model.load_state_dict(torch.load(weights_path))

        #model.eval()
        #with torch.no_grad():
        #    putput = torch.squeeze(model(img.to(device))).cpu()
        #    predict = torch.softmax(output, dim=0)
        #    predict_cla = torch.argmax(predict.numpy)

    npLOSS = np.array(trainLOSS)
    npVALACC = np.array(valACC)
    np.save('./save/loss_epoch_{}'.format(epoch), npLOSS)
    np.save('./save/valacc_epoch_{}'.format(epoch), npVALACC)

    print('Finished Training')
Ejemplo n.º 13
0
Archivo: main.py Proyecto: yldang/MLPJ
        # create data loader
        dataloader_train = DataLoader(
            datasets.CIFAR10(root='./data', train=True, transform=transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomCrop(32, 4),
                transforms.ToTensor(),
                normalize,
            ]), download=True),
            batch_size=BATCH_SIZE, shuffle=True,
            num_workers=4, pin_memory=True)

        print("Training dataloader created")

        # create optimizer
        optimizer = optim.SGD(
            params=filter(lambda p: p.requires_grad, alexnet.parameters()),
            lr=LR_INIT,
            momentum=MOMENTUM,
            weight_decay=LR_DECAY
        )
        print("Optimizer created")

        # multiply LR by 1 / 10 after every 30 epochs
        lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
        print("LR Scheduler created")

        # training
        print("Starting training...")
        total_steps = 1
        for epoch in range(NUM_EPOCHS):
            for imgs, classes in dataloader_train:
Ejemplo n.º 14
0
def main():
    # viz = Visdom()
    # viz.line([0.], [0.], win='train_loss', opts=dict(title='train loss'))

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    data_transform = {
        "train":
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor()
        ]),
        "val":
        transforms.Compose([
            transforms.Resize((224, 224)),  # cannot 224, must (224, 224)
            transforms.ToTensor()
        ])
    }

    data_root = "/home/zhongsy/datasets/dataset/"  # get data root path
    train_dataset = datasets.ImageFolder(root=os.path.join(data_root, "train"),
                                         transform=data_transform["train"])

    # print(train_dataset.imgs)
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 1
    # number of workers
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=32,
                                               shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(
        data_root, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=4,
                                                  shuffle=False,
                                                  num_workers=nw)

    print("using {} images for training, {} images for validation.".format(
        train_num, val_num))
    # test_data_iter = iter(validate_loader)
    # test_image, test_label = test_data_iter.next()
    #
    # def imshow(img):
    #     img = img / 2 + 0.5  # unnormalize
    #     npimg = img.numpy()
    #     plt.imshow(np.transpose(npimg, (1, 2, 0)))
    #     plt.show()
    #
    # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
    # imshow(utils.make_grid(test_image))

    net = AlexNet(num_classes=2, init_weights=True)

    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    # pata = list(net.parameters())
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    epochs = 30
    save_path = './AlexNet.pt'
    best_acc = 0.0
    train_steps = len(train_loader)
    global_step = 0
    for epoch in range(epochs):
        # train
        epochloss = 100000
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader)
        for step, data in enumerate(train_bar):
            images, labels = data

            # print("label: ", labels, labels.dtype)
            optimizer.zero_grad()
            outputs = net(images.to(device))
            # print("imges: ", images, images.dtype)
            # outputs_ = outputs.squeeze()
            # print("output__ : ", outputs_)
            # outputs_ = outputs.to(torch.float)
            loss = loss_function(outputs, labels.to(device))
            # loss = loss.to(torch.float)
            if epochloss > loss:
                epochloss = loss
            loss.backward()
            optimizer.step()
            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(
                epoch + 1, epochs, loss)
        # viz.line([epochloss.cpu().detach().numpy()], [global_step],
        #  win='train_loss', update='append')
        global_step += 1

        print("[ start val ]")
        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader)
            for val_data in val_bar:
                val_images, val_labels = val_data
                val_labels.unsqueeze(1)
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                # print("prect ;", predict_y)
                # outputs = outputs.squeeze()
                # print("out_puts: ", outputs)
                # a = torch.gt(outputs, 0.5)
                # print("a ", a)
                # for i, (data, label_) in enumerate(zip(outputs, val_labels)):
                #     if abs(data-label_) <= 0.5:
                #         acc += 1
                # viz.images(val_images.view(-1, 3, 224, 224), win='x')
                # viz.text(str(predict_y.detach().cpu().numpy()),
                #  win='pred', opts=dict(title='pred'))
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

                val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net, save_path)

    print('Finished Training')
Ejemplo n.º 15
0
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=num_workers)

    train_datasize = len(train_dataset)
    valid_datasize = len(valid_dataset)
    test_datasize = len(test_dataset)

    num_classes = 2

    model = AlexNet(num_classes=2)
    model.apply(weights_init)
    '''
    model = models.alexnet(pretrained=True)
    num_ftrs = model.classifier[-1].in_features
    model.classifier[-1] = nn.Linear(num_ftrs, num_classes)
    '''
    if use_gpu:
        model = model.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.2)

    model = train_model(model=model,
                        criterion=criterion,
                        optimizer=optimizer,
                        scheduler=exp_lr_scheduler,
                        num_epochs=80,
                        use_gpu=use_gpu)
Ejemplo n.º 16
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)

    data_transform = {
        "train":
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ]),
        "val":
        transforms.Compose([
            transforms.Resize((224, 224)),  # cannot 224, must (224, 224)
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
    }

    data_root = os.path.abspath(os.path.join(os.getcwd(), "./"))
    image_path = os.path.join(data_root, "flower_data")
    train_dataset = datasets.ImageFolder(root=image_path + "/train",
                                         transform=data_transform['train'])
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 8
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=0)

    validate_dataset = datasets.ImageFolder(root=image_path + "/val",
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=4,
                                                  shuffle=True,
                                                  num_workers=0)

    # test_data_iter = iter(validate_loader)
    # test_image, test_label = test_data_iter.next()
    #
    # def imshow(img):
    #     img = img / 2 + 0.5  # unnormalize
    #     npimg = img.numpy()
    #     plt.imshow(np.transpose(npimg, (1, 2, 0)))
    #     plt.show()
    #
    # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
    # imshow(utils.make_grid(test_image))

    net = AlexNet(num_class=5)
    print(net)
    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    # pata = list(net.parameters())
    optimizer = optim.Adam(net.parameters(), lr=0.0002)

    save_path = './AlexNet.pth'
    best_acc = 0.0
    for epoch in range(10):
        # train
        net.train()
        running_loss = 0.0
        t1 = time.perf_counter()
        for step, data in enumerate(train_loader, start=0):
            images, labels = data
            optimizer.zero_grad()
            outputs = net(images.to(device))
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            # print train process
            rate = (step + 1) / len(train_loader)
            a = "*" * int(rate * 50)
            b = "." * int((1 - rate) * 50)
            print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(
                int(rate * 100), a, b, loss),
                  end="")
        print()
        print(time.perf_counter() - t1)

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch

        # 验证过程中不计算损失梯度
        with torch.no_grad():
            for val_data in validate_loader:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += (predict_y == val_labels.to(device)).sum().item()
            val_accurate = acc / val_num
            if val_accurate > best_acc:
                best_acc = val_accurate
                torch.save(net.state_dict(), save_path)
            print('[epoch %d] train_loss: %.3f  test_accuracy: %.3f' %
                  (epoch + 1, running_loss / step, val_accurate))

    print('Finished Training')
Ejemplo n.º 17
0
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder

from model import AlexNet

if os.path.exists("net.pkl"):
    pkl = torch.load("net.pkl")
    net = pkl.get("model")
    sepoch = pkl.get("epoch")
else:
    net = AlexNet().cuda()
    sepoch = 1

criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(params=net.parameters(), lr=1e-2, momentum=9e-1)

data_loader = DataLoader(dataset=ImageFolder(
    "data/train",
    transform=transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])),
                         batch_size=50,
                         shuffle=True)


def adjust_learning_rate(epoch):
    lr = 1e-2 * 1e-1**(epoch // 20)
Ejemplo n.º 18
0
import os
import sys
pardir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(pardir)
from dataset import *
from result_save_visualization import *
from model import AlexNet
import torch
import torch.optim as optim
import time

net = AlexNet(num_classes=10).to(device)
optimizer = optim.SGD(net.parameters(),
                      lr=0.01,
                      momentum=0.9,
                      weight_decay=5e-4)
num_epochs = 100

data_dict = {
    'epoch': [],
    'time': [],
    'train_loss': [],
    'train_acc': [],
    'val_loss': [],
    'val_acc': []
}

start = time.time()
for epoch in range(num_epochs):
    # train
    net.train()
Ejemplo n.º 19
0
def train(args):
    # the number of N way, K shot images
    k = args.nway * args.kshot
    """ TODO 1.a """
    " Make your own model for Few-shot Classification in 'model.py' file."

    # model setting
    model = AlexNet()

    model.cuda()
    """ TODO 1.a END """

    # pretrained model load
    if args.restore_ckpt is not None:
        state_dict = torch.load(args.restore_ckpt)
        model.load_state_dict(state_dict)

    model = torch.nn.DataParallel(model,
                                  device_ids=range(torch.cuda.device_count()))

    if args.test_mode == 1:
        Test_phase(model, args, k)
    else:
        # Train data loading
        dataset = Dataset(args.dpath, state='train')
        train_sampler = Train_Sampler(dataset._labels,
                                      n_way=args.nway,
                                      k_shot=args.kshot,
                                      query=args.query)
        data_loader = DataLoader(dataset=dataset,
                                 batch_sampler=train_sampler,
                                 num_workers=8,
                                 pin_memory=True)

        # Validation data loading
        val_dataset = Dataset(args.dpath, state='val')
        val_sampler = Sampler(val_dataset._labels,
                              n_way=args.nway,
                              k_shot=args.kshot,
                              query=args.query)
        val_data_loader = DataLoader(dataset=val_dataset,
                                     batch_sampler=val_sampler,
                                     num_workers=8,
                                     pin_memory=True)
    """ TODO 1.b (optional) """
    " Set an optimizer or scheduler for Few-shot classification (optional) "

    # Default optimizer setting
    #optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=0.001,
                                momentum=0.9,
                                weight_decay=5e-4)
    """ TODO 1.b (optional) END """

    tl = Averager()  # save average loss
    ta = Averager()  # save average accuracy

    # training start
    print('train start')

    train_correct = 0
    train_total = 0
    train_loss = 0
    test_correct = 0
    test_total = 0
    test_loss = 0

    model.train()
    for i in range(args.se + 1, TOTAL):
        for episode in data_loader:
            optimizer.zero_grad()

            data, label = [_ for _ in episode]  # load an episode

            # split an episode images and labels into shots and query set
            # note! data_shot shape is ( nway * kshot, 3, h, w ) not ( kshot * nway, 3, h, w )
            # Take care when reshape the data shot
            data_shot, data_query = data[:k], data[k:]

            label_shot, label_query = label[:k], label[k:]
            label_shot = sorted(list(set(label_shot.tolist())))

            # convert labels into 0-4 values
            label_query = label_query.tolist()
            labels = []
            for j in range(len(label_query)):
                label = label_shot.index(label_query[j])
                labels.append(label)
            labels = torch.tensor(labels).cuda()
            """ TODO 2 ( Same as above TODO 2 ) """
            """ Train the model 
            Input:
                data_shot : torch.tensor, shot images, [args.nway * args.kshot, 3, h, w]
                            be careful when using torch.reshape or .view functions
                data_query : torch.tensor, query images, [args.query, 3, h, w]
                labels : torch.tensor, labels of query images, [args.query]
            output:
                loss : torch scalar tensor which used for updating your model
                logits : A value to measure accuracy and loss
            """

            features_shot = model(data_shot.cuda())
            n_sample = int(args.query / args.nway)
            features_shot_mean = torch.zeros(args.nway,
                                             features_shot.size(1)).cuda()
            for j in range(int(args.nway)):
                start = j * args.kshot
                end = (j + 1) * args.kshot
                features_shot_mean[j] = features_shot[start:end].mean(dim=0)

            features_query = model(data_query.cuda())
            logits = square_euclidean_metric(features_query,
                                             features_shot_mean)

            labels_expanded = labels.view(args.query, 1, 1)
            labels_expanded = labels_expanded.expand(args.query, args.nway, 1)
            lsoft = F.log_softmax(-logits, dim=1).view(args.kshot, n_sample,
                                                       -1)
            labels_expanded = labels_expanded.view(lsoft.size())
            loss = -lsoft.gather(2, labels_expanded).squeeze().view(-1).mean()
            _, pred = lsoft.max(2)
            """ TODO 2 END """

            acc = count_acc(logits, labels)

            tl.add(loss.item())
            ta.add(acc)

            loss.backward()
            optimizer.step()

            proto = None
            logits = None
            loss = None

        if (i + 1) % PRINT_FREQ == 0:
            print('train {}, loss={:.4f} acc={:.4f}'.format(
                i + 1, tl.item(), ta.item()))

            # initialize loss and accuracy mean
            tl = None
            ta = None
            tl = Averager()
            ta = Averager()

        # validation start
        if (i + 1) % VAL_FREQ == 0:
            print('validation start')
            model.eval()
            with torch.no_grad():
                vl = Averager()  # save average loss
                va = Averager()  # save average accuracy
                for j in range(VAL_TOTAL):
                    for episode in val_data_loader:
                        data, label = [_.cuda() for _ in episode]

                        data_shot, data_query = data[:k], data[
                            k:]  # load an episode

                        label_shot, label_query = label[:k], label[k:]
                        label_shot = sorted(list(set(label_shot.tolist())))

                        label_query = label_query.tolist()

                        labels = []

                        for j in range(len(label_query)):
                            label = label_shot.index(label_query[j])
                            labels.append(label)
                        labels = torch.tensor(labels).cuda()
                        """ TODO 2 ( Same as above TODO 2 ) """
                        """ Train the model 
                        Input:
                            data_shot : torch.tensor, shot images, [args.nway * args.kshot, 3, h, w]
                                        be careful when using torch.reshape or .view functions
                            data_query : torch.tensor, query images, [args.query, 3, h, w]
                            labels : torch.tensor, labels of query images, [args.query]
                        output:
                            loss : torch scalar tensor which used for updating your model
                            logits : A value to measure accuracy and loss
                        """

                        optimizer.zero_grad()

                        data, label = [_.cuda()
                                       for _ in episode]  # load an episode

                        # split an episode images and labels into shots and query set
                        # note! data_shot shape is ( nway * kshot, 3, h, w ) not ( kshot * nway, 3, h, w )
                        # Take care when reshape the data shot
                        data_shot, data_query = data[:k], data[k:]

                        label_shot, label_query = label[:k], label[k:]
                        label_shot = sorted(list(set(label_shot.tolist())))

                        # convert labels into 0-4 values
                        label_query = label_query.tolist()

                        labels = []
                        for j in range(len(label_query)):
                            label = label_shot.index(label_query[j])
                            labels.append(label)
                        labels = torch.tensor(labels).cuda()
                        """ TODO 2 ( Same as above TODO 2 ) """
                        """ Make a loss function and train your own model
                        Input:
                            data_shot : torch.tensor, shot images, [args.nway * args.kshot, 3, h, w]
                                        be careful when using torch.reshape or .view functions
                            (25, 3, 400, 400)
                            data_query : torch.tensor, query images, [args.query, 3, h, w]
                            (20, 3, 400, 400)
                            labels : torch.tensor, labels of query images, [args.query]
                            (20)
                        output:
                            loss : torch scalar tensor which used for updating your model
                            logits : A value to measure accuracy and loss
                        """

                        features_shot = model(data_shot.cuda())
                        n_sample = int(args.query / args.nway)
                        features_shot_mean = torch.zeros(
                            args.nway, features_shot.size(1)).cuda()
                        for j in range(int(args.nway)):
                            start = j * args.kshot
                            end = (j + 1) * args.kshot
                            features_shot_mean[j] = features_shot[
                                start:end].mean(dim=0)

                        features_query = model(data_query.cuda())
                        logits = square_euclidean_metric(
                            features_query, features_shot_mean)

                        labels_expanded = labels.view(args.query, 1, 1)
                        labels_expanded = labels_expanded.expand(
                            args.query, args.nway, 1)
                        lsoft = F.log_softmax(-logits, dim=1).view(
                            args.kshot, n_sample, -1)
                        labels_expanded = labels_expanded.view(lsoft.size())
                        loss = -lsoft.gather(
                            2, labels_expanded).squeeze().view(-1).mean()
                        _, pred = lsoft.max(2)
                        """ TODO 2 END """

                        acc = count_acc(logits, labels)

                        vl.add(loss.item())
                        va.add(acc)

                        proto = None
                        logits = None
                        loss = None

                print('val accuracy mean : %.4f' % va.item())
                print('val loss mean : %.4f' % vl.item())

                # initialize loss and accuracy mean
                vl = None
                va = None
                vl = Averager()
                va = Averager()

        if (i + 1) % SAVE_FREQ == 0:
            PATH = 'checkpoints/%d_%s.pth' % (i + 1, args.name)
            torch.save(model.module.state_dict(), PATH)
            print('model saved, iteration : %d' % i)
#     img = img / 2 + 0.5  # unnormalize
#     npimg = img.numpy()
#     plt.imshow(np.transpose(npimg, (1, 2, 0)))
#     plt.show()
#
# print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
# imshow(utils.make_grid(test_image))


net = AlexNet(num_classes=5, init_weights=True)

net.to(device)
loss_function = nn.CrossEntropyLoss()
# pata = list(net.parameters())
# lr: learning_rate
optimizer = optim.Adam(net.parameters(), lr=0.0002)

save_path = r'D:\Document\GitHub\deep-learning-for-image-processing\pytorch_classification\Test2_alexnet\AlexNet.pth'
best_acc = 0.0
epo = 20

for epoch in range(epo):
    # train
    net.train()
    running_loss = 0.0
    t1 = time.perf_counter()
    #从train_loader,加载一个batch
    for step, data in enumerate(train_loader, start=0):
        images, labels = data
        optimizer.zero_grad()
        outputs = net(images.to(device))
Ejemplo n.º 21
0
outf = f'logs'
while os.path.exists(outf):
    outf += '_'
os.mkdir(outf)
batch_size = 128
learning_rate = 0.01
epochs = 100
start_epoch = 1
check_point = ''

train_loader, test_loader = load_dataset(batch_size)
model = AlexNet().to(device)
if check_point:
    model.load_state_dict(torch.load(check_point))

optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.99))
criterion = nn.CrossEntropyLoss()

writer = SummaryWriter(outf + '/exp')
best_acc = float('-inf')
best_since_last = 0
for ep in range(start_epoch, epochs + 1):
    if best_since_last == 20: break
    elif best_since_last % 8 == 0 and best_since_last != 0:
        adjust_learning_rate(optimizer, 0.5)

    train_metrics = train(model, train_loader, optimizer, criterion, ep)
    test_metrics = test(model, test_loader, criterion)
    #     print(train_metrics)
    #     print(test_metrics)
    writer.add_scalar('train-acc', train_metrics['acc'], global_step=ep)
Ejemplo n.º 22
0
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from BatchNorm import BatchNorm
from cifar10_dataloader import get_loader
from pipeline import Pipeline

from model import AlexNet as Model
from model_bn import AlexNet_BN as Model_BN
###### with BN

pipeline.working()

##### without BN
train_loader, test_loader = get_loader(batch_size=128, num_workers=1)
model = Model()
optimizer = torch.optim.SGD(model.parameters(),
                            lr=0.01,
                            momentum=0.9,
                            weight_decay=5e-2)
pipeline = Pipeline(task_name='alexnet',
                    log_dir='alexnet',
                    model=model,
                    optimizer=optimizer,
                    loss_func=nn.CrossEntropyLoss(),
                    train_loader=train_loader,
                    test_loader=test_loader,
                    epochs=5,
                    cuda=True)
pipeline.working()
Ejemplo n.º 23
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    batch_size = 16
    epochs = 20

    data_transform = {
        "train":
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        "val":
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    }

    data_root = os.path.abspath(os.path.join(os.getcwd(),
                                             "."))  # get data root path
    image_path = os.path.join(data_root, "data_set",
                              "flower_data")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(
        image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(
        image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(
        image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  num_workers=nw)

    print("using {} images for training, {} images for validation.".format(
        train_num, val_num))

    # create model
    net = AlexNet(num_classes=5)

    net.to(device)

    # define loss function
    loss_function = nn.CrossEntropyLoss()

    # construct an optimizer
    params = [p for p in net.parameters() if p.requires_grad]
    optimizer = optim.Adam(params, lr=0.0001)

    best_acc = 0.0
    save_path = 'weights/alexnet.pth'
    train_steps = len(train_loader)
    for epoch in range(epochs):
        # train
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            logits = net(images.to(device))
            loss = loss_function(logits, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(
                epoch + 1, epochs, loss)

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader)
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                # loss = loss_function(outputs, test_labels)
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

                val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1, epochs)
        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')
Ejemplo n.º 24
0
class Solver(object):
    def __init__(self, config):
        self.model = None
        self.lr = config.lr
        self.epochs = config.epoch
        self.train_batch_size = config.trainBatchSize
        self.test_batch_size = config.testBatchSize
        self.criterion = None
        self.optimizer = None
        self.scheduler = None
        self.device = None
        self.cuda = config.cuda
        self.train_loader = None
        self.test_loader = None
        self.is_board = False

    def load_data(self):
        train_transform = transforms.Compose(
            [transforms.RandomHorizontalFlip(),
             transforms.ToTensor()])
        test_transform = transforms.Compose([transforms.ToTensor()])
        train_set = torchvision.datasets.CIFAR10(
            root='/mnt/disk50/datasets/cifar',
            train=True,
            download=True,
            transform=train_transform)
        self.train_loader = torch.utils.data.DataLoader(
            dataset=train_set, batch_size=self.train_batch_size, shuffle=True)
        test_set = torchvision.datasets.CIFAR10(
            root='/mnt/disk50/datasets/cifar',
            train=False,
            download=True,
            transform=test_transform)
        self.test_loader = torch.utils.data.DataLoader(
            dataset=test_set, batch_size=self.test_batch_size, shuffle=False)

    def load_model_from_pth(self, model_path):
        """Load the pre-trained model weight

        :param model_path:
        :return:
        """
        checkpoint = torch.load(model_path,
                                map_location=self.device_name)['model']

        # TODO:这里需要具体了解原因在哪里?
        checkpoint_parameter_name = list(checkpoint.keys())[0]
        model_parameter_name = next(self.model.named_parameters())[0]

        is_checkpoint = checkpoint_parameter_name.startswith('module.')
        is_model = model_parameter_name.startswith('module.')

        if is_checkpoint and not is_model:
            # 移除checkpoint模型里面参数
            new_parameter_check = OrderedDict()
            for key, value in checkpoint.items():
                if key.startswith('module.'):
                    new_parameter_check[key[7:]] = value
            self.model.load_state_dict(new_parameter_check)
        elif not is_checkpoint and is_model:
            # 添加module.参数
            new_parameter_dict = OrderedDict()
            for key, value in checkpoint.items():
                if not key.startswith('module.'):
                    key = 'module.' + key
                    new_parameter_dict[key] = value
        else:
            self.model.load_state_dict(checkpoint)
        return self.model

    def load_model(self):
        if self.cuda:
            self.device = torch.device('cuda:0')
            cudnn.benchmark = True
        else:
            self.device = torch.device('cpu')

        # self.model = LeNet().to(self.device)
        self.model = AlexNet().to(self.device)

        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        self.scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer,
                                                        milestones=[75, 150],
                                                        gamma=0.5)
        self.criterion = nn.CrossEntropyLoss().to(self.device)

    def train(self, writer=None):
        print("train:")
        self.model.train()
        train_loss = 0
        train_correct = 0
        total = 0

        for batch_num, (data, target) in enumerate(self.train_loader):
            data, target = data.to(self.device), target.to(self.device)
            self.optimizer.zero_grad()
            output = self.model(data)
            loss = self.criterion(output, target)
            loss.backward()
            self.optimizer.step()
            train_loss += loss.item()
            prediction = torch.max(
                output,
                1)  # second param "1" represents the dimension to be reduced
            total += target.size(0)

            # train_correct incremented by one if predicted right
            train_correct += np.sum(
                prediction[1].cpu().numpy() == target.cpu().numpy())

            progress_bar(
                batch_num, len(self.train_loader),
                'Loss: %.4f | Acc: %.3f%% (%d/%d)' %
                (train_loss / (batch_num + 1), 100. * train_correct / total,
                 train_correct, total))

        # if not writer:
        #     writer.add_scalar

        return train_loss, train_correct / total

    def test(self):
        print("test:")
        self.model.eval()
        test_loss = 0
        test_correct = 0
        total = 0
        start = time.time()
        with torch.no_grad():
            for batch_num, (data, target) in enumerate(self.test_loader):
                data, target = data.to(self.device), target.to(self.device)
                output = self.model(data)
                loss = self.criterion(output, target)
                test_loss += loss.item()
                prediction = torch.max(output, 1)
                total += target.size(0)
                test_correct += np.sum(
                    prediction[1].cpu().numpy() == target.cpu().numpy())

                progress_bar(
                    batch_num, len(self.test_loader),
                    'Loss: %.4f | Acc: %.3f%% (%d/%d)' %
                    (test_loss / (batch_num + 1), 100. * test_correct / total,
                     test_correct, total))
        end = time.time()
        time_used = end - start

        return test_loss, test_correct / total, time_used

    def save(self):
        model_out_path = "./best_model_new.pkl"
        torch.save(self.model.state_dict(), model_out_path)
        print("Checkpoint saved to {}".format(model_out_path))

    def run(self):
        self.load_data()
        self.load_model()
        # for k, v in self.model.state_dict():
        #     print('layer{}'.k)
        #     print(v)
        accuracy = 0
        writer = SummaryWriter()
        for epoch in range(1, self.epochs + 1):
            self.scheduler.step(epoch)
            print("\n===> epoch: %d/200" % epoch)

            train_loss, train_acc = self.train()
            test_loss, test_acc = self.test()
            # writer.add_scalar('loss_group',{'train_loss':train_loss.numpy(),
            #                                 'test_loss':test_loss.numpy()},epoch)
            # writer.add_scalar('acc_group',{'train_acc':train_acc.numpy(),
            #                                'test_acc':test_acc.numpy()}, epoch)

            if test_acc > accuracy:
                accuracy = test_acc
                self.save()
            elif epoch == self.epochs:
                print("===> BEST ACC. PERFORMANCE: %.3f%%" % (accuracy * 100))
                self.save()
Ejemplo n.º 25
0
test_image, test_label = test_data_iter.next()

# def imshow(img):
#     img = img / 2 + 0.5  # unnormalize
#     npimg = img.numpy()
#     plt.imshow(np.transpose(npimg, (1, 2, 0)))
#     plt.show()

# print(' '.join('%5s' % class_names[test_label[j].item()]
#                for j in range(len(test_label))))
# imshow(utils.make_grid(test_image))

epochs = 10
model = AlexNet(num_classes=5)
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
best_acc = 0.0

for epoch in range(epochs):
    # Train model
    model.train()
    running_loss = 0.0
    t = time.perf_counter()
    for index, data in enumerate(train_data_loader):
        imgs, labels = data
        outputs = model(imgs)
        optimizer.zero_grad()
        loss = loss_function(outputs, labels)
        running_loss += loss
        loss.backward()
        optimizer.step()
Ejemplo n.º 26
0
        model.train()
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            with amp.autocast():
                outputs = model(inputs)
                loss = criterion(outputs, targets)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            train_batch_loss.append(loss.item())
        train_losses.append(np.mean(train_batch_loss))

    return np.array(train_losses)


if __name__=='__main__':
    model = AlexNet()
    scaler = amp.GradScaler()
    model = nn.DataParallel(model)

    optimizer = nn.optim.SDG(model.parameters(), lr=LR, 
                             momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
    criterion = nn.CrossEntropy()
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    train_loader = get_dataloaders(path=PATH, batch_size=BATCH_SIZE)
    train(model, criterion, optimizer, scaler, train_loader, device, EPOCHS)

Ejemplo n.º 27
0
def train(pertrained=False, resume_file=None):
    if pertrained:
        from model import alexnet
        net = alexnet(pretrained=True, num_classes=NUMBER_CLASSES)
    else:
        from model import AlexNet
        net = AlexNet(num_classes=NUMBER_CLASSES)
    valid_precision = 0
    policies = net.parameters()

    optimizer = optim.SGD(policies,
                          lr=LR,
                          momentum=MOMENTUM,
                          weight_decay=WEIGHT_DECAY)

    train_log = open(
        "logs/train_logs_{}.log".format(
            time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())), "w")
    valid_log = open(
        "logs/valid_logs_{}.log".format(
            time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())), "w")
    train_log.write("{}\t{}\t{}\n".format("epoch", "losses ", "correct"))
    valid_log.write("{}\t{}\t{}\n".format("epoch", "losses ", "correct"))
    # 恢复训练
    if resume_file:
        if os.path.isfile(resume_file):
            print(("=> loading checkpoint '{}'".format(resume_file)))
            checkpoint = torch.load(resume_file)
            start_epoch = checkpoint['epoch']
            net.load_state_dict(checkpoint['model_state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                resume_file, checkpoint['epoch'])))
    else:
        start_epoch = 0
        print(("=> no checkpoint found at '{}'".format(resume_file)))

    # valid_precision = valid(net)
    for epoch in range(start_epoch, EPOCHES):
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        correct = AverageMeter()
        end = time.time()

        optimizer = adjust_learning_rate(optimizer, epoch, LR, LR_steps,
                                         WEIGHT_DECAY)

        for i_batch, sample_batched in enumerate(train_dataloader):
            # measure data loading time
            data_time.update(time.time() - end)
            inputs, labels = sample_batched
            if CUDA_AVALIABLE:
                outputs = net.forward(inputs.cuda())
                labels = labels.long().flatten().cuda()
            else:
                outputs = net.forward(inputs)
                labels = labels.long().flatten()

            outputs = outputs.reshape([-1, NUMBER_CLASSES])
            loss = criterion(outputs, labels)
            # 更新统计数据
            losses.update(loss.item(), inputs.size(0))
            _, predicted = torch.max(outputs.data, 1)
            # 计算准确率
            correct.update(
                (predicted == labels.long()).sum().item() / len(labels),
                inputs.size(0))

            optimizer.zero_grad()
            loss.backward()

            optimizer.step()
            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            if i_batch % 10 == 0:
                print(('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t'
                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                       'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                       'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'.format(
                           epoch,
                           i_batch,
                           len(train_dataloader),
                           batch_time=batch_time,
                           data_time=data_time,
                           loss=losses,
                           top1=correct,
                           lr=optimizer.param_groups[-1]['lr'])))

        train_log.write("{:5d}\t{:.5f}\t{:.5f}\n".format(
            epoch, losses.avg, correct.avg))
        train_log.flush()

        if epoch % 1 == 0:
            valid_precision = valid(net, epoch, valid_log)
        # 保存网络
        if (epoch > 0 and epoch % 10 == 0) or epoch == EPOCHES - 1:
            save_path = os.path.join(
                "models",
                "{:d}_{}_{:d}_{:d}_{:.5f}.pt".format(int(time.time()),
                                                     "alexnet", epoch,
                                                     BATCHSIZE,
                                                     valid_precision))
            print("[INFO] Save weights to " + save_path)
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': net.state_dict(),
                    'optimizer_state_dir': optimizer.state_dict,
                    'loss': loss
                }, save_path)

    train_log.close()
    valid_log.close()
Ejemplo n.º 28
0
# 存储 索引:标签 的字典
# 字典,类别:索引 {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
# 将 flower_list 中的 key 和 val 调换位置
cla_dict = dict((val, key) for key, val in flower_list.items())

# 将 cla_dict 写入 json 文件中
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
    json_file.write(json_str)

#训练过程
net = AlexNet(num_classes=5, init_weights=True)  # 实例化网络(输出类型为5,初始化权重)
net.to(device)  # 分配网络到指定的设备(GPU/CPU)训练
loss_function = nn.CrossEntropyLoss()  # 交叉熵损失
optimizer = optim.Adam(net.parameters(), lr=0.0002)  # 优化器(训练参数,学习率)

save_path = './AlexNet.pth'
best_acc = 0.0

for epoch in range(150):
    ########################################## train ###############################################
    net.train()  # 训练过程中开启 Dropout
    running_loss = 0.0  # 每个 epoch 都会对 running_loss  清零
    time_start = time.perf_counter()  # 对训练一个 epoch 计时

    for step, data in enumerate(train_loader, start=0):  # 遍历训练集,step从0开始计算
        images, labels = data  # 获取训练集的图像和标签
        optimizer.zero_grad()  # 清除历史梯度

        outputs = net(images.to(device))  # 正向传播
Ejemplo n.º 29
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    data_transform = {
        "train":
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ]),
        "val":
        transforms.Compose([
            transforms.Resize((224, 224)),  # cannot 224, must (224, 224)
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
    }

    data_root = os.path.abspath(os.path.join(os.getcwd(),
                                             "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set",
                              "flower_data")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(
        image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(
        image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 32
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(
        image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=4,
                                                  shuffle=False,
                                                  num_workers=nw)

    print("using {} images for training, {} images for validation.".format(
        train_num, val_num))
    # test_data_iter = iter(validate_loader)
    # test_image, test_label = test_data_iter.next()
    #
    # def imshow(img):
    #     img = img / 2 + 0.5  # unnormalize
    #     npimg = img.numpy()
    #     plt.imshow(np.transpose(npimg, (1, 2, 0)))
    #     plt.show()
    #
    # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
    # imshow(utils.make_grid(test_image))

    net = AlexNet(num_classes=5, init_weights=True)

    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    # pata = list(net.parameters())
    optimizer = optim.Adam(net.parameters(), lr=0.0002)

    epochs = 10
    save_path = './AlexNet.pth'
    best_acc = 0.0
    train_steps = len(train_loader)
    for epoch in range(epochs):
        # train
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            outputs = net(images.to(device))
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(
                epoch + 1, epochs, loss)

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader)
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')
Ejemplo n.º 30
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else
                          "cpu")  # torch.device规定训练中所使用的设备
    print("using {} device.".format(device))

    data_transform = {  # data_transform数据预处理    
        "train":
        transforms.Compose([
            transforms.RandomResizedCrop(224),  # 随机裁剪为224*224
            transforms.RandomHorizontalFlip(),  # 水平方向随机翻转
            transforms.ToTensor(),  # 转化为tensor     
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ]),  # 标准化处理
        "val":
        transforms.Compose([
            transforms.Resize((224, 224)),  # * cannot 224, must (224, 224)
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
    }
    print(os.getcwd())
    # data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    # 先获取数据集所在的根目录os.getcwd()
    # ^ os.getcwd() 返回当前进程的工作目录,并非当前文件所在的目录
    # "../.."表示的是上两层目录,这个要看具体的情况,这是一个相对路径的写法
    # ^ os.path.join 路径拼接,拼接后得到的就是当前目录的上两级目录
    # ^ os.path.abspath() 获取指定文件或目录的绝对路径(完整路径)

    data_root = os.path.abspath(os.getcwd())

    image_path = os.path.join(data_root, "data_set",
                              "flower_data")  # flower data set path
    # 等价于 image_path = data_root + "data_set/flower_data"
    # assert os.path.exists(image_path), "{} path does not exist.".format(image_path)

    train_dataset = datasets.ImageFolder(
        root=os.path.join(image_path, "train"),  # 下载数据集 ,"train"表示是训练集数据   
        transform=data_transform["train"])  # 使用"train"的预处理方式
    train_num = len(train_dataset)  # 查看训练集有多少张图片

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx  # * .class_to_idx 得到分类名称对应的索引
    cla_dict = dict(
        (val, key) for key, val in flower_list.items())  # * 将刚刚字典的键值对 变为 值键对
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)  # 将刚刚的字典变为json形式
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 32
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(
        train_dataset,  # 加载数据集
        batch_size=batch_size,
        shuffle=True,  # 通过batchsize和随机参数从样本中获取一批批数据
        num_workers=nw)  # wins下num_workers一般设置为0,linux下num_workers设置可以分布式计算

    validate_dataset = datasets.ImageFolder(
        root=os.path.join(
            image_path, "val"
        ),  # root=os.path.join(image_path, "val")等价于 root=image_path+"val"
        transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(
        validate_dataset,
        batch_size=batch_size,
        shuffle=False,  #   batch_size=4, shuffle=True,    
        num_workers=nw)

    print("using {} images for training, {} images for validation.".format(
        train_num, val_num))

    # 下面是查看数据集的demo
    # 注意,第60行的batch_size=4, shuffle=True再查看:

    # test_data_iter = iter(validate_loader)
    # test_image, test_label = test_data_iter.next()

    # def imshow(img):
    #     img = img / 2 + 0.5  # unnormalize
    #     npimg = img.numpy()
    #     plt.imshow(np.transpose(npimg, (1, 2, 0)))
    #     plt.show()

    # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
    # imshow(utils.make_grid(test_image))

    net = AlexNet(num_classes=5, init_weights=True)  # 5个类别的花数据集,初始化权重为True
    # 实例化模型对象 net

    net.to(device)  # ^ net.to(device)将网络放入刚刚指定的设备中
    loss_function = nn.CrossEntropyLoss()  # 定义损失函数,多类别的交叉熵函数
    # pata = list(net.parameters())                             # 调试所用,查看模型的参数
    optimizer = optim.Adam(
        net.parameters(),
        lr=0.0002)  # 定义Adam优化器,优化对象是网络中所有的可训练参数net.parameters(),以及学习了lr=0.0002

    epochs = 10
    save_path = './AlexNet.pth'  # 保存权重的路径
    best_acc = 0.0  # 最佳准确率 best_acc,首先初始化为0,后面再更新
    train_steps = len(train_loader)

    for epoch in range(epochs):  # 迭代10次
        # * 因为使用了dropout,只在训练中使用,预测中不使用

        # train                                                 #  & 训练阶段
        net.train()  # 调用net.train()进入训练阶段,同时使用 dropout 方法
        running_loss = 0.0  # 统计训练中的平均损失
        train_bar = tqdm(train_loader)  # 为了统计训练一个epoch所需时间
        for step, data in enumerate(train_bar):  # 遍历数据集;数据集分为图像和标签
            images, labels = data
            optimizer.zero_grad()  # 梯度清0
            outputs = net(
                images.to(device))  # 正向传播,图像放入设备中,然后实例化AlexNet的网络net中
            loss = loss_function(
                outputs, labels.to(device))  # 计算损失,计算预测值与真实值的损失,这里label也要放入设备中
            loss.backward()  # 反向传播到每一个节点
            optimizer.step()  # 更新每一个节点的参数

            # print statistics
            running_loss += loss.item()  # 累加loss值

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(
                epoch + 1, epochs, loss)  # 为了或者训练进度

        # validate                                                  # & 测试阶段
        net.eval()  # 调用net.eval() 进入测试阶段,同时关闭 dropout 方法
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():  # * with torch.no_grad() 禁止参数跟踪:验证中不计算损失梯度
            val_bar = tqdm(validate_loader)
            for val_data in val_bar:
                val_images, val_labels = val_data  # 数据划分为图片和对应的标签
                outputs = net(
                    val_images.to(device))  # 放入网络net中得到输出,输出的维度是 [batch, 10]
                predict_y = torch.max(
                    outputs, dim=1
                )[1]  # 求出输出的第1个维度(dim=1类别维度)max(只关注最大值对应的位置[1],不关心数值  ),得到预测值 predict_y
                acc += torch.eq(predict_y, val_labels.to(
                    device)).sum().item()  # 统计预测正确的个数   # ^ 通过.item()得到相应的数值
                # acc += (predict_y == val_labels.to(device)).sum().item()      # 等价的

        val_accurate = acc / val_num  # 累加的准确率除以样本个数,得到平均准确率
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:  # 如果当前准确率大于历史最优准确率
            best_acc = val_accurate  # 更新
            torch.save(net.state_dict(), save_path)

    print('Finished Training')