def main():
    # # FOR DATA PARSING
    # parse_data("./data/VOC2007/","./data/VOC2012/", "./output/")

    # # FOR TRAINING
    # train(resume=True, seed=94743, opt_level='O1', resize_dims=(500,500))

    # # FOR EVALUATION AND TESTING
    model = SSD(21)
    state_dict = torch.load("./output/checkpoint.pt", map_location=device)
    model.load_state_dict(state_dict)
    model = model.to(device)

    # # FOR EVALUATION ON THE TESTING SET
    # test(model, "ALLPT", resize_dims=(500,500))
    # test(model, "11PT", resize_dims=(500,500))

    # # FOR DETECTION ON A SINGLE IMAGE
    # image = "/home/numan947/MyHome/AIMLDL/Re:SSD - An Implementation of Single Shot MultiBox Detector/data/VOC2012/JPEGImages/2011_001254.jpg"
    # original_image = Image.open(image, mode='r')
    # original_image = original_image.convert('RGB')
    # detect_single_image(model, original_image, min_score=0.25, max_overlap=0.4, top_k=200, resize_dims=(500,500)).show()

    # # FOR DEMO USING CAMERA
    # camera(model, min_score=0.3, max_overlap=0.25, top_k=20, resize_dims=(500,500))

    # # FOR GENERATING SHOW CASE
    ds = PascalGeneratorDataset("./output/", split="test")
    generate(model, ds, n=50)
Exemple #2
0
def train(seed, resume=False, opt_level='O1', resize_dims=(300, 300)):
    clear_cuda()
    torch.manual_seed(seed)
    data_folder = "./output/"
    keep_difficult = True
    n_classes = len(label_map)

    other_checkpoint = data_folder+"oth_checkpoint.pt"
    model_checkpoint = data_folder+"checkpoint.pt"
    early_stopping = EarlyStopping(save_model_name=model_checkpoint, patience=3)
    pin_memory = True

    batch_size = 8
    accumulation_factor = 6
    iterations = 100000
    workers = 4*torch.cuda.device_count()
    lr = 1e-3
    decay_lr_at = [80000]
    decay_lr_to = 0.2

    early_stopper_lr_decrease = 0.5
    early_stopper_lr_decrease_count = 7

    momentum = 0.9
    weight_decay = 5e-3
    grad_clip = None
    torch.backends.cudnn.benchmark = True

    optimizer = None
    start_epoch = None

    history = pd.DataFrame()
    history.index.name="Epoch"
    history_path = "./output/HISTORY"+"_SEED_{}".format(seed)+".csv"
    model = SSD(n_classes)
    biases = list()
    not_biases = list()

    for param_name, param in model.named_parameters():
        if param.requires_grad:
            if param_name.endswith('.bias'):
                biases.append(param)
            else:
                not_biases.append(param)

    # optimizer = RAdam(params=[{'params':biases,'lr':2*lr},{'params':not_biases}], lr=lr, weight_decay=weight_decay)
    # optimizer = torch.optim.RMSprop(params=[{'params':biases,'lr':2*lr},{'params':not_biases}], lr=lr, weight_decay=weight_decay, momentum=momentum)
    # optimizer = torch.optim.SGD(params=[{'params':biases,'lr':2*lr},{'params':not_biases}], lr=lr, weight_decay=weight_decay, momentum=momentum)
    optimizer = apex.optimizers.FusedLAMB(params=[{'params':biases,'lr':2*lr},{'params':not_biases}], lr=lr, weight_decay=weight_decay)#, momentum=momentum)
    
    model = model.to(device)
    criterion = MultiBoxLoss(model.priors_cxcy).to(device)
    model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level, loss_scale=1.0)

    start_epoch = 0
    

    if resume and os.path.exists(other_checkpoint):
        ocheckpt = torch.load(other_checkpoint, map_location=device)
        optimizer.load_state_dict(ocheckpt['optimizer_state'])
        start_epoch = ocheckpt['epoch'] + 1
        lr = get_lr(optimizer)
        history = pd.read_csv(history_path, index_col="Epoch")
        model.load_state_dict(torch.load(model_checkpoint, map_location=device))
        # adjust_learning_rate(optimizer, 0.1)
        lr = get_lr(optimizer)

        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()

        amp.load_state_dict(ocheckpt['amp_state'])





    train_dataset = PascalDataset(data_folder, split="train", keep_difficult=keep_difficult, resize_dims=resize_dims)
    train_len = int(0.85*len(train_dataset))
    valid_len = len(train_dataset) - train_len

    train_data, valid_data = torch.utils.data.dataset.random_split(train_dataset, [train_len, valid_len])
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, collate_fn=train_dataset.collate_fn,
                                               num_workers=workers, pin_memory=pin_memory)
    valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size, shuffle=False, collate_fn=train_dataset.collate_fn,
                                               num_workers=workers, pin_memory=pin_memory)



    

    total_epochs = iterations//(len(train_dataset)//32)
    decay_lr_at = [it//(len(train_dataset)//32) for it in decay_lr_at]

    total_epochs = 125
    decay_lr_at = [100]


    print("Training For: {}".format(total_epochs))
    print("Decay LR at:", decay_lr_at)

    for epoch in range(start_epoch, total_epochs):
        if epoch in decay_lr_at:
            adjust_learning_rate(optimizer, decay_lr_to)
            lr*=decay_lr_to
            print("Learning Rate Adjusted")
        st = time.time()

        print("EPOCH: {}/{} -- Current LR: {}".format(epoch+1, total_epochs, lr))
        clear_cuda()
        tl,ta = train_single_epoch(epoch, model, train_loader, optimizer, criterion, plot=False, clip_grad=grad_clip, accumulation_factor=accumulation_factor)
        clear_cuda()
        vl, va = evaluate(model, valid_loader, criterion)


        print_epoch_stat(epoch, time.time()-st, history=history, train_loss=tl, valid_loss=vl)
        early_stopping(tl, model)
        other_state = {
            'epoch': epoch,
            'optimizer_state': optimizer.state_dict(),
            'amp_state': amp.state_dict()
            }

        torch.save(other_state, other_checkpoint)

        history.loc[epoch, "LR"] = lr
        history.to_csv(history_path)

        if early_stopping.early_stop:
            if early_stopper_lr_decrease_count>0:
                early_stopper_lr_decrease_count = early_stopper_lr_decrease_count-1
                adjust_learning_rate(optimizer, early_stopper_lr_decrease)
                early_stopping.early_stop = False
                early_stopping.counter = 0
                lr*=early_stopper_lr_decrease
                print("Learning Rate Adjusted")
                accumulation_factor*=2
            else:
                break
Exemple #3
0
    def train(self, vis=False):
        print("begin training....")

        if not os.path.exists('weights'):
            os.mkdir('weights')

        # Device settings
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        device1 = torch.device(
            "cuda:1" if torch.cuda.is_available() else "cpu")

        if self.multigpu:
            if torch.cuda.device_count() > 1:
                print("Let's use", torch.cuda.device_count(), "GPUs!")
                self.model = nn.DataParallel(self.model)
        self.model = self.model.to(device)

        eval_model = SSD(num_classes=cfg.num_classes,
                         num_blocks=cfg.mbox,
                         top_k=cfg.top_k,
                         conf_thresh=cfg.conf_thresh,
                         nms_thresh=cfg.nms_thresh,
                         variance=cfg.variance)
        eval_model = eval_model.to(device1)

        for item in self.model.parameters():
            print(item.requires_grad)

        total_epoch = cfg.epoch

        criterion = DEC_loss(num_classes=cfg.num_classes,
                             variances=cfg.variance,
                             device=device)

        optimizer = optim.SGD(params=filter(lambda p: p.requires_grad,
                                            self.model.parameters()),
                              lr=cfg.init_lr,
                              momentum=0.9,
                              weight_decay=cfg.weight_decay)

        # scheduler = lr_scheduler.StepLR(optimizer, step_size=cfg.lr_decay_epoch, gamma=0.1)
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             milestones=cfg.milestones,
                                             gamma=0.1)

        print('Loading Datasets...')
        dsets = PASCALVOC(root=cfg.root,
                          image_sets=cfg.train_sets,
                          transform=transforms.DEC_transforms(
                              phase='train',
                              size=cfg.img_size,
                              mean=cfg.means,
                              std=cfg.std))

        dsets_val = PASCALVOC(root=cfg.root,
                              image_sets=cfg.test_sets,
                              transform=transforms.DEC_transforms(
                                  phase='val',
                                  size=cfg.img_size,
                                  mean=cfg.means,
                                  std=cfg.std))

        dset_loaders = torch.utils.data.DataLoader(
            dsets,
            cfg.batch_size,
            num_workers=4,
            shuffle=True,
            collate_fn=detection_collate,
            pin_memory=True)
        if vis:
            viewDatasets_DEC(dset_loaders)

        train_loss_dict = []
        mAP_dict = []
        for epoch in range(total_epoch):
            print('Epoch {}/{}'.format(epoch, total_epoch - 1))
            print('-' * 10)
            for phase in ['train', 'val']:
                if phase == 'train':
                    scheduler.step()
                    self.model.train()
                    running_loss = 0.0
                    for data in dset_loaders:
                        inputs, target = data
                        inputs = inputs.to(device)
                        target = [item.to(device) for item in target]

                        optimizer.zero_grad()

                        # forward
                        # track history if only in train
                        with torch.set_grad_enabled(phase == 'train'):
                            outputs = self.model(inputs, phase)
                            # backprop
                            loss_l, loss_c = criterion(outputs, target)
                            loss = loss_l + loss_c

                            loss.backward()
                            optimizer.step()

                        running_loss += loss.item()

                    epoch_loss = running_loss / len(dsets)
                    print('{} Loss: {:.6}'.format(epoch, epoch_loss))

                    train_loss_dict.append(epoch_loss)
                    np.savetxt('train_loss.txt', train_loss_dict, fmt='%.6f')
                    if epoch % 5 == 0:
                        torch.save(
                            self.model.state_dict(),
                            os.path.join(
                                'weights', '{:d}_{:.4f}_model.pth'.format(
                                    epoch, epoch_loss)))
                    torch.save(self.model.state_dict(),
                               os.path.join('weights', 'end_model.pth'))

                else:
                    if epoch % 5 == 0:
                        model_dict = self.model.state_dict()
                        val_dict = {k[7:]: v for k, v in model_dict.items()}
                        eval_model.load_state_dict(val_dict)
                        maps = self.eval(device1, eval_model, dsets_val)
                        mAP_dict.append(maps)
                        np.savetxt('mAP.txt', mAP_dict, fmt='%.6f')
Exemple #4
0
class DEC_Module(object):
    def __init__(self, multigpu, resume):
        self.model = SSD(num_classes=cfg.num_classes,
                         num_blocks=cfg.mbox,
                         top_k=cfg.top_k,
                         conf_thresh=cfg.conf_thresh,
                         nms_thresh=cfg.nms_thresh,
                         variance=cfg.variance)
        if resume is not None:
            print('Resuming training weights from {} ...'.format(resume))
            resume_dict = torch.load(resume)
            resume_dict_update = {}
            for k in resume_dict:
                if k.startswith('module') and not k.startswith('module_list'):
                    resume_dict_update[k[7:]] = resume_dict[k]
                else:
                    resume_dict_update[k] = resume_dict[k]
            self.model.load_state_dict(resume_dict_update)
        else:
            resnet = "resnet101"
            print('Resuming weights from {} ...'.format(resnet))
            pre_trained_dict = model_zoo.load_url(model_urls[resnet])
            model_dict = self.model.state_dict()
            updated_dict = {
                k: v
                for k, v in pre_trained_dict.items() if k in model_dict
            }
            model_dict.update(updated_dict)
            self.model.load_state_dict(model_dict)

        self.multigpu = multigpu

    def train(self, vis=False):
        print("begin training....")

        if not os.path.exists('weights'):
            os.mkdir('weights')

        # Device settings
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        device1 = torch.device(
            "cuda:1" if torch.cuda.is_available() else "cpu")

        if self.multigpu:
            if torch.cuda.device_count() > 1:
                print("Let's use", torch.cuda.device_count(), "GPUs!")
                self.model = nn.DataParallel(self.model)
        self.model = self.model.to(device)

        eval_model = SSD(num_classes=cfg.num_classes,
                         num_blocks=cfg.mbox,
                         top_k=cfg.top_k,
                         conf_thresh=cfg.conf_thresh,
                         nms_thresh=cfg.nms_thresh,
                         variance=cfg.variance)
        eval_model = eval_model.to(device1)

        for item in self.model.parameters():
            print(item.requires_grad)

        total_epoch = cfg.epoch

        criterion = DEC_loss(num_classes=cfg.num_classes,
                             variances=cfg.variance,
                             device=device)

        optimizer = optim.SGD(params=filter(lambda p: p.requires_grad,
                                            self.model.parameters()),
                              lr=cfg.init_lr,
                              momentum=0.9,
                              weight_decay=cfg.weight_decay)

        # scheduler = lr_scheduler.StepLR(optimizer, step_size=cfg.lr_decay_epoch, gamma=0.1)
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             milestones=cfg.milestones,
                                             gamma=0.1)

        print('Loading Datasets...')
        dsets = PASCALVOC(root=cfg.root,
                          image_sets=cfg.train_sets,
                          transform=transforms.DEC_transforms(
                              phase='train',
                              size=cfg.img_size,
                              mean=cfg.means,
                              std=cfg.std))

        dsets_val = PASCALVOC(root=cfg.root,
                              image_sets=cfg.test_sets,
                              transform=transforms.DEC_transforms(
                                  phase='val',
                                  size=cfg.img_size,
                                  mean=cfg.means,
                                  std=cfg.std))

        dset_loaders = torch.utils.data.DataLoader(
            dsets,
            cfg.batch_size,
            num_workers=4,
            shuffle=True,
            collate_fn=detection_collate,
            pin_memory=True)
        if vis:
            viewDatasets_DEC(dset_loaders)

        train_loss_dict = []
        mAP_dict = []
        for epoch in range(total_epoch):
            print('Epoch {}/{}'.format(epoch, total_epoch - 1))
            print('-' * 10)
            for phase in ['train', 'val']:
                if phase == 'train':
                    scheduler.step()
                    self.model.train()
                    running_loss = 0.0
                    for data in dset_loaders:
                        inputs, target = data
                        inputs = inputs.to(device)
                        target = [item.to(device) for item in target]

                        optimizer.zero_grad()

                        # forward
                        # track history if only in train
                        with torch.set_grad_enabled(phase == 'train'):
                            outputs = self.model(inputs, phase)
                            # backprop
                            loss_l, loss_c = criterion(outputs, target)
                            loss = loss_l + loss_c

                            loss.backward()
                            optimizer.step()

                        running_loss += loss.item()

                    epoch_loss = running_loss / len(dsets)
                    print('{} Loss: {:.6}'.format(epoch, epoch_loss))

                    train_loss_dict.append(epoch_loss)
                    np.savetxt('train_loss.txt', train_loss_dict, fmt='%.6f')
                    if epoch % 5 == 0:
                        torch.save(
                            self.model.state_dict(),
                            os.path.join(
                                'weights', '{:d}_{:.4f}_model.pth'.format(
                                    epoch, epoch_loss)))
                    torch.save(self.model.state_dict(),
                               os.path.join('weights', 'end_model.pth'))

                else:
                    if epoch % 5 == 0:
                        model_dict = self.model.state_dict()
                        val_dict = {k[7:]: v for k, v in model_dict.items()}
                        eval_model.load_state_dict(val_dict)
                        maps = self.eval(device1, eval_model, dsets_val)
                        mAP_dict.append(maps)
                        np.savetxt('mAP.txt', mAP_dict, fmt='%.6f')

    def test(self):
        print('testing, evaluation mode...')
        self.model.eval()

        print('loading data...')
        dsets = PASCALVOC(root=cfg.root,
                          image_sets=cfg.test_sets,
                          transform=transforms.DEC_transforms(
                              phase='val',
                              size=cfg.img_size,
                              mean=cfg.means,
                              std=cfg.std))

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.model = self.model.to(device)

        num_imgs = len(dsets)
        test_timer = Timer()
        cv2.namedWindow('img')
        for i in range(num_imgs):
            print('testing {}...'.format(dsets.img_ids[i]))
            img, target = dsets.__getitem__(i)
            ori_img = cv2.imread(dsets._imgpath % dsets.img_ids[i])
            h, w, c = ori_img.shape

            x = img.unsqueeze(0)
            x = x.to(device)

            test_timer.tic()
            detections = self.model(x, 'test')
            detect_time = test_timer.toc(average=False)
            print('test time: {}'.format(detect_time))
            for j in range(1, detections.size(1)):
                dets = detections[0, j, :]
                mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
                dets = torch.masked_select(dets, mask).view(-1, 5)
                if dets.shape[0] == 0:
                    continue
                if j:
                    boxes = dets[:, 1:]
                    boxes[:, 0] *= h
                    boxes[:, 1] *= w
                    boxes[:, 2] *= h
                    boxes[:, 3] *= w
                    scores = dets[:, 0].cpu().numpy()
                    for box, score in zip(boxes, scores):
                        y1, x1, y2, x2 = box
                        y1 = int(y1)
                        x1 = int(x1)
                        y2 = int(y2)
                        x2 = int(x2)

                        cv2.rectangle(ori_img, (x1, y1), (x2, y2), (0, 255, 0),
                                      2, 2)
                        cv2.putText(ori_img,
                                    cfg.VOC_CLASSES[int(j)] + "%.2f" % score,
                                    (x1, y1 + 20), cv2.FONT_HERSHEY_SIMPLEX,
                                    0.6, (255, 0, 255))
            cv2.imshow('img', ori_img)
            k = cv2.waitKey(0)
            if k & 0xFF == ord('q'):
                cv2.destroyAllWindows()
                exit()
        cv2.destroyAllWindows()
        exit()

    def eval(self, device, eval_model, dsets):
        # print('evaluation mode...')
        # self.model.eval()
        eval_model.eval()
        output_dir = cfg.output_dir
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        else:
            shutil.rmtree(output_dir)
            os.mkdir(output_dir)

        num_imgs = len(dsets)
        total_time = 0

        det_file = os.path.join(output_dir, 'detections.pkl')
        # print('Detecting bounding boxes...')
        all_boxes = [[[] for _ in range(num_imgs)]
                     for _ in range(cfg.num_classes)]

        _t = {'im_detect': Timer(), 'misc': Timer()}

        for i in range(num_imgs):
            img, target = dsets.__getitem__(i)
            ori_img = cv2.imread(dsets._imgpath % dsets.img_ids[i])
            h, w, c = ori_img.shape

            x = img.unsqueeze(0)
            x = x.to(device)

            _t['im_detect'].tic()
            detections = eval_model(x, 'test')
            detect_time = _t['im_detect'].toc(average=False)

            # ignore the background boxes
            for j in range(1, detections.size(1)):
                dets = detections[0, j, :]
                mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
                dets = torch.masked_select(dets, mask).view(-1, 5)
                if dets.shape[0] == 0:
                    continue
                boxes = dets[:, 1:]
                boxes[:, 0] *= h
                boxes[:, 1] *= w
                boxes[:, 2] *= h
                boxes[:, 3] *= w
                scores = dets[:, 0].cpu().numpy()
                cls_dets = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                all_boxes[j][i] = cls_dets

            # print('img-detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_imgs, detect_time))
            total_time += detect_time

        with open(det_file, 'wb') as f:
            pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
            f.close()

        print('Saving the results...')
        for cls_ind, cls in enumerate(cfg.labelmap):
            # print('Writing {:s} VOC results file'.format(cls))
            filename = dec_eval.get_voc_results_file_template('test', cls)
            with open(filename, 'wt') as f:
                for im_ind, index in enumerate(dsets.img_ids):
                    dets = all_boxes[cls_ind + 1][im_ind]
                    if dets == []:
                        continue
                    # the VOCdevkit expects 1-based indices
                    for k in range(dets.shape[0]):
                        f.write(
                            '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format(
                                index[1], dets[k, -1], dets[k, 0] + 1,
                                dets[k, 1] + 1, dets[k, 2] + 1,
                                dets[k, 3] + 1))

        # print('Evaluating detections....')
        print('average time is {}'.format(float(total_time) / num_imgs))
        maps = dec_eval.do_python_eval(output_dir=output_dir, use_07=True)
        return maps

    def eval_single(self):
        print('evaluation mode...')
        self.model.eval()

        print('loading data...')
        dsets = PASCALVOC(root=cfg.root,
                          image_sets=cfg.test_sets,
                          transform=transforms.DEC_transforms(
                              phase='val',
                              size=cfg.img_size,
                              mean=cfg.means,
                              std=cfg.std))

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.model = self.model.to(device)

        output_dir = cfg.output_dir
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        else:
            shutil.rmtree(output_dir)
            os.mkdir(output_dir)

        num_imgs = len(dsets)

        det_file = os.path.join(output_dir, 'detections.pkl')
        print('Detecting bounding boxes...')
        all_boxes = [[[] for _ in range(num_imgs)]
                     for _ in range(cfg.num_classes)]

        _t = {'im_detect': Timer(), 'misc': Timer()}
        total_time = 0
        for i in range(num_imgs):
            img, target = dsets.__getitem__(i)
            ori_img = cv2.imread(dsets._imgpath % dsets.img_ids[i])
            h, w, c = ori_img.shape

            x = img.unsqueeze(0)
            x = x.to(device)

            _t['im_detect'].tic()
            detections = self.model(x, 'test')
            detect_time = _t['im_detect'].toc(average=False)
            total_time += detect_time
            # ignore the background boxes
            for j in range(1, detections.size(1)):
                dets = detections[0, j, :]
                mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
                dets = torch.masked_select(dets, mask).view(-1, 5)
                if dets.shape[0] == 0:
                    continue
                boxes = dets[:, 1:]
                boxes[:, 0] *= h
                boxes[:, 1] *= w
                boxes[:, 2] *= h
                boxes[:, 3] *= w
                boxes[:, 0] = np.maximum(0., boxes[:, 0])
                boxes[:, 1] = np.maximum(0., boxes[:, 1])
                boxes[:, 2] = np.minimum(h, boxes[:, 2])
                boxes[:, 3] = np.minimum(w, boxes[:, 3])
                scores = dets[:, 0].cpu().numpy()
                cls_dets = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                all_boxes[j][i] = cls_dets

            print('img-detect: {:d}/{:d} {:.3f}s'.format(
                i + 1, num_imgs, detect_time))

        with open(det_file, 'wb') as f:
            pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
            f.close()
        print('average time is {}'.format(float(total_time) / num_imgs))
        print('Saving the results...')
        for cls_ind, cls in enumerate(cfg.labelmap):
            print('Writing {:s} VOC results file'.format(cls))
            filename = dec_eval.get_voc_results_file_template('test', cls)
            with open(filename, 'wt') as f:
                for im_ind, index in enumerate(dsets.img_ids):
                    dets = all_boxes[cls_ind + 1][im_ind]
                    if dets == []:
                        continue
                    # the VOCdevkit expects 1-based indices
                    for k in range(dets.shape[0]):
                        f.write(
                            '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format(
                                index[1], dets[k, -1], dets[k, 0] + 1,
                                dets[k, 1] + 1, dets[k, 2] + 1,
                                dets[k, 3] + 1))

        print('Evaluating detections....')

        dec_eval.do_python_eval(output_dir=output_dir, use_07=True)
Exemple #5
0
def train():
    ssd_cfg = {
        'num_classes': 21,  # 背景クラスを含めた合計クラス数
        'input_size': 300,  # 画像の入力サイズ
        'bbox_aspect_num': [4, 6, 6, 6, 4, 4],  # 出力するDBoxのアスペクト比の種類
        'feature_maps': [38, 19, 10, 5, 3, 1],  # 各sourceの画像サイズ
        'pix_sizes': [8, 16, 32, 64, 100, 300],  # DBOXの大きさを決める
        'min_sizes': [30, 60, 111, 162, 213, 264],  # DBOXの大きさを決める
        'max_sizes': [60, 111, 162, 213, 264, 315],  # DBOXの大きさを決める
        'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
    }
    
    # SSDネットワークモデル
    net = SSD(phase="train", cfg=ssd_cfg)

    root_dir  = "./data/VOCdevkit/VOC2012/"
    train_dataset, val_dataset = load_data(root_dir)
    
    # SSDの初期の重みを設定
    # ssdのvgg部分に重みをロードする
    #vgg_weights = torch.load('./weights/vgg16_reducedfc.pth')
    #net.vgg.load_state_dict(vgg_weights)
    
    # ssdのその他のネットワークの重みはHeの初期値で初期化
    
    
    def weights_init(m):
        if isinstance(m, nn.Conv2d):
            init.kaiming_normal_(m.weight.data)
            if m.bias is not None:  # バイアス項がある場合
                nn.init.constant_(m.bias, 0.0)
    
    
    # Heの初期値を適用
    net.extras.apply(weights_init)
    net.loc.apply(weights_init)
    net.conf.apply(weights_init)

    # 損失関数の設定
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    criterion = MultiBoxLoss(jaccard_thresh=0.5, neg_pos=3, device=device)
    
    # 最適化手法の設定
    optimizer = optim.SGD(net.parameters(), lr=1e-3,
                          momentum=0.9, weight_decay=5e-4)
    num_epochs = 10

    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32,
                                         shuffle=False, num_workers=1, collate_fn=od_collate_fn)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32,
                                         shuffle=False, num_workers=1, collate_fn=od_collate_fn)

    print("使用デバイス:", device)

    # ネットワークをGPUへ
    net.to(device)

    # ネットワークがある程度固定であれば、高速化させる
    torch.backends.cudnn.benchmark = True

    # イテレーションカウンタをセット
    iteration = 1
    epoch_train_loss = 0.0  # epochの損失和
    epoch_val_loss = 0.0  # epochの損失和
    logs = []

    # epochのループ
    for epoch in range(num_epochs+1):

        # 開始時刻を保存
        t_epoch_start = time.time()
        t_iter_start = time.time()

        print('-------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-------------')

        # epochごとの訓練と検証のループ
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()  # モデルを訓練モードに
                print('(train)')
            else:
                if((epoch+1) % 10 == 0):
                    net.eval()   # モデルを検証モードに
                    print('-------------')
                    print('(val)')
                else:
                    # 検証は10回に1回だけ行う
                    continue

            # データローダーからminibatchずつ取り出すループ
            for images, targets in train_loader:

                # GPUが使えるならGPUにデータを送る
                images = images.to(device)
                targets = [ann.to(device)
                           for ann in targets]  # リストの各要素のテンソルをGPUへ

                # optimizerを初期化
                optimizer.zero_grad()

                iteration += 1
                # 順伝搬(forward)計算
                with torch.set_grad_enabled(phase == 'train'):
                    # 順伝搬(forward)計算
                    outputs = net(images)

                    # 損失の計算
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c

                    if phase == 'train':
                        loss.backward()  # 勾配の計算

                        # 勾配が大きくなりすぎると計算が不安定になるので、clipで最大でも勾配2.0に留める
                        nn.utils.clip_grad_value_(
                            net.parameters(), clip_value=2.0)

                        optimizer.step()  # パラメータ更新

                        if (iteration % 100 == 0):  # 100iterに1度、lossを表示
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('イテレーション {} || Loss: {:.4f} || 10iter: {:.4f} sec.'.format(
                                iteration, loss.item(), duration))
                            t_iter_start = time.time()

                        epoch_train_loss += loss.item()

            for images, targets in val_loader:
                with torch.set_grad_enabled(phase == 'valid'):
                    # 順伝搬(forward)計算
                    outputs = net(images)

                # GPUが使えるならGPUにデータを送る
                images = images.to(device)
                targets = [ann.to(device)
                           for ann in targets]  # リストの各要素のテンソルをGPUへ