Beispiel #1
0
 def step_epoch(self):
     '''
     train one epoch
     '''
     lr = -1
     for i, (imgs, boxes, labels, frame_labels, locs,
             scales) in enumerate(self.loader):
         if self.lr_func is not None:
             lr = self.lr_func(self.step)
             for param_group in self.opt.param_groups:
                 param_group['lr'] = lr
         if i == 0:
             batch_size = int(imgs.shape[0])
         time_start = time.time()
         self.opt.zero_grad()
         temp, loss_frame = self.net(imgs, locs, labels, boxes,
                                     frame_labels)
         loss = (get_loss(temp) + get_loss(loss_frame)) / 2
         loss.backward()
         if self.grad_clip > 0:
             torch.nn.utils.clip_grad_norm_(self.net.parameters(),
                                            self.grad_clip)
         self.opt.step()
         maxmem = int(
             torch.cuda.max_memory_allocated(device=self.device[0]) / 1024 /
             1024)
         time_end = time.time()
         totaltime = int((time_end - time_start) * 1000)
         print('total_step:%d: epoch:%d, step:%d/%d, loss:%f, maxMem:%dMB, time:%dms, lr:%f' % \
             (self.step, self.epoch, i*batch_size, len(self.dataset), loss, maxmem, totaltime, lr))
         self.step += 1
     self.epoch += 1
Beispiel #2
0
    def step_epoch(self):
        '''
        train one epoch
        '''
        lr = -1
        progressbar = tqdm(enumerate(self.loader), total=len(self.loader))
        for i, (imgs, boxes, labels, locs, scales) in progressbar:
            if self.lr_func is not None:
                lr = self.lr_func(self.step)
                for param_group in self.opt.param_groups:
                    param_group['lr'] = lr
            if i == 0:
                batch_size = int(imgs.shape[0])
            time_start = time.time()
            self.opt.zero_grad()
            temp = self.net(imgs, locs, labels, boxes)
            loss = get_loss(temp)
            loss.backward()
            if self.grad_clip > 0:
                torch.nn.utils.clip_grad_norm_(self.net.parameters(), self.grad_clip)
            self.opt.step()
            maxmem = int(torch.cuda.max_memory_allocated(device=self.device[0]) / 1024 / 1024)
            time_end = time.time()
            totaltime = int((time_end - time_start) * 1000)
            # descriptionStr = ("total_step:%d: epoch:%d, step:%d/%d, loss:%f, maxMem:%dMB, time:%dms, lr:%f" % (self.step, self.epoch, i*batch_size, len(self.dataset), loss, maxmem, totaltime, lr))
            progressbar.set_description("epoch: %d, loss: %f, lr: %f" % (self.epoch, loss, lr))

            # writing log to tensorboard
            if self.tb_writer and i % 10 == 0:
                totalStep = self.epoch * len(self.dataset) + i * batch_size
                self.tb_writer.add_scalar('training/loss', loss, totalStep)
                self.tb_writer.add_scalar('training/learning rate', lr, totalStep)

            self.step += 1
        self.epoch += 1
Beispiel #3
0
    def step_epoch(self, writer):
        '''
        train one epoch
        '''
        lr = -1

        for i, (img, bbox, label, loc, scale) in enumerate(self.loader):
            if self.lr_func is not None:
                lr = self.lr_func(self.step)
                for param_group in self.opt.param_groups:
                    param_group['lr'] = lr

            if i == 0:
                batch_size = int(img.shape[0])
            time_start = time.time()
            self.opt.zero_grad()
            temp = self.net(img, loc, label, bbox)
            loss = get_loss(temp)
            loss.backward()
            if self.grad_clip > 0:
                torch.nn.utils.clip_grad_norm_(self.net.parameters(),
                                               self.grad_clip)
            self.opt.step()
            # self.sch.step()
            # lr = self.sch.get_lr()[-1]
            maxmem = int(
                torch.cuda.max_memory_allocated(device=self.device[0]) / 1024 /
                1024)
            time_end = time.time()
            totaltime = int((time_end - time_start) * 1000)
            writer.add_scalar('loss', loss, self.step)
            print('total_step:%d: epoch:%d, step:%d/%d, loss:%f, maxMem:%dMB, time:%dms, lr:%f' % \
                (self.step, self.epoch, i*batch_size, len(self.dataset), loss, maxmem, totaltime, lr))
            self.step += 1
        self.epoch += 1
Beispiel #4
0

# Run warmup
WARM_UP_ITERS = 500
WARM_UP_FACTOR = 1.0 / 3.0
if cfg['freeze_bn']:
    net.module.backbone.freeze_bn()
for i, (img, bbox, label, scale, oh, ow) in enumerate(loader_train):
    alpha = float(i) / WARM_UP_ITERS
    warmup_factor = WARM_UP_FACTOR * (1.0 - alpha) + alpha
    for param_group in opt.param_groups:
        param_group['lr'] = lr * warmup_factor
    time_start = time.time()
    opt.zero_grad()
    temp = net(img, label, bbox)
    loss = get_loss(temp)
    loss.backward()
    clip = cfg['grad_clip']
    torch.nn.utils.clip_grad_norm_(net.parameters(), clip)
    opt.step()
    maxmem = int(torch.cuda.max_memory_allocated(device=cfg['device'][0]) / 1024 / 1024)
    time_end = time.time()
    totaltime = int((time_end - time_start) * 1000)
    print('warmup: step:%d/%d, lr:%f, loss:%f, maxMem:%dMB, time:%dms' % \
                (i, WARM_UP_ITERS, lr * warmup_factor, loss, maxmem, totaltime))
    if i >= WARM_UP_ITERS:
        break


# Run epoch
epoch = 0