Example #1
0
    def validation(self, val_loader):

        self.model.eval()
        summary_loss = AverageMeter()
        final_scores = RocAucMeter()
        t = time.time()

        for step, (images, targets) in enumerate(val_loader):

            with torch.no_grad():
                batch_size = images.shape[0]
                outputs = self.model(images)
                loss = self.criterion(outputs, targets)
                try: 
                    final_scores.update(targets, outputs)
                except:
                    if step % (self.config.verbose_step * 2) == 0:
                        xm.master_print("final_scores update failed...")
                    pass
                summary_loss.update(loss.detach().item(), batch_size)

            if self.config.verbose:
                if step % (self.config.verbose_step * 2) == 0:
                    xm.master_print(f"::: Valid Step({step}/{len(val_loader)}) | Loss: {summary_loss.avg:.4f} | AUC: {final_scores.avg:.4f} | Time: {int((time.time() - t))}s")

        return summary_loss, final_scores
Example #2
0
    def validation(self, val_loader):
        self.model.eval()
        summary_loss = AverageMeter()
        final_scores = RocAucMeter()
        t = time.time()
        for step, (images, targets) in enumerate(val_loader):
            if self.config.verbose:
                if step % self.config.verbose_step == 0:

                    print(
                        f"::: Valid Step({step}/{len(val_loader)}) | Loss: {summary_loss.avg:.4f} | AUC: {final_scores.avg:.4f} | Time: {int((time.time() - t))}s"
                    )  # , end='\r')

                    # print(
                    #     f'Val Step {step}/{len(val_loader)}, ' + \
                    #     f'summary_loss: {summary_loss.avg:.5f}, final_score: {final_scores.avg:.5f}, ' + \
                    #     f'time: {(time.time() - t):.5f}') #, end='\r'
                    # )
            with torch.no_grad():
                targets = targets.to(self.device)
                batch_size = images.shape[0]
                images = images.to(self.device).float()
                outputs = self.model(images)
                loss = self.criterion(outputs, targets)
                try:
                    final_scores.update(targets, outputs)
                except:
                    # print("outputs: ", list(outputs.data.cpu().numpy())[:10])
                    pass
                summary_loss.update(loss.detach().item(), batch_size)

        return summary_loss, final_scores
Example #3
0
    def train_one_epoch(self, train_loader):

        self.model.train()
        summary_loss = AverageMeter()
        final_scores = RocAucMeter()
        t = time.time()

        for step, (images, targets) in enumerate(train_loader):

            t0 = time.time()
            batch_size = images.shape[0]
            outputs = self.model(images)

            self.optimizer.zero_grad()
            loss = self.criterion(outputs, targets)
            loss.backward()  # compute and sum gradients on params
            #torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=global_config.CLIP_GRAD_NORM)

            xm.optimizer_step(self.optimizer)
            if self.config.step_scheduler:
                self.scheduler.step()

            try:
                final_scores.update(targets, outputs)
            except:
                # xm.master_print("outputs: ", list(outputs.data.cpu().numpy())[:10])
                pass
            summary_loss.update(loss.detach().item(), batch_size)

            if self.config.verbose:
                if step % self.config.verbose_step == 0:

                    t1 = time.time()
                    effNet_lr = np.format_float_scientific(
                        self.optimizer.param_groups[0]['lr'],
                        unique=False,
                        precision=1)
                    head_lr = np.format_float_scientific(
                        self.optimizer.param_groups[0]['lr'],
                        unique=False,
                        precision=1)
                    xm.master_print(
                        f":::({str(step).rjust(4, ' ')}/{len(train_loader)}) | Loss: {summary_loss.avg:.4f} | AUC: {final_scores.avg:.5f} | LR: {effNet_lr}/{head_lr} | BTime: {t1-t0 :.2f}s | ETime: {int((t1-t0)*(len(train_loader)-step)//60)}m"
                    )

        return summary_loss, final_scores
Example #4
0
    def train_one_epoch(self, train_loader):
        self.model.train()
        summary_loss = AverageMeter()
        final_scores = RocAucMeter()
        t = time.time()
        for step, (images, targets) in enumerate(train_loader):

            t0 = time.time()
            targets = targets.to(self.device)
            images = images.to(self.device).float()
            batch_size = images.shape[0]
            outputs = self.model(images)

            if global_config.ACCUMULATION_STEP > 1:
                loss = self.criterion(outputs, targets)
                # loss = loss / global_config.ACCUMULATION_STEP  # Normalize loss (if averaged)

                # APEX clip grad  # https://nvidia.github.io/apex/advanced.html#gradient-clipping
                if global_config.FP16:
                    with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                        scaled_loss.backward(
                        )  # in apex, loss.backward() becomes
                else:
                    loss.backward()  # compute and sum gradients on params

                if (step + 1) % global_config.ACCUMULATION_STEP == 0:
                    print(f"Step: {step} accum_optimizing")
                    # clip grad btw backward() and step() # https://nvidia.github.io/apex/advanced.html#gradient-clipping
                    # if config.FP16:
                    #     torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), max_norm=config.CLIP_GRAD_NORM)
                    # else:
                    #     torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.CLIP_GRAD_NORM)
                    self.optimizer.step(
                    )  # backprop according to accumulated losses
                    self.optimizer.zero_grad()  # clear gradients
                    if self.config.step_scheduler:
                        self.scheduler.step(
                        )  # scheduler.step() after opt.step() -> update LR

            else:
                self.optimizer.zero_grad()
                loss = self.criterion(outputs, targets)

                # APEX clip grad  # https://nvidia.github.io/apex/advanced.html#gradient-clipping
                if global_config.FP16:
                    with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                        scaled_loss.backward(
                        )  # in apex, loss.backward() becomes
                    torch.nn.utils.clip_grad_norm_(
                        amp.master_params(self.optimizer),
                        max_norm=global_config.CLIP_GRAD_NORM)
                else:
                    loss.backward()  # compute and sum gradients on params
                    torch.nn.utils.clip_grad_norm_(
                        self.model.parameters(),
                        max_norm=global_config.CLIP_GRAD_NORM)

                self.optimizer.step()
                if self.config.step_scheduler:
                    self.scheduler.step()

            try:
                final_scores.update(targets, outputs)
            except:
                # print("outputs: ", list(outputs.data.cpu().numpy())[:10])
                pass

            summary_loss.update(loss.detach().item(), batch_size)

            if self.config.verbose:
                if step % self.config.verbose_step == 0:

                    t1 = time.time()
                    effNet_lr = np.format_float_scientific(
                        self.optimizer.param_groups[0]['lr'],
                        unique=False,
                        precision=1)
                    head_lr = np.format_float_scientific(
                        self.optimizer.param_groups[0]['lr'],
                        unique=False,
                        precision=1)
                    print(
                        f":::({str(step).rjust(4, ' ')}/{len(train_loader)}) | Loss: {summary_loss.avg:.4f} | AUC: {final_scores.avg:.5f} | LR: {effNet_lr}/{head_lr} | BTime: {t1-t0 :.2f}s | ETime: {int((t1-t0)*(len(train_loader)-step)//60)}m"
                    )  #, end='\r')

        return summary_loss, final_scores