Beispiel #1
0
def fit_one_epoch(net,epoch,epoch_size,epoch_size_val,gen,genval,Epoch,cuda):
    total_r_loss = 0
    total_c_loss = 0
    total_loss = 0
    val_loss = 0
    start_time = time.time()

    net.train()
    with tqdm(total=epoch_size,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
        for iteration, batch in enumerate(gen):
            if iteration >= epoch_size:
                break

            with torch.no_grad():
                if cuda:
                    batch = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)).cuda() for ann in batch]
                else:
                    batch = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in batch]

            batch_images, batch_hms, batch_whs, batch_regs, batch_reg_masks = batch

            optimizer.zero_grad()

            if backbone=="resnet50":
                hm, wh, offset = net(batch_images)
                c_loss = focal_loss(hm, batch_hms)
                wh_loss = 0.1*reg_l1_loss(wh, batch_whs, batch_reg_masks)
                off_loss = reg_l1_loss(offset, batch_regs, batch_reg_masks)
                
                loss = c_loss + wh_loss + off_loss

                total_loss += loss.item()
                total_c_loss += c_loss.item()
                total_r_loss += wh_loss.item() + off_loss.item()
            else:
                outputs = net(batch_images)
                loss = 0
                c_loss_all = 0
                r_loss_all = 0
                index = 0
                for output in outputs:
                    hm, wh, offset = output["hm"].sigmoid(), output["wh"], output["reg"]
                    c_loss = focal_loss(hm, batch_hms)
                    wh_loss = 0.1*reg_l1_loss(wh, batch_whs, batch_reg_masks)
                    off_loss = reg_l1_loss(offset, batch_regs, batch_reg_masks)

                    loss += c_loss + wh_loss + off_loss

                    c_loss_all += c_loss
                    r_loss_all += wh_loss + off_loss
                    index += 1
                total_loss += loss.item()/index
                total_c_loss += c_loss_all.item()/index
                total_r_loss += r_loss_all.item()/index
            loss.backward()
            optimizer.step()
            
            waste_time = time.time() - start_time
            
            pbar.set_postfix(**{'total_r_loss'  : total_r_loss / (iteration + 1), 
                                'total_c_loss'  : total_c_loss / (iteration + 1),
                                'lr'            : get_lr(optimizer),
                                's/step'        : waste_time})
            pbar.update(1)

            start_time = time.time()

    net.eval()
    print('Start Validation')
    with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
        for iteration, batch in enumerate(genval):
            if iteration >= epoch_size_val:
                break
            with torch.no_grad():
                if cuda:
                    batch = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)).cuda() for ann in batch]
                else:
                    batch = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in batch]

                batch_images, batch_hms, batch_whs, batch_regs, batch_reg_masks = batch

                if backbone=="resnet50":
                    hm, wh, offset = net(batch_images)
                    c_loss = focal_loss(hm, batch_hms)
                    wh_loss = 0.1*reg_l1_loss(wh, batch_whs, batch_reg_masks)
                    off_loss = reg_l1_loss(offset, batch_regs, batch_reg_masks)
                    loss = c_loss + wh_loss + off_loss
                    val_loss += loss.item()
                else:
                    outputs = net(batch_images)
                    index = 0
                    loss = 0
                    for output in outputs:
                        hm, wh, offset = output["hm"].sigmoid(), output["wh"], output["reg"]
                        c_loss = focal_loss(hm, batch_hms)
                        wh_loss = 0.1*reg_l1_loss(wh, batch_whs, batch_reg_masks)
                        off_loss = reg_l1_loss(offset, batch_regs, batch_reg_masks)
                        loss += c_loss + wh_loss + off_loss
                        index += 1
                    val_loss += loss.item()/index


            pbar.set_postfix(**{'total_loss': val_loss / (iteration + 1)})
            pbar.update(1)
            
    print('Finish Validation')
    print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
    print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_loss/(epoch_size_val+1)))

    print('Saving state, iter:', str(epoch+1))
    torch.save(model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth'%((epoch+1),total_loss/(epoch_size+1),val_loss/(epoch_size_val+1)))
    return val_loss/(epoch_size_val+1)
Beispiel #2
0
def fit_one_epoch(net, epoch, epoch_size, epoch_size_val, gen, genval, Epoch, cuda):
    total_r_loss = 0
    total_c_loss = 0
    total_loss = 0
    val_loss = 0


    net.train()
    with tqdm(total=epoch_size, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar:
        for iteration, batch in enumerate(gen):
            start_time = time.time()
            if iteration >= epoch_size:
                break
            with torch.no_grad():
                if cuda:
                    batch = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)).cuda() for ann in batch]
                else:
                    batch = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in batch]

            batch_images, batch_hms, batch_whs, batch_regs, batch_reg_masks = batch

            optimizer.zero_grad()

            hm, wh, offset = net(batch_images)
            c_loss = focal_loss(hm, batch_hms)
            wh_loss = 0.1 * reg_l1_loss(wh, batch_whs, batch_reg_masks)
            off_loss = reg_l1_loss(offset, batch_regs, batch_reg_masks)

            loss = c_loss + wh_loss + off_loss

            total_loss += loss.item()
            total_c_loss += c_loss.item()
            total_r_loss += wh_loss.item() + off_loss.item()

            loss.backward()
            optimizer.step()

            waste_time = time.time() - start_time
            pbar.set_postfix(**{'total_r_loss': total_r_loss / (iteration + 1),
                                'total_c_loss': total_c_loss / (iteration + 1),
                                'lr': get_lr(optimizer),
                                's/step': waste_time})
            pbar.update(1)


    net.eval()
    print('Start Validation')
    with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar:
        for iteration, batch in enumerate(genval):
            if iteration >= epoch_size_val:
                break
            with torch.no_grad():
                if cuda:
                    batch = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)).cuda() for ann in batch]
                else:
                    batch = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in batch]

                batch_images, batch_hms, batch_whs, batch_regs, batch_reg_masks = batch


                hm, wh, offset = net(batch_images)
                c_loss = focal_loss(hm, batch_hms)
                wh_loss = 0.1 * reg_l1_loss(wh, batch_whs, batch_reg_masks)
                off_loss = reg_l1_loss(offset, batch_regs, batch_reg_masks)
                loss = c_loss + wh_loss + off_loss
                val_loss += loss.item()


            pbar.set_postfix(**{'total_loss': val_loss / (iteration + 1)})
            pbar.update(1)


    # tensorboardX
    writer.add_scalars('loss', {'train': total_loss / (epoch_size + 1), 'val': val_loss / (epoch_size_val + 1)},
                       epoch)
    writer.add_scalar('lr', get_lr(optimizer), epoch)
    writer.flush()

    print('Finish Validation')
    print('Epoch:' + str(epoch + 1) + '/' + str(Epoch))
    print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss / (epoch_size + 1), val_loss / (epoch_size_val + 1)))

    print('Saving state, iter:', str(epoch + 1))
    # log.yaml
    avg_train_loss = total_loss / (epoch_size + 1)
    avg_train_loss = avg_train_loss.item()
    avg_val_loss = val_loss / (epoch_size_val + 1)
    avg_val_loss = avg_val_loss.item()
    log['epoch_number'] += 1
    log['Epoch%03d' % (epoch + 1)] = [avg_train_loss, avg_val_loss]
    if log['best_val_loss'] < 0 or avg_val_loss < log['best_val_loss']:
        log['best_val_loss'] = avg_val_loss
        torch.save(model.state_dict(), 'logs/best.pth')
    with open('logs/log.yaml', 'w', encoding='utf-8') as f:
        yaml.dump(log, f)

    torch.save(model.state_dict(), 'logs/last.pth')

    return val_loss / (epoch_size_val + 1)
Beispiel #3
0
def fit_one_epoch(net, epoch, epoch_size, epoch_size_val, gen, gen_val, Epoch,
                  cuda):
    total_r_loss = 0
    total_c_loss = 0
    total_loss = 0
    val_loss = 0
    start_time = time.time()

    net.train()
    with tqdm(total=epoch_size,
              desc='Epoch {}/{}'.format((epoch + 1), Epoch),
              postfix=dict,
              mininterval=0.3) as pbar:
        for iteration, batch in enumerate(gen):
            if iteration >= epoch_size:
                break

            with torch.no_grad():
                if cuda:
                    batch = [
                        Variable(
                            torch.from_numpy(ann).type(
                                torch.FloatTensor)).cuda() for ann in batch
                    ]
                else:
                    batch = [
                        Variable(
                            torch.from_numpy(ann).type(torch.FloatTensor))
                        for ann in batch
                    ]

            batch_images, batch_hms, batch_whs, batch_regs, batch_reg_masks = batch

            optimizer.zero_grad()

            hm, wh, offset = net(batch_images)
            c_loss = focal_loss(hm, batch_hms)
            wh_loss = 0.1 * reg_l1_loss(wh, batch_whs, batch_reg_masks)
            off_loss = reg_l1_loss(offset, batch_regs, batch_reg_masks)

            loss = c_loss + wh_loss + off_loss

            total_loss += loss.item()
            total_c_loss += c_loss.item()
            total_r_loss += wh_loss.item() + off_loss.item()

            loss.backward()
            optimizer.step()

            waste_time = time.time() - start_time

            pbar.set_postfix(
                **{
                    'Total_Loss': total_loss / (iteration + 1),
                    'lr': get_lr(optimizer),
                    'step/s': waste_time
                })
            pbar.update(1)
            start_time = time.time()

    net.eval()
    print('Start Validation')
    with tqdm(total=epoch_size_val,
              desc='Epoch {}/{}'.format((epoch + 1), Epoch),
              postfix=dict,
              mininterval=0.3) as pbar:
        for iteration, batch in enumerate(gen_val):
            if iteration >= epoch_size_val:
                break

            with torch.no_grad():
                if cuda:
                    batch = [
                        Variable(
                            torch.from_numpy(ann).type(
                                torch.FloatTensor)).cuda() for ann in batch
                    ]
                else:
                    batch = [
                        Variable(
                            torch.from_numpy(ann).type(torch.FloatTensor))
                        for ann in batch
                    ]

                batch_images, batch_hms, batch_whs, batch_regs, batch_reg_masks = batch

                optimizer.zero_grad()
                hm, wh, offset = net(batch_images)
                c_loss = focal_loss(hm, batch_hms)
                wh_loss = 0.1 * reg_l1_loss(wh, batch_whs, batch_reg_masks)
                off_loss = reg_l1_loss(offset, batch_regs, batch_reg_masks)

                loss = c_loss + wh_loss + off_loss

                val_loss += loss.item()

            pbar.set_postfix(**{'Val_Loss': val_loss / (iteration + 1)})
            pbar.update(1)

    print('Finish Validation')
    print('Epoch:' + str(epoch + 1) + '/' + str(Epoch))
    print('Total Loss: %.4f || Val Loss: %.4f ' %
          (total_loss / (epoch_size + 1), val_loss / (epoch_size_val + 1)))

    print('Saving state, iter:', str(epoch + 1))
    torch.save(
        model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth' %
        ((epoch + 1), total_loss / (epoch_size + 1), val_loss /
         (epoch_size_val + 1)))

    return val_loss / (epoch_size_val + 1)
def fit_one_epoch(model_train,
                  model,
                  loss_history,
                  eval_callback,
                  optimizer,
                  epoch,
                  epoch_step,
                  epoch_step_val,
                  gen,
                  gen_val,
                  Epoch,
                  cuda,
                  fp16,
                  scaler,
                  backbone,
                  save_period,
                  save_dir,
                  local_rank=0):
    total_r_loss = 0
    total_c_loss = 0
    total_loss = 0
    val_loss = 0

    if local_rank == 0:
        print('Start Train')
        pbar = tqdm(total=epoch_step,
                    desc=f'Epoch {epoch + 1}/{Epoch}',
                    postfix=dict,
                    mininterval=0.3)
    model_train.train()
    for iteration, batch in enumerate(gen):
        if iteration >= epoch_step:
            break
        with torch.no_grad():
            if cuda:
                batch = [ann.cuda(local_rank) for ann in batch]
        batch_images, batch_hms, batch_whs, batch_regs, batch_reg_masks = batch

        #----------------------#
        #   清零梯度
        #----------------------#
        optimizer.zero_grad()
        if not fp16:
            if backbone == "resnet50":
                hm, wh, offset = model_train(batch_images)
                c_loss = focal_loss(hm, batch_hms)
                wh_loss = 0.1 * reg_l1_loss(wh, batch_whs, batch_reg_masks)
                off_loss = reg_l1_loss(offset, batch_regs, batch_reg_masks)

                loss = c_loss + wh_loss + off_loss

                total_loss += loss.item()
                total_c_loss += c_loss.item()
                total_r_loss += wh_loss.item() + off_loss.item()
            else:
                outputs = model_train(batch_images)
                loss = 0
                c_loss_all = 0
                r_loss_all = 0
                index = 0
                for output in outputs:
                    hm, wh, offset = output["hm"].sigmoid(
                    ), output["wh"], output["reg"]
                    c_loss = focal_loss(hm, batch_hms)
                    wh_loss = 0.1 * reg_l1_loss(wh, batch_whs, batch_reg_masks)
                    off_loss = reg_l1_loss(offset, batch_regs, batch_reg_masks)

                    loss += c_loss + wh_loss + off_loss

                    c_loss_all += c_loss
                    r_loss_all += wh_loss + off_loss
                    index += 1
                total_loss += loss.item() / index
                total_c_loss += c_loss_all.item() / index
                total_r_loss += r_loss_all.item() / index
            loss.backward()
            optimizer.step()
        else:
            from torch.cuda.amp import autocast
            with autocast():
                if backbone == "resnet50":
                    hm, wh, offset = model_train(batch_images)
                    c_loss = focal_loss(hm, batch_hms)
                    wh_loss = 0.1 * reg_l1_loss(wh, batch_whs, batch_reg_masks)
                    off_loss = reg_l1_loss(offset, batch_regs, batch_reg_masks)

                    loss = c_loss + wh_loss + off_loss

                    total_loss += loss.item()
                    total_c_loss += c_loss.item()
                    total_r_loss += wh_loss.item() + off_loss.item()
                else:
                    outputs = model_train(batch_images)
                    loss = 0
                    c_loss_all = 0
                    r_loss_all = 0
                    index = 0
                    for output in outputs:
                        hm, wh, offset = output["hm"].sigmoid(
                        ), output["wh"], output["reg"]
                        c_loss = focal_loss(hm, batch_hms)
                        wh_loss = 0.1 * reg_l1_loss(wh, batch_whs,
                                                    batch_reg_masks)
                        off_loss = reg_l1_loss(offset, batch_regs,
                                               batch_reg_masks)

                        loss += c_loss + wh_loss + off_loss

                        c_loss_all += c_loss
                        r_loss_all += wh_loss + off_loss
                        index += 1
                    total_loss += loss.item() / index
                    total_c_loss += c_loss_all.item() / index
                    total_r_loss += r_loss_all.item() / index

            #----------------------#
            #   反向传播
            #----------------------#
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

        if local_rank == 0:
            pbar.set_postfix(
                **{
                    'total_r_loss': total_r_loss / (iteration + 1),
                    'total_c_loss': total_c_loss / (iteration + 1),
                    'lr': get_lr(optimizer)
                })
            pbar.update(1)

    if local_rank == 0:
        pbar.close()
        print('Finish Train')
        print('Start Validation')
        pbar = tqdm(total=epoch_step_val,
                    desc=f'Epoch {epoch + 1}/{Epoch}',
                    postfix=dict,
                    mininterval=0.3)

    model_train.eval()
    for iteration, batch in enumerate(gen_val):
        if iteration >= epoch_step_val:
            break

        with torch.no_grad():
            if cuda:
                batch = [ann.cuda(local_rank) for ann in batch]
            batch_images, batch_hms, batch_whs, batch_regs, batch_reg_masks = batch

            if backbone == "resnet50":
                hm, wh, offset = model_train(batch_images)
                c_loss = focal_loss(hm, batch_hms)
                wh_loss = 0.1 * reg_l1_loss(wh, batch_whs, batch_reg_masks)
                off_loss = reg_l1_loss(offset, batch_regs, batch_reg_masks)

                loss = c_loss + wh_loss + off_loss

                val_loss += loss.item()
            else:
                outputs = model_train(batch_images)
                index = 0
                loss = 0
                for output in outputs:
                    hm, wh, offset = output["hm"].sigmoid(
                    ), output["wh"], output["reg"]
                    c_loss = focal_loss(hm, batch_hms)
                    wh_loss = 0.1 * reg_l1_loss(wh, batch_whs, batch_reg_masks)
                    off_loss = reg_l1_loss(offset, batch_regs, batch_reg_masks)

                    loss += c_loss + wh_loss + off_loss
                    index += 1
                val_loss += loss.item() / index

            if local_rank == 0:
                pbar.set_postfix(**{'val_loss': val_loss / (iteration + 1)})
                pbar.update(1)

    if local_rank == 0:
        pbar.close()
        print('Finish Validation')
        loss_history.append_loss(epoch + 1, total_loss / epoch_step,
                                 val_loss / epoch_step_val)
        eval_callback.on_epoch_end(epoch + 1, model_train)
        print('Epoch:' + str(epoch + 1) + '/' + str(Epoch))
        print('Total Loss: %.3f || Val Loss: %.3f ' %
              (total_loss / epoch_step, val_loss / epoch_step_val))

        #-----------------------------------------------#
        #   保存权值
        #-----------------------------------------------#
        if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch:
            torch.save(
                model.state_dict(),
                os.path.join(
                    save_dir, 'ep%03d-loss%.3f-val_loss%.3f.pth' %
                    (epoch + 1, total_loss / epoch_step,
                     val_loss / epoch_step_val)))

        if len(loss_history.val_loss) <= 1 or (
                val_loss / epoch_step_val) <= min(loss_history.val_loss):
            print('Save best model to best_epoch_weights.pth')
            torch.save(model.state_dict(),
                       os.path.join(save_dir, "best_epoch_weights.pth"))

        torch.save(model.state_dict(),
                   os.path.join(save_dir, "last_epoch_weights.pth"))