Beispiel #1
0
def trainer(epoch, model, train_loader, valid_loader, criterion, optimizer,
            lr_scheduler, path, dataset):
    maximum = float(-np.inf)
    best_epoch = 0

    result_list = defaultdict()

    for i in range(epoch):

        train_loss, train_gt, train_probs = batch_trainer(
            epoch=i,
            model=model,
            train_loader=train_loader,
            criterion=criterion,
            optimizer=optimizer,
        )

        valid_loss, valid_gt, valid_probs = valid_trainer(
            epoch=i,
            model=model,
            valid_loader=valid_loader,
            criterion=criterion,
        )

        lr_scheduler.step(metrics=valid_loss)

        train_result = get_pedestrian_metrics(train_gt, train_probs)
        valid_result = get_pedestrian_metrics(valid_gt, valid_probs)

        print(
            f'Evaluation on test set, \n',
            'ma: {:.4f},  pos_recall: {:.4f} , neg_recall: {:.4f} \n'.format(
                valid_result.ma, np.mean(valid_result.label_pos_recall),
                np.mean(valid_result.label_neg_recall)),
            'Acc: {:.4f}, Prec: {:.4f}, Rec: {:.4f}, F1: {:.4f}'.format(
                valid_result.instance_acc, valid_result.instance_prec,
                valid_result.instance_recall, valid_result.instance_f1))

        # print label metrics ma
        attr_name_list = return_attr_name_list(dataset)
        for attr_name, _ma in zip(attr_name_list, valid_result.label_ma):
            print(f'{attr_name}: {_ma}')

        print(f'{time_str()}')
        print('-' * 60)

        cur_metric = valid_result.ma

        if cur_metric > maximum:
            maximum = cur_metric
            best_epoch = i
            save_ckpt(model, path, i, maximum)

        result_list[i] = [train_result, valid_result]

    torch.save(result_list,
               os.path.join(os.path.dirname(path), 'metric_log.pkl'))

    return maximum, best_epoch
Beispiel #2
0
def trainer(epoch, model, train_loader, valid_loader, criterion, optimizer,
            lr_scheduler, path, dataset):
    maximum = float(-np.inf)
    best_epoch = 0

    result_list = defaultdict()

    for i in range(epoch):
        #####
        train_loss, train_gt, train_probs = batch_trainer(
            epoch=i,
            model=model,
            train_loader=train_loader,
            criterion=criterion,
            optimizer=optimizer,
        )
        ##eval in train set
        # test_alm(train_loader, model, attr_num=dataset.attr_num, description=dataset.attr_id, set = 'train')
        # eval in test set
        test_alm(valid_loader,
                 model,
                 attr_num=dataset.attr_num,
                 description=dataset.attr_id,
                 set='test',
                 threshold=0.5)
        valid_loss, valid_gt, valid_probs = valid_trainer(
            model=model,
            valid_loader=valid_loader,
            criterion=criterion,
        )

        lr_scheduler.step(metrics=valid_loss, epoch=i)

        train_result = get_pedestrian_metrics(train_gt,
                                              train_probs,
                                              threshold=0.5)
        valid_result = get_pedestrian_metrics(valid_gt,
                                              valid_probs,
                                              threshold=0.5)

        print(
            f'Evaluation on test set, \n',
            'ma: {:.4f},  pos_recall: {:.4f} , neg_recall: {:.4f} \n'.format(
                valid_result.ma, np.mean(valid_result.label_pos_recall),
                np.mean(valid_result.label_neg_recall)),
            'Acc: {:.4f}, Prec: {:.4f}, Rec: {:.4f}, F1: {:.4f}'.format(
                valid_result.instance_acc, valid_result.instance_prec,
                valid_result.instance_recall, valid_result.instance_f1))

        print(f'{time_str()}')
        print('-' * 60)

        cur_metric = valid_result.ma

        if cur_metric > maximum:
            maximum = cur_metric
            best_epoch = i
            save_ckpt(model, path, i, maximum)

        result_list[i] = [train_result, valid_result]

    torch.save(result_list,
               os.path.join(os.path.dirname(path), 'metric_log.pkl'))

    return maximum, best_epoch
def trainer(cfg, args, epoch, model, model_ema, train_loader, valid_loader,
            criterion, optimizer, lr_scheduler, path, loss_w, viz, tb_writer):
    maximum = float(-np.inf)
    maximum_ema = float(-np.inf)
    best_epoch = 0

    result_list = defaultdict()

    for e in range(epoch):

        if args.distributed:
            train_loader.sampler.set_epoch(epoch)

        lr = optimizer.param_groups[1]['lr']

        train_loss, train_gt, train_probs, train_imgs = batch_trainer(
            cfg,
            args=args,
            epoch=e,
            model=model,
            model_ema=model_ema,
            train_loader=train_loader,
            criterion=criterion,
            optimizer=optimizer,
            loss_w=loss_w,
            scheduler=lr_scheduler,
        )

        if args.distributed:
            if args.local_rank == 0:
                print("Distributing BatchNorm running means and vars")
            distribute_bn(model, args.world_size, args.dist_bn == 'reduce')

        # valid_loss, valid_gt, valid_probs, valid_imgs = valid_trainer(
        #     args=args,
        #     model=model,
        #     valid_loader=valid_loader,
        #     criterion=criterion,
        #     loss_w=loss_w
        # )

        if model_ema is not None:  # and not cfg.TRAIN.EMA.FORCE_CPU:

            if args.local_rank == 0:
                print('using model_ema to validate')

            if args.distributed:
                distribute_bn(model_ema, args.world_size,
                              args.dist_bn == 'reduce')

            valid_loss, valid_gt, valid_probs, valid_probs_ema, valid_imgs = valid_trainer(
                args=args,
                model=model,
                ema_model=model_ema.module,
                valid_loader=valid_loader,
                criterion=criterion,
                loss_w=loss_w)

        # if cfg.TRAIN.LR_SCHEDULER.TYPE == 'plateau':
        #     lr_scheduler.step(metrics=valid_loss)
        # elif cfg.TRAIN.LR_SCHEDULER.TYPE == 'warmup_cosine':
        #     lr_scheduler.step(epoch=e + 1)
        # else:
        #     lr_scheduler.step()

        if cfg.METRIC.TYPE == 'multi_label':

            train_metric = get_multilabel_metrics(train_gt, train_probs)
            valid_metric = get_multilabel_metrics(valid_gt, valid_probs)

            if model_ema is not None:  # and not cfg.TRAIN.EMA.FORCE_CPU:
                valid_metric_ema = get_multilabel_metrics(
                    valid_gt, valid_probs_ema)

            if args.local_rank == 0:
                print(
                    'Performance : mAP: {:.4f}, OP: {:.4f}, OR: {:.4f}, OF1: {:.4f} CP: {:.4f}, CR: {:.4f}, '
                    'CF1: {:.4f}'.format(valid_metric.map, valid_metric.OP,
                                         valid_metric.OR, valid_metric.OF1,
                                         valid_metric.CP, valid_metric.CR,
                                         valid_metric.CF1))
                print(
                    'EMA Performance : mAP: {:.4f}, OP: {:.4f}, OR: {:.4f}, OF1: {:.4f} CP: {:.4f}, CR: {:.4f}, '
                    'CF1: {:.4f}'.format(
                        valid_metric_ema.map, valid_metric_ema.OP,
                        valid_metric_ema.OR, valid_metric_ema.OF1,
                        valid_metric_ema.CP, valid_metric_ema.CR,
                        valid_metric_ema.CF1))
                print(f'{time_str()}')
                print('-' * 60)

                tb_writer.add_scalars('train/lr', {'lr': lr}, e)

                tb_writer.add_scalars('train/loss', {
                    'train': train_loss,
                    'test': valid_loss
                }, e)

                tb_writer.add_scalars(
                    'train/perf', {
                        'mAP': train_metric.map,
                        'OP': train_metric.OP,
                        'OR': train_metric.OR,
                        'OF1': train_metric.OF1,
                        'CP': train_metric.CP,
                        'CR': train_metric.CR,
                        'CF1': train_metric.CF1
                    }, e)

                tb_writer.add_scalars(
                    'test/perf', {
                        'mAP': valid_metric.map,
                        'OP': valid_metric.OP,
                        'OR': valid_metric.OR,
                        'OF1': valid_metric.OF1,
                        'CP': valid_metric.CP,
                        'CR': valid_metric.CR,
                        'CF1': valid_metric.CF1
                    }, e)

                tb_writer.add_scalars(
                    'test/ema_perf', {
                        'mAP': valid_metric_ema.map,
                        'OP': valid_metric_ema.OP,
                        'OR': valid_metric_ema.OR,
                        'OF1': valid_metric_ema.OF1,
                        'CP': valid_metric_ema.CP,
                        'CR': valid_metric_ema.CR,
                        'CF1': valid_metric_ema.CF1
                    }, e)

            cur_metric = valid_metric.map
            if cur_metric > maximum:
                maximum = cur_metric
                best_epoch = e
                save_ckpt(model, path, e, maximum)

            cur_metric = valid_metric_ema.map
            if cur_metric > maximum_ema:
                maximum_ema = cur_metric
                best_epoch = e
                save_ckpt(model, path, e, maximum_ema)

            result_list[e] = {
                'train_result': train_metric,
                'valid_result': valid_metric,
                'train_gt': train_gt,
                'train_probs': train_probs,
                'valid_gt': valid_gt,
                'valid_probs': valid_probs
            }
        else:
            assert False, f'{cfg.METRIC.TYPE} is unavailable'

        with open(os.path.join(os.path.dirname(path), 'metric_log.pkl'),
                  'wb') as f:
            pickle.dump(result_list, f)

    return maximum, best_epoch
Beispiel #4
0
def trainer(epoch, model, train_loader, valid_loader, criterion, optimizer, lr_scheduler,
            path):
    maximum = float(-np.inf)
    best_epoch = 0

    result_list = defaultdict()
    
    df_metrics = pd.DataFrame(columns=['epoch', 'train_loss', 'train_instance_acc', 'train_instance_prec', 'train_instance_recall', 'train_instance_f1', 'train_ma', 'train_pos_recall', 'train_neg_recall',
                                   'valid_loss', 'valid_instance_acc', 'valid_instance_prec', 'valid_instance_recall', 'valid_instance_f1', 'valid_ma', 'valid_pos_recall', 'valid_neg_recall'])


    for i in range(epoch):

        train_loss, train_gt, train_probs = batch_trainer(
            epoch=i,
            model=model,
            train_loader=train_loader,
            criterion=criterion,
            optimizer=optimizer,
        )

        valid_loss, valid_gt, valid_probs = valid_trainer(
            model=model,
            valid_loader=valid_loader,
            criterion=criterion,
        )

        lr_scheduler.step(metrics=valid_loss, epoch=i)

        train_result = get_pedestrian_metrics(train_gt, train_probs)
        valid_result = get_pedestrian_metrics(valid_gt, valid_probs)

        # tensorboard added
        # writer.add_scalar(tag, function, iteration)
        writer_step = i

        writer.add_scalars('Loss', {'Train':train_loss, 'Valid':valid_loss}, writer_step)
        writer.add_scalars('Accuracy', {'Train':train_result.instance_acc, 'Valid':valid_result.instance_acc}, writer_step)
        writer.add_scalars('Precision', {'Train':train_result.instance_prec, 'Valid':valid_result.instance_prec}, writer_step)
        writer.add_scalars('Recall', {'Train':train_result.instance_recall, 'Valid':valid_result.instance_recall}, writer_step)
        writer.add_scalars('F1', {'Train':train_result.instance_f1, 'Valid':valid_result.instance_f1}, writer_step)
        writer.add_scalars('Mean Accuracy', {'Train':train_result.ma, 'Valid':valid_result.ma}, writer_step)
        writer.add_scalars('Pos Recall', {'Train':np.mean(train_result.label_pos_recall), 'Valid':np.mean(valid_result.label_pos_recall)}, writer_step)
        writer.add_scalars('Neg Recall', {'Train':np.mean(train_result.label_neg_recall), 'Valid':np.mean(valid_result.label_neg_recall)}, writer_step)

        print(f'Evaluation on test set, \n',
              'ma: {:.4f},  pos_recall: {:.4f} , neg_recall: {:.4f} \n'.format(
                  valid_result.ma, np.mean(valid_result.label_pos_recall), np.mean(valid_result.label_neg_recall)),
              'Acc: {:.4f}, Prec: {:.4f}, Rec: {:.4f}, F1: {:.4f}'.format(
                  valid_result.instance_acc, valid_result.instance_prec, valid_result.instance_recall,
                  valid_result.instance_f1))

        print(f'{time_str()}')
        print('-' * 60)
        
        # create metrics dataframe to save as csv

        new_metrics = { 
            'epoch':i,
            'train_loss':train_loss,
            'train_instance_acc':train_result.instance_acc,
            'train_instance_prec':train_result.instance_prec,
            'train_instance_recall':train_result.instance_recall,
            'train_instance_f1':train_result.instance_f1,
            'train_ma':train_result.ma,
            'train_pos_recall':np.mean(train_result.label_pos_recall),
            'train_neg_recall':np.mean(train_result.label_neg_recall),
            'valid_loss':valid_loss,
            'valid_instance_acc':valid_result.instance_acc,
            'valid_instance_prec':valid_result.instance_prec,
            'valid_instance_recall':valid_result.instance_recall,
            'valid_instance_f1':valid_result.instance_f1,
            'valid_ma':valid_result.ma,
            'valid_pos_recall':np.mean(valid_result.label_pos_recall),
            'valid_neg_recall':np.mean(valid_result.label_neg_recall)
            }
        #append row to the dataframe
        df_metrics = df_metrics.append(new_metrics, ignore_index=True)
        df_metrics.to_csv(csv_file_name, index=False)

        cur_metric = valid_result.ma

        if cur_metric > maximum:
            maximum = cur_metric
            best_epoch = i
            save_ckpt(model, path, i, maximum)

        result_list[i] = [train_result, valid_result]

    writer.close()

    torch.save(result_list, os.path.join(os.path.dirname(path), 'metric_log.pkl'))

    return maximum, best_epoch