Esempio n. 1
0
    np.random.seed(RANDOM_SEED)
    random.seed(RANDOM_SEED)

    ENCODER = 'timm-tf_efficientnet_lite4'
    ENCODER_WEIGHTS = 'imagenet'
    CLASSES = ['hair']
    ACTIVATION = 'sigmoid'

    # Set device
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

    # Set train result directory
    make_directory(PERFORMANCE_RECORD_DIR)

    # Set system logger
    system_logger = get_logger(name='train', 
                               file_path=os.path.join(PERFORMANCE_RECORD_DIR, 'train_log.log'))
    
    # Unet / PSPNet / DeepLabV3Plus
    if MODEL == 'unet':
        model = smp.Unet(
            encoder_name=ENCODER, 
            encoder_weights=ENCODER_WEIGHTS, 
            classes=len(CLASSES), 
            activation=ACTIVATION,
        )
    elif MODEL == 'pspnet':
        model = smp.PSPNet(
            encoder_name=ENCODER, 
            encoder_weights=ENCODER_WEIGHTS, 
            classes=len(CLASSES), 
            activation=ACTIVATION,
Esempio n. 2
0
"""

from modules.anchor_box_retinanet import anchorBox as RanchorBox
from modules.anchor_box_kmeans import anchorBox as KanchorBox
from modules.detection_loss import FocalLoss
from models.backbone_models import backbone_models
from modules.box_utils import decode
import torch
import math
import pdb
import math
import torch.nn as nn
import modules.utils as utils

logger = utils.get_logger(__name__)


class RetinaNet(nn.Module):
    """Feature Pyramid Network Architecture
    The network is composed of a backbone FPN network followed by the
    added Head conv layers.  
    Each head layer branches into
        1) conv2d for class conf scores
        2) conv2d for localization predictions
    See: 
    RetinaNet: https://arxiv.org/pdf/1708.02002.pdf for more details.
    FPN: https://arxiv.org/pdf/1612.03144.pdf

    Args:
        backbone Network:
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser(
        description='Training single stage FPN with OHEM, resnet as backbone')
    parser.add_argument('DATA_ROOT',
                        help='Location to root directory for dataset reading'
                        )  # /mnt/mars-fast/datasets/
    parser.add_argument(
        'SAVE_ROOT',
        help='Location to root directory for saving checkpoint models'
    )  # /mnt/mars-alpha/
    parser.add_argument(
        'MODEL_PATH',
        help=
        'Location to root directory where kinetics pretrained models are stored'
    )

    parser.add_argument(
        '--MODE',
        default='train',
        help=
        'MODE can be train, gen_dets, eval_frames, eval_tubes define SUBSETS accordingly, build tubes'
    )
    # Name of backbone network, e.g. resnet18, resnet34, resnet50, resnet101 resnet152 are supported
    parser.add_argument('--ARCH',
                        default='resnet50',
                        type=str,
                        help=' base arch')
    parser.add_argument('--MODEL_TYPE',
                        default='I3D',
                        type=str,
                        help=' base model')
    parser.add_argument('--ANCHOR_TYPE',
                        default='RETINA',
                        type=str,
                        help='type of anchors to be used in model')

    parser.add_argument('--SEQ_LEN',
                        default=8,
                        type=int,
                        help='NUmber of input frames')
    parser.add_argument('--TEST_SEQ_LEN',
                        default=8,
                        type=int,
                        help='NUmber of input frames')
    parser.add_argument(
        '--MIN_SEQ_STEP',
        default=1,
        type=int,
        help='DIFFERENCE of gap between the frames of sequence')
    parser.add_argument(
        '--MAX_SEQ_STEP',
        default=1,
        type=int,
        help='DIFFERENCE of gap between the frames of sequence')
    # if output heads are have shared features or not: 0 is no-shareing else sharining enabled
    # parser.add_argument('--MULIT_SCALE', default=False, type=str2bool,help='perfrom multiscale training')
    parser.add_argument('--HEAD_LAYERS',
                        default=3,
                        type=int,
                        help='0 mean no shareding more than 0 means shareing')
    parser.add_argument('--NUM_FEATURE_MAPS',
                        default=5,
                        type=int,
                        help='0 mean no shareding more than 0 means shareing')
    parser.add_argument('--CLS_HEAD_TIME_SIZE',
                        default=3,
                        type=int,
                        help='Temporal kernel size of classification head')
    parser.add_argument('--REG_HEAD_TIME_SIZE',
                        default=3,
                        type=int,
                        help='Temporal kernel size of regression head')

    #  Name of the dataset only voc or coco are supported
    parser.add_argument('--DATASET',
                        default='road',
                        type=str,
                        help='dataset being used')
    parser.add_argument('--TRAIN_SUBSETS',
                        default='train_3,',
                        type=str,
                        help='Training SUBSETS seprated by ,')
    parser.add_argument('--VAL_SUBSETS',
                        default='',
                        type=str,
                        help='Validation SUBSETS seprated by ,')
    parser.add_argument('--TEST_SUBSETS',
                        default='',
                        type=str,
                        help='Testing SUBSETS seprated by ,')
    # Input size of image only 600 is supprted at the moment
    parser.add_argument('--MIN_SIZE',
                        default=512,
                        type=int,
                        help='Input Size for FPN')

    #  data loading argumnets
    parser.add_argument('-b',
                        '--BATCH_SIZE',
                        default=4,
                        type=int,
                        help='Batch size for training')
    parser.add_argument('--TEST_BATCH_SIZE',
                        default=1,
                        type=int,
                        help='Batch size for testing')
    # Number of worker to load data in parllel
    parser.add_argument('--NUM_WORKERS',
                        '-j',
                        default=8,
                        type=int,
                        help='Number of workers used in dataloading')
    # optimiser hyperparameters
    parser.add_argument('--OPTIM',
                        default='SGD',
                        type=str,
                        help='Optimiser type')
    parser.add_argument('--RESUME',
                        default=0,
                        type=int,
                        help='Resume from given epoch')
    parser.add_argument('--MAX_EPOCHS',
                        default=30,
                        type=int,
                        help='Number of training epoc')
    parser.add_argument('-l',
                        '--LR',
                        '--learning-rate',
                        default=0.004225,
                        type=float,
                        help='initial learning rate')
    parser.add_argument('--MOMENTUM', default=0.9, type=float, help='momentum')
    parser.add_argument('--MILESTONES',
                        default='20,25',
                        type=str,
                        help='Chnage the lr @')
    parser.add_argument('--GAMMA',
                        default=0.1,
                        type=float,
                        help='Gamma update for SGD')
    parser.add_argument('--WEIGHT_DECAY',
                        default=1e-4,
                        type=float,
                        help='Weight decay for SGD')

    # Freeze layers or not
    parser.add_argument(
        '--FBN',
        '--FREEZE_BN',
        default=True,
        type=str2bool,
        help='freeze bn layers if true or else keep updating bn layers')
    parser.add_argument(
        '--FREEZE_UPTO',
        default=1,
        type=int,
        help='layer group number in ResNet up to which needs to be frozen')

    # Loss function matching threshold
    parser.add_argument('--POSTIVE_THRESHOLD',
                        default=0.5,
                        type=float,
                        help='Min threshold for Jaccard index for matching')
    parser.add_argument('--NEGTIVE_THRESHOLD',
                        default=0.4,
                        type=float,
                        help='Max threshold Jaccard index for matching')
    # Evaluation hyperparameters
    parser.add_argument(
        '--EVAL_EPOCHS',
        default='30',
        type=str,
        help=
        'eval epochs to test network on these epoch checkpoints usually the last epoch is used'
    )
    parser.add_argument('--VAL_STEP',
                        default=2,
                        type=int,
                        help='Number of training epoch before evaluation')
    parser.add_argument(
        '--IOU_THRESH',
        default=0.5,
        type=float,
        help='Evaluation threshold for validation and for frame-wise mAP')
    parser.add_argument(
        '--CONF_THRESH',
        default=0.025,
        type=float,
        help='Confidence threshold for to remove detection below given number')
    parser.add_argument(
        '--NMS_THRESH',
        default=0.5,
        type=float,
        help='NMS threshold to apply nms at the time of validation')
    parser.add_argument('--TOPK',
                        default=10,
                        type=int,
                        help='topk detection to keep for evaluation')
    parser.add_argument(
        '--GEN_CONF_THRESH',
        default=0.025,
        type=float,
        help='Confidence threshold at the time of generation and dumping')
    parser.add_argument('--GEN_TOPK',
                        default=100,
                        type=int,
                        help='topk at the time of generation')
    parser.add_argument('--GEN_NMS',
                        default=0.5,
                        type=float,
                        help='NMS at the time of generation')
    parser.add_argument('--CLASSWISE_NMS',
                        default=False,
                        type=str2bool,
                        help='apply classwise NMS/no tested properly')
    parser.add_argument(
        '--JOINT_4M_MARGINALS',
        default=False,
        type=str2bool,
        help=
        'generate score of joints i.e. duplexes or triplet by marginals like agents and actions scores'
    )

    ## paths hyper parameters
    parser.add_argument(
        '--COMPUTE_PATHS',
        default=False,
        type=str2bool,
        help=' COMPUTE_PATHS if set true then it overwrite existing ones')
    parser.add_argument(
        '--PATHS_IOUTH',
        default=0.5,
        type=float,
        help='Iou threshold for building paths to limit neighborhood search')
    parser.add_argument(
        '--PATHS_COST_TYPE',
        default='score',
        type=str,
        help=
        'cost function type to use for matching, other options are scoreiou, iou'
    )
    parser.add_argument(
        '--PATHS_JUMP_GAP',
        default=4,
        type=int,
        help=
        'GAP allowed for a tube to be kept alive after no matching detection found'
    )
    parser.add_argument('--PATHS_MIN_LEN',
                        default=6,
                        type=int,
                        help='minimum length of generated path')
    parser.add_argument(
        '--PATHS_MINSCORE',
        default=0.1,
        type=float,
        help='minimum score a path should have over its length')

    ## paths hyper parameters
    parser.add_argument('--COMPUTE_TUBES',
                        default=False,
                        type=str2bool,
                        help='if set true then it overwrite existing tubes')
    parser.add_argument('--TUBES_ALPHA',
                        default=0,
                        type=float,
                        help='alpha cost for changeing the label')
    parser.add_argument('--TRIM_METHOD',
                        default='none',
                        type=str,
                        help='other one is indiv which works for UCF24')
    parser.add_argument('--TUBES_TOPK',
                        default=10,
                        type=int,
                        help='Number of labels to assign for a tube')
    parser.add_argument('--TUBES_MINLEN',
                        default=5,
                        type=int,
                        help='minimum length of a tube')
    parser.add_argument(
        '--TUBES_EVAL_THRESHS',
        default='0.2,0.5',
        type=str,
        help=
        'evaluation threshold for checking tube overlap at evaluation time, one can provide as many as one wants'
    )
    # parser.add_argument('--TRAIL_ID', default=0,
    #                     type=int, help='eval TUBES_Thtrshold at evaluation time')

    ###
    parser.add_argument('--LOG_START',
                        default=10,
                        type=int,
                        help='start loging after k steps for text/tensorboard')
    parser.add_argument('--LOG_STEP',
                        default=10,
                        type=int,
                        help='Log every k steps for text/tensorboard')
    parser.add_argument(
        '--TENSORBOARD',
        default=1,
        type=str2bool,
        help='Use tensorboard for loss/evalaution visualization')

    # Program arguments
    parser.add_argument('--MAN_SEED',
                        default=123,
                        type=int,
                        help='manualseed for reproduction')
    parser.add_argument(
        '--MULTI_GPUS',
        default=True,
        type=str2bool,
        help=
        'If  more than 0 then use all visible GPUs by default only one GPU used '
    )

    # Use CUDA_VISIBLE_DEVICES=0,1,4,6 to select GPUs to use

    ## Parse arguments
    args = parser.parse_args()

    args = utils.set_args(args)  # set directories and SUBSETS fo datasets
    args.MULTI_GPUS = False if args.BATCH_SIZE == 1 else args.MULTI_GPUS
    ## set random seeds and global settings
    np.random.seed(args.MAN_SEED)
    torch.manual_seed(args.MAN_SEED)
    # torch.cuda.manual_seed_all(args.MAN_SEED)
    torch.set_default_tensor_type('torch.FloatTensor')

    args = utils.create_exp_name(args)

    utils.setup_logger(args)
    logger = utils.get_logger(__name__)
    logger.info(sys.version)

    assert args.MODE in [
        'train', 'val', 'gen_dets', 'eval_frames', 'eval_tubes'
    ], 'MODE must be from ' + ','.join(['train', 'test', 'tubes'])

    if args.MODE == 'train':
        args.TEST_SEQ_LEN = args.SEQ_LEN
    else:
        args.SEQ_LEN = args.TEST_SEQ_LEN

    if args.MODE in ['train', 'val']:
        # args.CONF_THRESH = 0.05
        args.SUBSETS = args.TRAIN_SUBSETS
        train_transform = transforms.Compose([
            vtf.ResizeClip(args.MIN_SIZE, args.MAX_SIZE),
            vtf.ToTensorStack(),
            vtf.Normalize(mean=args.MEANS, std=args.STDS)
        ])

        # train_skip_step = args.SEQ_LEN
        # if args.SEQ_LEN>4 and args.SEQ_LEN<=10:
        #     train_skip_step = args.SEQ_LEN-2
        if args.SEQ_LEN > 10:
            train_skip_step = args.SEQ_LEN + (args.MAX_SEQ_STEP - 1) * 2 - 2
        else:
            train_skip_step = args.SEQ_LEN

        train_dataset = VideoDataset(args,
                                     train=True,
                                     skip_step=train_skip_step,
                                     transform=train_transform)
        logger.info('Done Loading Dataset Train Dataset')
        ## For validation set
        full_test = False
        args.SUBSETS = args.VAL_SUBSETS
        skip_step = args.SEQ_LEN * 8
    else:
        args.SEQ_LEN = args.TEST_SEQ_LEN
        args.MAX_SEQ_STEP = 1
        args.SUBSETS = args.TEST_SUBSETS
        full_test = True  #args.MODE != 'train'
        args.skip_beggning = 0
        args.skip_ending = 0
        if args.MODEL_TYPE == 'I3D':
            args.skip_beggning = 2
            args.skip_ending = 2
        elif args.MODEL_TYPE != 'C2D':
            args.skip_beggning = 2

        skip_step = args.SEQ_LEN - args.skip_beggning

    val_transform = transforms.Compose([
        vtf.ResizeClip(args.MIN_SIZE, args.MAX_SIZE),
        vtf.ToTensorStack(),
        vtf.Normalize(mean=args.MEANS, std=args.STDS)
    ])

    val_dataset = VideoDataset(args,
                               train=False,
                               transform=val_transform,
                               skip_step=skip_step,
                               full_test=full_test)
    logger.info('Done Loading Dataset Validation Dataset')

    args.num_classes = val_dataset.num_classes
    # one for objectness
    args.label_types = val_dataset.label_types
    args.num_label_types = val_dataset.num_label_types
    args.all_classes = val_dataset.all_classes
    args.num_classes_list = val_dataset.num_classes_list
    args.num_ego_classes = val_dataset.num_ego_classes
    args.ego_classes = val_dataset.ego_classes
    args.head_size = 256

    if args.MODE in ['train', 'val', 'gen_dets']:
        net = build_retinanet(args).cuda()
        if args.MULTI_GPUS:
            logger.info('\nLets do dataparallel\n')
            net = torch.nn.DataParallel(net)

    for arg in sorted(vars(args)):
        logger.info(str(arg) + ': ' + str(getattr(args, arg)))

    if args.MODE == 'train':
        if args.FBN:
            if args.MULTI_GPUS:
                net.module.backbone.apply(utils.set_bn_eval)
            else:
                net.backbone.apply(utils.set_bn_eval)
        train(args, net, train_dataset, val_dataset)
    elif args.MODE == 'val':
        val(args, net, val_dataset)
    elif args.MODE == 'gen_dets':
        gen_dets(args, net, val_dataset)
        eval_framewise_dets(args, val_dataset)
        build_eval_tubes(args, val_dataset)
    elif args.MODE == 'eval_frames':
        eval_framewise_dets(args, val_dataset)
    elif args.MODE == 'eval_tubes':
        build_eval_tubes(args, val_dataset)