Exemple #1
0
def __define_timeception_model():
    """
    Define Timeception classifier.
    """

    # some configurations for the model
    classification_type = config.cfg.MODEL.CLASSIFICATION_TYPE
    solver_name = config.cfg.SOLVER.NAME
    solver_lr = config.cfg.SOLVER.LR
    adam_epsilon = config.cfg.SOLVER.ADAM_EPSILON
    n_tc_timesteps = config.cfg.MODEL.N_TC_TIMESTEPS
    backbone_name = config.cfg.MODEL.BACKBONE_CNN
    feature_name = config.cfg.MODEL.BACKBONE_FEATURE
    n_tc_layers = config.cfg.MODEL.N_TC_LAYERS
    n_classes = config.cfg.MODEL.N_CLASSES
    is_dilated = config.cfg.MODEL.MULTISCALE_TYPE
    n_channels_in, channel_h, channel_w = utils.get_model_feat_maps_info(backbone_name, feature_name)
    n_groups = int(n_channels_in / 128.0)

    # optimizer and loss for either multi-label "ml" or single-label "sl" classification
    if classification_type == 'ml':
        loss = keras_utils.LOSSES[3]
        output_activation = keras_utils.ACTIVATIONS[2]
        metric_function = keras_utils.map_charades
    else:
        loss = keras_utils.LOSSES[0]
        output_activation = keras_utils.ACTIVATIONS[3]
        metric_function = keras_utils.METRICS[0]

    # define the optimizer
    optimizer = SGD(lr=0.01) if solver_name == 'sgd' else Adam(lr=solver_lr, epsilon=adam_epsilon)

    # input layer
    input_shape = (n_tc_timesteps, channel_h, channel_w, n_channels_in)  # (T, H, W, C)
    tensor_input = Input(shape=input_shape, name='input')  # (T, H, W, C)

    # define timeception layers, as a standalone module
    timeception_module = timeception.Timeception(n_channels_in, n_tc_layers, n_groups, is_dilated=is_dilated)
    tensor = timeception_module(tensor_input)  # (T, H, W, C)

    # but if you fancy, you can define timeception layers as a series of layers
    # tensor = timeception.timeception_layers(tensor_input, n_tc_layers, n_groups, is_dilated=is_dilated) # (T, H, W, C)

    # max-pool over space-time
    tensor = MaxLayer(axis=(1, 2, 3), name='maxpool_t_s')(tensor)

    # dense layers for classification
    tensor = Dropout(0.5)(tensor)
    tensor = Dense(512)(tensor)
    tensor = BatchNormalization()(tensor)
    tensor = LeakyReLU(alpha=0.2)(tensor)
    tensor = Dropout(0.25)(tensor)
    tensor = Dense(n_classes)(tensor)
    tensor_output = Activation(output_activation)(tensor)

    # define the model
    model = Model(inputs=tensor_input, outputs=tensor_output)
    model.compile(loss=loss, optimizer=optimizer, metrics=[metric_function])

    return model
Exemple #2
0
    def __init__(self):
        super(Model, self).__init__()

        # some configurations for the model
        n_tc_timesteps = config.cfg.MODEL.N_TC_TIMESTEPS  #32
        backbone_name = config.cfg.MODEL.BACKBONE_CNN  #'i3d_pytorch_charades_rgb'
        feature_name = config.cfg.MODEL.BACKBONE_FEATURE  #'mixed_5c'
        n_tc_layers = config.cfg.MODEL.N_TC_LAYERS  #2
        n_classes = config.cfg.MODEL.N_CLASSES  #157
        is_dilated = config.cfg.MODEL.MULTISCALE_TYPE  #采用的多核策略类型'ks'
        OutputActivation = Sigmoid if config.cfg.MODEL.CLASSIFICATION_TYPE == 'ml' else LogSoftmax
        n_channels_in, channel_h, channel_w = utils.get_model_feat_maps_info(
            backbone_name, feature_name)  #1024, 7, 7
        n_groups = int(n_channels_in / 128.0)  #8

        input_shape = (None, n_channels_in, n_tc_timesteps, channel_h,
                       channel_w
                       )  # (None, C, T, H, W),(None, 1024, 32, 7, 7)其中T是自己设定的
        self._input_shape = input_shape  #(None, 1024, 32, 7, 7)

        # define 4 layers of timeception
        self.timeception = timeception_pytorch.Timeception(
            input_shape, n_tc_layers, n_groups, is_dilated)

        # get number of output channels after timeception
        n_channels_in = self.timeception.n_channels_out

        # define layers for classifier
        self.do1 = Dropout(0.5)
        self.l1 = Linear(n_channels_in, 512)
        self.bn1 = BatchNorm1d(512)
        self.ac1 = LeakyReLU(0.2)
        self.do2 = Dropout(0.25)
        self.l2 = Linear(512, n_classes)
        self.ac2 = OutputActivation()
Exemple #3
0
def __define_data_generator(is_training):
    """
    Define data generator.
    """

    # get some configs for the training
    n_classes = config.cfg.MODEL.N_CLASSES
    dataset_name = config.cfg.DATASET_NAME
    backbone_model_name = config.cfg.MODEL.BACKBONE_CNN
    backbone_feature_name = config.cfg.MODEL.BACKBONE_FEATURE
    n_timesteps = config.cfg.MODEL.N_TC_TIMESTEPS

    batch_size_tr = config.cfg.TRAIN.BATCH_SIZE
    batch_size_te = config.cfg.TEST.BATCH_SIZE
    batch_size = batch_size_tr if is_training else batch_size_te

    # size and name of feature
    feature_name = 'features_%s_%s_%sf' % (backbone_model_name, backbone_feature_name, n_timesteps)
    c, h, w = utils.get_model_feat_maps_info(backbone_model_name, backbone_feature_name)
    feature_dim = (n_timesteps, h, w, c)

    # data generators
    params = {'batch_size': batch_size, 'n_classes': n_classes, 'feature_name': feature_name, 'feature_dim': feature_dim, 'is_shuffle': True, 'is_training': is_training}
    data_generator_class = data_utils_keras.KERAS_DATA_GENERATORS_DICT[dataset_name]
    data_generator = data_generator_class(**params)

    return data_generator
Exemple #4
0
    def __init__(self):
        super(Model, self).__init__()

        # some configurations for the model
        n_tc_timesteps = config.cfg.MODEL.N_TC_TIMESTEPS  # 输入到timeception的timesteps
        backbone_name = config.cfg.MODEL.BACKBONE_CNN
        feature_name = config.cfg.MODEL.BACKBONE_FEATURE
        n_tc_layers = config.cfg.MODEL.N_TC_LAYERS  # timeception的层数
        n_classes = config.cfg.MODEL.N_CLASSES  # 157
        is_dilated = config.cfg.MODEL.MULTISCALE_TYPE  # 多尺度的类型(空洞卷积/多尺度卷积核)
        OutputActivation = Sigmoid if config.cfg.MODEL.CLASSIFICATION_TYPE == 'ml' else LogSoftmax
        n_channels_in, channel_h, channel_w = utils.get_model_feat_maps_info(
            backbone_name, feature_name)
        n_groups = int(n_channels_in / 128.0)  # groups的数量

        # timeception层的输入shape
        input_shape = (None, n_channels_in, n_tc_timesteps, channel_h,
                       channel_w)  # (C, T, H, W)
        self._input_shape = input_shape

        # define 4 layers of timeception
        self.timeception = timeception_pytorch.Timeception(
            input_shape, n_tc_layers, n_groups, is_dilated)  # (C, T, H, W)

        # get number of output channels after timeception
        n_channels_in = self.timeception.n_channels_out

        # define layers for classifier
        self.do1 = Dropout(0.5)
        self.l1 = Linear(n_channels_in, 512)
        self.bn1 = BatchNorm1d(512)
        self.ac1 = LeakyReLU(0.2)  #negative_slope:控制负斜率的角度,默认等于0.01
        self.do2 = Dropout(0.25)
        self.l2 = Linear(512, n_classes)
        self.ac2 = OutputActivation()
Exemple #5
0
def __define_loader(is_training):
    """
    Define data loader.
    """

    # get some configs for the training,配置数据加载的参数
    n_classes = config.cfg.MODEL.N_CLASSES  #157
    dataset_name = config.cfg.DATASET_NAME  #charades
    backbone_model_name = config.cfg.MODEL.BACKBONE_CNN  #i3d_pytorch_charades_rgb
    backbone_feature_name = config.cfg.MODEL.BACKBONE_FEATURE  #mixed_5c
    n_timesteps = config.cfg.MODEL.N_TC_TIMESTEPS  #32
    n_workers = config.cfg.TRAIN.N_WORKERS  #读取数据的线程数

    batch_size_tr = config.cfg.TRAIN.BATCH_SIZE  #32
    batch_size_te = config.cfg.TEST.BATCH_SIZE  #64
    batch_size = batch_size_tr if is_training else batch_size_te

    # size and name of feature
    feature_name = 'features_%s_%s_%sf' % (
        backbone_model_name, backbone_feature_name, n_timesteps
    )  #'features _i3d_pytorch_charades_rgb_ mixed_5c_32f'
    c, h, w = utils.get_model_feat_maps_info(backbone_model_name,
                                             backbone_feature_name)
    #features_i3d_pytorch_charades_rgb,mixed_5c,||获得模型对应的feature_map的大小细节:c,h,w = 1024, 7, 7
    feature_dim = (c, n_timesteps, h, w
                   )  #特征的维度:1024, 32, 7, 7,其中的n_timesteps是自己设定的

    # data generators
    params = {
        'batch_size': batch_size,
        'n_classes': n_classes,
        'feature_name': feature_name,
        'feature_dim': feature_dim,
        'is_training': is_training
    }
    dataset_class = data_utils_pytorch.PYTORCH_DATASETS_DICT[
        dataset_name]  #core.data_utils_pytorch.DatasetCharades
    dataset = dataset_class(**params)
    n_samples = dataset.n_samples  #7811 1814
    n_batches = dataset.n_batches  #245 37

    data_loader = DataLoader(dataset,
                             batch_size=batch_size,
                             num_workers=n_workers,
                             shuffle=True)

    return data_loader, n_samples, n_batches
Exemple #6
0
def __define_loader(is_training):
    """
    Define data loader.
    """

    # get some configs for the training
    n_classes = config.cfg.MODEL.N_CLASSES
    dataset_name = config.cfg.DATASET_NAME
    backbone_model_name = config.cfg.MODEL.BACKBONE_CNN
    backbone_feature_name = config.cfg.MODEL.BACKBONE_FEATURE
    n_timesteps = config.cfg.MODEL.N_TC_TIMESTEPS
    n_workers = config.cfg.TRAIN.N_WORKERS

    batch_size_tr = config.cfg.TRAIN.BATCH_SIZE
    batch_size_te = config.cfg.TEST.BATCH_SIZE
    batch_size = batch_size_tr if is_training else batch_size_te

    # size and name of feature
    feature_name = 'features_%s_%s_%sf' % (backbone_model_name,
                                           backbone_feature_name, n_timesteps)
    c, h, w = utils.get_model_feat_maps_info(backbone_model_name,
                                             backbone_feature_name)
    feature_dim = (c, n_timesteps, h, w)

    # data generators
    params = {
        'batch_size': batch_size,
        'n_classes': n_classes,
        'feature_name': feature_name,
        'feature_dim': feature_dim,
        'is_training': is_training
    }
    dataset_class = data_utils.PYTORCH_DATASETS_DICT[dataset_name]
    dataset = dataset_class(**params)
    n_samples = dataset.n_samples
    n_batches = dataset.n_batches

    data_loader = DataLoader(dataset,
                             batch_size=batch_size,
                             num_workers=n_workers,
                             shuffle=True)

    return data_loader, n_samples, n_batches
Exemple #7
0
def __define_loader(is_training):
    """
    Define data loader.
    """

    # get some configs for the training
    n_classes = config.cfg.MODEL.N_CLASSES  # charades数据集157类
    dataset_name = config.cfg.DATASET_NAME  # charades
    backbone_model_name = config.cfg.MODEL.BACKBONE_CNN  # 'i3d_pytorch_charades_rgb'# which backbone cnn is used
    backbone_feature_name = config.cfg.MODEL.BACKBONE_FEATURE  #'mixed_5c' # type of feature output from backbone cnn
    n_timesteps = config.cfg.MODEL.N_TC_TIMESTEPS  #32 # how many timesteps expected as input to the timeception layers
    n_workers = config.cfg.TRAIN.N_WORKERS

    batch_size_tr = config.cfg.TRAIN.BATCH_SIZE  #32 # batch size for training
    batch_size_te = config.cfg.TEST.BATCH_SIZE  #64
    batch_size = batch_size_tr if is_training else batch_size_te

    # size and name of feature
    feature_name = 'features_%s_%s_%sf' % (backbone_model_name,
                                           backbone_feature_name, n_timesteps)
    c, h, w = utils.get_model_feat_maps_info(
        backbone_model_name, backbone_feature_name)  # 获取backbone模型的输出维度
    feature_dim = (c, n_timesteps, h, w)

    # data generators
    params = {
        'batch_size': batch_size,
        'n_classes': n_classes,
        'feature_name': feature_name,
        'feature_dim': feature_dim,
        'is_training': is_training
    }
    dataset_class = data_utils.PYTORCH_DATASETS_DICT[dataset_name]
    dataset = dataset_class(**params)
    n_samples = dataset.n_samples
    n_batches = dataset.n_batches

    data_loader = DataLoader(dataset,
                             batch_size=batch_size,
                             num_workers=n_workers,
                             shuffle=True)

    return data_loader, n_samples, n_batches
def train_model_on_pickled_features():
    """
    Train model.
    """

    annotation_type = 'noun'
    annot_path = Pth(
        'EPIC-Kitchens/annotation/annot_video_level_many_shots.pkl')
    (y_tr, y_te), n_classes = __load_annotation(annot_path, annotation_type)

    model_type = 'i3d_rgb'
    feature_type = 'mixed_5c'
    n_nodes = 128
    n_timesteps = 64
    n_frames_per_segment = 8
    n_frames_per_video = n_timesteps * n_frames_per_segment
    batch_size_tr = 20
    batch_size_te = 30
    n_epochs = 500
    epoch_offset = 0
    model_name = 'classifier_%s' % (utils.timestamp())
    model_root_path = Pth('EPIC-Kitchens/models')

    features_path = Pth(
        'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5',
        (n_frames_per_video, ))
    nodes_path = Pth('EPIC-Kitchens/features_centroids/features_random_%d.pkl',
                     (n_nodes, ))
    n_channels, side_dim = utils.get_model_feat_maps_info(
        model_type, feature_type)
    input_shape = (None, n_timesteps, side_dim, side_dim, n_channels)
    nodes = utils.pkl_load(nodes_path)

    print('--- start time')
    print(datetime.datetime.now())

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()
    model = __load_model_videograph(nodes, n_classes, input_shape)
    t2 = time.time()
    duration = t2 - t1
    print(model.summary(line_length=130, positions=None, print_fn=None))
    print('... model built, duration (sec): %d' % (duration))

    # load data
    print('... loading data: %s' % (features_path))
    t1 = time.time()
    # features are extracting using datasets.Epic_Kitchens.i3d_keras_epic_kitchens()
    # we use out-of-box i3d (pre-trained on kinetics, NOT fine-tuned on epic-kitchens) with last conv feature 7*7*1024 'mixed_5c'
    # to get a better performance, you need to write code to randomly sample new frames and extract their features every new epoch
    # please use this function to random sampling, instead of uniform sampling: Epic_Kitchens.__random_sample_frames_per_video_for_i3d()
    # then extract their features, as done in: Epic_Kitchens._901_extract_features_i3d()
    # then train on the extracted features. Please do so in every epoch. It's computationally heavy, but you cannot avoid random sampling to get better results.
    # Even better results if you replace I3D with a 2D/3D CNN that's previously fine-tuned on Epic-Kitchens
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])
    t2 = time.time()

    duration = t2 - t1
    print('... data loaded: %d' % (duration))

    n_tr = len(x_tr)
    n_te = len(x_te)
    n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr)
    n_batch_te = utils.calc_num_batches(n_te, batch_size_te)
    print('... [tr]: n, n_batch, batch_size: %d, %d, %d' %
          (n_tr, n_batch_tr, batch_size_tr))
    print('... [te]: n, n_batch, batch_size: %d, %d, %d' %
          (n_te, n_batch_te, batch_size_te))
    print(x_tr.shape)
    print(x_te.shape)
    print(y_tr.shape)
    print(y_te.shape)

    save_callback = keras_utils.ModelSaveCallback(model, model_name,
                                                  epoch_offset,
                                                  model_root_path)
    score_callback = keras_utils.MapScoreCallback(model, None, None, x_te,
                                                  y_te, batch_size_te,
                                                  n_classes)
    model_callbacks = [save_callback, score_callback]
    model.fit(x_tr,
              y_tr,
              epochs=n_epochs,
              batch_size=batch_size_tr,
              validation_split=0.0,
              validation_data=(x_te, y_te),
              shuffle=True,
              callbacks=model_callbacks,
              verbose=2)

    print('--- finish time')
    print(datetime.datetime.now())
Exemple #9
0
def train_model_videograph():
    """
    Train model.
    """

    annotation_type = 'noun'
    annot_path = Pth(
        'EPIC-Kitchens/annotations/annot_video_level_many_shots.pkl')
    (y_tr, y_te), n_classes = __load_annotation(annot_path, annotation_type)

    model_type = 'i3d_rgb'
    feature_type = 'mixed_5c'
    n_nodes = 128
    n_timesteps = 64
    n_frames_per_segment = 8
    n_frames_per_video = n_timesteps * n_frames_per_segment
    batch_size_tr = 20
    batch_size_te = 30
    n_epochs = 500
    epoch_offset = 0
    model_name = 'classifier_%s' % (utils.timestamp())
    model_root_path = Pth('EPIC-Kitchens/models')

    nodes_path = Pth('EPIC-Kitchens/features/nodes_random_%d.pkl', (n_nodes, ))
    features_path = Pth(
        'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5',
        (n_frames_per_video, ))
    n_channels, side_dim = utils.get_model_feat_maps_info(
        model_type, feature_type)
    input_shape = (None, n_timesteps, side_dim, side_dim, n_channels)

    # either load nodes, or generate them on the fly, but remeber to save them, as you need them in test time
    # nodes = utils.pkl_load(nodes_path)
    nodes = utils.generate_centroids(n_nodes, n_channels)

    print('--- start time')
    print(datetime.datetime.now())

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()
    model = __load_model_videograph(nodes, n_classes, input_shape)
    t2 = time.time()
    duration = t2 - t1
    print(model.summary(line_length=130, positions=None, print_fn=None))
    print('... model built, duration (sec): %d' % (duration))

    # load data
    print('... loading data: %s' % (features_path))
    t1 = time.time()
    # features are extracting using datasets.epic_kitchens.i3d_keras_epic_kitchens()
    # we use out-of-box i3d (pre-trained on kinetics, NOT fine-tuned on epic-kitchens) with last conv feature 7*7*1024 'mixed_5c'
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])
    t2 = time.time()

    duration = t2 - t1
    print('... data loaded: %d' % (duration))

    n_tr = len(x_tr)
    n_te = len(x_te)
    n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr)
    n_batch_te = utils.calc_num_batches(n_te, batch_size_te)
    print('... [tr]: n, n_batch, batch_size: %d, %d, %d' %
          (n_tr, n_batch_tr, batch_size_tr))
    print('... [te]: n, n_batch, batch_size: %d, %d, %d' %
          (n_te, n_batch_te, batch_size_te))
    print(x_tr.shape)
    print(x_te.shape)
    print(y_tr.shape)
    print(y_te.shape)

    save_callback = keras_utils.ModelSaveCallback(model, model_name,
                                                  epoch_offset,
                                                  model_root_path)
    score_callback = keras_utils.MapScoreCallback(model, None, None, x_te,
                                                  y_te, batch_size_te,
                                                  n_classes)
    model_callbacks = [save_callback, score_callback]
    model.fit(x_tr,
              y_tr,
              epochs=n_epochs,
              batch_size=batch_size_tr,
              validation_split=0.0,
              validation_data=(x_te, y_te),
              shuffle=True,
              callbacks=model_callbacks,
              verbose=2)

    print('--- finish time')
    print(datetime.datetime.now())
Exemple #10
0
def train_model_on_pickled_features():
    """
    Train model.
    """

    model_type = 'i3d_rgb'
    feature_type = 'mixed_5c'

    n_centroids = 128
    n_timesteps = 64
    is_spatial_pooling = True
    is_resume_training = False

    batch_size_tr = 12
    batch_size_te = 30
    n_epochs = 100
    n_classes = N_CLASSES
    n_gpus = 1

    model_name = 'classifier_%s' % (utils.timestamp())
    model_weight_path = ''
    features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames_max_pool.h5', (n_timesteps * 8,)) if is_spatial_pooling else Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,))
    centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,))
    gt_actions_path = Pth('Breakfast/annotation/gt_unit_actions.pkl')
    (_, y_tr), (_, y_te) = utils.pkl_load(gt_actions_path)
    centroids = utils.pkl_load(centroids_path)

    n_feat_maps, feat_map_side_dim = utils.get_model_feat_maps_info(model_type, feature_type)
    feat_map_side_dim = 1 if is_spatial_pooling else feat_map_side_dim
    input_shape = (None, n_timesteps, feat_map_side_dim, feat_map_side_dim, n_feat_maps)

    print ('--- start time')
    print (datetime.datetime.now())

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()
    root_model, model = __load_model_mlp_classifier_video_graph(centroids, n_classes, input_shape, n_gpus=n_gpus, is_load_weights=is_resume_training, weight_path=model_weight_path)
    t2 = time.time()
    duration = t2 - t1
    print (root_model.summary(line_length=130, positions=None, print_fn=None))
    print ('... model built, duration (sec): %d' % (duration))

    # load data
    print ('... loading data: %s' % (features_path))
    print ('... centroids: %s' % (centroids_path))
    t1 = time.time()
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])
    t2 = time.time()
    duration = t2 - t1
    print ('... data loaded: %d' % (duration))
    print(x_tr.shape)
    print(y_tr.shape)
    print(x_te.shape)
    print(y_te.shape)

    n_tr = len(x_tr)
    n_te = len(x_te)
    n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr)
    n_batch_te = utils.calc_num_batches(n_te, batch_size_te)
    print ('... [tr]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' % (n_tr, n_batch_tr, batch_size_tr, n_gpus))
    print ('... [te]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' % (n_te, n_batch_te, batch_size_te, n_gpus))

    score_callback = ScoreCallback(model, None, None, x_te, y_te, batch_size_te)
    callbacks = [score_callback]
    model.fit(x_tr, y_tr, epochs=n_epochs, batch_size=batch_size_tr, validation_split=0.0, validation_data=(x_te, y_te), shuffle=True, callbacks=callbacks, verbose=2)
    print ('--- finish time')
    print (datetime.datetime.now())