Ejemplo n.º 1
0
def __load_model_mlp_classifier_video_graph(centroids, n_classes,
                                            input_shape_x, n_gpus,
                                            is_load_weights, weight_path):
    """
    Model
    """

    # optimizer and loss
    loss = keras_utils.LOSSES[0]
    metrics = [keras_utils.METRICS[0]]
    output_activation = keras_utils.ACTIVATIONS[3]
    optimizer = SGD(lr=0.01)
    optimizer = Adam(lr=0.01, epsilon=1e-8)
    optimizer = Adam(lr=0.01, epsilon=1e-4)

    expansion_factor = 5.0 / 4.0
    n_groups = int(input_shape_x[-1] / 128.0)

    # per-layer kernel size and max pooling for centroids and timesteps
    n_graph_layers = 2

    # time kernel
    t_kernel_size = 7
    t_max_size = 3

    # node kernel
    c_kernel_size = 7
    c_max_size = 3
    c_avg_size = 4

    # space kernel
    s_kernel_size = 2
    s_kernel_size = 1

    n_centroids, _ = centroids.shape

    _, n_timesteps, side_dim, side_dim, n_channels_in = input_shape_x
    t_input_x = Input(shape=(n_timesteps, side_dim, side_dim, n_channels_in),
                      name='input_x')  # (None, 64, 1024)
    t_input_c = Input(tensor=tf.constant(centroids, dtype=tf.float32),
                      name='input_n')  # (1, 100, 1024)
    tensor = t_input_x

    # spatial convolution
    n_channels_in = 1024
    tensor = Conv3D(n_channels_in, (1, s_kernel_size, s_kernel_size),
                    padding='VALID',
                    name='conv_s')(tensor)
    tensor = BatchNormalization()(tensor)
    tensor = LeakyReLU(alpha=0.2)(tensor)

    # pool over space
    tensor = MaxLayer(axis=(2, 3), is_keep_dim=True,
                      name='global_pool_s')(tensor)  # (None, 64, 7, 7, 1024)

    # centroid-attention
    tensor = videograph.node_attention(
        tensor, t_input_c, n_channels_in,
        activation_type='relu')  # (N, 100, 64, 7, 7, 1024)

    # graph embeddings
    tensor = videograph.graph_embedding(tensor, n_graph_layers, c_avg_size,
                                        c_kernel_size, t_kernel_size,
                                        c_max_size,
                                        t_max_size)  # (N, 100, 64, 7, 7, 1024)

    # centroid pooling
    tensor = MeanLayer(axis=(1, ), name='global_pool_n')(tensor)

    # temporal pooling
    tensor = MaxLayer(axis=(1, 2, 3), name='global_pool_t')(tensor)

    # activity classification
    tensor = Dropout(0.25)(tensor)
    tensor = Dense(512)(tensor)
    tensor = BatchNormalization()(tensor)
    tensor = LeakyReLU(alpha=0.2)(tensor)
    tensor = Dropout(0.25)(tensor)
    tensor = Dense(n_classes)(tensor)
    t_output = Activation(output_activation)(tensor)

    model = Model(input=[t_input_x, t_input_c], output=t_output)

    if is_load_weights:
        model.load_weights(weight_path)

    if n_gpus == 1:
        model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
        parallel_model = model
    else:
        parallel_model = multi_gpu_utils.multi_gpu_model(model, n_gpus)
        parallel_model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

    return model, parallel_model
Ejemplo n.º 2
0
def __load_model_videograph(nodes, n_classes, input_shape_x):
    """
    Model
    """

    # optimizer and loss
    loss = keras_utils.LOSSES[3]
    output_activation = keras_utils.ACTIVATIONS[2]
    optimizer = SGD(lr=0.01)
    optimizer = Adam(lr=0.01, epsilon=1e-8)
    optimizer = Adam(lr=0.01, epsilon=1e-4)

    # per-layer kernel size and max pooling for nodes and timesteps
    n_graph_layers = 2

    # time kernel
    t_kernel_size = 7
    t_max_size = 3

    # node kernel
    n_kernel_size = 7
    n_max_size = 3
    n_avg_size = 4

    # space kernel
    s_kernel_size = 2
    s_kernel_size = 1

    n_nodes, _ = nodes.shape

    _, n_timesteps, side_dim, side_dim, n_channels_in = input_shape_x
    t_input_x = Input(shape=(n_timesteps, side_dim, side_dim, n_channels_in),
                      name='input_x')  # (None, 64, 1024)
    t_input_n = Input(tensor=tf.constant(nodes, dtype=tf.float32),
                      name='input_n')  # (1, 100, 1024)
    tensor = t_input_x

    # spatial convolution
    tensor = Conv3D(n_channels_in, (1, s_kernel_size, s_kernel_size),
                    padding='VALID',
                    name='conv_s')(tensor)
    tensor = BatchNormalization()(tensor)
    tensor = LeakyReLU(alpha=0.2)(tensor)

    # pool over space
    tensor = MaxLayer(axis=(2, 3), is_keep_dim=True,
                      name='global_pool_s')(tensor)  # (None, 64, 7, 7, 1024)

    # node attention
    tensor = videograph.node_attention(
        tensor, t_input_n, n_channels_in,
        activation_type='relu')  # (N, 100, 64, 7, 7, 1024)

    # graph embedding
    tensor = videograph.graph_embedding(tensor, n_graph_layers, n_avg_size,
                                        n_kernel_size, t_kernel_size,
                                        n_max_size,
                                        t_max_size)  # (N, 100, 64, 7, 7, 1024)

    # node pooling
    tensor = MeanLayer(axis=(1, ), name='global_pool_n')(tensor)

    # temporal pooling
    tensor = MaxLayer(axis=(1, 2, 3), name='global_pool_t')(tensor)

    # mlp for classification
    tensor = Dropout(0.25)(tensor)
    tensor = Dense(512)(tensor)
    tensor = BatchNormalization()(tensor)
    tensor = LeakyReLU(alpha=0.2)(tensor)
    tensor = Dropout(0.25)(tensor)
    tensor = Dense(n_classes)(tensor)
    t_output = Activation(output_activation)(tensor)

    model = Model(input=[t_input_x, t_input_n], output=t_output)
    model.compile(loss=loss, optimizer=optimizer)
    return model