Beispiel #1
0
def classifier(base_layers,
               input_rois,
               num_rois,
               nb_classes=21,
               trainable=False):

    # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround

    pooling_regions = 14
    input_shape = (num_rois, 14, 14, 1024)

    out_roi_pool = RoiPoolingConv(pooling_regions,
                                  num_rois)([base_layers, input_rois])
    out = classifier_layers(out_roi_pool,
                            input_shape=input_shape,
                            trainable=True)

    out = TimeDistributed(Flatten())(out)

    out_class = TimeDistributed(Dense(nb_classes,
                                      activation='softmax',
                                      kernel_initializer='zero'),
                                name='dense_class_{}'.format(nb_classes))(out)
    # note: no regression target for bg class
    out_regr = TimeDistributed(Dense(4 * (nb_classes - 1),
                                     activation='linear',
                                     kernel_initializer='zero'),
                               name='dense_regress_{}'.format(nb_classes))(out)
    return [out_class, out_regr]
Beispiel #2
0
def atari_qnet(input_shape, num_actions, net_name, net_size):
    net_name = net_name.lower()

    # input state
    state = Input(shape=input_shape)

    # convolutional layers
    conv1_32 = Conv2D(32, (8, 8), strides=(4, 4), activation='relu')
    conv2_64 = Conv2D(64, (4, 4), strides=(2, 2), activation='relu')
    conv3_64 = Conv2D(64, (3, 3), strides=(1, 1), activation='relu')

    # if recurrent net then change input shape
    if 'drqn' in net_name:
        # recurrent net (drqn)
        lambda_perm_state = lambda x: K.permute_dimensions(x, [0, 3, 1, 2])
        perm_state = Lambda(lambda_perm_state)(state)
        dist_state = Lambda(lambda x: K.stack([x], axis=4))(perm_state)

        # extract features with `TimeDistributed` wrapped convolutional layers
        dist_conv1 = TimeDistributed(conv1_32)(dist_state)
        dist_conv2 = TimeDistributed(conv2_64)(dist_conv1)
        dist_convf = TimeDistributed(conv3_64)(dist_conv2)
        feature = TimeDistributed(Flatten())(dist_convf)
    elif 'dqn' in net_name:
        # fully connected net (dqn)
        # extract features with convolutional layers
        conv1 = conv1_32(state)
        conv2 = conv2_64(conv1)
        convf = conv3_64(conv2)
        feature = Flatten()(convf)

    # network type. Dense for dqn; LSTM or GRU for drqn
    if 'lstm' in net_name:
        net_type = LSTM
    elif 'gru' in net_name:
        net_type = GRU
    else:
        net_type = Dense

    # dueling or regular dqn/drqn
    if 'dueling' in net_name:
        value1 = net_type(net_size, activation='relu')(feature)
        adv1 = net_type(net_size, activation='relu')(feature)
        value2 = Dense(1)(value1)
        adv2 = Dense(num_actions)(adv1)
        mean_adv2 = Lambda(lambda x: K.mean(x, axis=1))(adv2)
        ones = K.ones([1, num_actions])
        lambda_exp = lambda x: K.dot(K.expand_dims(x, axis=1), -ones)
        exp_mean_adv2 = Lambda(lambda_exp)(mean_adv2)
        sum_adv = add([exp_mean_adv2, adv2])
        exp_value2 = Lambda(lambda x: K.dot(x, ones))(value2)
        q_value = add([exp_value2, sum_adv])
    else:
        hid = net_type(net_size, activation='relu')(feature)
        q_value = Dense(num_actions)(hid)

    # build model
    return Model(inputs=state, outputs=q_value)
Beispiel #3
0
def classifier_layers(x, input_shape, trainable=False):

    # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround
    # (hence a smaller stride in the region that follows the ROI pool)
    x = conv_block_td(x,
                      3, [512, 512, 2048],
                      stage=5,
                      block='a',
                      input_shape=input_shape,
                      strides=(2, 2),
                      trainable=trainable)

    x = identity_block_td(x,
                          3, [512, 512, 2048],
                          stage=5,
                          block='b',
                          trainable=trainable)
    x = identity_block_td(x,
                          3, [512, 512, 2048],
                          stage=5,
                          block='c',
                          trainable=trainable)
    x = TimeDistributed(AveragePooling2D((7, 7)), name='avg_pool')(x)

    return x
Beispiel #4
0
def atari_acnet(input_shape, num_actions, net_name, net_size):
    net_name = net_name.lower()

    # input state
    state = Input(shape=input_shape)

    # convolutional layers
    conv1_32 = Conv2D(32, (8, 8), strides=(4, 4), activation='relu')
    conv2_64 = Conv2D(64, (4, 4), strides=(2, 2), activation='relu')
    conv3_64 = Conv2D(64, (3, 3), strides=(1, 1), activation='relu')

    # if recurrent net then change input shape
    if 'lstm' in net_name or 'gru' in net_name:
        # recurrent net
        lambda_perm_state = lambda x: K.permute_dimensions(x, [0, 3, 1, 2])
        perm_state = Lambda(lambda_perm_state)(state)
        dist_state = Lambda(lambda x: K.stack([x], axis=4))(perm_state)

        # extract features with `TimeDistributed` wrapped convolutional layers
        dist_conv1 = TimeDistributed(conv1_32)(dist_state)
        dist_conv2 = TimeDistributed(conv2_64)(dist_conv1)
        dist_convf = TimeDistributed(conv3_64)(dist_conv2)
        feature = TimeDistributed(Flatten())(dist_convf)

        # specify net type for the following layer
        if 'lstm' in net_name:
            net_type = LSTM
        elif 'gru' in net_name:
            net_type = GRU
    elif 'fully connected' in net_name:
        # fully connected net
        # extract features with convolutional layers
        conv1 = conv1_32(state)
        conv2 = conv2_64(conv1)
        convf = conv3_64(conv2)
        feature = Flatten()(convf)

        # specify net type for the following layer
        net_type = Dense

    # actor (policy) and critic (value) stream
    hid = net_type(net_size, activation='relu')(feature)
    logits = Dense(num_actions, kernel_initializer='zeros')(hid)
    value = Dense(1)(hid)

    # build model
    return Model(inputs=state, outputs=[value, logits])
Beispiel #5
0
def conv_block_td(input_tensor,
                  kernel_size,
                  filters,
                  stage,
                  block,
                  input_shape,
                  strides=(2, 2),
                  trainable=True):

    # conv block time distributed

    nb_filter1, nb_filter2, nb_filter3 = filters
    bn_axis = 3

    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = TimeDistributed(Convolution2D(nb_filter1, (1, 1),
                                      strides=strides,
                                      trainable=trainable,
                                      kernel_initializer='normal'),
                        input_shape=input_shape,
                        name=conv_name_base + '2a')(input_tensor)
    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis),
                        name=bn_name_base + '2a')(x)
    x = Activation('relu')(x)

    x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size),
                                      padding='same',
                                      trainable=trainable,
                                      kernel_initializer='normal'),
                        name=conv_name_base + '2b')(x)
    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis),
                        name=bn_name_base + '2b')(x)
    x = Activation('relu')(x)

    x = TimeDistributed(Convolution2D(nb_filter3, (1, 1),
                                      kernel_initializer='normal'),
                        name=conv_name_base + '2c',
                        trainable=trainable)(x)
    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis),
                        name=bn_name_base + '2c')(x)

    shortcut = TimeDistributed(Convolution2D(nb_filter3, (1, 1),
                                             strides=strides,
                                             trainable=trainable,
                                             kernel_initializer='normal'),
                               name=conv_name_base + '1')(input_tensor)
    shortcut = TimeDistributed(FixedBatchNormalization(axis=bn_axis),
                               name=bn_name_base + '1')(shortcut)

    x = Add()([x, shortcut])
    x = Activation('relu')(x)
    return x
Beispiel #6
0
def identity_block_td(input_tensor,
                      kernel_size,
                      filters,
                      stage,
                      block,
                      trainable=True):

    # identity block time distributed

    nb_filter1, nb_filter2, nb_filter3 = filters
    bn_axis = 3

    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = TimeDistributed(Convolution2D(nb_filter1, (1, 1),
                                      trainable=trainable,
                                      kernel_initializer='normal'),
                        name=conv_name_base + '2a')(input_tensor)
    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis),
                        name=bn_name_base + '2a')(x)
    x = Activation('relu')(x)

    x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size),
                                      trainable=trainable,
                                      kernel_initializer='normal',
                                      padding='same'),
                        name=conv_name_base + '2b')(x)
    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis),
                        name=bn_name_base + '2b')(x)
    x = Activation('relu')(x)

    x = TimeDistributed(Convolution2D(nb_filter3, (1, 1),
                                      trainable=trainable,
                                      kernel_initializer='normal'),
                        name=conv_name_base + '2c')(x)
    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis),
                        name=bn_name_base + '2c')(x)

    x = Add()([x, input_tensor])
    x = Activation('relu')(x)

    return x
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Converts class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

row, col, pixel = x_train.shape[1:]

# 4D input.
x = Input(shape=(row, col, pixel))

# Encodes a row of pixels using TimeDistributed Wrapper.
encoded_rows = TimeDistributed(LSTM(row_hidden))(x)

# Encodes columns of encoded rows.
encoded_columns = LSTM(col_hidden)(encoded_rows)

# Final predictions and model.
prediction = Dense(num_classes, activation='softmax')(encoded_columns)
model = Model(x, prediction)
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# Training.
model.fit(x_train,
          y_train,
          batch_size=batch_size,
Beispiel #8
0
def build_network(args, data):
    max_sentence_len = data["train_word"][0].shape[1]
    max_word_len = data["char"][0].shape[-1] / max_sentence_len
    num_labels = data["label_collection"].size() - 1

    # Input tensor contains char indices
    # for all words in a given batch of sentences
    # Shape: batch_size, max_sentence_len * max_word_len
    char_input = Input(shape=(max_sentence_len * max_word_len, ),
                       name="char_input",
                       dtype="int32")

    # 3D tensor containing char based word embeddings
    # for the given batch of sentences
    # Shape: batch_size, max_sentence_len, num_filters
    char_based_word_emb = get_char_based_embeddings(args, data, char_input,
                                                    "char")

    # Input tensor containing word indices
    # for the given batch of sentences
    # Shape: batch_size, max_sentence_len
    word_input = Input(shape=(max_sentence_len, ),
                       name="word_input",
                       dtype="int32")

    # 3D tensor containing word embeddings
    # for the given batch of sentences
    # Shape: batch_size, max_sentence_len, word_emb_dim
    word_emb = get_word_embeddings(args, data, word_input, "word")

    # Input tensor contains orth char indices
    # for all words in a given batch of sentences
    # Shape: batch_size, max_sentence_len * max_word_len
    orth_char_input = Input(shape=(max_sentence_len * max_word_len, ),
                            name="orth_char_input",
                            dtype="int32")

    # 3D tensor containing orth char based word embeddings
    # for the given batch of sentences
    # Shape: batch_size, max_sentence_len, num_filters
    orth_char_based_word_emb = get_char_based_embeddings(
        args, data, orth_char_input, "orth_char")

    # Input tensor containing orth word indices
    # for the given batch of sentences
    # Shape: batch_size, max_sentence_len
    orth_word_input = Input(shape=(max_sentence_len, ),
                            name="orth_word_input",
                            dtype="int32")

    # 3D tensor containing orth word embeddings
    # for the given batch of sentences
    # Shape: batch_size, max_sentence_len, orth_word_emb_dim
    orth_word_emb = get_word_embeddings(args, data, orth_word_input,
                                        "orth_word")

    inputs = [
        char_based_word_emb, word_emb, orth_char_based_word_emb, orth_word_emb
    ]

    bi_lstm_output = get_bi_lstm_output(args, data, inputs)

    lstm_output_dim = bi_lstm_output.shape[2]

    hidden_layer_output = TimeDistributed(
        Dense(units=num_labels,
              input_shape=(max_sentence_len, lstm_output_dim)))(bi_lstm_output)

    crf_output = ChainCRF(name="output")(hidden_layer_output)

    model = Model(
        inputs=[char_input, word_input, orth_char_input, orth_word_input],
        outputs=crf_output)

    # model = Model(
    #     inputs=[char_input, word_input, orth_char_input, orth_word_input],
    #     outputs=bi_lstm_output)

    # model.summary()

    return model
Beispiel #9
0
quickly create models that can process *sequences* of inputs.

- turn an image classification model into a video classification model, in just one line.
"""

from tensorflow.contrib.keras.python.keras.layers import TimeDistributed

# Input tensor for sequences of 20 timesteps,
# each containing a 784-dimensional vector
input_sequences = Input(shape=(20, 784))  # tensor (?, 20, 784)

out = model(input_sequences)
# This applies our previous model to every timestep in the input sequences.
# the output of the previous model was a 10-way softmax,
# so the output of the layer below will be a sequence of 20 vectors of size 10.
processed_sequences = TimeDistributed(model)(input_sequences)
# but what exactly is the difference between out an processed_sequences???
"""
(['class TimeDistributed(Wrapper):\n',

This wrapper allows to apply a layer to every temporal slice of an '
  'input.\n',
  '\n',
  '  The input should be at least 3D, and the dimension of index one\n',
  '  will be considered to be the temporal dimension.\n',
  '\n',
  '  Consider a batch of 32 samples,\n',
  '  where each sample is a sequence of 10 vectors of 16 dimensions.\n',
  '  The batch input shape of the layer is then `(32, 10, 16)`,\n',
  '  and the `input_shape`, not including the samples dimension, is `(10, '
  '16)`.\n',
# The next stage would be training this model on actual data.
"""
### Video question answering model

Now that we have trained our image QA model, we can quickly turn it into a video QA model. With appropriate training, you will be able to show it a short video (e.g. 100-frame human action) and ask a natural language question about the video (e.g. "what sport is the boy playing?" -> "football").

"""

from tensorflow.contrib.keras.python.keras.layers import TimeDistributed

video_input = Input(shape=(100, 224, 224,
                           3))  # video shape (?, 100, 224,224,3)

# This is our video encoded via the previously trained vision_model (weights are reused)
encoded_frame_sequence = TimeDistributed(vision_model)(
    video_input)  # the output will be a sequence of vectors # (?, 100, 160000)

encoded_video = LSTM(256)(
    encoded_frame_sequence)  # the output will be a vector # (?, 256)

# This is a model-level representation of the question encoder, reusing the same weights as before:
question_encoder = Model(
    inputs=question_input, outputs=encoded_question
)  # create question_encoder model from previous tensors created above example

# Let's use it to encode the question:
video_question_input = Input(shape=(100, ), dtype='int32')
encoded_video_question = question_encoder(video_question_input)

# And this is our video question answering model:
merged = concatenate([encoded_video, encoded_video_question])