Example #1
0
    def __init__(self,
                 image_embedding_dim=512,
                 token_embedding_dim=8,
                 token_vocab_size=17,
                 decoder_hidden_dim=1024,
                 mlp_hidden_dim=1024,
                 image_encoder="inceptionv3"):

        super(EDAXUMLP, self).__init__()

        # Model Parameters ######################################################
        self.image_embedding_dim = image_embedding_dim
        self.token_embedding_dim = token_embedding_dim
        self.token_vocab_size = token_vocab_size
        self.decoder_hidden_dim = decoder_hidden_dim
        self.mlp_hidden_dim = mlp_hidden_dim
        self.batch_size = None
        self.predictions_file = None
        self.image_encoder = image_encoder
        self.attention_weights = None
        self.attention_predictions = None

        # MODEL 1 = Image Encoding ##############################################
        if image_encoder == "inceptionv3":
            self.model1_image_encoding = \
              ImageEncoderIV3(image_embedding_dim=image_embedding_dim)
        else:
            self.model1_image_encoding = \
              ImageEncoder(image_embedding_dim=image_embedding_dim)

        # MODEL 2 = Token Sequence Embedding ####################################
        self.model2_token_embedding = \
          TokenEmbedding(token_vocab_size=token_vocab_size,
                         token_embedding_dim=token_embedding_dim)

        # MODEL 3 = Attention ###################################################
        self.model3_attention = \
          Attention(input_embedding_dim=image_embedding_dim,
                    decoder_hidden_dim=decoder_hidden_dim)

        # MODEL 4 = MLP #########################################################
        self.model4_mlp = \
          MLP(mlp_hidden_dim=mlp_hidden_dim,
              token_vocab_size=token_vocab_size)

        # LAYER 1 = Expand dims #################################################
        self.layer1_expand_dims = keras.backend.expand_dims

        # LAYER 2 = Concatenate encoder output with attention ###################
        self.layer2_concatenate = keras.layers.concatenate

        # LAYER 3 = LSTM ########################################################
        # TODO: Still need to return sequences even though only of length 1 in
        # order to get both state and output (?)
        self.layer3_lstm = keras.layers.LSTM(decoder_hidden_dim,
                                             return_sequences=True,
                                             return_state=True)

        # LAYER 4 = Concatenate lstm output with attention ######################
        self.layer4_concatenate = keras.layers.concatenate
Example #2
0
    def __init__(self,
                 image_embedding_dim=512,
                 token_embedding_dim=8,
                 token_vocab_size=17,
                 decoder_hidden_dim=1024,
                 mlp_hidden_dim=1024,
                 image_encoder="inceptionv3"):

        super(DRAKEDETECTIONS2, self).__init__()

        # Model Parameters ######################################################
        self.image_embedding_dim = image_embedding_dim
        self.token_embedding_dim = token_embedding_dim
        self.token_vocab_size = token_vocab_size
        self.decoder_hidden_dim = decoder_hidden_dim
        self.mlp_hidden_dim = mlp_hidden_dim
        self.batch_size = None
        self.predictions_file = None
        self.image_encoder = image_encoder

        # MODEL 1 = Image Encoding ##############################################
        if image_encoder == "inceptionv3":
            self.model1_image_encoding = \
              ImageEncoderIV3(image_embedding_dim=image_embedding_dim)
        else:
            self.model1_image_encoding = \
              ImageEncoder(image_embedding_dim=image_embedding_dim)

        # MODEL 2 = Token Sequence Embedding ####################################
        self.model2_token_embedding = \
          TokenEmbedding(token_vocab_size=token_vocab_size,
                         token_embedding_dim=token_embedding_dim)

        # MODEL 3 = Attention ###################################################
        self.model3_attention = \
          Attention(input_embedding_dim=token_embedding_dim,
                    decoder_hidden_dim=decoder_hidden_dim)

        # MODEL 4 = MLP #########################################################
        self.model4_mlp = \
          MLP(mlp_hidden_dim=mlp_hidden_dim,
              token_vocab_size=token_vocab_size)

        # MODEL 5 - Detection2Hidden ############################################
        self.model5_detections2hidden = \
          Detections2Hidden(decoder_hidden_dim=decoder_hidden_dim)

        # LAYER 1 = Expand dims #################################################
        self.layer1_expand_dims = keras.backend.expand_dims

        # LAYER 2 = Concatenate encoder output with attention ###################
        self.layer2_concatenate = keras.layers.concatenate

        # LAYER 3 = LSTM ########################################################
        self.layer3_lstm = keras.layers.LSTM(decoder_hidden_dim,
                                             return_sequences=True,
                                             return_state=True,
                                             dropout=0.1)