Example #1
0
    def _model_tf_v1(n_input, n_output):
        """

        Initialize the tensorflow 1.1X version of the model

        :param int n_input: number of input dimensions (number of ECG + aux channels)
        :param int n_output: number of output (number of EEG channels)

        :return: initialized model
        """
        from tensorflow.python.keras.layers import Input, Bidirectional, CuDNNGRU, Dense, Dropout

        session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(
            allow_growth=True))
        sess = tf.Session(config=session_config)

        K.set_floatx('float64')
        ecg_input = Input(shape=(None, n_input),
                          dtype='float64',
                          name='ecg_input')

        x = Bidirectional(
            CuDNNGRU(16,
                     return_sequences=True,
                     recurrent_regularizer=l2(0.096),
                     activity_regularizer=l2(0.030)))(ecg_input)

        x = Bidirectional(
            CuDNNGRU(16,
                     return_sequences=True,
                     recurrent_regularizer=l2(0.090),
                     activity_regularizer=l2(0.013)))(x)

        x = Dense(8, activation='relu')(x)
        x = Dropout(0.327)(x)

        x = Bidirectional(
            CuDNNGRU(16,
                     return_sequences=True,
                     recurrent_regularizer=l2(0.024),
                     activity_regularizer=l2(0.067)))(x)

        x = Bidirectional(
            CuDNNGRU(64,
                     return_sequences=True,
                     recurrent_regularizer=l2(2.48e-07),
                     activity_regularizer=l2(0.055)))(x)

        bcg_out = Dense(n_output, activation='linear')(x)
        model = Model(inputs=ecg_input, outputs=bcg_out)

        return model
    def get_bidirectional_cudnn_model(self, pre_embeddings, dp_rate=-1.0, use_lstm=False):
        """
        cudnn provided versions, should be much faster
        :param pre_embeddings:
        :param use_lstm: utilize LSTM or GRU unit
        :return: the model
        """
        # Embedding part can try multichannel as same as origin paper
        embedding_layer = Embedding(self.max_features,  # 字典长度
                                    self.embedding_dims,  # 词向量维度
                                    weights=[pre_embeddings],  # 预训练的词向量
                                    input_length=self.maxlen,  # 每句话的最大长度
                                    trainable=False  # 是否在训练过程中更新词向量
                                    )
        input = Input((self.maxlen,))
        embedding = embedding_layer(input)
        if use_lstm:
            x = Bidirectional(CuDNNLSTM(RNN_DIM, return_sequences=True))(embedding)  # LSTM
        else:
            x = Bidirectional(CuDNNGRU(RNN_DIM, return_sequences=True))(embedding)  # GRU

        # add none or one of the following attention layers
        x, atten_layer = self.do_attention(x)
        fn = kb.function([input], [atten_layer.att_weights])

        if dp_rate > 0:
            # 加dropout层
            x = Dropout(dp_rate)(x)

        output = Dense(self.class_num, activation=self.last_activation)(x)
        model = Model(inputs=input, outputs=output)

        return model, fn
Example #3
0
def build_gru_model():
    model = Sequential()
    #CuDNNGRU 代替GRU, 提高速度
    model.add(CuDNNGRU(32, input_shape=(None, float_data.shape[-1])))
    model.add(Dense(1))
    model.compile(optimizer=RMSprop(), loss='mae')
    return model
Example #4
0
def build_model_2(time_steps):
    # 卷积层过多的激活函数效果反而一般
    model = tf.keras.models.Sequential()
    # 每次输入一个月的数据量
    # 输入数据5个通道是指四个价格,加一个交易量
    # 时间步数太短,卷积核尺寸开始小,然后增大,不能一直用1,否则卷积无法查看相邻的关系
    # model.add(layers.Conv1D(16, 2, padding='same', activation='tanh', strides=1, input_shape=(time_steps, 5)))
    # model.add(layers.Conv1D(32, 2, padding='same', activation='tanh', strides=1))

    model.add(
        layers.Conv1D(64, 2, padding='same', activation='tanh', strides=1))
    model.add(
        layers.Conv1D(128, 2, padding='same', activation='tanh', strides=1))
    model.add(layers.AveragePooling1D(2))
    # 卷积核数量作为需要标准化的轴,每个卷积核使用不用beta和gamma
    # 在这里用BN层效果一般,可能是股价的均值,方差不稳定
    # model.add(layers.BatchNormalization(axis=2))
    # activation = 'relu' CuDNNGRU,CuDNNLSTM的激活函数貌似是内定的
    # 单次直接输入多个日期,貌似不需要时间记忆,先去掉,把神经网络加深。。。。
    # return_sequences 决定返回单个 hidden state值还是返回全部time steps 的 hidden state值
    # 这里第一个不加return sequence ,不能连7续用两个gru,输出没有time steps形状不匹配
    model.add(CuDNNGRU(128, return_sequences=True))
    # model.add(CuDNNGRU(256, return_sequences=True))
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.4))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(5, activation='tanh'))
    # 最终输出层用tanh收敛好于relu,并且预测效果远好于relu,可能是输出-1,1,对应输入范围广的原因
    model.compile(optimizer='adam', loss='mse')
    return model
Example #5
0
    def get_cnn_rnn_model(self, pre_embeddings, dp_rate=0.0, use_lstm=False, filter_sizes=[2, 3, 4]):
        """
        first CNN to generate a vector, then apply RNN on the vector
        :param pre_embeddings:
        :param dp_rate: drop out rate
        :param use_lstm: utilize LSTM or GRU unit
        :return: the model
        """
        # Embedding part can try multichannel as same as origin paper
        embedding_layer = Embedding(self.max_features,  # 字典长度
                                    self.embedding_dims,  # 词向量维度
                                    weights=[pre_embeddings],  # 预训练的词向量
                                    input_length=self.maxlen,  # 每句话的最大长度
                                    trainable=False  # 是否在训练过程中更新词向量
                                    )
        input = Input((self.maxlen,))
        embedding = embedding_layer(input)

        # add a convolution layer
        c = Conv1D(NUM_FILTERS, 3, padding='valid', activation='relu')(embedding)
        cc = MaxPooling1D()(c)

        if dp_rate > 0:
            # 加dropout层
            cc = Dropout(dp_rate)(cc)

        if use_lstm:
            x = CuDNNLSTM(RNN_DIM)(cc)
        else:
            x = CuDNNGRU(RNN_DIM)(cc)

        output = Dense(self.class_num, activation=self.last_activation)(x)
        model = Model(inputs=input, outputs=output)

        return model
Example #6
0
def build_model_3(time_steps):
    model = tf.keras.models.Sequential()

    model.add(
        layers.Conv1D(64,
                      2,
                      padding='same',
                      strides=1,
                      activation='relu',
                      kernel_initializer='uniform',
                      input_shape=(time_steps, 5)))
    # model.add(layers.Conv1D(32, 2, padding='same', strides=1,activation='relu',kernel_initializer='uniform'))
    # model.add(layers.Conv1D(64, 2, padding='same', strides=1,activation='relu',kernel_initializer='uniform'))
    model.add(
        layers.Conv1D(128,
                      2,
                      padding='same',
                      strides=1,
                      activation='relu',
                      kernel_initializer='uniform'))
    model.add(layers.AveragePooling1D(2))
    model.add(CuDNNGRU(128, return_sequences=True))
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.4))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(5, activation='tanh'))

    # 优化算法使用adam,短周期收敛较慢
    model.compile(optimizer='adam', loss='mse')
    return model
Example #7
0
    def get_cudnn_version_model(self, pre_embeddings, dp_rate=-1.0, use_lstm=False):
        """
        cudnn provided versions, should be much faster
        :param pre_embeddings:
        :param use_lstm: utilize LSTM or GRU unit
        :return: the model
        """
        # Embedding part can try multichannel as same as origin paper
        embedding_layer = Embedding(self.max_features,  # 字典长度
                                    self.embedding_dims,  # 词向量维度
                                    weights=[pre_embeddings],  # 预训练的词向量
                                    input_length=self.maxlen,  # 每句话的最大长度
                                    trainable=False  # 是否在训练过程中更新词向量
                                    )
        input = Input((self.maxlen,))
        embedding = embedding_layer(input)
        if use_lstm:
            x = CuDNNLSTM(RNN_DIM)(embedding)   # LSTM
        else:
            x = CuDNNGRU(RNN_DIM)(embedding)    # GRU

        if dp_rate > 0:
            # 加dropout层
            x = Dropout(dp_rate)(x)

        output = Dense(self.class_num, activation=self.last_activation)(x)
        model = Model(inputs=input, outputs=output)

        return model
Example #8
0
def model(embedding_size, n_a):
    # word embedding matrix
    #word_vec = Input(shape=(embedding_size), name='Words') # batch, 300
    word_vec = tf.constant(answer_emb, name='Words', dtype='float32')    
    # preprocessing sentences into sentence vectors
    sentence = Input(shape=(T, embedding_size), name='Sentences') # batch, 50, 300
    sentence_vec = Bidirectional(CuDNNGRU(units=n_a, return_sequences=False), name='Sentence_Vectors')(sentence) # batch, 300
    # dot
    #product = Dot(axes=-1, normalize=False, name='Matrix')([word_vec, sentence_vec])
    product = tf.matmul(word_vec, sentence_vec, transpose_b = True, name = 'Matrix')
    key_matrix = K.transpose(product)
    model = Model(inputs= sentence, outputs=key_matrix)
    return model
Example #9
0
def gru_test():
    '''
    使用return_sequences 返回所有time steps的输出
    不适用的时候只返回最后一次
    '''
    model = Sequential()
    model.add(CuDNNGRU(128))
    # model.add(CuDNNGRU(128, return_sequences=True))
    model.compile('rmsprop', 'mse')
    input_array = np.random.normal(size=(32, 10, 1))
    output_array = model.predict(input_array)
    print(output_array.shape)
    return model
Example #10
0
def get_decoder_outputs_gpu(target_length, encoder_states, decoder_inputs,
                            latent_dim):
    # First Layer
    decoder_gru1_layer = CuDNNGRU(latent_dim,
                                  input_shape=(None, target_length),
                                  return_sequences=True,
                                  return_state=True,
                                  kernel_constraint=None,
                                  kernel_regularizer=None,
                                  name="decoder_gru1_layer")
    decoder_gru1, state_h = decoder_gru1_layer(decoder_inputs,
                                               initial_state=encoder_states)

    # Second LSTM Layer
    decoder_gru2_layer = CuDNNGRU(latent_dim,
                                  stateful=False,
                                  return_sequences=True,
                                  return_state=True,
                                  kernel_constraint=None,
                                  kernel_regularizer=None,
                                  name="decoder_gru2_layer")
    decoder_outputs, state_h = decoder_gru2_layer(decoder_gru1)

    return decoder_outputs
Example #11
0
def get_encoder_states_gpu(input_shape, encoder_inputs, latent_dim):
    encoder = CuDNNGRU(latent_dim,
                       input_shape=(None, input_shape),
                       stateful=False,
                       return_sequences=False,
                       return_state=True,
                       kernel_constraint=None,
                       kernel_regularizer=None,
                       recurrent_initializer='glorot_uniform',
                       name="encoder_gru_layer")
    # 'encoder_outputs' are ignored and only states are kept.
    encoder_outputs, state_h = encoder(encoder_inputs)
    encoder_states = [state_h]

    return encoder_states
Example #12
0
def encoder_bi_GRU_gpu(input_shape, encoder_inputs, latent_dim):
    encoder = Bidirectional(CuDNNGRU(latent_dim,
                                     input_shape=(None, input_shape),
                                     stateful=False,
                                     return_sequences=False,
                                     return_state=True,
                                     kernel_constraint=None,
                                     kernel_regularizer=None,
                                     recurrent_initializer='glorot_uniform'),
                            name="encoder_bi_gru_layer")
    # 'encoder_outputs' are ignored and only states are kept.
    encoder_outputs, forward_h, backward_h = encoder(encoder_inputs)
    state_h = Concatenate()([forward_h, backward_h])
    encoder_states = [state_h]

    return encoder_states
Example #13
0
def make_discriminator(name, s, adj, node_f, use_gcn=True, use_gru=True):
    n = node_f.shape[0]  # number of nodes
    input_s = Input(shape=(s, n))
    input_f = Input(shape=(n, node_f.shape[1]))
    input_g = Input(shape=(n, n))
    if use_gcn:
        gcov1 = GraphConv(2 * base)([input_f, input_g])
        # gcov2 = GraphConv(base)([gcov1, input_g])
        input_s1 = Dot(axes=(2, 1))(
            [input_s, gcov1])  # dot product: element by element multiply
    else:
        input_s1 = input_s
    fc1 = Dense(4 * base, activation='relu', input_shape=(n, ))(input_s1)
    fc2 = Dense(8 * base, activation='relu', input_shape=(n, ))(fc1)
    # S*D2

    if use_gru:
        rnn1 = Dropout(dropout)(CuDNNGRU(2 * base, return_sequences=True)(fc2))
    else:
        rnn1 = fc2
    fc3 = Dense(16 * base, activation='relu', input_shape=(n, ))(rnn1)
    out = Dense(1)(Flatten()(fc3))
    return Model(name=name, inputs=[input_s, input_f, input_g], outputs=out)
Example #14
0
def build_model_1(time_steps):
    model = tf.keras.models.Sequential()
    # 时间步数太短,卷积核尺寸开始小,然后增大,不能一直用1,否则卷积无法查看相邻的关系
    model.add(
        layers.Conv1D(16,
                      2,
                      padding='same',
                      strides=1,
                      input_shape=(time_steps, 5)))
    model.add(layers.Conv1D(32, 2, padding='same', strides=1))
    model.add(layers.Conv1D(64, 2, padding='same', strides=1))
    # 注意这里第二次卷积核为2的卷积实际上就已经跨过三天的k线,所以没必要用太多
    model.add(layers.Conv1D(128, 2, padding='same', strides=1))
    model.add(layers.AveragePooling1D(2))
    model.add(CuDNNGRU(128, return_sequences=True))
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.4))
    model.add(layers.Dense(128, activation='relu'))
    # model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(5, activation='tanh'))

    # 优化算法使用adam,短周期收敛较慢
    model.compile(optimizer='adam', loss='mse')
    return model
def one_hot(x, num_classes):
    return K.one_hot(x, num_classes=num_classes)


enc_size = 256
dec_size = 256

# encoder part
enc_inp = Input(shape=(None, ), dtype=tf.int32)
# our inputs are sparse but we need one-hot encoding
enc_one_hot = Lambda(function=one_hot,
                     arguments={'num_classes': eng_vocab_size},
                     output_shape=(max_inp_seq, eng_vocab_size))(enc_inp)
# use CuDNNGRU if available, is 3x faster
if tf.test.is_gpu_available():
    enc_gru = CuDNNGRU(units=enc_size, return_state=True)
    enc_output, enc_state = enc_gru(enc_one_hot)
else:
    enc_gru = GRU(units=enc_size, return_state=True)
    enc_output, enc_state = enc_gru(enc_one_hot)

# decoder part
dec_inp = Input(shape=(None, ), dtype=tf.int32)
# our outputs are sparse but we need one-hot encoding
dec_one_hot = Lambda(function=one_hot,
                     arguments={'num_classes': spa_vocab_size},
                     output_shape=(max_trg_seq, spa_vocab_size))(dec_inp)
# use CuDNNGRU if available, is 3x faster
if tf.test.is_gpu_available():
    dec_gru = CuDNNGRU(units=dec_size,
                       return_sequences=True,
    def init_model(self,
                   input_shape,
                   num_classes,
                   **kwargs):
        layers = 5
        filters_size = [64, 128, 256, 512, 512]
        kernel_size = (3, 3)
        pool_size = [(2, 2), (2, 2), (2, 2), (4, 1), (4, 1)]

        freq_axis = 2
        channel_axis = 3

        channel_size = 128
        min_size = min(input_shape[:2])
        melgram_input = Input(shape=input_shape)
        # x = ZeroPadding2D(padding=(0, 37))(melgram_input)

        x = Reshape((input_shape[0], input_shape[1], 1))(melgram_input)
        x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(x)

        # Conv block 1
        x = Convolution2D(
            filters=filters_size[0],
            kernel_size=kernel_size,
            padding='same',
            name='conv1')(x)
        x = ELU()(x)
        x = BatchNormalization(axis=channel_axis, name='bn1')(x)
        x = MaxPooling2D(
            pool_size=pool_size[0],
            strides=pool_size[0],
            name='pool1')(x)
        x = Dropout(0.1, name='dropout1')(x)

        min_size = min_size // pool_size[0][0]

        for layer in range(1, layers):
            min_size = min_size // pool_size[layer][0]
            if min_size < 1:
                break
            x = Convolution2D(
                filters=filters_size[layer],
                kernel_size=kernel_size,
                padding='same',
                name='conv' + str(layer + 1))(x)
            x = ELU()(x)
            x = BatchNormalization(axis=channel_axis, name='bn'+str(layer + 1)+'')(x)
            x = MaxPooling2D(
                pool_size=pool_size[layer],
                strides=pool_size[layer],
                name='pool'+str(layer + 1)+'')(x)
            x = Dropout(0.1, name='dropout'+str(layer + 1)+'')(x)

        x = Reshape((-1, channel_size))(x)

        gru_units = 32
        if num_classes > 32:
            gru_units = int(num_classes * 1.5)
        # GRU block 1, 2, output
        x = CuDNNGRU(gru_units, return_sequences=True, name='gru1')(x)
        x = CuDNNGRU(gru_units, return_sequences=False, name='gru2')(x)
        x = Dropout(0.3)(x)
        outputs = Dense(num_classes, activation='softmax', name='output')(x)

        model = TFModel(inputs=melgram_input, outputs=outputs)
        optimizer = optimizers.Adam(
            # learning_rate=1e-3,
            lr=1e-3,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-08,
            decay=1e-4,
            amsgrad=True)
        model.compile(
            optimizer=optimizer,
            loss="sparse_categorical_crossentropy",
            metrics=['accuracy'])
        model.summary()
        self._model = model
        self.is_init = True
Example #17
0
def gru_model():
    emb_n = 64
    category_num = {
         'adidmd5': (780369, emb_n),
         'idfamd5': (360, emb_n),
         'imeimd5': (1021836, emb_n),
         'macmd5': (329184, emb_n),
         'openudidmd5': (85051, emb_n),
         'ip': (813719, emb_n),
         'reqrealip': (9748, emb_n),
        'adunitshowid': (800, emb_n),
        'apptype': (91, emb_n),
        'carrier': (4, emb_n),
        'city': (331, emb_n),
        'dvctype': (3, emb_n),
        'model': (5923, emb_n),  # 7957 7958  5922
        'make': (1704, emb_n),
        'mediashowid': (313, emb_n),
        'ntt': (7, emb_n),
        'orientation': (2, emb_n),
        'osv': (185, emb_n),
        'pkgname': (2368, emb_n),
        'ppi': (119, emb_n),
        'ver': (3268, emb_n),
        'screen_area': (1396, emb_n),
        'creative_dpi': (1763, emb_n),
        'hour': (24, emb_n),
        'lan': (33, emb_n),
        'h': (985, emb_n),
        'w': (449, emb_n),

    }
    # 类别型变量输入
    category_inp = Input(shape=(len(category),), name='category_inp')
    cat_embeds = []
    for idx, col in enumerate(category):
        x = Lambda(lambda x: x[:, idx, None])(category_inp)
        x = Embedding(category_num[col][0], category_num[col][1], input_length=1)(x)
        cat_embeds.append(x)
    embeds = concatenate(cat_embeds, axis=2)
    embeds = GaussianDropout(0.5)(embeds)
    # 数值型变量输入
    numerical_inp = Input(shape=(len(numerical),), name='continous_inp')
    print('numerical', len(numerical) // 8 * 8 + 8)
    x2 = Dense(len(numerical) // 8 + 8, activation='relu', kernel_initializer='random_uniform',
               bias_initializer='zeros')(
        numerical_inp)
    x2 = Dropout(0.5)(x2)
    x2 = BatchNormalization()(x2)
    x2 = Reshape([1, int(x2.shape[1])])(x2)
    x = concatenate([embeds, x2], axis=2)
    # 主干网络
    x = CuDNNGRU(128)(x)
    x = BatchNormalization()(x)
    x = Dropout(0.50)(x)
    x = Dense(64, activation='relu', kernel_initializer='random_uniform')(x)
    x = PReLU()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.50)(x)
    x = Dense(32, activation='relu', kernel_initializer='random_uniform')(x)
    x = PReLU()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.50)(x)
    out_p = Dense(1, activation='sigmoid')(x)
    return Model(inputs=[category_inp, numerical_inp], outputs=out_p)
Example #18
0
# In[43]:

embedding_boyut = 50

# In[44]:

model.add(
    Embedding(input_dim=max_kelime,
              output_dim=embedding_boyut,
              input_length=max_token,
              name='embedding_katman'))

# In[45]:

model.add(CuDNNGRU(units=16, return_sequences=True))
model.add(CuDNNGRU(units=8, return_sequences=True))
model.add(CuDNNGRU(units=4, return_sequences=False))
model.add(Dense(1, activation='sigmoid'))

# In[46]:

optimizer = Adam(lr=1e-3)

# In[47]:

model.compile(loss='binary_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

# In[48]:
Example #19
0
    def init_model(self, input_shape, num_classes, **kwargs):
        freq_axis = 2
        channel_axis = 3
        channel_size = 128
        min_size = min(input_shape[:2])
        melgram_input = Input(shape=input_shape)
        # x = ZeroPadding2D(padding=(0, 37))(melgram_input)
        # x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(x)

        x = Reshape((input_shape[0], input_shape[1], 1))(melgram_input)
        # Conv block 1
        x = Convolution2D(64, 3, 1, padding='same', name='conv1')(x)
        x = BatchNormalization(axis=channel_axis, name='bn1')(x)
        x = ELU()(x)
        x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x)
        x = Dropout(0.1, name='dropout1')(x)

        # Conv block 2
        x = Convolution2D(channel_size, 3, 1, padding='same', name='conv2')(x)
        x = BatchNormalization(axis=channel_axis, name='bn2')(x)
        x = ELU()(x)
        x = MaxPooling2D(pool_size=(3, 3), strides=(3, 3), name='pool2')(x)
        x = Dropout(0.1, name='dropout2')(x)

        # Conv block 3
        x = Convolution2D(channel_size, 3, 1, padding='same', name='conv3')(x)
        x = BatchNormalization(axis=channel_axis, name='bn3')(x)
        x = ELU()(x)
        x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool3')(x)
        x = Dropout(0.1, name='dropout3')(x)

        if min_size // 24 >= 4:
            # Conv block 4
            x = Convolution2D(channel_size, 3, 1, padding='same',
                              name='conv4')(x)
            x = BatchNormalization(axis=channel_axis, name='bn4')(x)
            x = ELU()(x)
            x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool4')(x)
            x = Dropout(0.1, name='dropout4')(x)

        x = Reshape((-1, channel_size))(x)

        gru_units = 128
        if num_classes > gru_units:
            gru_units = int(num_classes * 1.5)
        # GRU block 1, 2, output
        x = CuDNNGRU(gru_units, return_sequences=True, name='gru1')(x)
        x = CuDNNGRU(gru_units, return_sequences=False, name='gru2')(x)
        # x = Dense(max(int(num_classes*1.5), 128), activation='relu', name='dense1')(x)
        x = Dropout(0.3)(x)
        outputs = Dense(num_classes, activation='softmax', name='output')(x)

        model = TFModel(inputs=melgram_input, outputs=outputs)
        optimizer = optimizers.Adam(
            # learning_rate=1e-3,
            lr=1e-3,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-08,
            decay=1e-4,
            amsgrad=True)
        model.compile(optimizer=optimizer,
                      loss="sparse_categorical_crossentropy",
                      metrics=['accuracy'])
        model.summary()
        self._model = model
        self.is_init = True