def nnet(inputs,num_classes,keep_prob=0.2):
        """
        # 适用于单导联的深度网络模型
        :param inputs: keras tensor, 切片并堆叠后的单导联信号.
        :param keep_prob: float, dropout-随机片段屏蔽概率.
        :param num_classes: int, 目标类别数.
        :return: keras tensor, 各类概率及全连接层前自动提取的特征.
        """
        bch = Net2.__backbone(inputs)
    # features = Net2.__backbone(inputs) //信号输入为单导联时
    #     features = Concatenate(axis=1)(branches)
        features = Dropout(keep_prob)(bch)
        # features = Dropout(keep_prob, [1, int(inputs.shape[-1]), 1])(features)
        # features = Bidirectional(CuDNNLSTM(12, return_sequences=False), merge_mode='concat')(features)
        features = Bidirectional(CuDNNGRU(12, return_sequences=True), merge_mode='concat')(features)

        # attention
        attention_pre = Dense(24, name='attention_vec')(features)  # [b_size,maxlen,64]
        attention_probs = Softmax()(attention_pre)  # [b_size,maxlen,64]
        attention_mul = Lambda(lambda x: x[0] * x[1])([attention_probs, features])

        features = Bidirectional(CuDNNGRU(12, return_sequences=True), merge_mode='concat')(attention_mul)

        # attention
        attention_pre = Dense(24, name='attention_vec1')(features)  # [b_size,maxlen,64]
        attention_probs = Softmax()(attention_pre)  # [b_size,maxlen,64]
        attention_mul = Lambda(lambda x: x[0] * x[1])([attention_probs, features])
        # features = attention_3d_block1(features)
        features = BatchNormalization()(attention_mul)
        features = Flatten()(features)
        net = Dense(units=num_classes, activation='sigmoid')(features)
        return net, features
Beispiel #2
0
def build_MINI_MTL(input_shape, filters, numClasses, i):
    input_layer = Input(shape=(input_shape, input_shape, input_shape, filters))
    x_edge = RA(input_layer, input_layer, filters)
    x_mask = RA(input_layer, input_layer, filters)

    x_edge = Conv3D(filters, (3, 3, 3), padding='same')(x_edge)
    x_edge = BatchNormalization(axis=-1)(x_edge)
    x_edge = Activation('relu')(x_edge)
    x_mask = Conv3D(filters, (3, 3, 3), padding='same')(x_mask)
    x_mask = BatchNormalization(axis=-1)(x_mask)
    x_mask = Activation('relu')(x_mask)

    out_edge = Conv3D(numClasses, (1, 1, 1), padding='same')(x_edge)
    out_edge = Softmax(axis=-1)(out_edge)
    out_edge = UpSampling3D(pow(2,i), name='out_edge_{}'.format(i))(out_edge)
    out_mask = Conv3D(numClasses, (1, 1, 1), padding='same')(x_mask)
    out_mask = Softmax(axis=-1)(out_mask)
    out_mask = UpSampling3D(pow(2,i), name='out_mask_{}'.format(i))(out_mask)

    out_mtl = Concatenate()([x_mask, x_edge])
    out_mtl = Conv3D(filters, (1, 1, 1), padding='same')(out_mtl)

    mtl_model = Model(inputs=[input_layer], outputs=[out_edge, out_mask])

    return mtl_model, out_mtl
Beispiel #3
0
    def __init__(self, config, human_vocab_size, machine_vocab_size):

        # basic params
        self.Tx = config['Tx']
        self.Ty = config['Ty']

        self.x_vocab_size = human_vocab_size
        self.y_vocab_size = machine_vocab_size

        # net params
        self.layer1_size = config['layer1_size']
        self.layer2_size = config['layer2_size']

        # net func
        self.at_repeat = RepeatVector(self.Tx)  # 这层作用:
        self.at_concate = Concatenate(axis=-1)  # 这层作用:
        self.at_dense1 = Dense(8, activation='tanh')
        self.at_dense2 = Dense(1, activation='relu')
        self.at_softmax = Activation(lambda x: Softmax(axis=1)(x),
                                     name='attention_weights')
        self.at_dot = Dot(axes=1)

        self.layer3 = Dense(machine_vocab_size,
                            activation=lambda x: Softmax(axis=1)(x))

        # get model
        self.model = self.get_model()
    def train_student(self,
                      c=0.0,
                      T=1.0,
                      use_mse=False,
                      epochs=200,
                      lr=1e-5,
                      batch_size=32):
        if use_mse:
            loss = distillation_loss_mse(c=c)
        else:
            loss = distillation_loss(c=c, T=T)
        df_train = pickle.load(open("train_data.p", "rb"))
        df_test = pickle.load(open("test_data.p", "rb"))
        g_train = Student.distill_train_generator(df_train,
                                                  batch_size=batch_size)
        g_test = Student.batch_generator(df_test,
                                         "label",
                                         batch_size=batch_size,
                                         shuffle=False)

        # Since MobileNet's convolution features for each image are stored,
        # we only train a small dense model to sit on top of them.
        top_model = keras.models.Sequential()
        top_model.add(Flatten(input_shape=(4, 4, 256)))
        top_model.add(Dense(200, activation="relu"))
        top_model.add(Dropout(0.3))
        top_model.add(Dense(nbr_classes, activation=None, name="logits"))
        top_model.add(Softmax())
        top_model.compile(loss=keras.losses.categorical_crossentropy,
                          optimizer=keras.optimizers.Adam(),
                          metrics=["acc"])
        x = top_model.get_layer("logits").output
        y = Softmax()(x)

        # The distillation loss functions need access to the logits before the final softmax layer.
        # Weights will be shared between logits_model and top_model,
        # so we train the logits_model, then return the top_model.
        logits_model = keras.models.Model(
            inputs=top_model.layers[0].input,
            outputs=top_model.get_layer("logits").output)
        logits_model.compile(loss=loss, optimizer=keras.optimizers.Adam(lr=lr))

        log = []
        for epoch in range(200):
            logits_model.fit_generator(g_train,
                                       steps_per_epoch=len(df_train) //
                                       batch_size,
                                       epochs=1,
                                       verbose=0)
            how_good = top_model.evaluate_generator(g_test,
                                                    steps=len(df_test) //
                                                    batch_size)
            print("Epoch {} validation results are {}".format(epoch, how_good))
            log.append(how_good)

        top_model.save("models/student.h5")
        losses, val_accs = zip(*log)
        print("Best ={}".format(max(val_accs)))
        return val_accs
Beispiel #5
0
def attention_weights(input_uid, input_iid, xu, xi,
                      user_num, item_num, embed_id_dim, random_seed,
                      attention_size, l2_reg_lambda):

    vec_uid = Embedding(user_num + 2, embed_id_dim, embeddings_initializer=RandomUniform(minval=-
                                                                                         0.1, maxval=0.1, seed=random_seed), name='user_id_embed')(input_uid)
    vec_iid = Embedding(item_num + 2, embed_id_dim, embeddings_initializer=RandomUniform(minval=-
                                                                                         0.1, maxval=0.1, seed=random_seed), name='item_id_embed')(input_iid)

    # Mapping user/item ID vectors and semantics of user/item's reviews to the attention space
    vec_uid = Dense(attention_size, activation=None, use_bias=False, kernel_initializer='glorot_uniform',
                    kernel_regularizer=l2(l2_reg_lambda), name='user_id_attention')(vec_uid)
    vec_iid = Dense(attention_size, activation=None, use_bias=False, kernel_initializer='glorot_uniform',
                    kernel_regularizer=l2(l2_reg_lambda), name='item_id_attention')(vec_iid)
    vec_textu = Dense(attention_size, activation=None, use_bias=False, kernel_initializer='glorot_uniform',
                      kernel_regularizer=l2(l2_reg_lambda), name='user_text_attention')(xu)
    vec_texti = Dense(attention_size, activation=None, use_bias=False, kernel_initializer='glorot_uniform',
                      kernel_regularizer=l2(l2_reg_lambda), name='item_text_attention')(xi)

    # Interaction between the user and each item review to learn personalized review-usefulness
    out_u = Multiply(name='usertext_itemid_interaction')([vec_textu, vec_iid])
    out_i = Multiply(name='itemtext_userid_interaction')([vec_texti, vec_uid])

    # b_u = np.random.uniform(low=-0.1, high=0.1, size=[attention_size])
    # b_i = np.random.uniform(low=-0.1, high=0.1, size=[attention_size])
    # b_u = b_u.astype(np.float32)
    # b_i = b_i.astype(np.float32)
    # # b_u = np.ndarray(b_u, 'float32')
    # # b_i = np.ndarray(b_i, 'float32')

    def biasadd_layer(x):
        b = tf.keras.backend.random_uniform_variable(
            [attention_size], low=-0.1, high=0.1, seed=random_seed)
        return tf.keras.backend.bias_add(x, b)
    # out_u = tf.keras.backend.bias_add(out_u, b_u)
    # out_i = tf.keras.backend.bias_add(out_i, b_i)
    out_u = Lambda(biasadd_layer)(out_u)
    out_i = Lambda(biasadd_layer)(out_i)
    # out_u = tf.keras.backend.bias_add(out_u, b_u)
    # out_i = tf.keras.backend.bias_add(out_i, b_i)

    # out_u = Dense(1, activation=None, use_bias=True,
    #               kernel_initializer='ones', bias_initializer='random_uniform')(out_u)
    # out_i = Dense(1, activation=None, use_bias=True,
    #               kernel_initializer='ones', bias_initializer='random_uniform')(out_i)

    out_u = ReLU()(out_u)
    out_i = ReLU()(out_i)
    out_u = Dense(1, activation=None, use_bias=True,
                  kernel_initializer='glorot_uniform', bias_initializer='zeros')(out_u)
    out_i = Dense(1, activation=None, use_bias=True,
                  kernel_initializer='glorot_uniform', bias_initializer='zeros')(out_i)

    # Output the weight (usefulness) for each review
    out_u = Softmax(axis=1, name='user_rev_weights')(out_u)
    out_i = Softmax(axis=1, name='item_rev_weights')(out_i)

    return out_u, out_i
Beispiel #6
0
 def call(self, answer_encoding):
     score_matrix = tf.matmul(answer_encoding, K.permute_dimensions(answer_encoding, (0, 2, 1)))
     eye1 = K.eye(Params.max_passage_count); zero1 = K.zeros_like(eye1); mask = K.cast(K.equal(eye1, zero1), dtype="float32")
     score_matrix = score_matrix * mask
     score_matrix = Softmax(axis=-1)(score_matrix)
     answer_encoding_hat = tf.matmul(score_matrix, answer_encoding)
     answer_encoding_final = K.concatenate([answer_encoding, answer_encoding_hat, answer_encoding*answer_encoding_hat])
     answer_probability = self.dense_1(answer_encoding_final)
     answer_probability = K.squeeze(answer_probability, axis=-1)
     answer_probability = Softmax(axis=-1)(answer_probability)
     return answer_probability
    def create_model(self, num_class=1000):

        self.net.add(Conv2D(filters=round(self.width_multiplier * 32), kernel_size=[3, 3],
                            input_shape=self.input_shape))
        self._add_df_layer(self.net, 64)
        self._add_df_layer(self.net, 128, downsample=True)
        self._add_df_layer(self.net, 128)

        # two different net, so there are two different convolution layer
        shortcut1 = self._shortcut(self.net)
        shortcut_link1 = self._shortcut(self.net, 16)

        self._add_df_layer(self.net, 256, downsample=True)
        self._add_df_layer(self.net, 256)

        shortcut2 = self._shortcut(self.net)
        shortcut_link2 = self._shortcut(self.net, 16)

        self._add_df_layer(self.net, 512, downsample=True)

        self._add_df_layer(self.net, 512)
        self._add_df_layer(self.net, 512)
        self._add_df_layer(self.net, 512)
        self._add_df_layer(self.net, 512)
        self._add_df_layer(self.net, 512)

        shortcut3 = self._shortcut(self.net)
        shortcut_link3 = self._shortcut(self.net, 16)

        self._add_df_layer(self.net, 1024, downsample=True)
        self._add_df_layer(self.net, 1024)

        # the net decide text or not
        text_net = self._shortcut(self.net)
        text_net = self._up_sample(text_net, shortcut3)
        text_net = self._up_sample(text_net, shortcut2)
        text_net = self._up_sample(text_net, shortcut1)
        text_net.add(Softmax(text_net))

        # the net decide pixel linked or not
        pixel_net = self._shortcut(self.net)
        pixel_net = self._up_sample(pixel_net, shortcut_link3)
        pixel_net = self._up_sample(pixel_net, shortcut_link2)
        pixel_net = self._up_sample(pixel_net, shortcut_link1)
        pixel_net.add(Softmax(pixel_net))

        # last three layers of mobilenet, we may neglect this stuff
        # self.net.add(AveragePooling2D(pool_size=[7, 7]))
        # self.net.add(Dense(num_class))
        # self.net.add(Softmax())

        return text_net, pixel_net
Beispiel #8
0
    def __net(self, data_shape, network_shape):
        print('Building Network')

        # Placeholder
        self.data_in = Input(shape=data_shape)
        self.data_co = Input(shape=data_shape)  #desired output

        # Encoder
        layer = self.data_co
        for i in network_shape:
            layer = Dense(i, activation='relu')(layer)
            layer = concatenate([layer, self.data_in])

        # Latent
        self.latent = Dense(self.latent_units)(layer)

        self.latent_out = Reshape(self.latent_shape)(self.latent)
        self.latent_out = Softmax(name='latent_output')(self.latent_out)

        self.latent_softmax = Lambda(self.__gumbelSample,
                                     output_shape=(self.latent_units, ))(
                                         self.latent)

        # Decoder
        count = 0
        layer = self.latent_softmax
        for i in network_shape[::-1]:
            layer = Dense(i,
                          activation='relu',
                          name='decoder-{}'.format(count))(layer)
            layer = concatenate([layer, self.data_in])
            count += 1

        # Output
        self.data_out = Dense(data_shape[0], activation='sigmoid')(layer)
Beispiel #9
0
def build_3d_model(settings: ModelSettings,
                   no_of_classes: int,
                   no_of_bands: int):
    optimizer = Adam(lr=0.001)

    model = Sequential()
    model.add(
        Conv2D(filters=200,
               kernel_size=settings.first_conv_kernel_size,
               strides=(1, 1),
               input_shape=settings.input_neighborhood + (no_of_bands, ),
               data_format='channels_last',
               padding='valid'))
    model.add(MaxPooling2D(pool_size=(2, 2),
                           padding='valid'))
    model.add(Conv2D(filters=200,
                     kernel_size=(2, 2),
                     padding='same',
                     activation='relu'))
    model.add(Conv2D(filters=no_of_classes,
                     kernel_size=(2, 2),
                     padding='valid'))
    model.add(Flatten())
    model.add(Softmax())
    model.compile(optimizer=optimizer,
                  metrics=['accuracy'],
                  loss='categorical_crossentropy')
    return model
Beispiel #10
0
def CreateCNN(nUnits=(32, 64, 128, 128, 128, 256),
              inShape=(None, None, 3),
              dropProb=.2,
              nClasses=10,
              linearOut=False):

    inp = Input(shape=inShape)
    tens = inp

    for n in nUnits:
        tens = Conv2D(n, (3, 3), padding='valid', strides=(1, 1))(tens)
        tens = Dropout(dropProb)(Activation('relu')(
            BatchNormalization()(tens)))

    tens = Conv2D(nClasses, (1, 1))(tens)
    tens = GlobalAveragePooling2D()(tens)
    pred = Softmax()(tens)

    #flat=Flatten()(l3)
    #pred=Dense(10,activation='softmax')(flat)

    mod = Model(inputs=inp, outputs=pred)
    mod.compile(Adam(lr=.0001), 'categorical_crossentropy', ['accuracy'])

    return mod
def global_context_block(x, channels):
    bs, h, w, c = x.shape.as_list()
    input_x = x
    input_x = Reshape((h * w, c))(input_x)  # [N, H*W, C]
    input_x = Permute((2,1))(input_x)       # [N, C, H*W]
    input_x = Lambda(backend_expand_dims_1,name='a')(input_x)  # [N, 1, C, H*W]

    context_mask = Conv2D(1,(1,1), name='gc-conv0')(x)
    context_mask = Reshape((h * w, 1))(context_mask) # [N, H*W, 1]
    context_mask = Softmax(axis=1)(context_mask)  # [N, H*W, 1]
    context_mask = Permute((2,1))(context_mask)   # [N, 1, H*W]
    context_mask = Lambda(backend_expand_dims_last,name='b')(context_mask) # [N, 1, H*W, 1]

    context = Lambda(backend_dot,name='c')([input_x, context_mask])
    context = Reshape((1,1,c))(context) # [N, 1, 1, C]

    context_transform = conv_block(context, channels, 1, strides=1, name='gc-conv1')
    context_transform = Conv2D(c,(1,1), name='gc-conv2')(context_transform)
    context_transform = Activation('sigmoid')(context_transform)
    x = Multiply()([x , context_transform])

    context_transform = conv_block(context, channels, 1, strides=1, name='gc-conv3')
    context_transform = Conv2D(c,(1,1), name='gc-conv4')(context_transform)
    x = Add()([x,context_transform])

    return x
Beispiel #12
0
    def construct_network(self, log):
        # Input + Embedding layer for every parent

        input_layers = []
        embedding_layers = []
        for parent in self.parents:
            i = Input(shape=(1,), name=parent.attr_name.replace(" ", "_").replace("(", "").replace(")","").replace(":","_"))
            input_layers.append(i)
            e = Embedding(log[parent.attr_name].max() + 2, 32, embeddings_initializer="zeros")(i)
            embedding_layers.append(e)
        concat = Concatenate(name="concat")(embedding_layers)

        # dense1 = Dense(32)(concat)
        drop = Dropout(0.2)(concat)
        dense2 = Dense(log[self.attr_name].max() + 1)(drop)

        flat = Flatten()(dense2)

        output = Softmax(name="output")(flat)

        model = Model(inputs=input_layers, outputs=[output])
        opt = Nadam(lr=0.002, beta_1=0.9, beta_2=0.999,
                    epsilon=1e-08, schedule_decay=0.004, clipvalue=3)
        model.compile(loss={'output': 'categorical_crossentropy'},
                      optimizer=opt)
        model.summary()
        return model
Beispiel #13
0
def self_attn_block(inp, n_c, squeeze_factor=8):
    """ GAN Self Attention Block
    Code borrows from https://github.com/taki0112/Self-Attention-GAN-Tensorflow
    """
    msg = "Input channels must be >= {}, recieved nc={}".format(squeeze_factor, n_c)
    assert n_c // squeeze_factor > 0, msg
    var_x = inp
    shape_x = var_x.get_shape().as_list()

    var_f = Conv2D(n_c // squeeze_factor, 1,
                   kernel_regularizer=regularizers.l2(GAN22_REGULARIZER))(var_x)
    var_g = Conv2D(n_c // squeeze_factor, 1,
                   kernel_regularizer=regularizers.l2(GAN22_REGULARIZER))(var_x)
    var_h = Conv2D(n_c, 1, kernel_regularizer=regularizers.l2(GAN22_REGULARIZER))(var_x)

    shape_f = var_f.get_shape().as_list()
    shape_g = var_g.get_shape().as_list()
    shape_h = var_h.get_shape().as_list()
    flat_f = Reshape((-1, shape_f[-1]))(var_f)
    flat_g = Reshape((-1, shape_g[-1]))(var_g)
    flat_h = Reshape((-1, shape_h[-1]))(var_h)

    var_s = Lambda(lambda var_x: K.batch_dot(var_x[0],
                                             Permute((2, 1))(var_x[1])))([flat_g, flat_f])

    beta = Softmax(axis=-1)(var_s)
    var_o = Lambda(lambda var_x: K.batch_dot(var_x[0], var_x[1]))([beta, flat_h])
    var_o = Reshape(shape_x[1:])(var_o)
    var_o = Scale()(var_o)

    out = add([var_o, inp])
    return out
Beispiel #14
0
def cards(hid):
    cards = Dense(units=36 * 4, activation=activation)(hid)
    # cards = Dropout(0.2)(cards)  # no dropout towards end of network
    # cards = BatchNormalization()(cards)
    cards = Reshape((36, 4))(cards)
    cards = Softmax(name='cards')(cards)
    return cards
def model_DL2(trainX, trainy, wd=0.005, lr=0.01, lr_decay=1e-4):
    n_channels, n_timesteps, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
    data_input = Input(shape=(n_channels, n_timesteps), dtype='float32', name='input')

    perchan_inp_dim = n_timesteps

    perchan_model = Inception1D((n_timesteps, 1))
    
    # channel-distributed feature extractor block
    data_input_rs = Lambda(lambda x: K.expand_dims(x, axis=-1), name='data_input_rs')(data_input)
    data_processed = TimeDistributed(perchan_model, name='data_before_mil')(data_input_rs)
    #attention block
    data_attention = TimeDistributed(Dense(32, activation='tanh', kernel_regularizer=l2(wd), use_bias=False))(
        data_processed)
    data_attention = TimeDistributed(Dense(1, activation=None, kernel_regularizer=l2(wd), use_bias=False))(
        data_attention)
    data_attention = Lambda(lambda x: K.squeeze(x, -1))(data_attention)
    data_attention = Softmax()(data_attention)
    data_attention = Lambda(lambda x: K.expand_dims(x))(data_attention)
    data_attention = Lambda(lambda x: K.repeat_elements(x, data_processed.shape[-1], -1),name='att_mil_weights')(data_attention)

    #    if attention-MIL weights are needed, the model below (commented) outputs attention weights
    #    att_model=Model(inputs=[data_input],outputs=[data_attention])

    data_weighted = Multiply()([data_processed, data_attention])
    data_sum = GlobalAveragePooling1D()(data_weighted)
    out_dense = Dense(32, activation='relu', kernel_regularizer=l2(wd))(data_sum)
    out_sq = Dense(1, activation='sigmoid', name='out_score')(out_dense)

    model = Model(inputs=[data_input], outputs=[out_sq])
    return model
    def frontend(x):
        if len(x.shape) == 4:
            if x.shape[2] != 1:
                x1 = Lambda(l1)(x)
                x2 = Lambda(l2)(x)
                x = K.layers.Concatenate(axis=-1)([x1, x2])
                # x = K.layers.Reshape((x.shape[1], -1))(x)
            else:
                x = K.layers.Lambda(sq(2))(x)

        x = K.layers.Dropout(rate=0.5)(x)

        cla = Dense(256, activation='relu')(x)
        cla = K.layers.Dropout(rate=0.5)(cla)
        cla = Dense(128, activation='relu')(cla)
        cla = K.layers.Dropout(rate=0.5)(cla)
        cla = Dense(classes_num, activation='softmax')(cla)

        att = Dense(256, activation='relu')(x)
        att = K.layers.Dropout(rate=0.5)(att)
        att = Dense(128, activation='relu')(att)
        att = K.layers.Dropout(rate=0.5)(att)
        att = Dense(1, activation=None)(att)
        att = Softmax(axis=1)(att)
        # att = Dense(classes_num, activation='softmax')(att)

        output_layer = Lambda(_attention_pooling)([cla, att])

        # output_layer = Lambda(l1_norm(1))(output_layer)
        return output_layer
Beispiel #17
0
def Bert(max_seq_length=100, vocabulary_size=100, word_embedding_size=100, use_universal_transformer = 0, transformer_depth=5, num_heads=10, transformer_dropout = 0.1, embedding_dropout = 0.6, l2_reg_penalty = 1e-4):
    word_ids = Input(shape=(max_seq_length,), dtype='int32', name='word_ids')
    segment_ids = Input( shape=(max_seq_length,), dtype='int32', name='segment_ids')
    l2_regularizer = (regularizers.l2(l2_reg_penalty) if l2_reg_penalty else None)
    embedding_layer = ReusableEmbedding(vocabulary_size, word_embedding_size, input_length=max_seq_length, name='bpe_embeddings', embeddings_regularizer=l2_regularizer)
    segment_embedding_layer = Embedding(word_embedding_size, max_seq_length, name='segment_embeddings')
    add_segment_layer = Add(name='add_segment')
    output_layer = TiedOutputEmbedding(projection_regularizer=l2_regularizer, projection_dropout=embedding_dropout, name='word_prediction_logits')
    output_softmax_layer = Softmax(name='word_predictions')
    coordinate_embedding_layer = TransformerCoordinateEmbedding(transformer_depth if use_universal_transformer else 1, name='coordinate_embedding')
    next_step_input, embedding_matrix = embedding_layer(word_ids)
    segment_embeddings = segment_embedding_layer(segment_ids)
    if use_universal_transformer:
        act_layer = TransformerACT(name='adaptive_computation_time')
        transformer_block = TransformerBlock( name='transformer', num_heads=num_heads, residual_dropout=transformer_dropout, attention_dropout=transformer_dropout, use_masking=False)
        act_output = next_step_input
        for i in range(transformer_depth):
            next_step_input = coordinate_embedding_layer(next_step_input, step=i)
            next_step_input = add_segment_layer([next_step_input, segment_embeddings])
            next_step_input = transformer_block(next_step_input)
            next_step_input, act_output = act_layer(next_step_input)
        act_layer.finalize()
        next_step_input = act_output
    else:
        next_step_input = coordinate_embedding_layer(next_step_input, step=0)
        next_step_input = add_segment_layer([next_step_input, segment_embeddings])
        for i in range(transformer_depth):
            next_step_input = (TransformerBlock( name='transformer' + str(i), num_heads=num_heads, residual_dropout=transformer_dropout, attention_dropout=transformer_dropout, use_masking=False, vanilla_wiring=True)(next_step_input))
    word_predictions = output_softmax_layer(output_layer([next_step_input, embedding_matrix]))
    cls_node_slice = (Lambda(lambda x: x[:, 0], name='cls_node_slicer')(next_step_input))
    class_prediction = (Dense(1, name='class_prediction', activation='sigmoid')(cls_node_slice))
    model = Model(inputs=[word_ids, segment_ids], outputs=[word_predictions, class_prediction])
    return model
Beispiel #18
0
def attention_scaled_dot(activations, attention_mask): #, length):
#https://arxiv.org/pdf/1706.03762.pdf
    units = int(activations.shape[2])
    words = int(activations.shape[1])
    _drop_rate_ = .1
    Q = TimeDistributed(Dense(units, activation=None, use_bias=False))(activations)
    Q = Dropout(_drop_rate_)(Q)
    K = TimeDistributed(Dense(units, activation=None, use_bias=False))(activations)
    K = Dropout(_drop_rate_)(K)
    V = TimeDistributed(Dense(units, activation=None, use_bias=False))(activations)
    V = Dropout(_drop_rate_)(V)
    #print(Q.shape)
    QK_T = Dot(axes=-1, normalize=False)([Q,K]) # list of two tensors
    """normalize: Whether to L2-normalize samples along the dot product axis before taking the dot product. If set to True, then the output of the dot product is the cosine proximity between the two samples."""
    QK_T = Lambda( lambda inp: inp[0]/ backend.sqrt(backend.cast(shape_list(inp[1])[-1], backend.floatx())))([QK_T, V])
    #print(QK_T.shape)
    
#     cropping = np.zeros(QK_T.shape[1])
#     cropping[length:] = (-10**6) * np.ones(int(QK_T.shape[1])-length)
#     QK_T = QK_T + cropping
    attention_mask__ = RepeatVector(int(QK_T.shape[1]))(attention_mask)
#     print(attention_mask__.shape)
    QK_T = Add()([QK_T, attention_mask__])
    QK_T = Softmax(axis=-1)(QK_T)
    QK_T = Dropout(_drop_rate_)(QK_T)
    #print(V.shape)
    V = Permute([2, 1])(V)
    #print(V.shape)
    V_prime = Dot(axes=-1, normalize=False)([QK_T,V]) # list of two tensors
    #print(V_prime.shape)
    return V_prime
Beispiel #19
0
def build_model(input_shape):
    """
    The function to build the model based on 1.4.0 version.

    Parameters:
        input_shape (tuple): The input shape of the model. It should be in the form of (1, ..., ...).

    Returns:
        keras.Sequential: The built model.
    """
    input = Input(shape=input_shape[1:])

    # Conv Layer 1, Preprocessing
    layer_1_conv = Conv2D(4,
                          kernel_size=5,
                          kernel_initializer=filters,
                          strides=2,
                          name="Fixed_Filters",
                          trainable=False)(input)
    layer_1_bn = BatchNormalization()(layer_1_conv)
    layer_1_output = LeakyReLU(alpha=0.1)(layer_1_bn)

    # Conv Layer 2, Feature learning
    layer_2_atrous_conv = Conv2D(5, kernel_size=5,
                                 dilation_rate=8)(layer_1_output)
    layer_2_bn_1 = BatchNormalization()(layer_2_atrous_conv)

    layer_2_conv = Conv2D(5, kernel_size=4, strides=2)(layer_1_output)
    layer_2_bn_2 = BatchNormalization()(layer_2_conv)

    cc_2 = concatenate([layer_2_bn_1, layer_2_bn_2])
    layer_2_output = LeakyReLU(alpha=0.1)(cc_2)

    # Conv Layer 3, Feature learning
    layer_3_atrous_conv = Conv2D(5, kernel_size=5,
                                 dilation_rate=4)(layer_2_output)
    layer_3_bn_1 = BatchNormalization()(layer_3_atrous_conv)

    layer_3_conv = Conv2D(5, kernel_size=4, strides=2)(layer_2_output)
    layer_3_bn_2 = BatchNormalization()(layer_3_conv)

    cc_3 = concatenate([layer_3_bn_1, layer_3_bn_2])
    layer_3_output = LeakyReLU(alpha=0.1)(cc_3)

    # Fully connected Layers, Binary classification
    fc_flatten_1 = Flatten()(layer_3_output)
    fc_dropout_1 = Dropout(0.2)(fc_flatten_1)
    fc_dense_1 = Dense(200)(fc_dropout_1)
    fc_activation_1 = LeakyReLU(alpha=0.1)(fc_dense_1)

    fc_dropout_2 = Dropout(0.2)(fc_activation_1)
    fc_dense_2 = Dense(200)(fc_dropout_2)
    fc_activation_2 = LeakyReLU(alpha=0.1)(fc_dense_2)

    fc_dropout_3 = Dropout(0.2)(fc_activation_2)
    fc_dense_3 = Dense(2)(fc_dropout_3)

    output = Softmax()(fc_dense_3)

    return Model(inputs=input, outputs=output)
Beispiel #20
0
    def build_onet(self, input_shape=None):
        if input_shape is None:
            input_shape = (48, 48, 3)

        o_inp = Input(input_shape)
        o_layer = Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding="valid")(o_inp)
        o_layer = PReLU(shared_axes=[1, 2])(o_layer)
        o_layer = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(o_layer)

        o_layer = Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding="valid")(o_layer)
        o_layer = PReLU(shared_axes=[1, 2])(o_layer)
        o_layer = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="valid")(o_layer)

        o_layer = Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding="valid")(o_layer)
        o_layer = PReLU(shared_axes=[1, 2])(o_layer)
        o_layer = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same")(o_layer)

        o_layer = Conv2D(128, kernel_size=(2, 2), strides=(1, 1), padding="valid")(o_layer)
        o_layer = PReLU(shared_axes=[1, 2])(o_layer)

        o_layer = Flatten()(o_layer)
        o_layer = Dense(256)(o_layer)
        o_layer = PReLU()(o_layer)

        o_layer_out1 = Dense(2)(o_layer)
        o_layer_out1 = Softmax(axis=1)(o_layer_out1)
        o_layer_out2 = Dense(4)(o_layer)
        o_layer_out3 = Dense(10)(o_layer)

        o_net = Model(o_inp, [o_layer_out2, o_layer_out3, o_layer_out1])
        return o_net
Beispiel #21
0
    def build_rnet(self, input_shape=None):
        if input_shape is None:
            input_shape = (24, 24, 3)

        r_inp = Input(input_shape)

        r_layer = Conv2D(28, kernel_size=(3, 3), strides=(1, 1), padding="valid")(r_inp)
        r_layer = PReLU(shared_axes=[1, 2])(r_layer)
        r_layer = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(r_layer)

        r_layer = Conv2D(48, kernel_size=(3, 3), strides=(1, 1), padding="valid")(r_layer)
        r_layer = PReLU(shared_axes=[1, 2])(r_layer)
        r_layer = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="valid")(r_layer)

        r_layer = Conv2D(64, kernel_size=(2, 2), strides=(1, 1), padding="valid")(r_layer)
        r_layer = PReLU(shared_axes=[1, 2])(r_layer)
        r_layer = Flatten()(r_layer)
        r_layer = Dense(128)(r_layer)
        r_layer = PReLU()(r_layer)

        r_layer_out1 = Dense(2)(r_layer)
        r_layer_out1 = Softmax(axis=1)(r_layer_out1)

        r_layer_out2 = Dense(4)(r_layer)

        r_net = Model(r_inp, [r_layer_out2, r_layer_out1])

        return r_net
Beispiel #22
0
    def build_pnet(self, input_shape=None):
        if input_shape is None:
            input_shape = (None, None, 3)

        p_inp = Input(input_shape)

        p_layer = Conv2D(10,
                         kernel_size=(3, 3),
                         strides=(1, 1),
                         padding="valid")(p_inp)
        p_layer = PReLU(shared_axes=[1, 2])(p_layer)
        p_layer = MaxPooling2D(pool_size=(2, 2),
                               strides=(2, 2),
                               padding="same")(p_layer)

        p_layer = Conv2D(16,
                         kernel_size=(3, 3),
                         strides=(1, 1),
                         padding="valid")(p_layer)
        p_layer = PReLU(shared_axes=[1, 2])(p_layer)

        p_layer = Conv2D(32,
                         kernel_size=(3, 3),
                         strides=(1, 1),
                         padding="valid")(p_layer)
        p_layer = PReLU(shared_axes=[1, 2])(p_layer)

        p_layer_out1 = Conv2D(2, kernel_size=(1, 1), strides=(1, 1))(p_layer)
        p_layer_out1 = Softmax(axis=3)(p_layer_out1)

        p_layer_out2 = Conv2D(4, kernel_size=(1, 1), strides=(1, 1))(p_layer)

        p_net = Model(p_inp, [p_layer_out2, p_layer_out1])

        return p_net
Beispiel #23
0
    def create_model(cfg):
        # Keras Model
        model = Sequential()
        # Input layer
        model.add(
            InputLayer(batch_input_shape=(None, cfg['input_dim']),
                       name='input'))
        model.add(GaussianNoise(stddev=cfg['gaussian_noise']))
        # Hidden layers
        for i in range(cfg['num_hidden_layers'] - 1):
            model.add(
                Dense(units=cfg['layer_size'],
                      kernel_initializer='he_normal',
                      kernel_regularizer=regularizers.l2(cfg['weight_decay']),
                      name='hidden{}'.format(str(i))))
            model.add(Activation(activation=cfg['activation']))
            if cfg['batch_normalization']:
                model.add(BatchNormalization())
            model.add(Dropout(rate=cfg['dropout']))
        # Output layer
        model.add(
            Dense(units=cfg['output_dim'],
                  kernel_initializer='he_normal',
                  kernel_regularizer=regularizers.l2(cfg['weight_decay']),
                  name='output'))
        if cfg['batch_normalization']:
            model.add(BatchNormalization())
        # Optional softmax layer
        if cfg['task'] == 'classification':
            model.add(Softmax())

        return model
    def train_delinquent(self, epochs=200, lr=1e-5, batch_size=32):
        # Delinquent refers to a model that is trained without using distillation.
        # This is used as a baseline to evaluate the benefit of distillation.
        df_train = pickle.load(open("train_data.p", "rb"))
        df_test = pickle.load(open("test_data.p", "rb"))
        g_train = Student.batch_generator(df_train,
                                          "label",
                                          batch_size=batch_size)
        g_test = Student.batch_generator(df_test,
                                         "label",
                                         batch_size=batch_size,
                                         shuffle=False)

        top_model = keras.models.Sequential()
        top_model.add(Flatten(input_shape=(4, 4, 256)))
        top_model.add(Dense(200, activation="relu"))
        top_model.add(Dropout(0.3))
        top_model.add(Dense(nbr_classes, activation=None, name="logits"))
        top_model.add(Softmax())

        top_model.compile(loss=keras.losses.categorical_crossentropy,
                          optimizer=keras.optimizers.Adam(lr=lr),
                          metrics=["acc"])
        results = top_model.fit_generator(
            g_train,
            steps_per_epoch=len(df_train) // batch_size,
            validation_data=g_test,
            validation_steps=len(df_test) // batch_size,
            epochs=epochs)
        return (results)
def attention_scaled_dot(activations):
    #https://arxiv.org/pdf/1706.03762.pdf
    units = int(activations.shape[2])
    words = int(activations.shape[1])
    Q = TimeDistributed(Dense(units, activation=None,
                              use_bias=False))(activations)
    Q = Dropout(.2)(Q)
    K = TimeDistributed(Dense(units, activation=None,
                              use_bias=False))(activations)
    K = Dropout(.2)(K)
    V = TimeDistributed(Dense(units, activation=None,
                              use_bias=False))(activations)
    V = Dropout(.2)(V)
    #print(Q.shape)
    QK_T = Dot(axes=-1, normalize=False)([Q, K])  # list of two tensors
    #print(QK_T.shape)
    QK_T = Lambda(lambda inp: inp[0] / backend.sqrt(
        backend.cast(shape_list(inp[1])[-1], backend.floatx())))([QK_T, V])
    #print(QK_T.shape)

    QK_T = Softmax(axis=-1)(QK_T)
    QK_T = Dropout(.2)(QK_T)
    #print(V.shape)
    V = Permute([2, 1])(V)
    #print(V.shape)
    V_prime = Dot(axes=-1, normalize=False)([QK_T, V])  # list of two tensors
    #print(V_prime.shape)
    return V_prime
Beispiel #26
0
def run():
    args = get_args()

    # get the jets out of the input file.
    with h5py.File(args.input_file, 'r') as infile:
        jets = np.asarray(infile['jets'])

    # first, let's make the training dataset!
    input_data = preproc_inputs(jets)
    targets = make_targets(jets)

    # now make the network
    from keras.layers import Input, Dense, Softmax
    from keras.models import Model

    input_node = Input(shape=(2, ))
    dense = Dense(3)(input_node)
    pred = Softmax()(dense)
    model = Model(inputs=input_node, outputs=pred)
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # now fit this thing!
    model.fit(input_data, targets, epochs=args.epochs)

    # finally, save the trained network
    odir = args.output_dir
    if not os.path.isdir(odir):
        os.mkdir(odir)
    with open(f'{odir}/architecture.json', 'w') as arch_file:
        arch_file.write(model.to_json(indent=2))
    model.save_weights(f'{odir}/weights.h5')
Beispiel #27
0
def build_model(x_shape, num_classes):
    inputs = Input(shape=x_shape)
    x = Conv2D(16,
               kernel_size=(5, 5),
               input_shape=(250, 16, 1),
               padding='same',
               use_bias=True)(inputs)
    #x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Dropout(0.2)(x)
    x = Conv2D(2 * 16, (5, 5), padding='same', use_bias=True)(x)
    #x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = MaxPooling2D(pool_size=(2, 2), padding='same')(x)
    x = Dropout(0.1)(x)
    x = Conv2D(4 * 16, (5, 5), padding='same', use_bias=True)(x)
    #x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = MaxPooling2D(pool_size=(2, 2), padding='same')(x)
    x = Flatten()(x)
    x = Dense(64)(x)
    x = ReLU()(x)
    x = Dense(32)(x)
    x = ReLU()(x)
    x = Dense(16)(x)
    x = ReLU()(x)
    x = Dense(num_classes)(x)
    predictions = Softmax()(x)
    model = Model(inputs=inputs, outputs=predictions)
    return model
    def build_actor_baseline(self):
        layers = self.options.layers

        states = Input(shape=self.state_size)
        z = states
        for l in layers[:-1]:
            z = Dense(l, activation='relu')(z)

        # actor and critic heads have a seperated final fully connected layer
        z_a = Dense(layers[-1], activation='tanh')(z)
        z_a = Dense(self.env.action_space.n, activation='tanh')(z_a)
        z_b = Dense(layers[-1], activation='relu')(z)

        probs = Softmax(name='actor_output')(z_a)
        baseline = Dense(1, activation='linear', name='baseline_output')(z_b)

        model = Model(inputs=[states], outputs=[probs, baseline])
        model.compile(optimizer=Adam(lr=self.options.alpha),
                      loss={
                          'actor_output': pg_loss(),
                          'baseline_output': losses.MeanSquaredError()
                      },
                      loss_weights={
                          'actor_output': 1.0,
                          'baseline_output': 1.0
                      })

        return model
Beispiel #29
0
def Create_Model():
	# Model Creation

	model1 = Input(shape=(XDIM, YDIM, TIMESTEPS, 1))

	# 1st Convolution Layer
	model1a = Conv3D(kernel_size = (3, 3, 5), strides = (2, 2, 4), filters=16, name="Conv1")(model1)
	model1c = Activation('elu')(model1a)

	# Small Receptive Field (SRF)

	modelSRF = create_receptive_field(SRF_SIZE, SRF_STRIDES, model1c, 'SRF')
	
	# Medium Receptive Field (MRF)

	modelMRF = create_receptive_field(MRF_SIZE, MRF_STRIDES, model1c, 'MRF')

	# Large Receptive Field (LRF)
	
	modelLRF = create_receptive_field(LRF_SIZE, LRF_STRIDES, model1c, 'LRF')

	# Add the layers - This sums each layer
	final = Add()([modelSRF, modelMRF, modelLRF])
	out = Softmax()(final)

	model = Model(inputs=model1, outputs=out)

	return model
Beispiel #30
0
    def define_model(self, model_name):
        def conv_block(_input, filters, kernel_size, strides, name_prefix):
            _x = Conv2D(filters=filters,
                        kernel_size=kernel_size,
                        strides=strides,
                        padding='same',
                        name=f'{name_prefix}conv')(_input)
            _x = BatchNormalization()(_x)
            _x = ReLU()(_x)
            _x = MaxPool2D(pool_size=2)(_x)
            return _x

        _input = Input(shape=(28, 28, 1),
                       name=f'{model_name}_input')  # (28, 28, 1)

        x = conv_block(_input,
                       filters=4,
                       kernel_size=3,
                       strides=1,
                       name_prefix='conv_block_1')  # (14, 14, 4)
        x = conv_block(x,
                       filters=8,
                       kernel_size=3,
                       strides=1,
                       name_prefix='conv_block_2')  # (7,  7,  8)
        x = Flatten()(x)  # (392,)
        x = Dense(64)(x)  # (64,)
        x = Dense(10)(x)  # (10,)
        x = Softmax()(x)

        model = Model(inputs=_input, outputs=x, name=model_name)

        return model