class JointSequentialIntentModel(IntentExtractionModel):
    """
    Joint Intent classification and Slot tagging Model
    """

    def __init__(self):
        super(JointSequentialIntentModel, self).__init__()

    def build(self,
              sentence_length,
              vocab_size,
              tag_labels,
              intent_labels,
              token_emb_size=100,
              tagger_hidden=100,
              tagger_dropout=0.5,
              intent_classifier_hidden=100,
              emb_model_path=None):
        """
        Build the model

        Args:
            sentence_length (int): max length of a sentence
            vocab_size (int): vocabulary size
            tag_labels (int): number of tag labels
            intent_labels (int): number of intent labels
            token_emb_size (int): token embedding vectors size
            tagger_hidden (int): label tagger LSTM hidden size
            tagger_dropout (float): label tagger dropout rate
            intent_classifier_hidden (int): intent LSTM hidden size
            emb_model_path (str): external embedding model path
        """
        tokens_input, token_emb = self._create_input_embed(sentence_length,
                                                           emb_model_path is not None,
                                                           token_emb_size,
                                                           vocab_size)
        intent_enc = Bidirectional(LSTM(intent_classifier_hidden))(token_emb)
        intent_out = Dense(intent_labels, activation='softmax',
                           name='intent_classifier')(intent_enc)
        intent_vec_rep = RepeatVector(sentence_length)(intent_out)

        slot_emb = Bidirectional(LSTM(tagger_hidden, return_sequences=True))(token_emb)
        tagger_features = concatenate([slot_emb, intent_vec_rep], axis=-1)
        tagger = Bidirectional(
                LSTM(tagger_hidden, return_sequences=True))(tagger_features)
        tagger = Dropout(tagger_dropout)(tagger)
        tagger_out = TimeDistributed(
                Dense(tag_labels, activation='softmax'),
                name='slot_tag_classifier')(tagger)

        self.model = Model(inputs=tokens_input, outputs=[
            intent_out, tagger_out])
        self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
                           loss_weights=[1., 1.], metrics=['categorical_accuracy'])
def get_model():
    '''stack ensemble by NN model
    '''
    input_layer = Input(shape=get_ensemble_inputShape())
    layer = Dense(units=128, activation='relu')(input_layer)
    layer = BatchNormalization()(layer)
    layer = Dropout(0.3)(layer)
    output_layer = Dense(6, activation='sigmoid')(layer)
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['acc'])
    return model
Beispiel #3
0
	   
    numerator = tf.reduce_sum(onehots_true * probabilities, axis=0)
	   
    denominator = tf.reduce_sum(onehots_true + probabilities, axis=0)
	   
    loss = 1.0 - 2.0 * (numerator + 1) / (denominator + 1)
    return loss

def main()
    with open(args.config, 'r') as f:
        yam = yaml.load(f)
    img_path = yam['img_path']
    mask_path = yam['mask_path']
    epochs = yam['epochs']
    image_size = yam['image_size']
    start_neurons = yam['start_neurons']
    batch_size = yam['batch_size']
    get_custom_objects().update({'lrelu': Activation(tf.keras.layers.LeakyReLU(alpha=0.3))})
    train_generator = directory_to_generator(img_path , mask_path , image_size)
    steps_per_epoch = int( np.ceil(train_generator.shape[0] / batch_size) )
    input_layer = Input((image_size, image_size, 3))
    output_layer = build_model(input_layer, start_neurons)
    model = Model(input_layer, output_layer)
    model.compile(loss = dice_loss, optimizer='adam', metrics=["accuracy"])
    model.fit(train_generator , epochs = epochs , steps_per_epoch = steps_per_epoch , batch_size = batch_size)

if __name__ == "__main__":
    main()


Beispiel #4
0
def ctc_lambda_func( args ):
    y_pred, labels, label_lengths = args
    y_pred_len = [ [y_pred.shape[1] ] ] * batchSize
    #  y_pred = y_pred[:, 2:, :]
    return K.ctc_batch_cost( labels, K.softmax( y_pred ), y_pred_len, label_lengths )

labels = Input(name='the_labels', shape=[ labelWidth ], dtype='int32')
images = Input(name='the_images', shape=[ targetH, targetW, 1 ], dtype='float32')
label_lengths = Input(name='label_lengths', shape=[1], dtype='int32')

y_pred = backBone( images )
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')( [ y_pred, labels, label_lengths ])
fullModel = Model( inputs=[ images, labels, label_lengths ], outputs=loss_out )
#  plot_model(fullModel, to_file='model2.png', show_shapes=True)

fullModel.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=optimizer,  metrics=['accuracy'])


train_loader = DataGenerator( opt.traindata, batchSize=opt.batchSize, limit=opt.traindata_limit, cache=opt.traindata_cache )
test_loader = DataGenerator( opt.valdata, batchSize=opt.batchSize, limit=opt.valdata_limit, cache=opt.valdata_cache )

#  import pdb; pdb.set_trace();
#  import IPython as x; x.embed()

class WeightsSaver(Callback):
    def __init__(self):
        self.fname = opt.outfile + '_' + datetime.now().strftime('%d%m%Y_%H%M%S')
        self.i = 1
        self.j = 1
        self.saveInterval = 100 if 'SAVE_INTERVAL' not in environ else int( environ['SAVE_INTERVAL'])
Beispiel #5
0
from keras import Input
from keras import layers
from keras import Model

input_tensor=Input(shape=(64,))
x=layers.Dense(32,activation='relu')(input_tensor)
x=layers.Dense(32,activation='relu')(x)
output_tensor=layers.Dense(10,activation='softmax')(x)
model=Model(input_tensor,output_tensor)
# modle.summary()
model.compile(optimizer='rmsprop',loss='sparse_categorical_crossentropy',metrics=['acc'])
import numpy as np
x_train=np.random.random((1000,64))
y_train=np.random.randint(0,10,1000)
model.fit(x_train,y_train,epochs=10,batch_size=128)
score=model.evaluate(x_train,y_train)

Beispiel #6
0
def autoencode(pipe: Pipe,
               layer_config: List[Dict],
               from_file: str,
               store_model: str,
               loss: str,
               optimiser: str,
               epochs: int,
               batch_size: int,
               shuffle: bool,
               validation_split: float,
               adjust_weights: float,
               mode: str):
    """Build and train an autoencoder."""
    import keras
    from keras import regularizers, Sequential, Input, Model
    from keras.callbacks import EarlyStopping, TensorBoard
    from keras.engine import InputLayer
    from keras.engine.saving import model_from_yaml, model_from_json
    from keras.layers import Dense
    from numpy.random.mtrand import seed
    from tensorflow import set_random_seed
    from lyner.keras_extras import SignalHandler
    seed(1)
    set_random_seed(2)
    matrix = pipe.matrix.copy()
    if matrix.isnull().values.any():
        LOGGER.warning("Dropping rows containing nan values")
        matrix.dropna(how='any', inplace=True)

    def parse_layout(layer_conf):
        get_layer_type = lambda t: getattr(keras.layers, t, None)
        regdict = {'l1_l2': regularizers.l1_l2, 'l1': regularizers.l1, 'l2': regularizers.l2}
        lc = layer_conf.copy()
        layer_type = lc.get('type', None)
        if layer_type:
            lc['type'] = get_layer_type(layer_type)

        # TODO parse regularizers
        kernel_reg_type = lc.get('kernel_regularizer', None)
        if kernel_reg_type:
            if '(' in kernel_reg_type and ')' in kernel_reg_type:
                params = kernel_reg_type[kernel_reg_type.index('(') + 1:kernel_reg_type.index(')')]
                if '+' in params:
                    params = params.split('+')
                else:
                    params = [params]
                params = [float(p) for p in params]
                kernel_reg_type = kernel_reg_type[:kernel_reg_type.index('(')]
            lc['kernel_regularizer'] = regdict[kernel_reg_type](*params)
        return lc.pop('type'), int(lc.pop('n')), lc

    layout = [parse_layout(layer_conf) for layer_conf in layer_config]
    labels = matrix.columns.values.tolist()
    data = matrix.values
    shape = (data.shape[0],)
    data = data.transpose()
    if layout:
        encoding_dim = layout[-1][1]
        encoder = Sequential(name="encoder")
        encoder.add(InputLayer(shape, name="encoder_input"))
        for layer_num, (Layer, n_nodes, extra_args) in enumerate(layout):
            encoder.add(Layer(n_nodes, name=f"encoder_{layer_num}_{n_nodes}", **extra_args))
            # kernel_regularizer=regularizers.l1_l2(0.001, 0.001),
            # kernel_regularizer=regularizers.l1(0.0001),

        decoder = Sequential(name="decoder")
        decoder.add(InputLayer((encoding_dim,), name="decoder_input"))
        for layer_num, (Layer, n_nodes, _) in enumerate(layout[::-1][1:]):
            decoder.add(Layer(n_nodes, name=f"decoder_{layer_num}_{n_nodes}"))
        decoder.add(Dense(shape[0], activation='linear', name="decoder_output"))

        input_layer = Input(shape=shape, name="autoencoder_input")
        encode_layer = encoder(input_layer)
        decode_layer = decoder(encode_layer)

        autoencoder = Model(input_layer, decode_layer)
        if store_model:
            if store_model.endswith('.yaml'):
                model_string = autoencoder.to_yaml()
            elif store_model.endswith('.json'):
                model_string = autoencoder.to_json()
            else:
                model_string = autoencoder.to_yaml()
            with open(store_model, 'wt') as writer:
                writer.write(model_string)
    elif from_file:
        with open(from_file, 'rt') as reader:
            model_string = '\n'.join(reader.readlines())
        if from_file.endswith('.yaml'):
            autoencoder = model_from_yaml(model_string)
        elif from_file.endswith('.json'):
            autoencoder = model_from_json(model_string)
        # TODO set encoder and decoder correctly
    else:
        raise ValueError("No model specified. Use either of --layer-config or --from-file.")
    # from pprint import pprint
    # pprint(autoencoder.get_config())
    autoencoder.compile(optimizer=optimiser, loss=loss, metrics=['mse'], )

    early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.0000001, patience=50)

    sh = SignalHandler()
    autoencoder.fit(np.vsplit(data, 1), np.vsplit(data, 1),
                    callbacks=[TensorBoard(log_dir='/tmp/autoencoder'), sh, early_stopping],
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_split=validation_split,
                    shuffle=shuffle
                    )
    sh.uninit()

    class Autoencoder:
        def __init__(self, encoder=None, decoder=None):
            self._encoder = encoder
            self._decoder = decoder

        def inverse_transform(self, data):
            return self._decoder.predict(data).transpose()

        def transform(self, data):
            return self._encoder.predict(data).transpose()

    pipe.decomposition = Autoencoder(encoder, decoder)

    encoded_data = pipe.decomposition.transform(data)
    decoded_data = pipe.decomposition.inverse_transform(encoded_data.T)
    pre_error = ((data.T - decoded_data) ** 2).mean(axis=None)
    print(f"MSE: {pre_error}")

    pipe._index = pipe.matrix.index
    pipe._columns = pipe.matrix.columns
    if adjust_weights:
        quant = float(adjust_weights)
        for i, layer in enumerate(encoder.layers):
            W, b = layer.get_weights()
            low, median, high = np.quantile(W.flatten(), [quant, 0.5, 1 - quant])
            W_low = W * (W < low)
            W_high = W * (W > high)
            selected_weights = W_low + W_high
            # oplot([Histogram(x=W.flatten()), Histogram(x=W[W < low].flatten()), Histogram(x=W[W > high].flatten())])
            layer.set_weights([selected_weights, b])
            break
        encoded_data = pipe.decomposition.transform(data)
        decoded_data = pipe.decomposition.inverse_transform(encoded_data.T)
        post_error = ((data.T - decoded_data) ** 2).mean(axis=None)
        print(f"MSE: {post_error}")
    if 'weights' == mode:
        layer = 0
        layer_weights = encoder.layers[layer].get_weights()
        layer = encoder.layers[layer]
        if len(layer_weights) == 0:
            layer_weights = encoder.layers[0].get_weights()
        if len(layer_weights) >= 2:
            layer_weights = layer_weights[:-1]  # last one is bias
        new_data = layer_weights[0]
        index = [f'Weight_{i}' for i in range(new_data.shape[0])]
        num_nodes = new_data.shape[1]
        columns = [f"{layer.name}_{i}" for i in range(num_nodes)]
    elif 'nodes' == mode:
        new_data = encoder.predict(np.vsplit(data, 1)).transpose()
        columns = labels
        index = [f"{mode}_{i}" for i in range(encoding_dim)]
    elif 'discard' == mode:
        W, b = encoder.layers[0].get_weights()
        W = np.sum(np.abs(W), axis=1)
        W[W != 0] = 1
        print(f"Kept {np.sum(W)} weights")
        v: np.array = pipe.matrix.values
        new_data = (v.T * W).T
        columns = pipe.matrix.columns
        index = pipe.matrix.index
    else:
        raise ValueError(f"Unknown mode {mode}")
    pipe.matrix = pd.DataFrame(data=new_data,
                               columns=columns,
                               index=index,
                               )
    return
Beispiel #7
0
def main():
    config = InferenceConfig()
    # config.display()

    # Create Mask Model
    mask_model = modellib.MaskRCNN(mode='inference',
                                   model_dir=MASK_LOGS_DIR,
                                   config=config)
    mask_model.load_weights(COCO_MODEL_PATH, by_name=True)

    # Freeze model
    mask_model.keras_model.trainable = False

    class_names = CLASS_NAMES

    file_names = next(os.walk(IMAGE_DIR))[2]
    random.shuffle(file_names)

    if SHOW is True:
        test_file_names = next(os.walk(TESTING_DIR))[2][0:1]
    else:
        test_file_names = next(os.walk(TESTING_DIR))[2][0:10]

    def log_auto_color(s):
        try:
            log_file = open("loss_hist_bc_lab.txt", 'a')
            log_file.write('{}\n'.format(s))
            # print("{}: {}".format(str(datetime.now()), s)) # For debugging
        except:
            None
        finally:
            log_file.close()

    def report_loss(filenames=test_file_names):
        X_batch, Y_batch, _ = generate_training_datum(filenames,
                                                      image_dir=TESTING_DIR)
        loss = model.evaluate(X_batch, Y_batch)
        print("Loss(bachnorm): {}".format(loss))
        log_auto_color(loss)

    def colorize(filename='000000000643.jpg'):
        """Colorize One picture"""
        X_batch, Y_batch, images = generate_training_datum(
            [filename], image_dir=TESTING_DIR)

        # plt.imshow(images[0])
        # plt.show()

        preds = model.predict(X_batch)
        # print("predict shape: {}".format(preds.shape))

        lab = rgb2lab(images[0])
        pred_image = np.zeros(lab.shape)
        pred_image[:, :, 0] = lab[:, :, 0]
        pred_image[:, :, 1:] = preds[0] * 128

        # pred_image = np.concatenate((images[0], lab2rgb(pred_image)), axis=1) # Demage orignal image
        pred_image = lab2rgb(pred_image)
        # print(preds)

        if SHOW is True:
            plt.imshow(pred_image)
            plt.show()
        return pred_image

    def get_feature_map(images):
        """ Get the feature map from the trained mask_rcnn """
        result = mask_model.run_graph(
            images,
            [
                ('P2', mask_model.keras_model.get_layer('fpn_p2').output
                 ),  # -> shape: (2, 256, 256, 256)
                ('P3', mask_model.keras_model.get_layer('fpn_p3').output
                 ),  # -> shape: (2, 128, 128, 256)
                ('P4', mask_model.keras_model.get_layer('fpn_p4').output
                 ),  # -> shape: (2, 64, 64, 256)
                ('P5', mask_model.keras_model.get_layer('fpn_p5').output
                 ),  # -> shape: (2, 32, 32, 256)
            ])
        return result

    def generate_training_datum(filenames, image_dir=IMAGE_DIR):
        images = []
        grayscaled_rgbs = []
        Y_batch = []
        for filename in filenames:
            image = skimage.io.imread(os.path.join(image_dir, filename))
            image, _, _, _ = utils.resize_image(image,
                                                min_dim=config.IMAGE_MAX_DIM)

            try:
                image = image[:config.IMAGE_SHAPE[0], :config.
                              IMAGE_SHAPE[1], :]
            except IndexError:
                continue

            images.append(image)
            lab = rgb2lab(image)

            grayscaled_rgb = gray2rgb(rgb2gray(image))
            grayscaled_rgbs.append(grayscaled_rgb)

            Y_batch.append(lab[:, :, 1:] / 128)

        feature_maps = get_feature_map(grayscaled_rgbs)
        # print(feature_maps['P2'].shape) # -> (batch_size, pool_size, pool_size, filter_num)

        grayscaled_rgbs = np.asarray(grayscaled_rgbs)
        Y_batch = np.asarray(Y_batch)
        # print(grayscaled_rgbs.shape) # -> (batch_size, height, width, channels)

        return feature_maps, Y_batch, images

    # generate_training_datum(file_names[0:2])

    # ========= Building the network ========= #
    # Input: https://stackoverflow.com/questions/44747343/keras-input-explanation-input-shape-units-batch-size-dim-etc
    P5 = Input(shape=(
        32,
        32,
        256,
    ), name='P5')
    P4 = Input(shape=(
        64,
        64,
        256,
    ), name='P4')
    P3 = Input(shape=(
        128,
        128,
        256,
    ), name='P3')
    P2 = Input(shape=(
        256,
        256,
        256,
    ), name='P2')

    initer = keras.initializers.RandomUniform(minval=-0.5, maxval=0.5)
    # activer = 'relu'
    activer = 'sigmoid'
    # error = 'mse'
    error = 'mean_absolute_error'

    # Decode
    decode_p5 = KL.Conv2D(128, (3, 3),
                          padding='same',
                          bias_initializer=initer,
                          activation=activer,
                          name='decode_p5')(P5)
    # decode_p5 = KL.TimeDistributed(BatchNorm(axis=3), name='p5_bn')(decode_p5)
    # decode_p5 = KL.Activation('relu')(decode_p5)
    decode_p5 = UpSampling2D((2, 2))(decode_p5)

    decode_p4 = Conv2D(128, (1, 1), padding='same',
                       bias_initializer=initer)(P4)
    decode_p4_5 = Add()([decode_p5, decode_p4])
    decode_p4_5 = BatchNorm(axis=3, name='p45_bn')(decode_p4_5)
    decode_p4_5 = KL.Activation(activer)(decode_p4_5)

    decode2_p4_5 = Conv2D(64, (3, 3),
                          activation=activer,
                          padding='same',
                          bias_initializer=initer)(decode_p4_5)
    decode2_p4_5 = UpSampling2D((2, 2))(decode2_p4_5)
    decode2_p3 = Conv2D(64, (3, 3), padding='same',
                        bias_initializer=initer)(P3)
    decode2_p3_4_5 = Add()([decode2_p4_5, decode2_p3])
    decode2_p3_4_5 = BatchNorm(axis=3, name='p345_bn')(decode2_p3_4_5)
    decode2_p3_4_5 = KL.Activation(activer)(decode2_p3_4_5)

    decode3_p345 = Conv2D(32, (3, 3), activation=activer,
                          padding='same')(decode2_p3_4_5)
    decode3_p345 = UpSampling2D((2, 2))(decode3_p345)
    decode3_p2 = Conv2D(32, (1, 1), padding='same',
                        bias_initializer=initer)(P2)
    decode3_p2345 = Add()([decode3_p345, decode3_p2])
    decode3_p2345 = BatchNorm(axis=3, name='p2345_bn')(decode3_p2345)
    decode3_p2345 = KL.Activation(activer)(decode3_p2345)

    decode_out = Conv2D(16, (3, 3),
                        activation=activer,
                        padding='same',
                        bias_initializer=initer)(decode3_p2345)
    decode_out = UpSampling2D((2, 2))(decode_out)
    decode_out = Conv2D(4, (3, 3),
                        activation=activer,
                        padding='same',
                        bias_initializer=initer)(decode_out)
    decode_out = UpSampling2D((2, 2))(decode_out)
    decode_out = Conv2D(2, (3, 3),
                        activation='tanh',
                        padding='same',
                        bias_initializer=initer)(decode_out)

    # build
    tensorboard = TensorBoard(log_dir=TB_LOG_DIR)
    model = Model(inputs=[P5, P4, P3, P2], outputs=decode_out)

    if os.path.isfile('auto_color_batch_norm_lab.h5'):
        print('Found weights')
        model.load_weights('auto_color_batch_norm_lab.h5')

    sgd = optimizers.SGD(lr=0.005, momentum=0.1, decay=0.0, nesterov=False)
    model.compile(optimizer=sgd, loss=error)

    # ========= Training =========== #
    batch_size = BATCH_SIZE
    for i in range(int(len(file_names) / batch_size - 1)):
        # for i in range(30):
        print('(batchnorm) Training on batch {}'.format(i))
        X_batch, Y_batch, _ = generate_training_datum(
            file_names[i * batch_size:(i + 1) * batch_size])
        model.train_on_batch(X_batch, Y_batch)

        if SHOW is True:
            colored = colorize()
            report_loss()

        # color_files = random.choice(test_file_names)

        if i % 10 == 0:
            report_loss()
            colored = colorize()
            skimage.io.imsave(os.path.join(
                TESTING_RESULT_DIR,
                '{}_test_batchnorm_lab_'.format(i) + "00000643.jpg"),
                              arr=colored)

        if i % 300 == 299:
            model.save_weights("{}_color_batchnorm_mrcnn_lab.h5".format(i))

    # ===== Store Model ===== #
    # Save model
    model_json = model.to_json()
    with open("batchnorm_model.json", "w") as json_file:
        json_file.write(model_json)
    model.save_weights("auto_color_batchnorm_final_lab.h5")
text_input = Input(shape=(None,), dtype='int32', name='text')
embedded_text = layers.Embedding(text_vocabulary_size, 64)(text_input)
encoded_text = layers.LSTM(32)(embedded_text)

question_input = Input(shape=(None,), dtype='int32', name='question')
embedded_question = layers.Embedding(
    question_vocabulary_size, 32)(question_input)
encoded_question = layers.LSTM(16)(embedded_question)

concatenated = layers.concatenate([encoded_text, encoded_question], axis=-1)
answer = layers.Dense(answer_vocabulary_size,
                      activation='softmax')(concatenated)

model = Model([text_input, question_input], answer)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
              metrics=['acc'])

# %%

num_samples = 1000
max_length = 100

text = np.random.randint(1, text_vocabulary_size,
                         size=(num_samples, max_length))
question = np.random.randint(
    1, question_vocabulary_size, size=(num_samples, max_length))
answers = np.random.randint(1, answer_vocabulary_size, size=(num_samples,))
answers = keras.utils.to_categorical(answers, answer_vocabulary_size)

model.fit({'text': text, 'question': question},
          answers, epochs=10, batch_size=128)
Beispiel #9
0
time_dense = PReLU(name='time_dense_prelu')(Dense(units=5,
                                                  name='time_dense')(time_bn))

inv_con = concatenate([time_dense, member_feat_dense, question_dense],
                      name='invite_concatenate')
inv_con_bn = BatchNormalization(name='inv_con_bn')(inv_con)

inv_dense_1 = PReLU(name='inv_dense_1_prelu')(Dense(
    units=512, name='inv_dense_1')(inv_con_bn))
inv_dense_2 = PReLU()(Dense(units=128, name='inv_dense_2_prelu')(inv_dense_1))
inv_out = Dense(units=1, activation='sigmoid', name='inv_out')(inv_dense_2)

model = Model(inputs=[time_input, ques_input, member_feat_input],
              outputs=inv_out)
model.compile(optimizer='adam',
              loss=keras.losses.binary_crossentropy,
              metrics=['acc'])

model.summary()
keras.utils.plot_model(model, './model.png', show_shapes=True)


#%%
class MGene(keras.utils.Sequence):
    def __init__(self, batch_size: int, invite: pd.DataFrame):
        self.batch_size = batch_size
        self.invite = invite
        self.length = len(invite)
        self.true_len_rate = len(
            invite[invite['is_answer'] == 1]) / self.length
Beispiel #10
0
        def transform_1D_data_to_reverse_dist(
                data,
                new_sample_ratio=False,
                return_same_sized_combined_dist=True,
                bins=30,
                imba_f=1.2,
                visualization=True):
            from keras import Input, Sequential, Model
            from keras.layers import Dense
            from keras.optimizers import Adam
            from keras.callbacks import EarlyStopping
            import matplotlib.pyplot as plt
            # instead of making rare events having the same standing as frequent events, we make rare events even more common than norm
            # imba factor controls the distribution of rare events > normal events

            # if no_of_new_samples is not specified, it attempts to calculate the number by finding the amount of new samples
            # required to fill up the remaining area of the uniform dist (think of it as the unfilled area of a rectangle'
            if new_sample_ratio == 0 and new_sample_ratio != False or imba_f == 0:
                return data

            latent_dim = 1
            feature_count = len(data[0])
            enc_input = Input(shape=(feature_count, ))

            encoder = Sequential()
            encoder.add(Dense(100, input_shape=(feature_count, )))
            encoder.add(Dense(latent_dim))

            decoder = Sequential()
            decoder.add(Dense(100, input_shape=(latent_dim, )))
            decoder.add(Dense(feature_count))

            final = Model(enc_input, decoder(encoder(enc_input)))
            final.compile(optimizer=Adam(lr=1e-4), loss="mean_squared_error")

            np.random.shuffle(data)

            final.fit(
                x=np.asarray(data),
                y=np.asarray(data),
                batch_size=int(len(data) / 10),
                callbacks=[EarlyStopping(monitor='loss', min_delta=0.00001)],
                epochs=500)

            latent_values = encoder.predict(data)

            if visualization:
                plt.figure('Original latent values histogram')
                plt.hist(latent_values, bins=bins)

            if bins > len(latent_values):
                bins = int(len(latent_values) / 2)
            count, ranges = np.histogram(latent_values, bins=bins)

            no_of_new_samples = 0
            if not new_sample_ratio:
                no_of_new_samples = np.sum(np.max(count) - count)
            else:
                no_of_new_samples = int(len(data) * new_sample_ratio)

            bins_probability_table = [
                np.power(x, imba_f)
                for x in np.rint(max(count) - count) / max(count)
            ]
            bins_probability_table /= np.max(bins_probability_table)

            new_latent_values = []

            while (True):
                for i in range(len(bins_probability_table)):
                    bin_rng = [ranges[i], ranges[i + 1]]
                    bins_prob = bins_probability_table[i]
                    if np.random.rand() < bins_prob:
                        new_synth_latent = np.random.rand() * (
                            bin_rng[1] - bin_rng[0]) + bin_rng[0]
                        new_latent_values.append([new_synth_latent])
                    if len(new_latent_values) >= no_of_new_samples:
                        break
                if len(new_latent_values) >= no_of_new_samples:
                    break

            # for debugging
            if len(new_latent_values) == 0:
                return data
            new_synth_data = decoder.predict(np.asarray(new_latent_values))

            if visualization:
                plt.figure('New latent values histogram')
                plt.hist(np.asarray(new_latent_values), bins=bins)

                plt.figure('Combined latent values histogram')
                combined_latent_values = np.concatenate(
                    (np.asarray(new_latent_values), latent_values))

                plt.hist(combined_latent_values, bins=bins)
                plt.show()

            # count_, ranges_ = np.histogram(new_latent_values, bins=bins)

            if return_same_sized_combined_dist == True:
                resampled_data = np.concatenate((data, new_synth_data))
                np.random.shuffle(resampled_data)
                resampled_data = resampled_data[:len(data)]

                # for debugging
                # debugging_latent_v = encoder.predict(resampled_data)
                # plt.hist(debugging_latent_v, bins=bins)
                # plt.show()

                return resampled_data
            return new_latent_values
Beispiel #11
0
        def transform_1D_samples_using_DOPE(
                data,
                return_same_sized_combined_dist=True,
                new_sample_ratio=0.3,
                no_of_std=3,
                visualization=False):
            from keras import Input, Sequential, Model
            from keras.layers import Dense
            from keras.optimizers import Adam
            from keras.callbacks import EarlyStopping
            import matplotlib.pyplot as plt
            from scipy.stats import chi

            if new_sample_ratio == 0 or no_of_std == 0:
                return data

            latent_dim = 1
            no_of_new_samples = int(len(data) * new_sample_ratio)
            feature_count = len(data[0])
            enc_input = Input(shape=(feature_count, ))

            encoder = Sequential()
            encoder.add(Dense(100, input_shape=(feature_count, )))
            encoder.add(Dense(latent_dim))

            decoder = Sequential()
            decoder.add(Dense(100, input_shape=(latent_dim, )))
            decoder.add(Dense(feature_count))

            final = Model(enc_input, decoder(encoder(enc_input)))
            final.compile(optimizer=Adam(lr=1e-4), loss="mean_squared_error")

            np.random.shuffle(data)

            final.fit(
                x=np.asarray(data),
                y=np.asarray(data),
                batch_size=int(len(data) / 10),
                callbacks=[EarlyStopping(monitor='loss', min_delta=0.00001)],
                epochs=500)

            latent_values = encoder.predict(data)

            if visualization:
                # for debugging of distribution of latent_values
                plt.figure('Latent value distribution')
                plt.hist(latent_values, bins=30)
                plt.show()

            center = np.mean(latent_values, axis=0)
            std = np.std(latent_values, axis=0)
            chi_std = chi.std(2, 0, np.linalg.norm(std))

            # x-mean
            # I have a problem with the following line, he assumes that the latent values are already gaussian
            # distributed hence using it directly
            dist = np.linalg.norm(latent_values - center,
                                  axis=1)  # Frobenius norm

            if visualization:
                # for debugging of distribution
                plt.figure('L1 norm distribution')
                plt.hist(dist, bins=30)
                plt.show()

            for i, el in enumerate(dist):
                dist[i] = 0. if el > no_of_std * chi_std else dist[i]

            if visualization:
                # for debugging of distribution
                plt.figure('L1 norm distribution after std filtering')
                plt.hist(list(filter(lambda x: x > 0, dist)), bins=30)
                plt.show()

            threshold = sorted(dist)[int(len(dist) *
                                         0.9)]  # this is cutting too much

            dist = [0. if x < threshold else x for x in dist]

            if visualization:
                # for debugging of distribution
                plt.figure(
                    'L1 norm distribution after std & threshold filtering')
                plt.hist(list(filter(lambda x: x > 0, dist)), bins=30)
                plt.show()

            dist /= np.sum(dist)

            synth_latent = []
            for i in range(no_of_new_samples):
                # choose an ele from 1st argv, given that 1st argv has prob dist in p
                choice = np.random.choice(np.arange(len(dist)), p=dist)

                a = latent_values[choice]
                latent_copy = np.concatenate(
                    (latent_values[:choice], latent_values[choice + 1:]))
                latent_copy -= a
                latent_copy = np.linalg.norm(latent_copy,
                                             axis=1)  # Frobenius norm
                b = np.argmin(latent_copy)
                if b >= choice:
                    b += 1
                b = latent_values[b]
                scale = np.random.rand()
                c = scale * (a - b) + b
                synth_latent.append(c)

            new_latent_values = np.concatenate(
                (latent_values, np.asarray(synth_latent)))

            new_data = decoder.predict(np.asarray(synth_latent))
            if return_same_sized_combined_dist:
                resampled_data = np.concatenate((data, new_data))
                np.random.shuffle(resampled_data)
                return resampled_data[:len(data)]
            return new_data
class CharacterTagger:
    """
    A class for character-based neural morphological tagger
    """
    def __init__(self,
                 reverse=False,
                 word_rnn="cnn",
                 min_char_count=1,
                 char_embeddings_size=16,
                 char_conv_layers=1,
                 char_window_size=5,
                 char_filters=None,
                 char_filter_multiple=25,
                 char_highway_layers=1,
                 conv_dropout=0.0,
                 highway_dropout=0.0,
                 intermediate_dropout=0.0,
                 lstm_dropout=0.0,
                 word_lstm_layers=1,
                 word_lstm_units=128,
                 word_dropout=0.0,
                 regularizer=None,
                 batch_size=16,
                 validation_split=0.2,
                 nepochs=25,
                 min_prob=0.01,
                 max_diff=2.0,
                 callbacks=None,
                 verbose=1):
        self.reverse = reverse
        self.word_rnn = word_rnn
        self.min_char_count = min_char_count
        self.char_embeddings_size = char_embeddings_size
        self.char_conv_layers = char_conv_layers
        self.char_window_size = char_window_size
        self.char_filters = char_filters
        self.char_filter_multiple = char_filter_multiple
        self.char_highway_layers = char_highway_layers
        self.conv_dropout = conv_dropout
        self.highway_dropout = highway_dropout
        self.intermediate_dropout = intermediate_dropout
        self.word_lstm_layers = word_lstm_layers
        self.word_lstm_units = word_lstm_units
        self.lstm_dropout = lstm_dropout
        self.word_dropout = word_dropout
        self.regularizer = regularizer
        self.batch_size = batch_size
        self.validation_split = validation_split
        self.nepochs = nepochs
        self.min_prob = min_prob
        self.max_diff = max_diff
        self.callbacks = callbacks
        self.verbose = verbose
        self.initialize()

    def initialize(self):
        if isinstance(self.char_window_size, int):
            self.char_window_size = [self.char_window_size]
        if self.char_filters is None or isinstance(self.char_filters, int):
            self.char_filters = [self.char_filters] * len(
                self.char_window_size)
        if len(self.char_window_size) != len(self.char_filters):
            raise ValueError(
                "There should be the same number of window sizes and filter sizes"
            )
        if isinstance(self.word_lstm_units, int):
            self.word_lstm_units = [self.word_lstm_units
                                    ] * self.word_lstm_layers
        if len(self.word_lstm_units) != self.word_lstm_layers:
            raise ValueError(
                "There should be the same number of lstm layer units and lstm layers"
            )
        if self.regularizer is not None:
            self.regularizer = kreg.l2(self.regularizer)

    def to_json(self, outfile, model_file, lm_file=None):
        info = dict()
        if lm_file is not None:
            info["lm_file"] = lm_file
        # model_file = os.path.abspath(model_file)
        for (attr, val) in inspect.getmembers(self):
            if not (attr.startswith("__") or inspect.ismethod(val)
                    or isinstance(getattr(CharacterTagger, attr, None),
                                  property) or isinstance(val, np.ndarray)
                    or isinstance(val, Vocabulary) or attr.isupper()
                    or attr in ["callbacks", "model_", "regularizer"]):
                info[attr] = val
            elif isinstance(val, Vocabulary):
                info[attr] = val.jsonize()
            elif isinstance(val, np.ndarray):
                val = val.tolist()
                info[attr] = val
            elif attr == "model_":
                info["dump_file"] = model_file
                self.model_.save_weights(model_file)
            elif attr == "callbacks":
                for callback in val:
                    if isinstance(callback, EarlyStopping):
                        info["early_stopping_callback"] = {
                            "patience": callback.patience,
                            "monitor": callback.monitor
                        }
                    elif isinstance(callback, ModelCheckpoint):
                        info["model_checkpoint_callback"] =\
                            {key: getattr(callback, key) for key in ["monitor", "filepath"]}
                    elif isinstance(callback, ReduceLROnPlateau):
                        info["LR_callback"] =\
                            {key: getattr(callback, key) for key in
                             ["monitor", "factor", "patience", "cooldown", "epsilon"]}
            elif attr.endswith("regularizer"):
                if val is not None:
                    info[attr] = float(val.l2)
        with open(outfile, "w", encoding="utf8") as fout:
            json.dump(info, fout)

    @property
    def symbols_number_(self):
        return self.symbols_.symbols_number_

    @property
    def tags_number_(self):
        return self.tags_.symbols_number_

    def transform(self,
                  data,
                  labels=None,
                  pad=True,
                  return_indexes=True,
                  buckets_number=None,
                  bucket_size=None,
                  join_buckets=True):
        lengths = [len(x) + 2 for x in data]
        if pad:
            indexes, level_lengths = make_bucket_indexes(
                lengths,
                buckets_number=buckets_number,
                bucket_size=bucket_size,
                join_buckets=join_buckets)
        else:
            indexes = [[i] for i in range(len(data))]
            level_lengths = lengths
        X = [None] * len(data)
        for bucket_indexes, bucket_length in zip(indexes, level_lengths):
            for i in bucket_indexes:
                sent = data[i] if not self.reverse else data[i][::-1]
                X[i] = [
                    self._make_sent_vector(sent, bucket_length=bucket_length)
                ]
                if labels is not None:
                    tags = labels[i] if not self.reverse else labels[i][::-1]
                    X[i].append(
                        self._make_tags_vector(tags,
                                               bucket_length=bucket_length))
        if return_indexes:
            return X, indexes
        else:
            return X

    def _make_sent_vector(self, sent, bucket_length=None):
        if bucket_length is None:
            bucket_length = len(sent)
        answer = np.zeros(shape=(bucket_length, MAX_WORD_LENGTH + 2),
                          dtype=np.int32)
        for i, word in enumerate(sent):
            answer[i, 0] = BEGIN
            m = min(len(word), MAX_WORD_LENGTH)
            for j, x in enumerate(word[-m:]):
                answer[i, j + 1] = self.symbols_.toidx(x)
            answer[i, m + 1] = END
            answer[i, m + 2:] = PAD
        return answer

    def _make_tags_vector(self, tags, bucket_length=None, func=None):
        m = len(tags)
        if bucket_length is None:
            bucket_length = m
        answer = np.zeros(shape=(bucket_length, ), dtype=np.int32)
        for i, tag in enumerate(tags):
            answer[i] = self.tags_.toidx(tag) if func is None else func(tag)
        return answer

    def train(self,
              data,
              labels,
              dev_data=None,
              dev_labels=None,
              symbol_vocabulary_file=None,
              tags_vocabulary_file=None,
              lm_file=None,
              model_file=None,
              save_file=None):
        """
        Trains the tagger on data :data: with labels :labels:

        data: list of lists of sequences, a list of sentences
        labels: list of lists of strs,
            a list of sequences of tags, each tag is a feature-value structure
        :return:
        """
        if symbol_vocabulary_file is None:
            self.symbols_ = Vocabulary(
                character=True, min_count=self.min_char_count).train(data)
        else:
            self.symbols_ = vocabulary_from_json(symbol_vocabulary_file,
                                                 use_features=False)
        if tags_vocabulary_file is None:
            self.tags_ = FeatureVocabulary(character=False).train(labels)
        else:
            with open(tags_vocabulary_file, "r", encoding="utf8") as fin:
                tags_info = json.load(fin)
            self.tags_ = vocabulary_from_json(tags_info, use_features=True)
        if self.verbose > 0:
            print("{} characters, {} tags".format(self.symbols_number_,
                                                  self.tags_number_))
        X_train, indexes_by_buckets = self.transform(data,
                                                     labels,
                                                     buckets_number=10)
        if dev_data is not None:
            X_dev, dev_indexes_by_buckets =\
                self.transform(dev_data, dev_labels, bucket_size=BUCKET_SIZE)
        else:
            X_dev, dev_indexes_by_buckets = [None] * 2
        self.build()
        if save_file is not None and model_file is not None:
            self.to_json(save_file, model_file, lm_file)
        self._train_on_data(X_train,
                            indexes_by_buckets,
                            X_dev,
                            dev_indexes_by_buckets,
                            model_file=model_file)
        return self

    def _train_on_data(self,
                       X,
                       indexes_by_buckets,
                       X_dev=None,
                       dev_indexes_by_buckets=None,
                       model_file=None):
        if X_dev is None:
            X_dev, dev_indexes_by_buckets = X, []
            validation_split = self.validation_split
        else:
            validation_split = 0.0
        train_indexes_by_buckets = []
        for curr_indexes in indexes_by_buckets:
            np.random.shuffle(curr_indexes)
            if validation_split != 0.0:
                train_bucket_size = int(
                    (1.0 - self.validation_split) * len(curr_indexes))
                train_indexes_by_buckets.append(
                    curr_indexes[:train_bucket_size])
                dev_indexes_by_buckets.append(curr_indexes[train_bucket_size:])
            else:
                train_indexes_by_buckets.append(curr_indexes)
        if model_file is not None:
            callback = ModelCheckpoint(model_file,
                                       monitor="val_acc",
                                       save_weights_only=True,
                                       save_best_only=True)
            if self.callbacks is not None:
                self.callbacks.append(callback)
            else:
                self.callbacks = [callback]
        train_steps = sum((1 + (len(x) - 1) // self.batch_size)
                          for x in train_indexes_by_buckets)
        dev_steps = len(dev_indexes_by_buckets)
        train_gen = generate_data(X,
                                  train_indexes_by_buckets,
                                  self.tags_number_,
                                  self.batch_size,
                                  use_last=False)
        dev_gen = generate_data(X_dev,
                                dev_indexes_by_buckets,
                                self.tags_number_,
                                use_last=False,
                                shuffle=False)
        self.model_.fit_generator(train_gen,
                                  steps_per_epoch=train_steps,
                                  epochs=self.nepochs,
                                  callbacks=self.callbacks,
                                  validation_data=dev_gen,
                                  validation_steps=dev_steps,
                                  verbose=1)
        if model_file is not None:
            self.model_.load_weights(model_file)
        return self

    def predict(self, data, labels=None, return_probs=False):
        X_test, indexes_by_buckets =\
            self.transform(data, labels=labels, bucket_size=BUCKET_SIZE)
        answer, probs = [None] * len(data), [None] * len(data)
        for k, (X_curr, bucket_indexes) in enumerate(
                zip(X_test[::-1], indexes_by_buckets[::-1])):
            X_curr = [
                np.array([X_test[i][j] for i in bucket_indexes])
                for j in range(len(X_test[0]) - int(labels is not None))
            ]
            bucket_probs = self.model_.predict(X_curr, batch_size=256)
            bucket_labels = np.argmax(bucket_probs, axis=-1)
            for curr_labels, curr_probs, index in\
                    zip(bucket_labels, bucket_probs, bucket_indexes):
                curr_labels = curr_labels[:len(data[index])]
                curr_labels = [
                    self.tags_.symbols_[label] for label in curr_labels
                ]
                answer[index], probs[
                    index] = curr_labels, curr_probs[:len(data[index])]
        return (answer, probs) if return_probs else answer

    def score(self, data, labels):
        X_test, indexes_by_buckets = self.transform(data,
                                                    labels,
                                                    bucket_size=BUCKET_SIZE)
        probs = [None] * len(data)
        for k, (X_curr, bucket_indexes) in enumerate(
                zip(X_test[::-1], indexes_by_buckets[::-1])):
            X_curr = [
                np.array([X_test[i][j] for i in bucket_indexes])
                for j in range(len(X_test[0]) - 1)
            ]
            y_curr = [np.array(X_test[i][-1]) for i in bucket_indexes]
            bucket_probs = self.model_.predict(X_curr, batch_size=256)
            for curr_labels, curr_probs, index in zip(y_curr, bucket_probs,
                                                      bucket_indexes):
                L = len(data[index])
                probs[index] = curr_probs[np.arange(L), curr_labels[:L]]
        return probs

    def build(self):
        word_inputs = kl.Input(shape=(None, MAX_WORD_LENGTH + 2),
                               dtype="int32")
        inputs = [word_inputs]
        word_outputs = self.build_word_cnn(word_inputs)
        outputs, lstm_outputs = self.build_basic_network(word_outputs)
        compile_args = {
            "optimizer": ko.nadam(lr=0.002, clipnorm=5.0),
            "loss": "categorical_crossentropy",
            "metrics": ["accuracy"]
        }
        self.model_ = Model(inputs, outputs)
        self.model_.compile(**compile_args)
        if self.verbose > 0:
            print(self.model_.summary())
        return self

    def build_word_cnn(self, inputs):
        # inputs = kl.Input(shape=(MAX_WORD_LENGTH,), dtype="int32")
        inputs = kl.Lambda(kb.one_hot,
                           arguments={"num_classes": self.symbols_number_},
                           output_shape=lambda x: tuple(x) +
                           (self.symbols_number_, ))(inputs)
        char_embeddings = kl.Dense(self.char_embeddings_size,
                                   use_bias=False)(inputs)
        conv_outputs = []
        self.char_output_dim_ = 0
        for window_size, filters_number in zip(self.char_window_size,
                                               self.char_filters):
            curr_output = char_embeddings
            curr_filters_number = (min(self.char_filter_multiple *
                                       window_size, 200) if
                                   filters_number is None else filters_number)
            for _ in range(self.char_conv_layers - 1):
                curr_output = kl.Conv2D(
                    curr_filters_number, (1, window_size),
                    padding="same",
                    activation="relu",
                    data_format="channels_last")(curr_output)
                if self.conv_dropout > 0.0:
                    curr_output = kl.Dropout(self.conv_dropout)(curr_output)
            curr_output = kl.Conv2D(curr_filters_number, (1, window_size),
                                    padding="same",
                                    activation="relu",
                                    data_format="channels_last")(curr_output)
            conv_outputs.append(curr_output)
            self.char_output_dim_ += curr_filters_number
        if len(conv_outputs) > 1:
            conv_output = kl.Concatenate(axis=-1)(conv_outputs)
        else:
            conv_output = conv_outputs[0]
        highway_input = kl.Lambda(kb.max, arguments={"axis": -2})(conv_output)
        if self.intermediate_dropout > 0.0:
            highway_input = kl.Dropout(
                self.intermediate_dropout)(highway_input)
        for i in range(self.char_highway_layers - 1):
            highway_input = Highway(activation="relu")(highway_input)
            if self.highway_dropout > 0.0:
                highway_input = kl.Dropout(self.highway_dropout)(highway_input)
        highway_output = Highway(activation="relu")(highway_input)
        return highway_output

    def build_basic_network(self, word_outputs):
        """
        Creates the basic network architecture,
        transforming word embeddings to intermediate outputs
        """
        if self.word_dropout > 0.0:
            lstm_outputs = kl.Dropout(self.word_dropout)(word_outputs)
        else:
            lstm_outputs = word_outputs
        for j in range(self.word_lstm_layers - 1):
            lstm_outputs = kl.Bidirectional(
                kl.LSTM(self.word_lstm_units[j],
                        return_sequences=True,
                        dropout=self.lstm_dropout))(lstm_outputs)
        lstm_outputs = kl.Bidirectional(
            kl.LSTM(self.word_lstm_units[-1],
                    return_sequences=True,
                    dropout=self.lstm_dropout))(lstm_outputs)
        pre_outputs = kl.TimeDistributed(kl.Dense(
            self.tags_number_,
            activation="softmax",
            activity_regularizer=self.regularizer),
                                         name="p")(lstm_outputs)
        return pre_outputs, lstm_outputs
# Flatten feature map to a 1-dim tensor
x = layers.Flatten()(x)

# Create a fully connected layer with ReLU activation and 512 hidden units
x = layers.Dense(512, activation='relu')(x)

# Add a dropout rate of 0.5
x = layers.Dropout(0.5)(x)

# Create output layer with a single node and sigmoid activation
output = layers.Dense(1, activation='sigmoid')(x)

# Configure and compile the model
model = Model(img_input, output)
model.compile(loss='binary_crossentropy',
              optimizer=RMSprop(lr=0.001),
              metrics=['acc'])

history = model.fit_generator(train_generator,
                              steps_per_epoch=100,
                              epochs=30,
                              validation_data=validation_generator,
                              validation_steps=50,
                              verbose=2)

# Retrieve a list of accuracy results on training and test data
# sets for each training epoch
acc = history.history['acc']
val_acc = history.history['val_acc']

# Retrieve a list of list results on training and test data
        # Make both of the discriminator networks non-trainable
        discriminatorA.trainable = False
        discriminatorB.trainable = False

        probsA = discriminatorA(generatedA)
        probsB = discriminatorB(generatedB)

        adversarial_model = Model(inputs=[inputA, inputB],
                                  outputs=[
                                      probsA, probsB, reconstructedA,
                                      reconstructedB, generatedAId,
                                      generatedBId
                                  ])
        adversarial_model.compile(
            loss=['mse', 'mse', 'mae', 'mae', 'mae', 'mae'],
            loss_weights=[1, 1, 10.0, 10.0, 1.0, 1.0],
            optimizer=common_optimizer)

        tensorboard = TensorBoard(log_dir="logs/{}".format(time.time()),
                                  write_images=True,
                                  write_grads=True,
                                  write_graph=True)
        tensorboard.set_model(generatorAToB)
        tensorboard.set_model(generatorBToA)
        tensorboard.set_model(discriminatorA)
        tensorboard.set_model(discriminatorB)

        real_labels = np.ones((batch_size, 7, 7, 1))
        fake_labels = np.zeros((batch_size, 7, 7, 1))

        for epoch in range(epochs):
Beispiel #15
0
from keras import models
from keras.layers import Conv2D, Flatten, MaxPooling2D, Dense, GlobalAveragePooling2D, Dropout
from keras import layers
from keras import Model
import logging
from keras.applications.inception_v3 import InceptionV3

X_size = 75
Y_size = 75

base_model = InceptionV3(include_top=False,
                         input_shape=(X_size, Y_size, 3),
                         classes=52)

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.7)(x)
predictions = Dense(52, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = datagen.flow_from_directory('./trafficSignsHW/trainFULL',
                                              target_size=(X_size, Y_size),
                                              batch_size=32,
                                              class_mode='categorical')
model.compile(keras.optimizers.Adam(),
              'categorical_crossentropy',
              metrics=['accuracy'])
model.fit_generator(train_generator, steps_per_epoch=20, epochs=32)
model.save("my_model.h5")
Beispiel #16
0
    def build(self,
              sentence_length,
              word_length,
              target_label_dims,
              word_vocab,
              word_vocab_size,
              char_vocab_size,
              word_embedding_dims=100,
              char_embedding_dims=25,
              word_lstm_dims=25,
              tagger_lstm_dims=100,
              tagger_fc_dims=100,
              dropout=0.2,
              external_embedding_model=None):
        """
        Build a NERCRF model

        Args:
            sentence_length (int): max sentence length
            word_length (int): max word length in characters
            target_label_dims (int): number of entity labels (for classification)
            word_vocab (dict): word to int dictionary
            word_vocab_size (int): word vocabulary size
            char_vocab_size (int): character vocabulary size
            word_embedding_dims (int): word embedding dimensions
            char_embedding_dims (int): character embedding dimensions
            word_lstm_dims (int): character LSTM feature extractor output dimensions
            tagger_lstm_dims (int): word tagger LSTM output dimensions
            tagger_fc_dims (int): output fully-connected layer size
            dropout (float): dropout rate
            external_embedding_model (str): path to external word embedding model
        """
        # build word input
        words_input = Input(shape=(sentence_length,), name='words_input')

        if external_embedding_model is not None:
            # load and prepare external word embedding
            external_emb, ext_emb_size = load_word_embeddings(external_embedding_model)

            embedding_matrix = np.zeros((word_vocab_size, ext_emb_size))
            for word, i in word_vocab.items():
                embedding_vector = external_emb.get(word.lower())
                if embedding_vector is not None:
                    # words not found in embedding index will be all-zeros.
                    embedding_matrix[i] = embedding_vector

            # load pre-trained word embeddings into an Embedding layer
            # note that we set trainable = False so as to keep the embeddings fixed
            embedding_layer = Embedding(word_vocab_size,
                                        ext_emb_size,
                                        weights=[embedding_matrix],
                                        input_length=sentence_length,
                                        trainable=False)
        else:
            # learn embeddings ourselves
            embedding_layer = Embedding(word_vocab_size, word_embedding_dims,
                                        input_length=sentence_length)

        word_embeddings = embedding_layer(words_input)
        word_embeddings = Dropout(dropout)(word_embeddings)

        # create word character embeddings
        word_chars_input = Input(shape=(sentence_length, word_length), name='word_chars_input')
        char_embedding_layer = Embedding(char_vocab_size, char_embedding_dims,
                                         input_length=word_length)
        char_embeddings = TimeDistributed(char_embedding_layer)(word_chars_input)
        char_embeddings = TimeDistributed(Bidirectional(LSTM(word_lstm_dims)))(char_embeddings)
        char_embeddings = Dropout(dropout)(char_embeddings)

        # create the final feature vectors
        features = concatenate([word_embeddings, char_embeddings], axis=-1)

        # encode using a bi-lstm
        bilstm = Bidirectional(LSTM(tagger_lstm_dims, return_sequences=True))(features)
        bilstm = Dropout(dropout)(bilstm)
        after_lstm_hidden = Dense(tagger_fc_dims)(bilstm)

        # classify the dense vectors
        crf = CRF(target_label_dims, sparse_target=False)
        predictions = crf(after_lstm_hidden)

        # compile the model
        model = Model(inputs=[words_input, word_chars_input], outputs=predictions)
        model.compile(loss=crf.loss_function,
                      optimizer='adam',
                      metrics=[crf.accuracy])
        self.model = model
def train(hps, epochs, save_interval=200):
    half_batch = int(hps.batch_size / 2)
    dataset, shape = data.load_dataset(hps)
    # loss values for further plotting

    model = mb.CapsuleGANModel(hps, shape)
    discriminator = model.build_discriminator()
    generator = model.build_generator()
    discriminator.compile(loss='binary_crossentropy',
                          optimizer=Adam(hps.learning_rate, hps.beta_1,
                                         hps.beta_2, hps.epsilon),
                          metrics=['accuracy'])
    generator.compile(loss='binary_crossentropy',
                      optimizer=Adam(hps.learning_rate, hps.beta_1, hps.beta_2,
                                     hps.epsilon))

    z = Input(shape=(100, ))
    img = generator(z)
    discriminator.trainable = False
    valid = discriminator(img)
    combined = Model(z, valid)
    combined.compile(loss='binary_crossentropy',
                     optimizer=Adam(hps.learning_rate, hps.beta_1, hps.beta_2,
                                    hps.epsilon))
    for epoch in range(epochs):

        # ---------------------
        #  Train Discriminator
        # ---------------------

        # select a random half batch of images
        idx = np.random.randint(0, dataset.shape[0], half_batch)
        imgs = dataset[idx]

        noise = np.random.normal(0, 1, (half_batch, 100))

        # generate a half batch of new images
        gen_imgs = generator.predict(noise)

        # train the discriminator by feeding both real and fake (generated) images one by one
        d_loss_real = discriminator.train_on_batch(
            imgs,
            np.ones((half_batch, 1)) * 0.9)  # 0.9 for label smoothing
        d_loss_fake = discriminator.train_on_batch(gen_imgs,
                                                   np.zeros((half_batch, 1)))
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # ---------------------
        #  Train Generator
        # ---------------------

        noise = np.random.normal(0, 1, (hps.batch_size, 100))

        # the generator wants the discriminator to label the generated samples
        # as valid (ones)
        valid_y = np.array([1] * hps.batch_size)

        # train the generator
        g_loss = combined.train_on_batch(noise, np.ones((hps.batch_size, 1)))

        # Plot the progress
        print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" %
              (epoch, d_loss[0], 100 * d_loss[1], g_loss))
        model.D_L_REAL.append(d_loss_real)
        model.D_L_FAKE.append(d_loss_fake)
        model.D_L.append(d_loss)
        model.D_ACC.append(d_loss[1])
        model.G_L.append(g_loss)

        # if at save interval => save generated image samples
        if epoch % (5 * save_interval) == 0:
            su.save_imgs(hps.module, generator, epoch, hps)
        if epoch % (10 * save_interval) == 0:
            generator.save(
                os.path.join(hps.model_dir,
                             hps.module + '_gen_model_{}.h5'.format(epoch)))
            discriminator.save(
                os.path.join(hps.model_dir,
                             hps.module + '_dis_model_{}.h5'.format(epoch)))
        # if epoch % (15*save_interval) == 0:
        #     # joblib.dump(model, "model_{}.pkl".format(epoch))
        #     with open("model_{}.json".format(epoch), 'w') as f:
        #         ujson.dump(model, f)
        #     f.close()
    plt.plot(model.D_L)
    plt.title('Discriminator results')
    plt.xlabel('Epochs')
    plt.ylabel('Discriminator Loss (blue), Discriminator Accuracy (orange)')
    plt.legend(['Discriminator Loss', 'Discriminator Accuracy'])
    su.save_fig("{}_DL".format(hps.module))

    plt.plot(model.G_L)
    plt.title('Generator results')
    plt.xlabel('Epochs')
    plt.ylabel('Generator Loss (blue)')
    plt.legend('Generator Loss')
    su.save_fig("{}_GL".format(hps.module))
def build_model(vectors, shape, settings):
    max_length, nr_hidden, nr_class = shape

    input1 = layers.Input(shape=(max_length,), dtype="int32", name="words1")
    input2 = layers.Input(shape=(max_length,), dtype="int32", name="words2")

    # embeddings (projected)
    embed = create_embedding(vectors, max_length, nr_hidden)

    a = embed(input1)
    b = embed(input2)

    # step 1: attend
    F = create_feedforward(nr_hidden)
    att_weights = layers.dot([F(a), F(b)], axes=-1)

    G = create_feedforward(nr_hidden)

    if settings["entail_dir"] == "both":
        norm_weights_a = layers.Lambda(normalizer(1))(att_weights)
        norm_weights_b = layers.Lambda(normalizer(2))(att_weights)
        alpha = layers.dot([norm_weights_a, a], axes=1)
        beta = layers.dot([norm_weights_b, b], axes=1)

        # step 2: compare
        comp1 = layers.concatenate([a, beta])
        comp2 = layers.concatenate([b, alpha])
        v1 = layers.TimeDistributed(G)(comp1)
        v2 = layers.TimeDistributed(G)(comp2)

        # step 3: aggregate
        v1_sum = layers.Lambda(sum_word)(v1)
        v2_sum = layers.Lambda(sum_word)(v2)
        concat = layers.concatenate([v1_sum, v2_sum])

    elif settings["entail_dir"] == "left":
        norm_weights_a = layers.Lambda(normalizer(1))(att_weights)
        alpha = layers.dot([norm_weights_a, a], axes=1)
        comp2 = layers.concatenate([b, alpha])
        v2 = layers.TimeDistributed(G)(comp2)
        v2_sum = layers.Lambda(sum_word)(v2)
        concat = v2_sum

    else:
        norm_weights_b = layers.Lambda(normalizer(2))(att_weights)
        beta = layers.dot([norm_weights_b, b], axes=1)
        comp1 = layers.concatenate([a, beta])
        v1 = layers.TimeDistributed(G)(comp1)
        v1_sum = layers.Lambda(sum_word)(v1)
        concat = v1_sum

    H = create_feedforward(nr_hidden)
    out = H(concat)
    out = layers.Dense(nr_class, activation="softmax")(out)

    model = Model([input1, input2], out)

    model.compile(
        optimizer=optimizers.Adam(lr=settings["lr"]),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )

    return model
Beispiel #19
0
    def construct_model(self):
        """
            Construct the :math:`1`-st order and :math:`0`-th order models, which are used to approximate the
            :math:`U_1(x, C(x))` and the :math:`U_0(x)` utilities respectively. For each pair of objects in
            :math:`x_i, x_j \in Q` :math:`U_1(x, C(x))` we construct :class:`CmpNetCore` with weight sharing to
            approximate a pairwise-matrix. A pairwise matrix with index (i,j) corresponds to the :math:`U_1(x_i,x_j)`
            is a measure of how favorable it is to choose :math:`x_i` over :math:`x_j`. Using this matrix we calculate
            the borda score for each object to calculate :math:`U_1(x, C(x))`. For `0`-th order model we construct
            :math:`\lvert Q \lvert` sequential networks whose weights are shared to evaluate the :math:`U_0(x)` for
            each object in the query set :math:`Q`. The output mode is using linear activation.

            Returns
            -------
            model: keras :class:`Model`
                Neural network to learn the FETA utility score
        """
        def create_input_lambda(i):
            return Lambda(lambda x: x[:, i])

        if self._use_zeroth_model:
            self.logger.debug('Create 0th order model')
            zeroth_order_outputs = []
            inputs = []
            for i in range(self.n_objects):
                x = create_input_lambda(i)(self.input_layer)
                inputs.append(x)
                for hidden in self.hidden_layers_zeroth:
                    x = hidden(x)
                zeroth_order_outputs.append(self.output_node_zeroth(x))
            zeroth_order_scores = concatenate(zeroth_order_outputs)
            self.logger.debug('0th order model finished')
        self.logger.debug('Create 1st order model')
        outputs = [list() for _ in range(self.n_objects)]
        for i, j in combinations(range(self.n_objects), 2):
            if self._use_zeroth_model:
                x1 = inputs[i]
                x2 = inputs[j]
            else:
                x1 = create_input_lambda(i)(self.input_layer)
                x2 = create_input_lambda(j)(self.input_layer)
            x1x2 = concatenate([x1, x2])
            x2x1 = concatenate([x2, x1])

            for hidden in self.hidden_layers:
                x1x2 = hidden(x1x2)
                x2x1 = hidden(x2x1)

            merged_left = concatenate([x1x2, x2x1])
            merged_right = concatenate([x2x1, x1x2])

            n_g = self.output_node(merged_left)
            n_l = self.output_node(merged_right)

            outputs[i].append(n_g)
            outputs[j].append(n_l)
        # convert rows of pairwise matrix to keras layers:
        outputs = [concatenate(x) for x in outputs]
        # compute utility scores:
        sum_func = lambda s: K.mean(s, axis=1, keepdims=True)
        scores = [Lambda(sum_func)(x) for x in outputs]
        scores = concatenate(scores)
        self.logger.debug('1st order model finished')
        if self._use_zeroth_model:
            scores = add([scores, zeroth_order_scores])
        model = Model(inputs=self.input_layer, outputs=scores)
        self.logger.debug('Compiling complete model...')
        model.compile(loss=self.loss_function,
                      optimizer=self.optimizer,
                      metrics=self.metrics)
        return model
vocabulary_size = 50000
num_income_groups = 10

posts_input = Input(shape=(None,), dtype='int32', name='posts')
embedded_posts = layers.Embedding(vocabulary_size, 256)(posts_input)
x = layers.Conv1D(128, 5, activation='relu')(embedded_posts)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation='relu')(x)

age_prediction = layers.Dense(1, name='age')(x)
income_prediction = layers.Dense(
    num_income_groups, activation='softmax', name='income')(x)
gender_prediction = layers.Dense(1, activation='sigmoid', name='gender')(x)

model = Model(posts_input, [age_prediction,
                            income_prediction, gender_prediction])
model.compile(optimizer='rmsprop',
              loss={'age': 'mse',
                    'income': 'categorical_crossentropy',
                    'gender': 'binary_crossentropy'},
              loss_weights={'age': 0.25,
                            'income': 1.0,
                            'gender': 10.0})
Beispiel #21
0
X = np.random.randint(10, size=(n_samples, dx, dy))
y_true = np.ones((n_samples, dx, dout))

# X[2, 0] = mask_value
# X[3, 1] = mask_value

sample_weight = np.ones_like(y_true)
# sample_weight[2, 0] = 0
# sample_weight[3, 1] = 0
sample_weight[0, 0] = 0

inp = Input(shape=(dx, dy))
dense = TimeDistributed(Dense(dout))(inp)
model = Model(inputs=inp, outputs=dense)
model.summary()
model.compile(optimizer="rmsprop", loss="mae", sample_weight_mode="temporal")
set_model_weights_to_unity(model)

y_pred = model.predict(X, verbose=0)
unmasked_loss = mae(y_true, y_pred, mask=False)
masked_loss = mae(y_true, y_pred, mask=True)
weighted_loss = mae(y_true, y_pred, mask=False, weights=sample_weight)
keras_loss = model.evaluate(X, y_true, verbose=0)
keras_loss_weighted = model.evaluate(X,
                                     y_true,
                                     sample_weight=sample_weight[..., 0],
                                     verbose=0)
print(f"unmasked loss: {unmasked_loss}")
print(f"masked loss: {masked_loss}")
print(f"weighted loss: {weighted_loss}")
print(f"evaluate with Keras: {keras_loss}")
model = Dropout(droprate)(
    model
)  # To forget ordrop the few pixels from the layer to avoid the learnign the noise of the parameter.

#Fully connected final layer
model = Dense(num_classes)(
    model)  # To connect all the layers as fully connected layers.
model = Activation('softmax')(
    model
)  # As its a multi class classfication, softmax is used. This will add the model probability output from each output nodes to 1.

val = Model(inputs, model)

#compile model using accuracy to measure model performance
val.compile(loss=keras.losses.categorical_crossentropy,
            optimizer=keras.optimizers.RMSprop(),
            metrics=['accuracy'])

#describe the layers
val.summary()

# define path to save model
model_path = Path + 'fm_cnn_BN16.h5'

# prepare callbacks
callbacks = [
    EarlyStopping(monitor='val_acc', patience=10, mode='max', verbose=1),
    ModelCheckpoint(model_path,
                    monitor='val_acc',
                    save_best_only=True,
                    mode='max',
Beispiel #23
0
model = layers.Dense(128)(model)
model = layers.Dense(64)(model)
model = layers.Dense(4)(model)

output = layers.Activation('softmax')(model) 

model = Model(input,output)

model.summary()


callback_list = [keras.callbacks.EarlyStopping(monitor = 'val_acc', patience = 5), 
                 keras.callbacks.ModelCheckpoint(filepath='ResNet18.h5', monitor = 'val_loss', save_best_only = True)]

model.compile(loss = 'sparse_categorical_crossentropy', 
              optimizer = 'adam',
              metrics = ['acc'])
             
import time # 훈련할 때마다 time을 가져와서 초기화를 시켜줘야 합니다!
start = time.time() 
history = model.fit(train_image, train_label, epochs = 100, callbacks=callback_list,validation_data = (test_image,test_label))
time = time.time() - start
print("테스트 시 소요 시간(초) : {}".format(time))
print("전체 파라미터 수 : {}".format(sum([arr.flatten().shape[0] for arr in model.get_weights()])))

# 모델 훈련이 잘 되었는지 그래프로 확입힙니다.     
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
Beispiel #24
0
crf = CRF(len(label.index) + 1, learn_mode='marginal')(gru_kata)

preds = Dense(len(label.index) + 1, activation='softmax')(gru_kata)

print "Model Choice:"
model_choice = 1 # input('Enter 1 for CRF or 2 for Dense layer: ')

model = Model(inputs=[sequence_input, sequence_input_c], outputs=[crf])
if model_choice == 2:
    model = Model(inputs=[sequence_input, sequence_input_c], outputs=[preds])

optimizer = 'adam' # raw_input('Enter optimizer (default rmsprop): ')
loss = 'binary_crossentropy' # raw_input('Enter loss function (default categorical_crossentropy): ')
model.summary()
model.compile(loss=loss,
              optimizer=optimizer,
              metrics=['acc'])

load_m = False
"""
Training
"""
epoch = input('Enter number of epochs: ')
batch = input('Enter number of batch size: ')

model.fit([np.array(x_train.padded), np.array(x_train_char)],
          [np.array(y_encoded)],
          epochs=epoch, batch_size=batch)

"""
Converting text data to int using index
    true_positives = K.cast(true_ones, K.floatx())
    true_positive_count = K.sum(true_positives)
    label_positive_count = K.sum(y_true)
    recall = true_positive_count / label_positive_count
    return recall


def precision_m(y_true, y_pred):
    y_true = K.round(y_true)
    y_pred = K.round(y_pred)
    pair_sum = tf.add(y_true, y_pred)
    true_ones = K.equal(pair_sum, 2.)
    true_positives = K.cast(true_ones, K.floatx())
    true_positive_count = K.sum(true_positives)
    pred_positive_count = K.sum(y_pred)
    precision = true_positive_count / pred_positive_count
    return precision


def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall))


# Another way to define your optimizer
adam = Adam(lr=0.001)
# We add metrics to get more results you want to see
model = Model(inputs="your inputs")
model.compile(optimizer=adam, loss="mean_squared_error", metrics=['categorical_accuracy', recall_m, precision_m, f1_m])
    D_out_11, D_out_12, D_out_13, D_out_14, D_out_21, D_out_22, D_out_23,
    D_out_24, D_out_31, D_out_32, D_out_33, D_out_34, D_out_41, D_out_42,
    D_out_43, D_out_44
])

GAN = Model(inputs=[G_input],
            outputs=[generated_image, GAN_output],
            name="GAN")
GAN_loss = [laplacian_loss, 'binary_crossentropy']
opt_GAN = Adam(lr=lr_schedule(0, G_inital_lr, G_decay_factor, G_decay_period),
               beta_1=0.9,
               beta_2=0.999,
               epsilon=1e-08)
loss_weights = [1, 0.005]
GAN.compile(loss=GAN_loss,
            loss_weights=loss_weights,
            optimizer=opt_GAN,
            metrics={'model_1': mae_on_first_channel})
GAN.summary()

# training start here
real_val = 1.0
fake_val = 0.0

# can load pretrained models here, not necessary
# D.load_weights('save/pre_D.hdf5')
G.load_weights('save/formalin_g_G_500.hdf5')

for iteration in range(0, num_iters, 1):
    # train D until D can distinguish real and generated images
    lr_D = lr_schedule(iteration, D_inital_lr, D_decay_factor, D_decay_period)
    K.set_value(D.optimizer.lr, lr_D)
Beispiel #27
0
    embedding = Embedding(vocab_size,
                          embedding_vector_size,
                          input_length=1,
                          name='embedding',
                          weights=model_mat_skip_gram)
    target = embedding(input_target)
    target = Reshape((embedding_vector_size, 1))(target)
    context = embedding(input_context)
    context = Reshape((embedding_vector_size, 1))(context)

    # setup a cosine similarity operation which will be output in a secondary model
    # similarity = merge([target, context], mode='cos', dot_axes=0)
    similarity = dot([target, context], axes=1, normalize=True)

    # now perform the dot product operation to get a similarity measure
    dot_product = dot([target, context], axes=1)
    dot_product = Reshape((1, ))(dot_product)
    # add the sigmoid output layer
    output = Dense(1, activation='sigmoid')(dot_product)
    # create the primary training model
    model = Model(input=[input_target, input_context], output=output)
    model.compile(loss='binary_crossentropy', optimizer='rmsprop')
    model.summary()

    # create a secondary validation model to run our similarity checks during training
    validation_model = Model(input=[input_target, input_context],
                             output=similarity)

    callb = SimilarityCallback(t)
    callb.run_sim()
Beispiel #28
0
xtest = sequence.pad_sequences(xtest, maxlen=maxima_longitud)

#Creacion de modelo
entrada = Input(shape=(maxima_longitud, ))
x = Embedding(maximas_caracteristicas,
              tamano_embedding)(entrada)  #Capa especial para texto.
x = LSTM(tamano_embedding, return_sequences=True, activation='relu')(
    x)  #returns_sequences devuelde los estados obtenidos por embedding.
x = Flatten()(x)  #Llevar a una dimension
x = Dense(1,
          activation="sigmoid",
          kernel_initializer='zeros',
          bias_initializer='zeros')(x)
modelo = Model(inputs=entrada, outputs=x)
modelo.compile(loss='binary_crossentropy',
               optimizer='adam',
               metrics=['binary_accuracy'])
modelo.summary()

#Clasificacion binaria, positivo o negativo

#entrenamiento
#Callback para guardar el mejor modelo de las mejores epocas.
checkpoint = ModelCheckpoint('deteccion_texto.h5',
                             monitor='val_binary_accuracy',
                             verbose=1,
                             save_best_only=True,
                             save_weights_only=False,
                             mode='auto')
history = modelo.fit(xentrenamiento,
                     yentrenamiento,
Beispiel #29
0
    descriptionEmbeddings,
    #input_length=MAX_DESC_SEQUENCE_LENGTH,
    mask_zero=True)(descriptionBranchI)
descriptionBranch = SpatialDropout1D(rate=0.2)(
    descriptionBranch)  #Masks the same embedding element for all tokens
descriptionBranch = BatchNormalization()(descriptionBranch)
descriptionBranch = Dropout(0.2)(descriptionBranch)
descriptionBranch = LSTM(units=30)(descriptionBranch)
descriptionBranch = BatchNormalization()(descriptionBranch)
descriptionBranch = Dropout(0.2, name="description")(descriptionBranch)
descriptionBranchO = Dense(len(set(classes)),
                           activation='softmax')(descriptionBranch)

descriptionModel = Model(inputs=descriptionBranchI, outputs=descriptionBranchO)
descriptionModel.compile(loss='sparse_categorical_crossentropy',
                         optimizer='adam',
                         metrics=['accuracy'])
start = time.time()
descriptionHistory = descriptionModel.fit(trainDescription,
                                          classes,
                                          epochs=nb_epoch,
                                          batch_size=batch_size,
                                          verbose=verbosity,
                                          validation_split=validation_split,
                                          callbacks=callbacks)
print("descriptionBranch finished after " +
      str(datetime.timedelta(seconds=round(time.time() - start))))
descriptionModel.save(modelPath + 'descriptionBranchNorm.h5')

#####################
#2a.) Link Model for Domain
Beispiel #30
0
    model.summary()

# model = Sequential()
# model.add(xception)

    

# model.add(layers.Dense(1000, activation='relu'))
# model.add(layers.Dense(1000, activation='relu'))
# model.add(Dropout(0.5))

# model.add(layers.Dense(num_classes, activation='softmax'))

    model.compile(
        loss='categorical_crossentropy', 
        optimizer='adam', 
        metrics = ['accuracy'])
    
    
    loops = 1
    for i in range(loops):
        print ('\n\nEPOCH SET {}'.format(i))
        nb_epochs = 2
        history = model.fit_generator(
            train_generator,
            steps_per_epoch = train_generator.samples // batch_size,
            validation_data = validation_generator, 
            validation_steps = validation_generator.samples // batch_size,
            epochs = nb_epochs)
        
    name = 'Xception_places_200_FE'
Beispiel #31
0
    val_dir,
    target_size=(IM_WIDTH, IM_HEIGHT),
    batch_size=batch_size,
    class_mode='categorical')

inception_model = InceptionResNetV2(include_top=False)
x = GlobalAveragePooling2D(name='avg_pool')(inception_model.output)
x = Dense(nb_classes, activation='softmax', name='predictions')(x)
model = Model(inception_model.input, x)

# model = load_model('boxes.h5')

for layer in model.layers:
    layer.trainable = False
model.layers[-1].trainable = True
model.layers[-2].trainable = True

checkpoint = ModelCheckpoint("boxes_trained_epoch_{epoch}.h5",
                             monitor='val_loss',
                             save_weights_only=False,
                             save_best_only=True)

model.compile(optimizer=Adam(lr=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit_generator(train_generator,
                    validation_data=validation_generator,
                    epochs=nb_epoch,
                    callbacks=[checkpoint])
Beispiel #32
0
#conv2 = Conv1D(128, 3, activation='tanh')(max_1)
#max_2 = MaxPooling1D(3)(conv2)
question_out_1 = Flatten()(question_dmax_2)
#out_1 = LSTM(128)(max_1)

merged_vector = merge([relation_out_1, question_out_1], mode='concat')  # good
dense_1 = Dense(128, activation='relu')(merged_vector)
dense_2 = Dense(128, activation='relu')(dense_1)
dense_3 = Dense(128, activation='relu')(dense_2)

predictions = Dense(1, activation='sigmoid')(dense_3)
#predictions = Dense(len(labels_index), activation='softmax')(merged_vector)

model = Model(input=[tweet_relation, tweet_ques], output=predictions)
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit([rela_train, ques_train],
          label_train,
          nb_epoch=10,
          batch_size=20,
          verbose=1,
          shuffle=True)
json_string = model.to_json()  # json_string = model.get_config()
open('my_model_architecture.json', 'w').write(json_string)
model.save_weights('my_model_weights.h5')

score = model.evaluate([rela_train, ques_train], label_train, verbose=0)
print('train score:', score[0])
print('train accuracy:', score[1])
Beispiel #33
0
z = layers.MaxPooling2D((3, 3))(z)
z = layers.Conv2D(128, (1, 1), padding='same')(z)
z = layers.ReLU()(z)
z = layers.Conv2D(64, (1, 1))(z)
z = layers.LeakyReLU(alpha=0.3)(z)
z = layers.Conv2D(32, (1, 1))(z)
z = layers.ReLU()(z)
z = layers.Flatten()(z)
# Von folgendem Layer werden die Gewichtungen erfasst
z = layers.Dense(32, kernel_regularizer=l1(0.001))(z)
z = layers.ReLU()(z)
model_output_1 = layers.Dense(1, activation='sigmoid')(z)
model = Model(input_1, model_output_1)
model.summary()
model.compile(loss=['binary_crossentropy'],
              optimizer=optimizers.Nadam(lr=1e-2),
              metrics=['acc'])
path = os.path.join(os.getcwd(), 'logs/')
callbacks_list = [keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                    factor=0.2,
                                                    patience=5)]

STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
STEP_SIZE_VALID = validation_generator.n // validation_generator.batch_size
STEP_SIZE_TEST = test_generator.n // test_generator.batch_size
history = model.fit_generator(
    generator=train_generator,
    steps_per_epoch=STEP_SIZE_TRAIN,
    epochs=40,
    callbacks=callbacks_list,
    validation_data=validation_generator,
Beispiel #34
0
class PolicyValueNetwork:
    """ AlphaZero Residual-CNN """
    def __init__(self, model_file=None):

        # Build Network Architecture
        input_shape = Board().encoded_states().shape  # (6, 15, 15)
        inputs = Input(input_shape)

        shared_net = Sequential([
            *ConvBlock(32, input_shape=input_shape),
            *ConvBlock(64),
            *ConvBlock(128)
        ], "shared_net")

        policy_head = Sequential([
            shared_net,
            *ConvBlock(4, (1, 1), "relu"),
            Flatten(),
            Dense(Game["board_size"], kernel_regularizer=l2()),
            Activation("softmax")
        ], "policy_head")

        value_head = Sequential([
            shared_net,
            *ConvBlock(2, (1, 1), "relu"),
            Flatten(),
            Dense(64, activation="relu", kernel_regularizer=l2()),
            Dense(1, kernel_regularizer=l2()),
            Activation("tanh")
        ], "value_head")

        self.model = Model(
            inputs,
            [value_head(inputs), policy_head(inputs)]
        )

        if model_file is not None:
            self.restore_model(model_file)

    def compile(self, opt):
        """
        Optimization and Loss definition
        """
        self.model.compile(
            optimizer=sgd(),
            loss=["mse", "categorical_crossentropy"]
        )

    def eval_state(self, state):
        """
        Evaluate a board state.
        """
        vp = self.model.predict_on_batch(state.encoded_states()[np.newaxis, :])
        # format to (float, np.array((255,1),dtype=float)) structure
        return vp[0][0][0], vp[1][0]

    def train_step(self, optimizer):
        """
        One Network Tranning step.
        """
        opt = self.model.optimizer
        K.set_value(opt.lr, optimizer["lr"])
        K.set_value(opt.momentum, optimizer["momentum"])
        # loss = self.model.train_on_batch(inputs, [winner, probs])
        # return loss

    def save_model(self, filename):
        base_path = "{}/keras".format(TRAINING_CONFIG["model_path"])
        if not os.path.exists(base_path):
            os.mkdir(base_path)
        self.model.save_weights("{}/{}.h5".format(base_path, filename))

    def restore_model(self, filename):
        base_path = "{}/keras".format(TRAINING_CONFIG["model_path"])
        if os.path.exists("{}/{}.h5".format(base_path, filename)):
            self.model.load_weights("{}/{}.h5".format(base_path, filename))
# Let's use the 'mixed7' layer as the input to our model
last_layer = pre_trained_model.get_layer('mixed7')
print("last later output shape: ", last_layer.output_shape)
last_output = last_layer.output # this is the input to our own model


# building our own model to on top of last_layer
x = layers.Flatten()(last_output) # flattening output layer to 1-dim
x = layers.Dense(units=1024, activation='relu')(x)
x = layers.Dropout(rate=0.2)(x)
x = layers.Dense(units=1, activation='sigmoid')(x)

model = Model(pre_trained_model.input, x)
model.compile(optimizer=RMSprop(lr=0.0001),
              loss='binary_crossentropy',
              metrics=['acc']
              )

# now for the data

base_dir = 'utils/cats_and_dogs_filtered'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')

train_cats_filenames = os.listdir(train_cats_dir)
train_dogs_filenames = os.listdir(train_dogs_dir)
Beispiel #36
0
class CharacterTagger:

    """A class for character-based neural morphological tagger

    Parameters:
        symbols: character vocabulary
        tags: morphological tags vocabulary
        word_rnn: the type of character-level network (only `cnn` implemented)
        char_embeddings_size: the size of character embeddings
        char_conv_layers: the number of convolutional layers on character level
        char_window_size: the width of convolutional filter (filters).
            It can be a list if several parallel filters are applied, for example, [2, 3, 4, 5].
        char_filters: the number of convolutional filters for each window width.
            It can be a number, a list (when there are several windows of different width
            on a single convolution layer), a list of lists, if there
            are more than 1 convolution layers, or **None**.
            If **None**, a layer with width **width** contains
            min(**char_filter_multiple** * **width**, 200) filters.

        char_filter_multiple: the ratio between filters number and window width
        char_highway_layers: the number of highway layers on character level
        conv_dropout: the ratio of dropout between convolutional layers
        highway_dropout: the ratio of dropout between highway layers,
        intermediate_dropout: the ratio of dropout between convolutional
            and highway layers on character level
        lstm_dropout: dropout ratio in word-level LSTM
        word_vectorizers: list of parameters for additional word-level vectorizers,
            for each vectorizer it stores a pair of vectorizer dimension and
            the dimension of the corresponding word embedding
        word_lstm_layers: the number of word-level LSTM layers
        word_lstm_units: hidden dimensions of word-level LSTMs
        word_dropout: the ratio of dropout before word level (it is applied to word embeddings)
        regularizer: l2 regularization parameter
        verbose: the level of verbosity
    """
    def __init__(self,
                 symbols: DefaultVocabulary,
                 tags: DefaultVocabulary,
                 word_rnn: str = "cnn",
                 char_embeddings_size: int = 16,
                 char_conv_layers: int = 1,
                 char_window_size: Union[int, List[int]] = 5,
                 char_filters: Union[int, List[int]] = None,
                 char_filter_multiple: int = 25,
                 char_highway_layers: int = 1,
                 conv_dropout: float = 0.0,
                 highway_dropout: float = 0.0,
                 intermediate_dropout: float = 0.0,
                 lstm_dropout: float = 0.0,
                 word_vectorizers: List[Tuple[int, int]] = None,
                 word_lstm_layers: int = 1,
                 word_lstm_units: Union[int, List[int]] = 128,
                 word_dropout: float = 0.0,
                 regularizer: float = None,
                 verbose: int = 1):
        self.symbols = symbols
        self.tags = tags
        self.word_rnn = word_rnn
        self.char_embeddings_size = char_embeddings_size
        self.char_conv_layers = char_conv_layers
        self.char_window_size = char_window_size
        self.char_filters = char_filters
        self.char_filter_multiple = char_filter_multiple
        self.char_highway_layers = char_highway_layers
        self.conv_dropout = conv_dropout
        self.highway_dropout = highway_dropout
        self.intermediate_dropout = intermediate_dropout
        self.lstm_dropout = lstm_dropout
        self.word_dropout = word_dropout
        self.word_vectorizers = word_vectorizers  # a list of additional vectorizer dimensions
        self.word_lstm_layers = word_lstm_layers
        self.word_lstm_units = word_lstm_units
        self.regularizer = regularizer
        self.verbose = verbose
        self._initialize()
        self.build()

    def _initialize(self):
        if isinstance(self.char_window_size, int):
            self.char_window_size = [self.char_window_size]
        if self.char_filters is None or isinstance(self.char_filters, int):
            self.char_filters = [self.char_filters] * len(self.char_window_size)
        if len(self.char_window_size) != len(self.char_filters):
            raise ValueError("There should be the same number of window sizes and filter sizes")
        if isinstance(self.word_lstm_units, int):
            self.word_lstm_units = [self.word_lstm_units] * self.word_lstm_layers
        if len(self.word_lstm_units) != self.word_lstm_layers:
            raise ValueError("There should be the same number of lstm layer units and lstm layers")
        if self.word_vectorizers is None:
            self.word_vectorizers = []
        if self.regularizer is not None:
            self.regularizer = kreg.l2(self.regularizer)
        if self.verbose > 0:
            log.info("{} symbols, {} tags in CharacterTagger".format(self.symbols_number_, self.tags_number_))

    @property
    def symbols_number_(self) -> int:
        """Character vocabulary size
        """
        return len(self.symbols)

    @property
    def tags_number_(self) -> int:
        """Tag vocabulary size
        """
        return len(self.tags)

    def build(self):
        """Builds the network using Keras.
        """
        word_inputs = kl.Input(shape=(None, MAX_WORD_LENGTH+2), dtype="int32")
        inputs = [word_inputs]
        word_outputs = self._build_word_cnn(word_inputs)
        if len(self.word_vectorizers) > 0:
            additional_word_inputs = [kl.Input(shape=(None, input_dim), dtype="float32")
                                      for input_dim, dense_dim in self.word_vectorizers]
            inputs.extend(additional_word_inputs)
            additional_word_embeddings = [kl.Dense(dense_dim)(additional_word_inputs[i])
                                          for i, (_, dense_dim) in enumerate(self.word_vectorizers)]
            word_outputs = kl.Concatenate()([word_outputs] + additional_word_embeddings)
        outputs, lstm_outputs = self._build_basic_network(word_outputs)
        compile_args = {"optimizer": ko.nadam(lr=0.002, clipnorm=5.0),
                        "loss": "categorical_crossentropy", "metrics": ["accuracy"]}
        self.model_ = Model(inputs, outputs)
        self.model_.compile(**compile_args)
        if self.verbose > 0:
            self.model_.summary(print_fn=log.info)
        return self

    def _build_word_cnn(self, inputs):
        """Builds word-level network
        """
        inputs = kl.Lambda(kb.one_hot, arguments={"num_classes": self.symbols_number_},
                           output_shape=lambda x: tuple(x) + (self.symbols_number_,))(inputs)
        char_embeddings = kl.Dense(self.char_embeddings_size, use_bias=False)(inputs)
        conv_outputs = []
        self.char_output_dim_ = 0
        for window_size, filters_number in zip(self.char_window_size, self.char_filters):
            curr_output = char_embeddings
            curr_filters_number = (min(self.char_filter_multiple * window_size, 200)
                                   if filters_number is None else filters_number)
            for _ in range(self.char_conv_layers - 1):
                curr_output = kl.Conv2D(curr_filters_number, (1, window_size),
                                        padding="same", activation="relu",
                                        data_format="channels_last")(curr_output)
                if self.conv_dropout > 0.0:
                    curr_output = kl.Dropout(self.conv_dropout)(curr_output)
            curr_output = kl.Conv2D(curr_filters_number, (1, window_size),
                                    padding="same", activation="relu",
                                    data_format="channels_last")(curr_output)
            conv_outputs.append(curr_output)
            self.char_output_dim_ += curr_filters_number
        if len(conv_outputs) > 1:
            conv_output = kl.Concatenate(axis=-1)(conv_outputs)
        else:
            conv_output = conv_outputs[0]
        highway_input = kl.Lambda(kb.max, arguments={"axis": -2})(conv_output)
        if self.intermediate_dropout > 0.0:
            highway_input = kl.Dropout(self.intermediate_dropout)(highway_input)
        for i in range(self.char_highway_layers - 1):
            highway_input = Highway(activation="relu")(highway_input)
            if self.highway_dropout > 0.0:
                highway_input = kl.Dropout(self.highway_dropout)(highway_input)
        highway_output = Highway(activation="relu")(highway_input)
        return highway_output

    def _build_basic_network(self, word_outputs):
        """
        Creates the basic network architecture,
        transforming word embeddings to intermediate outputs
        """
        if self.word_dropout > 0.0:
            lstm_outputs = kl.Dropout(self.word_dropout)(word_outputs)
        else:
            lstm_outputs = word_outputs
        for j in range(self.word_lstm_layers-1):
            lstm_outputs = kl.Bidirectional(
                kl.LSTM(self.word_lstm_units[j], return_sequences=True,
                        dropout=self.lstm_dropout))(lstm_outputs)
        lstm_outputs = kl.Bidirectional(
                kl.LSTM(self.word_lstm_units[-1], return_sequences=True,
                        dropout=self.lstm_dropout))(lstm_outputs)
        pre_outputs = kl.TimeDistributed(
                kl.Dense(self.tags_number_, activation="softmax",
                         activity_regularizer=self.regularizer),
                name="p")(lstm_outputs)
        return pre_outputs, lstm_outputs

    def _transform_batch(self, data, labels=None, transform_to_one_hot=True):
        data, additional_data = data[0], data[1:]
        L = max(len(x) for x in data)
        X = np.array([self._make_sent_vector(x, L) for x in data])
        X = [X] + [np.array(x) for x in additional_data]
        if labels is not None:
            Y = np.array([self._make_tags_vector(y, L) for y in labels])
            if transform_to_one_hot:
                Y = to_one_hot(Y, len(self.tags))
            return X, Y
        else:
            return X

    def train_on_batch(self, data: List[Iterable], labels: Iterable[list]) -> None:
        """Trains model on a single batch

        Args:
            data: a batch of word sequences
            labels: a batch of correct tag sequences
        Returns:
            the trained model
        """
        X, Y = self._transform_batch(data, labels)
        self.model_.train_on_batch(X, Y)

    def predict_on_batch(self, data: Union[list, tuple],
                         return_indexes: bool = False) -> List[List[str]]:
        """
        Makes predictions on a single batch

        Args:
            data: a batch of word sequences together with additional inputs
            return_indexes: whether to return tag indexes in vocabulary or tags themselves

        Returns:
            a batch of label sequences
        """
        X = self._transform_batch(data)
        objects_number, lengths = len(X[0]), [len(elem) for elem in data[0]]
        Y = self.model_.predict_on_batch(X)
        labels = np.argmax(Y, axis=-1)
        answer: List[List[str]] = [None] * objects_number
        for i, (elem, length) in enumerate(zip(labels, lengths)):
            elem = elem[:length]
            answer[i] = elem if return_indexes else self.tags.idxs2toks(elem)
        return answer

    def _make_sent_vector(self, sent: List, bucket_length: int =None) -> np.ndarray:
        """Transforms a sentence to Numpy array, which will be the network input.

        Args:
            sent: input sentence
            bucket_length: the width of the bucket

        Returns:
            A 3d array, answer[i][j][k] contains the index of k-th letter
            in j-th word of i-th input sentence.
        """
        bucket_length = bucket_length or len(sent)
        answer = np.zeros(shape=(bucket_length, MAX_WORD_LENGTH+2), dtype=np.int32)
        for i, word in enumerate(sent):
            answer[i, 0] = self.tags.tok2idx("BEGIN")
            m = min(len(word), MAX_WORD_LENGTH)
            for j, x in enumerate(word[-m:]):
                answer[i, j+1] = self.symbols.tok2idx(x)
            answer[i, m+1] = self.tags.tok2idx("END")
            answer[i, m+2:] = self.tags.tok2idx("PAD")
        return answer

    def _make_tags_vector(self, tags, bucket_length=None) -> np.ndarray:
        """Transforms a sentence of tags to Numpy array, which will be the network target.

        Args:
            tags: input sentence of tags
            bucket_length: the width of the bucket

        Returns:
            A 2d array, answer[i][j] contains the index of j-th tag in i-th input sentence.
        """
        bucket_length = bucket_length or len(tags)
        answer = np.zeros(shape=(bucket_length,), dtype=np.int32)
        for i, tag in enumerate(tags):
            answer[i] = self.tags.tok2idx(tag)
        return answer

    def save(self, outfile) -> None:
        """Saves model weights to a file

        Args:
            outfile: file with model weights (other model components should be given in config)
        """
        self.model_.save_weights(outfile)

    def load(self, infile) -> None:
        """Loads model weights from a file

        Args:
            infile: file to load model weights from
        """
        self.model_.load_weights(infile)
max_pool = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
                        padding='same')(multi)
incept_1 = inception_module(max_pool, 72, 64)
max_pool = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
                        padding='same')(incept_1)
incept_2 = inception_module(max_pool, 128, 96)
max_pool = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
                        padding='same')(incept_2)
output = Flatten()(max_pool)
output = Dense(512, activation='relu')(output)
output = Dense(20, activation='softmax')(output)

model = Model(inputs=input_spectrum, outputs=output)

print(model.summary())
adam = Adam()

model.compile(adam, loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(train_x,
                    train_y,
                    epochs=1,
                    batch_size=256,
                    validation_data=(val_x, val_y))

plot_accuracy(history)
plot_confusion_matrix(model, val_x, labeled_val_y)

pred_test_y = np.argmax(model.predict(test_x), axis=1)
np.save('results.npy', pred_test_y)
    def build(self,
              sentence_length,
              word_length,
              num_labels,
              num_intent_labels,
              word_vocab_size,
              char_vocab_size,
              word_emb_dims=100,
              char_emb_dims=25,
              char_lstm_dims=25,
              tagger_lstm_dims=100,
              dropout=0.2,
              embedding_matrix=None):
        """
        Build a model

        Args:
            sentence_length (int): max sentence length
            word_length (int): max word length (in characters)
            num_labels (int): number of slot labels
            num_intent_labels (int): number of intent classes
            word_vocab_size (int): word vocabulary size
            char_vocab_size (int): character vocabulary size
            word_emb_dims (int, optional): word embedding dimensions
            char_emb_dims (int, optional): character embedding dimensions
            char_lstm_dims (int, optional): character feature LSTM hidden size
            tagger_lstm_dims (int, optional): tagger LSTM hidden size
            dropout (float, optional): dropout rate
            embedding_matrix (dict, optional): external word embedding dictionary
        """
        if embedding_matrix is not None:
            # load pre-trained word embeddings into an Embedding layer
            # note that we set trainable = False so as to keep the embeddings fixed
            embedding_layer = Embedding(word_vocab_size,
                                        word_emb_dims,
                                        weights=[embedding_matrix],
                                        input_length=sentence_length,
                                        trainable=True,
                                        name='word_embedding_layer')
        else:
            # learn embeddings ourselves
            embedding_layer = Embedding(word_vocab_size, word_emb_dims,
                                        input_length=sentence_length,
                                        name='word_embedding_layer')

        # create word embedding input and embedding layer
        words_input = Input(shape=(sentence_length,), name='words_input')
        word_embeddings = embedding_layer(words_input)
        word_embeddings = Dropout(dropout)(word_embeddings)

        # create word character input and embeddings layer
        word_chars_input = Input(shape=(sentence_length, word_length), name='word_chars_input')
        char_embedding_layer = Embedding(char_vocab_size, char_emb_dims,
                                         input_length=word_length, name='char_embedding_layer')
        # apply embedding to each word
        char_embeddings = TimeDistributed(char_embedding_layer)(word_chars_input)
        # feed dense char vectors into BiLSTM
        char_embeddings = TimeDistributed(Bidirectional(LSTM(char_lstm_dims)))(char_embeddings)
        char_embeddings = Dropout(dropout)(char_embeddings)

        # first BiLSTM layer (used for intent classification)
        first_bilstm_layer = Bidirectional(
            LSTM(tagger_lstm_dims, return_sequences=True, return_state=True))
        first_lstm_out = first_bilstm_layer(word_embeddings)

        lstm_y_sequence = first_lstm_out[:1][0]  # save y states of the LSTM layer
        states = first_lstm_out[1:]
        hf, cf, hb, cb = states  # extract last hidden states
        h_state = concatenate([hf, hb], axis=-1)
        intent_out = Dense(num_intent_labels, activation='softmax',
                           name='intent_classifier_output')(h_state)

        # create the 2nd feature vectors
        combined_features = concatenate([lstm_y_sequence, char_embeddings], axis=-1)

        # 2nd BiLSTM layer for label classification
        second_bilstm_layer = Bidirectional(
                LSTM(tagger_lstm_dims, return_sequences=True))(combined_features)
        second_bilstm_layer = Dropout(dropout)(second_bilstm_layer)

        # feed BiLSTM vectors into CRF
        crf = CRF(num_labels, sparse_target=False)
        labels_out = crf(second_bilstm_layer)

        # compile the model
        model = Model(inputs=[words_input, word_chars_input],
                      outputs=[intent_out, labels_out])

        # define losses and metrics
        loss_f = {'intent_classifier_output': 'categorical_crossentropy',
                  'crf_1': crf.loss_function}
        metrics = {'intent_classifier_output': 'categorical_accuracy',
                   'crf_1': crf.accuracy}

        model.compile(loss=loss_f,
                      optimizer='adam',
                      metrics=metrics)
        self.model = model
class NNClassifier(ClassifierMixin):
    """
    Neural Network classifier, implements the same methods as the sklearn models to make it simple to add
    """

    # noinspection PyTypeChecker
    def __init__(self, **kwargs: Dict[str, Union[int, str, float]]):
        """initializes the Neural Network classifier

        :param kwargs: configuration containing the predictive_model parameters, encoding and training parameters

        """
        self._n_hidden_layers = int(kwargs['n_hidden_layers'])
        self._n_hidden_units = int(kwargs['n_hidden_units'])
        self._activation = str(kwargs['activation'])
        self._n_epochs = int(kwargs['n_epochs'])
        self._encoding = str(kwargs['encoding'])
        self._dropout_rate = float(kwargs['dropout_rate'])
        self._is_binary_classifier = bool(kwargs['is_binary_classifier'])
        self._encoding_parser = EncodingParser(self._encoding, self._is_binary_classifier,
                                               task=PredictiveModels.CLASSIFICATION.value)
        self._model = None

    def fit(self, train_data: DataFrame, targets: ndarray) -> None:
        """creates and fits the predictive_model

        first the encoded data is parsed, then the predictive_model created and then trained

        :param train_data: encoded training dataset
        :param targets: encoded target dataset

        """
        targets = DataFrame(targets, columns=['label'])
        train_data = self._encoding_parser.parse_training_dataset(train_data)
        targets = self._encoding_parser.parse_targets(targets)

        model_inputs = Input(train_data.shape[1:])
        predicted = model_inputs

        if self._encoding in ['simpleIndex', 'complex', 'lastPayload']:
            predicted = Flatten()(predicted)

        for _ in range(self._n_hidden_layers):
            predicted = Dense(self._n_hidden_units, activation=self._activation)(predicted)
            predicted = Dropout(self._dropout_rate)(predicted)

        if self._is_binary_classifier:
            predicted = Dense(1, activation='sigmoid')(predicted)
        else:
            predicted = Dense(targets.shape[1], activation='softmax')(predicted)
        self._model = Model(model_inputs, predicted)

        if self._is_binary_classifier:
            self._model.compile(loss='binary_crossentropy', optimizer='adam')
        else:
            self._model.compile(loss='categorical_crossentropy', optimizer='adam')

        self._model.fit(train_data, targets, epochs=self._n_epochs)

    def predict(self, test_data: DataFrame) -> ndarray:
        """returns predictive_model predictions

        parses the encoded test dataset, then returns the predictive_model predictions

        :param test_data: encoded test dataset
        :return: predictive_model predictions

        """
        test_data = self._encoding_parser.parse_testing_dataset(test_data)

        predictions = self._model.predict(test_data)
        if self._is_binary_classifier:
            predictions = predictions.astype(bool)
        else:
            predictions = np.argmax(predictions, -1)
        return predictions

    def predict_proba(self, test_data: DataFrame) -> ndarray:
        """returns the classification probability

        parses the test dataset and returns the raw prediction probabilities of the predictive_model

        :param test_data: encoded test dataset
        :return: predictive_model prediction probabilities

        """
        test_data = self._encoding_parser.parse_testing_dataset(test_data)

        predictions = self._model.predict(test_data)
        if self._is_binary_classifier:
            predictions = np.max(predictions, -1)
            predictions = np.vstack((1 - predictions, predictions)).T
        return predictions

    def reset(self) -> None:
        """
class EncDecIntentModel(IntentExtractionModel):
    """
    Encoder Decoder Deep LSTM Tagger Model
    """

    def __init__(self):
        super(EncDecIntentModel, self).__init__()

    def build(self,
              sentence_length,
              vocab_size,
              tag_labels,
              token_emb_size=100,
              encoder_depth=1,
              decoder_depth=1,
              lstm_hidden_size=100,
              encoder_dropout=0.5,
              decoder_dropout=0.5,
              emb_model_path=None):
        """
        Build the model

        Args:
            sentence_length (int): max sentence length
            vocab_size (int): vocabulary size
            tag_labels (int): number of tag labels
            token_emb_size (int, optional): token embedding vector size
            encoder_depth (int, optional): number of encoder LSTM layers
            decoder_depth (int, optional): number of decoder LSTM layers
            lstm_hidden_size (int, optional): LSTM layers hidden size
            encoder_dropout (float, optional): encoder dropout
            decoder_dropout (float, optional): decoder dropout
            emb_model_path (str, optional): external embedding model path
        """
        tokens_input, token_emb = self._create_input_embed(sentence_length,
                                                           emb_model_path is not None,
                                                           token_emb_size,
                                                           vocab_size)
        benc_in = token_emb
        assert encoder_depth > 0, 'Encoder depth must be > 0'
        for i in range(encoder_depth):
            bencoder = LSTM(lstm_hidden_size, return_sequences=True, return_state=True,
                            go_backwards=True, dropout=encoder_dropout,
                            name='encoder_blstm_{}'.format(i))(benc_in)
            benc_in = bencoder[0]
        b_states = bencoder[1:]
        benc_h, bene_c = b_states

        decoder_inputs = token_emb
        assert decoder_depth > 0, 'Decoder depth must be > 0'
        for i in range(decoder_depth):
            decoder = LSTM(lstm_hidden_size, return_sequences=True,
                           name='decoder_lstm_{}'.format(i))(decoder_inputs,
                                                             initial_state=[benc_h,
                                                                            bene_c])
            decoder_inputs = decoder
        decoder_outputs = Dropout(decoder_dropout)(decoder)
        decoder_predictions = TimeDistributed(
                Dense(tag_labels, activation='softmax'),
                name='decoder_classifier')(decoder_outputs)

        self.model = Model(tokens_input, decoder_predictions)
        self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
                           metrics=['categorical_accuracy'])
# %%
from keras import Input, layers
from keras import Model

input_tensor = Input(shape=(64,))
x = layers.Dense(32, activation='relu')(input_tensor)
x = layers.Dense(32, activation='relu')(x)
output_tensor = layers.Dense(10, activation='softmax')(x)
model = Model(input_tensor, output_tensor)
model.summary()

# %%
import numpy as np
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

x_train = np.random.random((1000, 64))
y_train = np.random.random((1000, 10))

model.fit(x_train, y_train, epochs=10, batch_size=128)
score = model.evaluate(x_train, y_train)