Ejemplo n.º 1
0
def resnet_50(input_shape):
    img_input = Input(input_shape)
    x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', name='conv1')(img_input)
    if input_shape[-1] > 3:
        x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', name='conv1_changed')(img_input)
    x = BatchNormalization(name='bn_conv1')(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x)

    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')

    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')

    print("Loading pretrained weights for Resnet50...")
    weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
                            resnet50_padding.WEIGHTS_PATH_NO_TOP,
                            cache_subdir='models',
                            md5_hash='a268eb855778b3df3c7506639542a6af')
    model = Model(img_input, x)
    model.load_weights(weights_path, by_name=True)
    if input_shape[-1] > 3:
        print("Loading weights for conv1 layer separately for the first 3 channels")
        conv1_weights = np.zeros((7, 7, input_shape[-1], 64), dtype="float32")
        resnet_ori = ResNet50(include_top=False, input_shape=(224, 224, 3))
        conv1_weights[:, :, :3, :] = resnet_ori.get_layer("conv1").get_weights()[0][:, :, :, :]
        # random init
        conv1_weights[:, :, 3:, :] = model.get_layer('conv1_changed').get_weights()[0][:, :, 3:, :]
        bias = resnet_ori.get_layer("conv1").get_weights()[1]
        model.get_layer('conv1_changed').set_weights((conv1_weights, bias))
        model.get_layer('conv1_changed').name = 'conv1'

    return model
Validation_file_names = get_image_file_names(Validation_dir, 3650)

# Set the early stopping
early_stopping = EarlyStopping(monitor='val_acc',
                               patience=EarlyStopping_patience,
                               mode='auto')

# Set the checkpoint
checkpoint = ModelCheckpoint(Models_filepath,
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=False)

# Check if have any previous weight
if os.path.exists("./Models/weights-resnet-network-01-0.44.hdf5"):
    model.load_weights("./Models/weights-resnet-network-01-0.44.hdf5")
    print("Check point loaded!")

# Start trainning
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
# keras.backend.get_session().run(tf.global_variables_initializer())
history = model.fit_generator(
    generator=get_train_batch(Trainning_file_names, Batch_size, img_W, img_H),
    epochs=Epochs,
    steps_per_epoch=Steps_per_epoch,
    verbose=1,
    validation_data=get_train_batch(Validation_file_names, Batch_size, img_W,
                                    img_H),
    callbacks=[checkpoint, early_stopping],
    validation_steps=Val_Steps_per_epoch)
Ejemplo n.º 3
0
def VGGUnet(n_classes,
            input_height=416,
            input_width=608,
            vgg16NoTopWeights=None):
    assert input_height % 32 == 0
    assert input_width % 32 == 0

    IMAGE_ORDERING = 'channels_first'
    if IMAGE_ORDERING == 'channels_last':
        concat_axis = 3
        input_shape = input_height, input_width, 3
    elif IMAGE_ORDERING == 'channels_first':
        concat_axis = 1
        input_shape = 3, input_height, input_width
    else:
        raise Exception('Unexpected IMAGE_ORDERING')

    # https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5
    img_input = Input(shape=input_shape)

    x = Conv2D(64, (3, 3),
               activation='relu',
               padding='same',
               name='block1_conv1',
               data_format=IMAGE_ORDERING)(img_input)
    x = Conv2D(64, (3, 3),
               activation='relu',
               padding='same',
               name='block1_conv2',
               data_format=IMAGE_ORDERING)(x)
    x = MaxPooling2D((2, 2),
                     strides=(2, 2),
                     name='block1_pool',
                     data_format=IMAGE_ORDERING)(x)
    f1 = x
    # Block 2
    x = Conv2D(128, (3, 3),
               activation='relu',
               padding='same',
               name='block2_conv1',
               data_format=IMAGE_ORDERING)(x)
    x = Conv2D(128, (3, 3),
               activation='relu',
               padding='same',
               name='block2_conv2',
               data_format=IMAGE_ORDERING)(x)
    x = MaxPooling2D((2, 2),
                     strides=(2, 2),
                     name='block2_pool',
                     data_format=IMAGE_ORDERING)(x)
    f2 = x

    # Block 3
    x = Conv2D(256, (3, 3),
               activation='relu',
               padding='same',
               name='block3_conv1',
               data_format=IMAGE_ORDERING)(x)
    x = Conv2D(256, (3, 3),
               activation='relu',
               padding='same',
               name='block3_conv2',
               data_format=IMAGE_ORDERING)(x)
    x = Conv2D(256, (3, 3),
               activation='relu',
               padding='same',
               name='block3_conv3',
               data_format=IMAGE_ORDERING)(x)
    x = MaxPooling2D((2, 2),
                     strides=(2, 2),
                     name='block3_pool',
                     data_format=IMAGE_ORDERING)(x)
    f3 = x

    # Block 4
    x = Conv2D(512, (3, 3),
               activation='relu',
               padding='same',
               name='block4_conv1',
               data_format=IMAGE_ORDERING)(x)
    x = Conv2D(512, (3, 3),
               activation='relu',
               padding='same',
               name='block4_conv2',
               data_format=IMAGE_ORDERING)(x)
    x = Conv2D(512, (3, 3),
               activation='relu',
               padding='same',
               name='block4_conv3',
               data_format=IMAGE_ORDERING)(x)
    x = MaxPooling2D((2, 2),
                     strides=(2, 2),
                     name='block4_pool',
                     data_format=IMAGE_ORDERING)(x)
    f4 = x

    # Block 5
    x = Conv2D(512, (3, 3),
               activation='relu',
               padding='same',
               name='block5_conv1',
               data_format=IMAGE_ORDERING)(x)
    x = Conv2D(512, (3, 3),
               activation='relu',
               padding='same',
               name='block5_conv2',
               data_format=IMAGE_ORDERING)(x)
    x = Conv2D(512, (3, 3),
               activation='relu',
               padding='same',
               name='block5_conv3',
               data_format=IMAGE_ORDERING)(x)
    x = MaxPooling2D((2, 2),
                     strides=(2, 2),
                     name='block5_pool',
                     data_format=IMAGE_ORDERING)(x)
    # f5 = x

    if vgg16NoTopWeights:
        vgg = Model(img_input, x)
        vgg.load_weights(vgg16NoTopWeights)

    o = f4

    o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o)
    o = (Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)
    o = (BatchNormalization())(o)

    o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o)
    o = (concatenate([o, f3], axis=concat_axis))
    o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o)
    o = (Conv2D(256, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)
    o = (BatchNormalization())(o)

    o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o)
    o = (concatenate([o, f2], axis=concat_axis))
    o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o)
    o = (Conv2D(128, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)
    o = (BatchNormalization())(o)

    o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o)
    o = (concatenate([o, f1], axis=concat_axis))
    o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o)
    o = (Conv2D(64, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)
    o = (BatchNormalization())(o)

    o = Conv2D(n_classes, (3, 3), padding='same',
               data_format=IMAGE_ORDERING)(o)
    o_shape = Model(img_input, o).output_shape
    outputHeight = o_shape[2]
    outputWidth = o_shape[3]

    o = (Reshape((n_classes, outputHeight * outputWidth)))(o)
    o = (Permute((2, 1)))(o)
    o = (Activation('softmax'))(o)
    model = Model(img_input, o)
    model.outputWidth = outputWidth
    model.outputHeight = outputHeight

    return model
Ejemplo n.º 4
0
def vgg16_2d(image_rows=256,
             image_cols=256,
             input_channels=3,
             train_encoder=True):
    inputs = layers.Input((image_rows, image_cols, input_channels))
    # Block 1
    x = layers.Conv2D(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv1')(inputs)
    x = layers.Conv2D(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = layers.Conv2D(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv1')(x)
    x = layers.Conv2D(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv1')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv2')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv3')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv1')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv2')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv3')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv1')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv2')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv3')(x)
    x_output = layers.MaxPooling2D((2, 2), strides=(2, 2),
                                   name='block5_pool')(x)

    model = Model(inputs=[inputs], outputs=[x_output], name='vgg16')

    model.summary()

    weights_path = utils.get_file(
        'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',
        WEIGHTS_PATH_NO_TOP,
        cache_subdir='models',
        file_hash='6d6bbae143d832006294945121d1f1fc')

    model.load_weights(weights_path)

    return model
Ejemplo n.º 5
0
    # Env
    env = Environment(max_steps=1000)

    # Model Definition
    if agent_type == "keras-rl":
        the_input = Input((1, ) + env.render().shape)
        flatten = Flatten()(the_input)
        x = Dense(256, activation='relu')(flatten)
        x = Dense(1024, activation='relu')(x)
        x = Dense(1024, activation='relu')(x)
        x = Dense(1024, activation='relu')(x)
        x = Dense(196, activation='linear')(x)
        
        model = Model(inputs=[the_input], outputs=[x])
        model.load_weights('pretrained.h5')
        model.compile(optimizer='adam', loss='mse')
        agent_spec["model"] = model
  

        # Agent Init
        agent = agent_class(**agent_spec)
        print("Starting experiment for %s." % name)

        # Agent Train
        agent.compile(Adam(lr=1e-2), metrics=['mse'])
        history = agent.fit(env, nb_steps=EPISODES*150, nb_max_episode_steps=1000, visualize=False, verbose=2)


        # Fetch Train Summary
        summary_step = history.history["nb_episode_steps"][:EPISODES]
input = Input(shape=(max_word_length,))
embedding = Embedding(len(tokenizer.word_index) + 1, 128)(input)
embedding = SpatialDropout1D(0.2)(embedding)
capsule = Capsule(num_classes, 8, 10, True)(embedding)
output = Lambda(lambda x : K.sqrt(K.sum(K.square(x), 2)), output_shape=(num_classes, ))(capsule)
model = Model(inputs=input, outputs=output)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

model_weight_file = './model_capsule.h5'
model_file = './model_capsule.model'
early_stopping = EarlyStopping(monitor='val_loss', patience=5)
model_checkpoint = ModelCheckpoint(model_weight_file, save_best_only=True, save_weights_only=True)
model.fit(x_train_word_index,
          y_train_index,
          batch_size=8,
          epochs=1000,
          verbose=2,
          callbacks=[early_stopping, model_checkpoint],
          validation_data=(x_dev_word_index, y_dev_index),
          shuffle=True)

model.load_weights(model_weight_file)
model.save(model_file)
evaluate = model.evaluate(x_test_word_index, y_test_index, batch_size=8, verbose=2)
print('loss value=' + str(evaluate[0]))
print('metrics value=' + str(evaluate[1]))

# loss value=0.7480950128464472
# metrics value=0.7619047609586564
Ejemplo n.º 7
0
concatenation = concatenate([abs_x_minus_y, x_mult_y])

fcnn_input = Reshape((600, ))(concatenation)

fcnn_layer_one = Dense(len(scores[0]),
                       input_shape=(600, ),
                       activation='softmax')(fcnn_input)
model = Model(inputs=[sent1_input, sent2_input], outputs=[fcnn_layer_one])

print(model.summary())

filepath = path + 'lstm_weights.last.hdf5'
exists = os.path.isfile(filepath)
if exists:
    model.load_weights(filepath)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

checkpoint = ModelCheckpoint(filepath,
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=False,
                             save_weights_only=True,
                             mode='auto')

model.fit([data1, data2],
          scores,
          validation_data=([valid1, valid2], valid_scores),
dense_input = Input(shape=(7, 7, 512))
dense_output = Flatten(name='flatten')(dense_input)
dense_output = Dense(dense_layer_1, activation='relu',
                     name='fc1')(dense_output)
dense_output = Dense(dense_layer_2, activation='relu',
                     name='fc2')(dense_output)
dense_output = Dense(num_classes, activation='softmax',
                     name='predictions')(dense_output)

top_model = Model(inputs=dense_input, outputs=dense_output, name='top_model')

# from: https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
# note that it is necessary to start with a fully-trained
# classifier, including the top classifier,
# in order to successfully do fine-tuning
top_model.load_weights(top_model_weights_path)

block5_pool = vgg16.get_layer('block5_pool').output

# Now combine the two models
full_output = top_model(block5_pool)
full_model = Model(inputs=vgg16.input, outputs=full_output)

# set the first 15 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
# WARNING: this may not be applicable for Inception V3
for layer in full_model.layers[:15]:
    layer.trainable = False

# Verify things look as expected
full_model.summary()
Ejemplo n.º 9
0
import cv2
from keras.applications import DenseNet121
from keras.layers import Dense, GlobalAveragePooling2D
from keras import Model


IMG_PATH = ['./chest_xray_images/normal/15268.jpg', '0']
IMG_SHAPE = (320, 320, 3)

test_img = load_img(path=IMG_PATH[0], color_mode='grayscale')
test_img = img_to_array(img=test_img, data_format='channels_last')
test_img = cv2.resize(test_img, dsize=IMG_SHAPE[:2], interpolation=cv2.INTER_NEAREST)
test_img = np.expand_dims(test_img, axis=-1)
test_img = test_img.astype(np.uint8)
test_img = test_img / 255.
test_img = np.concatenate((test_img, test_img, test_img), axis=-1)
print('external image(s) shape:', test_img.shape)

backbone = DenseNet121(include_top=False, weights=None, input_shape=(320, 320, 3))
backbone_out = backbone.output
gap = GlobalAveragePooling2D(name='pooling_layer')(backbone_out)
output = Dense(units=14, activation='softmax', name='output_layer')(gap)
chexnet_model = Model(inputs=backbone.input, outputs=output)
chexnet_model.summary()

chexnet_model.load_weights('C:/Users/Arman/Desktop/Covid19-Detection/checkpoints/CheXNet/CheXNet_v0.3.0.h5')
chexnet_model.compile(optimizer='adam', loss='binary_crossentropy')

chexnet_model.save(filepath='./checkpoints/CheXNet/CheXNet_model.hdf5')
print('sample prediction: \n', chexnet_model.predict(np.expand_dims(test_img, axis=0)))
Ejemplo n.º 10
0
                batch_size=64,
                epochs=50,
                verbose=2,
                callbacks=[checkpoint])

        # get the most recent file in the job directory which happens to be the last best model
        best_model_file = get_most_recent_file(job_dir)

        # save current model so that the training can be resumed later
        vae.save(job_dir + '/model.h5')
    else:
        # get the most recent file in the job directory which happens to be the last best model
        best_model_file = get_most_recent_file(job_dir)

    # load weights from the best model
    vae.load_weights(best_model_file)
    print('loaded weights from', best_model_file)

    encoder = Model(in_, z_mu)

    train_xhat = encoder.predict(train_x)
    test_xhat = encoder.predict(test_x)

    if latent_dim in [1, 2]:
        test_xhat_nonfraud = test_xhat[test_y == 0]
        test_xhat_fraud = test_xhat[test_y == 1]
        if latent_dim == 1:
            plt.scatter(test_xhat_nonfraud,
                        np.zeros_like(test_xhat_nonfraud),
                        color='b',
                        alpha=0.25,
Ejemplo n.º 11
0
def train_model(data, topic, PROCESSED_DIR, SEED_FOLDER, **kwargs):
    def func(x):
        liste = []
        for i in range(sent_len):
            temp = TimeDistributed(paths_lstm_1)(
                x[:, i, :, :, :]
            )  # [bs, max_paths, max_path_len, emb_dim] * sent_len
            temp = TimeDistributed(paths_lstm_2)(
                temp)  # [bs, max_paths, max_path_len, emb_dim] * sent_len
            temp = TimeDistributed(paths_lstm_last)(
                temp)  # [bs, max_paths, max_path_len, emb_dim] * sent_len
            liste.append(temp)
        stacked = K.stack(liste, axis=1)
        return stacked

    dropout = kwargs['model_settings']["dropout"]
    lstm_size = kwargs['model_settings']["lstm_size"]
    monitor = kwargs['model_settings']["monitor"]
    batch_size = kwargs['model_settings']["batch_size"]
    epochs = kwargs['model_settings']["epochs"]
    learning_rate = kwargs['model_settings']["learning_rate"]
    train_embeddings = kwargs['model_settings']["train_embeddings"]
    # model file eg: 'results/only_sub_and_inst/model_runs/EvLSTM/seed_0/death_penalty_threelabel_crossdomain_monitor-f1_macro_do-0.3_lsize-32_bs-32_epochs-20_lr-0.001_trainemb-False_kl-only_sub_and_inst'
    model_file = SEED_FOLDER + topic + "_" + kwargs['model_settings'][
        "model_file_suffix"]
    seed = kwargs['model_settings']['current_seed']

    # clear default graph (new model now)
    #tf.reset_default_graph()

    # set configs for memory usage and reproducibility: https://stackoverflow.com/questions/38469632/tensorflow-non-repeatable-results
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    rn.seed(seed)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = False
    config.gpu_options.per_process_gpu_memory_fraction = 0.3
    np.random.seed(seed)
    #graph_level_seed = seed
    operation_level_seed = seed
    #tf.set_random_seed(graph_level_seed)

    # load embeddings
    emb_sents = np.load(PROCESSED_DIR + "index_to_vec_we" +
                        kwargs['model_settings']['word_embeddings'][1] +
                        ".npy")
    emb_knowledge = np.load(PROCESSED_DIR + "index_to_vec_kge" +
                            kwargs['model_settings']['kg_embeddings'][1] +
                            ".npy")

    # load data
    X_train, X_dev, X_test = data["X_train"], data["X_dev"], data[
        "X_test"]  # [samples, sent_len]
    kX_train, kX_dev, kX_test = data["kX_train"], data["kX_dev"], data[
        "kX_test"]  # [samples, sent_len, max_concepts]
    y_train, y_dev, y_test = data["y_train"], data["y_dev"], data["y_test"]
    val_y_non_one_hot = [np.argmax(pred) for pred in y_dev]

    # some constants
    sent_len = X_train.shape[1]
    max_paths = kX_train.shape[2]
    max_path_len = kX_train.shape[3]
    num_labels = y_train.shape[1]
    attention_size = kwargs['model_settings'].get('attention_size',
                                                  emb_sents.shape[1])

    ############################
    #   KNOWLEDGE PROCESSING   #
    ############################

    # input for all concepts of a sentence
    sentence_inputs = Input(shape=(sent_len, ),
                            dtype='int32',
                            name="sentence_inputs")
    knowledge_inputs = Input(shape=(
        sent_len,
        max_paths,
        max_path_len,
    ),
                             dtype='int32',
                             name="knowledge_inputs")

    emb_knowledge_ids = Embedding(
        emb_knowledge.shape[0],
        emb_knowledge.shape[1],
        mask_zero=True,
        weights=[emb_knowledge],
        trainable=train_embeddings)(
            knowledge_inputs)  # [samples, sent_len, max_concepts, kge_dim]

    embedded_word_ids = Embedding(
        emb_sents.shape[0],
        emb_sents.shape[1],
        mask_zero=True,
        weights=[emb_sents],
        trainable=train_embeddings,
        input_length=sent_len)(sentence_inputs)  # [samples, sent_len, we_dim]

    # function that reduces the paths to a single vector => from there on, model is equal to the shallow model
    # in: [bs, sent_len, max_concepts, max_path_len, kge_dim], out: [bs, sent_len, max_concepts, 2*lstm_size]
    paths_lstm_1 = LSTM(lstm_size, return_sequences=True
                        )  # define lstm that reduces the paths to one vector
    paths_lstm_2 = LSTM(lstm_size, return_sequences=True
                        )  # define lstm that reduces the paths to one vector
    paths_lstm_last = LSTM(
        lstm_size)  # define lstm that reduces the paths to one vector

    reduce_paths_to_vector = Lambda(
        func, output_shape=(sent_len, max_paths, lstm_size))(emb_knowledge_ids)

    attended_knowledge = attention_knowledge(
        embedded_word_ids,
        None,
        attention_size,
        return_alphas=False,
        summed_up=True)(reduce_paths_to_vector)

    concat_sequences = Lambda(lambda x: tf.concat([x[0], x[1]], axis=-1))(
        [embedded_word_ids, attended_knowledge])

    # define bilstm + dropout
    sent_bilstm = Bidirectional(LSTM(lstm_size))(concat_sequences)
    sent_bilstm_dropout = Dropout(dropout)(sent_bilstm)

    output_layer = Dense(num_labels, activation='softmax')(sent_bilstm_dropout)

    model = Model(inputs=[sentence_inputs, knowledge_inputs],
                  outputs=output_layer)

    adam = Adam(lr=learning_rate)
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=['accuracy'])

    #e = EarlyStopping(monitor=monitor, mode='auto')
    e = ModelCheckpoint(model_file,
                        monitor=monitor,
                        verbose=0,
                        save_best_only=True,
                        save_weights_only=True,
                        mode='auto',
                        period=1)
    model.fit([X_train, kX_train],
              y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=([X_dev, kX_dev], y_dev),
              callbacks=[e],
              verbose=1)
    model.load_weights(model_file)

    y_pred_test = model.predict([X_test, kX_test], verbose=False)
    y_pred_dev = model.predict([X_dev, kX_dev], verbose=False)

    return [np.argmax(pred)
            for pred in y_pred_test], [np.argmax(pred) for pred in y_pred_dev]
    def execute(self, kf: datasets.KFoldedDataSet, model: keras.Model,
                ec: ExecutionConfig):

        if 'unfreeze_encoder' in self.dict and self.dict['unfreeze_encoder']:
            set_trainable(model)
        if self.loss or self.lr:
            self.cfg.compile(model, self.cfg.createOptimizer(self.lr),
                             self.loss)
        cb = [] + self.cfg.callbacks
        if self.initial_weights is not None:
            model.load_weights(self.initial_weights)
        if 'callbacks' in self.dict:
            cb = configloader.parse("callbacks", self.dict['callbacks'])
        if 'extra_callbacks' in self.dict:
            cb = configloader.parse("callbacks", self.dict['extra_callbacks'])
        kepoch = -1
        if self.cfg.resume:
            kepoch = maxEpoch(ec.metricsPath())
            if kepoch != -1:
                self.epochs = self.epochs - kepoch
                if os.path.exists(ec.weightsPath()):
                    model.load_weights(ec.weightsPath())
                cb.append(
                    CSVLogger(ec.metricsPath(), append=True, start=kepoch))
            else:
                cb.append(CSVLogger(ec.metricsPath()))
                kepoch = 0

        else:
            kepoch = 0
            cb.append(CSVLogger(ec.metricsPath()))
        md = self.cfg.primary_metric_mode
        if self.cfg.gpus > 1:
            cb.append(
                alt.AltModelCheckpoint(ec.weightsPath(),
                                       save_best_only=True,
                                       monitor=self.cfg.primary_metric,
                                       mode=md,
                                       verbose=1))
        else:
            cb.append(
                keras.callbacks.ModelCheckpoint(
                    ec.weightsPath(),
                    save_best_only=True,
                    monitor=self.cfg.primary_metric,
                    mode=md,
                    verbose=1))

        cb.append(
            DrawResults(self.cfg,
                        kf,
                        ec.fold,
                        ec.stage,
                        negatives=self.negatives))
        if self.cfg.showDataExamples:
            cb.append(
                DrawResults(self.cfg,
                            kf,
                            ec.fold,
                            ec.stage,
                            negatives=self.negatives,
                            train=True))
        if self.epochs - kepoch == 0:
            return
        if self.cfg.gpus > 1:
            model = multi_gpu_model(model, self.cfg.gpus, True, True)
        kf.trainOnFold(ec.fold,
                       model,
                       cb,
                       self.epochs - kepoch,
                       self.negatives,
                       subsample=ec.subsample,
                       validation_negatives=self.validation_negatives)
        pass
Ejemplo n.º 13
0
from keras.applications import DenseNet121
from keras.layers import Dense, GlobalAveragePooling2D
from keras import Model

IMG_PATH = ['./chest_xray_images/normal/15268.jpg', '0']
IMG_SHAPE = (320, 320, 3)

test_img = load_img(path=IMG_PATH[0], color_mode='grayscale')
test_img = img_to_array(img=test_img, data_format='channels_last')
test_img = cv2.resize(test_img,
                      dsize=IMG_SHAPE[:2],
                      interpolation=cv2.INTER_NEAREST)
test_img = np.expand_dims(test_img, axis=-1)
test_img = test_img.astype(np.uint8)
test_img = test_img / 255.
test_img = np.concatenate((test_img, test_img, test_img), axis=-1)
print('external image(s) shape:', test_img.shape)

backbone = DenseNet121(include_top=False,
                       weights=None,
                       input_shape=(320, 320, 3))
backbone_out = backbone.output
gap = GlobalAveragePooling2D(name='pooling_layer')(backbone_out)
output = Dense(units=14, activation='sigmoid', name='output_layer')(gap)
predictor = Model(inputs=backbone.input, outputs=output)
print(predictor.summary())
predictor.load_weights(
    'C:/Users/Arman/Desktop/Covid19-Detection/checkpoints/CheXNet/CheXNet_v0.3.0.h5'
)
print(predictor.predict(np.expand_dims(test_img, axis=0)))
Ejemplo n.º 14
0
class HybridModel(object):
    def __init__(self,
                 C=4,
                 V=40000,
                 MAX_LEN=600,
                 MAX_LEN_TERM=300,
                 NUM_FEAT=8,
                 char_embed_matrix=None,
                 term_embed_matrix=None,
                 use_multi_task=False,
                 name='hybridmodel.h5',
                 PE=False):
        #+bn2   0.975 +bn1  0.986
        #+bn1,max+avg pool  0.987
        #squeeze embedding (128)0.985  (64+conv64)0.983
        #去除子网络的dense 0.987   squeeze embedding+relu 0.985
        #conv 64 0.987  conv 128 0.988
        self.name = name
        self.use_multi_task = use_multi_task
        input = Input(shape=(MAX_LEN, ), dtype='int32')
        #CNN不支持mask,即 mask_zero=True
        if char_embed_matrix is None:
            x = Embedding(V, 32)(input)
        else:
            embed1 = Embedding(char_embed_matrix.shape[0],
                               char_embed_matrix.shape[1],
                               weights=[char_embed_matrix],
                               trainable=False)
            embed2 = Embedding(char_embed_matrix.shape[0],
                               char_embed_matrix.shape[1],
                               weights=[char_embed_matrix],
                               trainable=True)
            x = embed1(input)
            x2 = embed2(input)
            x = Concatenate()([x, x2])
            # x = Dense(64, activation='relu')(x)
        if PE:
            echar_input = Input(shape=(MAX_LEN, ),
                                dtype='int32',
                                name='PE_char_in')
            ex_char = Embedding(MAX_LEN, 32, name='PEchar')(echar_input)
            x = Concatenate()([x, ex_char])
        kss = [2, 3, 4, 5]
        hs = []
        for ks in kss:
            h = Conv1D(128, ks, activation='relu', padding='same')(x)
            h1 = GlobalMaxPool1D()(h)
            h2 = GlobalAveragePooling1D()(h)
            hs.append(h1)
            hs.append(h2)
        hs = Concatenate()(hs)
        # hs = Dense(128, activation='relu')(hs)
        if self.use_multi_task:
            y1 = Dense(C, activation='softmax', name='y1')(hs)

        input_term = Input(shape=(MAX_LEN_TERM, ), dtype='int32')
        if term_embed_matrix is None:
            xterm = Embedding(V, 32)(input_term)
        else:
            embed1 = Embedding(term_embed_matrix.shape[0],
                               term_embed_matrix.shape[1],
                               weights=[term_embed_matrix],
                               trainable=False)
            embed2 = Embedding(term_embed_matrix.shape[0],
                               term_embed_matrix.shape[1],
                               weights=[term_embed_matrix],
                               trainable=True)
            xterm = embed1(input_term)
            xterm2 = embed2(input_term)
            xterm = Concatenate()([xterm, xterm2])
            # xterm = Dense(64, activation='relu')(xterm)
        if PE:
            eterm_input = Input(shape=(MAX_LEN_TERM, ),
                                dtype='int32',
                                name='PE_term_in')
            ex_term = Embedding(MAX_LEN_TERM, 32, name='PEterm')(eterm_input)
            xterm = Concatenate()([xterm, ex_term])
        hsterm = []
        for ks in kss:
            h = Conv1D(128, ks, activation='relu', padding='same')(xterm)
            h1 = GlobalMaxPool1D()(h)
            h2 = GlobalAveragePooling1D()(h)
            hsterm.append(h1)
            hsterm.append(h2)
        hsterm = Concatenate()(hsterm)
        # hsterm = Dense(128, activation='relu')(hsterm)

        input_feat = Input(shape=(NUM_FEAT, ), dtype='float32')
        hfeat = Dense(8, activation='relu')(input_feat)

        hs = Concatenate()([hs, hsterm, hfeat])

        hs = BatchNormalization()(hs)
        z = Dense(128, activation='relu')(hs)
        # z = BatchNormalization()(z)
        z = Dense(C, activation='softmax', name='y')(z)
        if PE:
            model = Model(
                [input, input_term, input_feat, echar_input, eterm_input], z)
        else:
            model = Model([input, input_term, input_feat], z)
        opt = Adagrad(lr=0.005)
        # opt = Adam()
        model.compile(opt, 'categorical_crossentropy', metrics=['acc'])
        self.model = model
        if self.use_multi_task:
            y2 = Dense(C, activation='softmax', name='y2')(hsterm)
            y3 = Dense(C, activation='softmax', name='y3')(hfeat)
            if PE:
                self.train_model = Model(
                    [input, input_term, input_feat, echar_input, eterm_input],
                    [z, y1, y2, y3])
            else:
                self.train_model = Model([input, input_term, input_feat],
                                         [z, y1, y2, y3])
            self.train_model.compile(opt,
                                     'categorical_crossentropy',
                                     metrics=['acc'])

    def load_weights(self, name=None):
        if name is None:
            save_path = self.name
        else:
            save_path = name
        if self.use_multi_task:
            self.train_model.load_weights(save_path)
        else:
            self.model.load_weights(save_path)

    def train(self, x, y, x_val, y_val, x_ts, y_ts):
        early_stop = EarlyStopping(min_delta=0.01, patience=2)
        save_path = self.name
        save_best = ModelCheckpoint(save_path, save_best_only=True)
        if self.use_multi_task:
            self.train_model.fit(
                x, [y, y, y, y],
                validation_data=[x_val, [y_val, y_val, y_val, y_val]],
                batch_size=128,
                epochs=20,
                callbacks=[early_stop, save_best])
        else:
            self.model.fit(x,
                           y,
                           validation_data=[x_val, y_val],
                           batch_size=128,
                           epochs=20,
                           callbacks=[early_stop, save_best])

        metric = self.model.evaluate(x_ts, y_ts)
        print(metric)
        self.load_weights()
        metric = self.model.evaluate(x_ts, y_ts, batch_size=512)
        print(metric)
        y_pred = self.model.predict(x_ts, batch_size=512)

        cnf_matrix = confusion_matrix(convert_y(y_ts), convert_y(y_pred))
        print(cnf_matrix)

    def test(self, x, ids, out_file):
        labels = ['人类作者', '自动摘要', '机器作者', '机器翻译']
        y_pred = self.model.predict(x, batch_size=512)
        y_pred = convert_y(y_pred)
        with open(out_file, 'w', encoding='utf-8') as fout:
            for id, yi in zip(ids, y_pred):
                label = labels[yi]
                fout.write('{},{}\n'.format(id, label))
        print('done.')

    def predict(self, x):
        y_pred = self.model.predict(x, batch_size=512)
        return y_pred

    def error_analysis(self, x_ts, y_ts, texts, start_index):
        labels = ['人类作者', '自动摘要', '机器作者', '机器翻译']
        y_pred = self.model.predict(x_ts, batch_size=512)
        y_ts, y_pred = convert_y(y_ts), convert_y(y_pred)
        with open('error.txt', 'w') as fout:
            for i in range(y_ts.shape[0]):
                if y_ts[i] != y_pred[i]:
                    fout.write('*****\n{}\n正确标签:{}   分类标签:{}\n'.format(
                        texts[start_index + i], labels[y_ts[i]],
                        labels[y_pred[i]]))
        print('output error done.')
Ejemplo n.º 15
0
def vgg_16_cbcnn(input_shape,
                 no_classes,
                 bilinear_output_dim,
                 sum_pool=True,
                 weight_decay_constant=5e-4,
                 multi_label=False,
                 weights_path=None):

    weights_regularizer = regularizers.l2(weight_decay_constant)

    # Input layer
    img_input = Input(shape=input_shape, name='spectr_input')

    # Block 1
    x = Conv2D(64, (3, 3),
               activation='relu',
               padding='same',
               name='block1_conv1',
               kernel_regularizer=weights_regularizer)(img_input)
    x = Conv2D(64, (3, 3),
               activation='relu',
               padding='same',
               name='block1_conv2',
               kernel_regularizer=weights_regularizer)(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D(128, (3, 3),
               activation='relu',
               padding='same',
               name='block2_conv1',
               kernel_regularizer=weights_regularizer)(x)
    x = Conv2D(128, (3, 3),
               activation='relu',
               padding='same',
               name='block2_conv2',
               kernel_regularizer=weights_regularizer)(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D(256, (3, 3),
               activation='relu',
               padding='same',
               name='block3_conv1',
               kernel_regularizer=weights_regularizer)(x)
    x = Conv2D(256, (3, 3),
               activation='relu',
               padding='same',
               name='block3_conv2',
               kernel_regularizer=weights_regularizer)(x)
    x = Conv2D(256, (3, 3),
               activation='relu',
               padding='same',
               name='block3_conv3',
               kernel_regularizer=weights_regularizer)(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D(512, (3, 3),
               activation='relu',
               padding='same',
               name='block4_conv1',
               kernel_regularizer=weights_regularizer)(x)
    x = Conv2D(512, (3, 3),
               activation='relu',
               padding='same',
               name='block4_conv2',
               kernel_regularizer=weights_regularizer)(x)
    x = Conv2D(512, (3, 3),
               activation='relu',
               padding='same',
               name='block4_conv3',
               kernel_regularizer=weights_regularizer)(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = Conv2D(512, (3, 3),
               activation='relu',
               padding='same',
               name='block5_conv1',
               kernel_regularizer=weights_regularizer)(x)
    x = Conv2D(512, (3, 3),
               activation='relu',
               padding='same',
               name='block5_conv2',
               kernel_regularizer=weights_regularizer)(x)
    x = Conv2D(512, (3, 3),
               activation='relu',
               padding='same',
               name='block5_conv3',
               kernel_regularizer=weights_regularizer)(x)

    # Merge using compact bilinear method
    # dummy_tensor_for_output_dim = K.placeholder(shape=(bilinear_output_dim,))
    compact_bilinear_arg_list = [x, x]

    output_shape_x = x.get_shape().as_list()[1:]
    output_shape_cb = (
        output_shape_x[0],
        output_shape_x[1],
        bilinear_output_dim,
    )
    x = merge(compact_bilinear_arg_list,
              mode=compact_bilinear,
              name='compact_bilinear',
              output_shape=output_shape_cb)

    # If sum_pool=True do a global sum pooling
    if sum_pool:
        # Since using tf. Hence 3rd would represent channels
        x = Lambda(lambda x: K.sum(x, axis=[1, 2]))(x)

    # Sign sqrt and L2 normalize result
    x = Lambda(lambda x: K.sign(x) * K.sqrt(K.abs(x)))(x)
    x = Lambda(lambda x: K.l2_normalize(x, axis=-1))(x)

    # final dense layer
    if not multi_label:
        final_activation = 'softmax'
    else:
        final_activation = 'sigmoid'
    x = Dense(no_classes,
              activation=final_activation,
              name='softmax_layer',
              kernel_regularizer=weights_regularizer)(x)

    # Put together input and output to form model
    model = Model(inputs=[img_input], outputs=[x])
    if weights_path:
        model.load_weights(weights_path, by_name=True)
    return model
def get_model(pre_weight, input_size):
    inputs = Input(input_size)

    # convolution1
    convolution1_1 = Conv2D(64, (3, 3), padding='same',
                            activation='relu')(inputs)
    convolution1_2 = Conv2D(64, (3, 3), padding='same',
                            activation='relu')(convolution1_1)

    # pooling1
    pooling1 = MaxPool2D((2, 2), strides=(2, 2))(convolution1_2)

    # convolution2
    convolution2_1 = Conv2D(128, (3, 3), padding='same',
                            activation='relu')(pooling1)
    convolution2_2 = Conv2D(128, (3, 3), padding='same',
                            activation='relu')(convolution2_1)

    # pooling2
    pooling2 = MaxPool2D((2, 2), strides=(2, 2))(convolution2_2)

    # convolution3
    convolution3_1 = Conv2D(256, (3, 3), padding='same',
                            activation='relu')(pooling2)
    convolution3_2 = Conv2D(256, (3, 3), padding='same',
                            activation='relu')(convolution3_1)
    convolution3_3 = Conv2D(256, (3, 3), padding='same',
                            activation='relu')(convolution3_2)

    # pooling3
    pooling3 = MaxPool2D((2, 2), strides=(2, 2))(convolution3_3)

    # convolution4
    convolution4_1 = Conv2D(512, (3, 3), padding='same',
                            activation='relu')(pooling3)
    convolution4_2 = Conv2D(512, (3, 3), padding='same',
                            activation='relu')(convolution4_1)
    convolution4_3 = Conv2D(512, (3, 3), padding='same',
                            activation='relu')(convolution4_2)

    # pooling4
    pooling4 = MaxPool2D((2, 2), strides=(2, 2))(convolution4_3)

    # convolution5
    convolution5_1 = Conv2D(512, (3, 3), padding='same',
                            activation='relu')(pooling4)
    convolution5_2 = Conv2D(512, (3, 3), padding='same',
                            activation='relu')(convolution5_1)
    convolution5_3 = Conv2D(512, (3, 3), padding='same',
                            activation='relu')(convolution5_2)

    # pooling5
    pooling5 = MaxPool2D((2, 2), strides=(2, 2))(convolution5_3)

    # fc1
    fc1 = Flatten()(pooling5)

    # fc2
    fc2 = Dense(4096, activation='relu')(fc1)

    # fc3
    fc3 = Dense(4096, activation='relu')(fc2)

    # output
    output = Dense(1000, activation='softmax')(fc3)

    model = Model(inputs=inputs, outputs=output)

    model.summary()
    if os.path.exists(pre_weight):
        print('exist')
        model.load_weights(pre_weight)
    adam = Adam(lr=1e-4, decay=0.5)
    model.compile(optimizer=adam,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model
Ejemplo n.º 17
0
# x = BatchNormalization(axis=-1)(x)
# x = Dense(120,activation='relu')(x)
# x = BatchNormalization(axis=-1)(x)
# x = Dense(60,activation='relu')(x)
# x = BatchNormalization(axis=-1)(x)
# x = Dense(30,activation='relu')(x)
# x = BatchNormalization(axis=-1)(x)
# x = Dense(10,activation='relu')(x)
# x = BatchNormalization(axis=-1)(x)
x = Dense(2, activation='sigmoid')(x)
final_model = Model(input=input, output=x)
final_model.compile(loss='mean_squared_logarithmic_error',
                    optimizer='adam',
                    metrics=['accuracy'])
final_model.summary()
final_model.load_weights('keras_models/weights.best.Inceptionv3.hdf5')
predictions = []
prd = []
for tensor in test_tensors:
    prediction = final_model.predict(np.expand_dims(tensor, axis=0))
    prd.append(prediction[0])
    predictions.append(prediction[0][1])

print(log_loss(test_targets, np.array(prd)))
with open('kera_data/upload_data/result.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    header = ['id', 'label']
    writer.writerow(header)
    for i in range(len(test_files)):
        row = [os.path.basename(test_files[i]).split('.')[0], predictions[i]]
        writer.writerow(row)
Ejemplo n.º 18
0
model = Model([input, input_pos_x, input_pos_y], x)
model.compile(loss=loss, optimizer=OPTIMIZER(lr=learning_rate, decay=0.1))
model.summary()

# Prepare callbacks for model saving and for learning rate adjustment.
checkpoint = ModelCheckpoint(filepath=model_save_path, monitor='val_loss', verbose=1, save_best_only=True)
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
train_logger = CSVLogger(log_save_path)

num_epochs_per_decay = 2.4
step_decay = len(train_gen) * num_epochs_per_decay
lr_exp = LearningRateExponentialDecay(0.94, step_decay)
callbacks = [checkpoint, lr_exp, train_logger]

try:
    model.load_weights(model_save_path)
    print('Loading pretrain_weights!')
except Exception as e:
    print(e)
    pass

print('Using real-time data augmentation.')
model.fit_generator(train_gen,
                    validation_data=val_gen,
                    validation_steps=len(val_gen),
                    epochs=epochs, workers=1,
                    steps_per_epoch=len(train_gen) / 8,
                    callbacks=callbacks)


Ejemplo n.º 19
0
def VGG19(input_shape, include_top=True,
          weights='imagenet',
          pooling=None,
          classes=1000,
          final_activation = 'sigmoid',
          **kwargs):

    input = Input(input_shape)
    # Block 1
    x = Conv2D_Initialize(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv1', bias_initializer='zero')(input)
    x = Conv2D_Initialize(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D_Initialize(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv1')(x)
    x = Conv2D_Initialize(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D_Initialize(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv1')(x)
    x = Conv2D_Initialize(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv2')(x)
    x = Conv2D_Initialize(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv3')(x)
    x = Conv2D_Initialize(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv4')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D_Initialize(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv1')(x)
    x = Conv2D_Initialize(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv2')(x)
    x = Conv2D_Initialize(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv3')(x)
    x = Conv2D_Initialize(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv4')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = Conv2D_Initialize(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv1')(x)
    x = Conv2D_Initialize(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv2')(x)
    x = Conv2D_Initialize(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv3')(x)
    x = Conv2D_Initialize(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv4')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)

    if include_top:
        # Classification block
        x = layers.Flatten(name='flatten')(x)
        x = Dense_Initialize(4096, activation='relu', name='fc1')(x)
        x = Dense_Initialize(4096, activation='relu', name='fc2')(x)
        x = Dense_Initialize(classes, activation=final_activation, name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)


    # Load weights.
    weights_path = None
    if weights == 'imagenet':
        if include_top:
            weights_path = keras_utils.get_file(
                'vgg19_weights_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH,
                cache_subdir='models',
                file_hash='cbe5617147190e668d6c5d5026f83318')
        else:
            weights_path = keras_utils.get_file(
                'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5',
                WEIGHTS_PATH_NO_TOP,
                cache_subdir='models',
                file_hash='253f8cb515780f3b799900260a226db6')


    model = Model(input, x, name = 'vgg19')
    if weights_path and weights:
        model.load_weights(weights_path, by_name=True, skip_mismatch=True)
    return model
Ejemplo n.º 20
0
output = dec_dense(dec_outputs)

# compile our model
model = Model([enc_inputs, dec_inputs], output)

model.compile(optimizer=RMSprop(), loss='categorical_crossentropy')

model.summary()

# train model
# model.fit([encoder_input_data, decoder_input_data], decoder_output_data)

# (use weights from previous training)
path_to_weight = "chatbot_seq2seq_v3.h5"

model.load_weights(path_to_weight)

# set up our evaluation step:
def make_inference_models():
    dec_state_input_h = Input(shape=(200,))
    dec_state_input_c = Input(shape=(200,))
    dec_states_inputs = [dec_state_input_h, dec_state_input_c]
    dec_outputs, state_h, state_c = dec_lstm(dec_embedding,
                                             initial_state=dec_states_inputs)
    dec_states = [state_h, state_c]
    dec_outputs = dec_dense(dec_outputs)
    dec_model = Model(
        inputs=[dec_inputs] + dec_states_inputs,
        outputs=[dec_outputs] + dec_states)
    print('Inference decoder:')
    dec_model.summary()
Ejemplo n.º 21
0
def main(args):
    typeName = args.mode_type
    if typeName.startswith('train'):
        if not os.path.exists(c.MODEL_DIR):
            os.mkdir(c.MODEL_DIR)
        train_dataset, val_dataset = CreateDataset(args, split_ratio=0.1)
        nclass = len(set(train_dataset[1]))
        print("nclass = ",nclass)
        labels_to_id = Map_label_to_dict(labels=train_dataset[1])
        # load the model
        model = models.SE_ResNet(c.INPUT_SHPE)
        # model = models.Deep_speaker_model(c.INPUT_SHPE)
        # add softmax layer
        x = model.output
        x = Dense(nclass, activation='softmax', name=f'softmax')(x)
        model = Model(model.input, x)
        # model.summary()
        # exit()

        # 加载预训练模型
        filenames = os.listdir(f'{c.MODEL_DIR}/aishell')
        filenames = [hfile for hfile in glob.iglob(c.TRAIN_DEV_SET + "/*.h5")]
        if len(filenames):
            acc_lists = [os.path.splitext(f)[0].split("-")[1].split("_")[1] for f in filenames]
            optimal_model_index = acc_lists.index(min(acc_lists))
            model.load_weights(f'{c.MODEL_DIR}/aishell/{filenames[optimal_model_index]}')

         # train model
        sgd = optimizers.SGD(lr=c.LEARN_RATE,momentum=0.9)
        model.compile(loss='categorical_crossentropy', optimizer=sgd,
                        metrics=['accuracy'])
        model.fit_generator(Batch_generator(train_dataset, labels_to_id, c.BATCH_SIZE, nclass),
                            steps_per_epoch=len(train_dataset[0])//c.BATCH_SIZE, epochs=30,
                            validation_data=load_validation_data(
                                val_dataset, labels_to_id, nclass),
                            validation_steps=len(val_dataset[0])//c.BATCH_SIZE,
                            callbacks=[
            ModelCheckpoint(f'{c.MODEL_DIR}/aishell/best.h5',
                            monitor='val_loss', save_best_only=True, mode='min'),
            ReduceLROnPlateau(monitor='val_loss',factor=0.1,patience=10,mode='min'),
            EarlyStopping(monitor='val_loss', patience=10),
        ])

    else:
        test_dataset, enroll_dataset = CreateDataset(args,split_ratio=0,target=c.TARGET)
        # load weights
        model_se = models.SE_ResNet(c.INPUT_SHPE)
        model_se.load_weights(f'{c.MODEL_DIR}/aishell/seresnet/acc_0.707-eer_0.292.h5', by_name='True')

        model_dp = models.Deep_speaker_model(c.INPUT_SHPE)
        model_dp.load_weights(f'{c.MODEL_DIR}/aishell/deepspeaker/acc_0.685-eer_0.313.h5',by_name='True')
         # load all data
        print("loading data...")
        (enroll_x, enroll_y) = load_all_data(enroll_dataset, 'enroll')
        (test_x, test_y) = load_all_data(test_dataset, 'test')

        def distance_of_model(model):
            enroll_pre = np.squeeze(model.predict(enroll_x))
            test_pre = np.squeeze(model.predict(test_x))
            distances = caculate_distance(enroll_dataset, enroll_pre, test_pre)
            return distances

        distances_dp = distance_of_model(model_dp)
        distances_se = distance_of_model(model_se)
        distances = 0.3*normalization_frames(distances_dp) + 0.7*normalization_frames(distances_se)
        
        # speaker identification
        test_y_pre = speaker_identification(enroll_dataset, distances, enroll_y)
        #  compute result
        result = compute_result(test_y_pre, test_y)
        score = sum(result)/len(result)
        print(f"score={score}")
Ejemplo n.º 22
0
train_gen.fit(x_train)
valid_gen.fit(x_valid)

filename = "cancer_classification.h5"
# Save the model according to the conditions
checkpoint = ModelCheckpoint(filename, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
# early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto')

# validation_steps=validation_size//batch_size

# fits the model on batches with real-time data augmentation:
model_final.fit_generator(train_gen.flow(x_train, y_train, batch_size=32),
                    samples_per_epoch = nb_train_samples,
                    epochs = epochs,
                    validation_data = valid_gen.flow(x_valid, y_valid),
                    nb_val_samples = nb_validation_samples,
                    callbacks = [checkpoint],
                    steps_per_epoch=len(x_train) / 32)


model_final.load_weights(filename)

predictions = []
for feature in x_test:
    pred = model_final.predict(feature)
    predictions.append(pred)

predictions = np.asarray(predictions)
print(predictions.shape)

print(predictions[0])
def train_model(data, topic, PROCESSED_DIR, SEED_FOLDER, **kwargs):
    dropout = kwargs['model_settings']["dropout"]
    lstm_size = kwargs['model_settings']["lstm_size"]
    monitor = kwargs['model_settings']["monitor"]
    batch_size = kwargs['model_settings']["batch_size"]
    epochs = kwargs['model_settings']["epochs"]
    learning_rate = kwargs['model_settings']["learning_rate"]
    train_embeddings = kwargs['model_settings']["train_embeddings"]
    return_probs = False
    return_model = False
    model_file = SEED_FOLDER+topic+"_"+kwargs['model_settings']["model_file_suffix"]
    seed = kwargs['model_settings']['current_seed']

    # set reproducibility
    # set configs for memory usage and reproducibility: https://stackoverflow.com/questions/38469632/tensorflow-non-repeatable-results
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    rn.seed(seed)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = False
    config.gpu_options.per_process_gpu_memory_fraction = 0.3
    np.random.seed(seed)
    graph_level_seed = 1
    operation_level_seed = 1
    tf.set_random_seed(graph_level_seed)
    sess = tf.Session(config=config)
    K.set_session(sess)

    # load vocab we and get indices for topic
    vocab_we = load_from_pickle(PROCESSED_DIR+"vocab_we.pkl")

    # load word embeddings
    embeddings_lookup = np.load(PROCESSED_DIR + "index_to_vec_we"+kwargs['model_settings']['word_embeddings'][1]+".npy")

    # load data
    X_train, X_dev, X_test = data["X_train"], data["X_dev"], data["X_test"]
    y_train, y_dev, y_test = data["y_train"], data["y_dev"], data["y_test"]
    
    # generate topic data
    data['X_topic_train'] = [get_avg_embedding(topic.split('_'), embeddings_lookup, vocab_we)] * len(data['X_train'])
    data['X_topic_dev'] = [get_avg_embedding(topic.split('_'), embeddings_lookup, vocab_we)] * len(data['X_dev'])
    data['X_topic_test'] = [get_avg_embedding(topic.split('_'), embeddings_lookup, vocab_we)] * len(data['X_test'])
    
    X_topic_train, X_topic_dev, X_topic_test = data["X_topic_train"], data["X_topic_dev"], data["X_topic_test"]
    
    # some constants
    sent_len = X_train.shape[1]
    num_labels = y_train.shape[1]

    sentence_input = Input(shape=(sent_len,), dtype='int32', name="text_input")
    gate_vector_input = Input(shape=(300,), dtype='float32', name="gate_vectors_each_sentence")
    embedded_layer = Embedding(embeddings_lookup.shape[0], embeddings_lookup.shape[1], mask_zero=True,
                               trainable=train_embeddings, input_length=sent_len,
                               weights=[embeddings_lookup])(sentence_input)


    bilstm_layer = Bidirectional(custom_LSTM_fo(lstm_size))([embedded_layer, gate_vector_input])


    dropout_layer = Dropout(dropout)(bilstm_layer)
    output_layer = Dense(num_labels, activation='softmax')(dropout_layer)
    model = Model(inputs=[sentence_input,gate_vector_input], output=output_layer)

    adam = Adam(lr=learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

    #e = EarlyStopping(monitor=monitor, mode='auto')
    e = ModelCheckpoint(model_file, monitor=monitor, verbose=0, save_best_only=True, save_weights_only=True,
                        mode='auto', period=1)
    model.fit([X_train, X_topic_train], y_train, batch_size=batch_size, epochs=epochs,
              validation_data=([X_dev, X_topic_dev], y_dev), callbacks=[e], verbose=1)
    model.load_weights(model_file)

    if return_model == True:
        return model
    else:
        test_predictions = model.predict([X_test, X_topic_test], verbose=False)
        val_predictions = model.predict([X_dev, X_topic_dev], verbose=False)
        if return_probs == False:
            test_predictions = [np.argmax(pred) for pred in test_predictions]
            val_predictions = [np.argmax(pred) for pred in val_predictions]
        return test_predictions, val_predictions
Ejemplo n.º 24
0
def main():
    print('Training the join cardinality estimator')
    is_train = True
    num_rows, num_columns = 16, 16
    # target = 'join_selectivity'
    target = 'mbr_tests_selectivity'
    datasets_features_path = 'data/spatial_descriptors/spatial_descriptors_small_datasets.csv'
    datasets_histograms_path = 'data/histograms/small_datasets'
    join_results_path = 'data/join_results/join_results_small_datasets_no_bit.csv'
    features_df = datasets.load_datasets_feature(datasets_features_path)
    join_data, ds1_histograms, ds2_histograms, ds_all_histogram, ds_bops_histogram = datasets.load_join_data(
        features_df, join_results_path, datasets_histograms_path, num_rows,
        num_columns)

    train_attributes, test_attributes, ds1_histograms_train, ds1_histograms_test, ds2_histograms_train, ds2_histograms_test, ds_all_histogram_train, ds_all_histogram_test, ds_bops_histogram_train, ds_bops_histogram_test = train_test_split(
        join_data,
        ds1_histograms,
        ds2_histograms,
        ds_all_histogram,
        ds_bops_histogram,
        test_size=0.20,
        random_state=42)

    # train_attributes, val_attributes, ds1_histograms_train, ds1_histograms_val, ds2_histograms_train, ds2_histograms_val, ds_all_histogram_train, ds_all_histogram_val = train_test_split(
    #     train_attributes, ds1_histograms_train, ds2_histograms_train, ds_all_histogram_train, test_size=0.20, random_state=32)

    num_features = len(train_attributes.columns) - 10
    # print (join_data)
    X_train = pd.DataFrame.to_numpy(
        train_attributes[[i for i in range(num_features)]])
    X_test = pd.DataFrame.to_numpy(
        test_attributes[[i for i in range(num_features)]])
    y_train = train_attributes[target]
    y_test = test_attributes[target]
    # y_train = train_attributes['result_size']
    # y_test = test_attributes['result_size']

    mlp = models.create_mlp(X_train.shape[1], regress=False)
    cnn1 = models.create_cnn(num_rows, num_columns, 1, regress=False)
    # cnn2 = models.create_cnn(num_rows, num_columns, 1, regress=False)
    # cnn3 = models.create_cnn(num_rows, num_columns, 1, regress=False)

    # combined_input = concatenate([mlp.output, cnn1.output, cnn2.output, cnn3.output])
    combined_input = concatenate([mlp.output, cnn1.output])

    x = Dense(4, activation="relu")(combined_input)
    x = Dense(1, activation="linear")(x)

    # model = Model(inputs=[mlp.input, cnn1.input, cnn2.input, cnn3.input], outputs=x)
    model = Model(inputs=[mlp.input, cnn1.input], outputs=x)

    EPOCHS = 40
    LR = 1e-2
    # opt = Adam(lr=1e-4, decay=1e-4 / 200)
    opt = Adam(lr=LR, decay=LR / EPOCHS)
    model.compile(loss="mean_absolute_percentage_error", optimizer=opt)

    # print (model.summary())

    # train the model
    if is_train:
        print("[INFO] training model...")
        # model.fit(
        #     [X_train, ds1_histograms_train, ds2_histograms_train], y_train,
        #     validation_data=([X_test, ds1_histograms_test, ds2_histograms_test], y_test),
        #     epochs=EPOCHS, batch_size=128)
        model.fit([X_train, ds_bops_histogram_train],
                  y_train,
                  validation_data=([X_test, ds_bops_histogram_test], y_test),
                  epochs=EPOCHS,
                  batch_size=256)

        model.save('trained_models/model.h5')
        model.save_weights('trained_models/model_weights.h5')
    else:
        model = keras.models.load_model('trained_models/model.h5')
        model.load_weights('trained_models/model_weights.h5')

    print('Test on small datasets')
    y_pred = model.predict([X_test, ds_bops_histogram_test])

    print('r2 score: {}'.format(r2_score(y_test, y_pred)))

    diff = y_pred.flatten() - y_test
    percent_diff = (diff / y_test)
    abs_percent_diff = np.abs(percent_diff)

    # test_attributes['join_selectivity_pred'] = y_pred
    # test_attributes['percent_diff'] = abs_percent_diff
    # test_attributes.to_csv('prediction_small.csv')

    # compute the mean and standard deviation of the absolute percentage
    # difference
    mean = np.mean(abs_percent_diff)
    std = np.std(abs_percent_diff)

    print('mean = {}, std = {}'.format(mean, std))

    print('Test on large datasets')
    datasets_features_path = 'data/spatial_descriptors/spatial_descriptors_large_datasets.csv'
    datasets_histograms_path = 'data/histograms/large_datasets'
    join_results_path = 'data/join_results/join_results_large_datasets_no_bit.csv'
    features_df = datasets.load_datasets_feature(datasets_features_path)
    join_data, ds1_histograms, ds2_histograms, ds_all_histogram, ds_bops_histogram = datasets.load_join_data(
        features_df, join_results_path, datasets_histograms_path, num_rows,
        num_columns)

    X_test = pd.DataFrame.to_numpy(join_data[[i for i in range(num_features)]])
    y_test = join_data[target]

    y_pred = model.predict([X_test, ds_bops_histogram])

    print('r2 score: {}'.format(r2_score(y_test, y_pred)))

    diff = y_pred.flatten() - y_test
    percent_diff = (diff / y_test)
    abs_percent_diff = np.abs(percent_diff)
    mean = np.mean(abs_percent_diff)
    std = np.std(abs_percent_diff)

    print('mean = {}, std = {}'.format(mean, std))
Ejemplo n.º 25
0
class NoteTaggerLSTMTrain(NoteTaggerModelTrain):
    def __init__(self,
                 lstm_config_path,
                 data,
                 text_column_name,
                 outcome_column_name,
                 window_size,
                 model_save_path,
                 model_name='lstm',
                 word_tags=constants.TAGS,
                 stride_length=None,
                 grid_search=False):
        """
        Implements the NoteTaggerModelTrain class for a Random Forest Model. Most Arguments and
        Keyword Arguments inherited from the parent class

        Arguments:
            lstm_config_path (str): path to json file with random forest configuration parameters
        """

        super().__init__(model_name=model_name,
                         data=data,
                         text_column_name=text_column_name,
                         outcome_column_name=outcome_column_name,
                         window_size=window_size,
                         model_save_path=model_save_path,
                         word_tags=word_tags,
                         stride_length=stride_length,
                         grid_search=grid_search)

        # load configuration file
        with open(lstm_config_path, 'r') as f:
            self._config["model_params"] = json.load(f)

        with open(self._config['model_params']['embedding_path'],
                  'rb') as embedding_file:
            self._embedding_layer = pickle.load(embedding_file)

        with open(self._config['model_params']['word_to_index'],
                  'r') as word_to_index_file:
            self._word_to_index = json.load(word_to_index_file)

        # set base model to random forest
        self._create_model()

    def _create_model(self):
        input_layer = layers.Input(
            shape=(self._config["notetagger_params"]['window_size'] * 2, ),
            name='input_layer')
        model_layer = self._embedding_layer(input_layer)
        model_layer = layers.Dropout(
            self._config['model_params']['model']['lstm_dropout'])(model_layer)
        for i, lstm_layer in enumerate(
                self._config['model_params']['model']['lstm_layers']):
            return_sequences = i < len(
                self._config['model_params']['model']['lstm_layers']) - 1
            model_layer = layers.Bidirectional(
                layers.LSTM(lstm_layer,
                            return_sequences=return_sequences,
                            name='lstm_layer_{}'.format(i)))(model_layer)
        dense_layer = layers.Dense(1, name='dense_layer')(model_layer)
        output_layer = layers.Activation('sigmoid',
                                         name='activation_layer')(dense_layer)
        self._model = Model(input_layer, output_layer)
        self._model.compile(**self._config['model_params']['compile'])
        print(self._model.summary())

    def _token_to_index(self, tokenized_data):
        unk_token = self._word_to_index['unk']
        indexed_data = tokenized_data['tokenized_text'].map(
            lambda tokens:
            [self._word_to_index.get(token, unk_token) for token in tokens])
        max_size = self._config["notetagger_params"]['window_size'] * 2
        padded_data = indexed_data.map(lambda tokens: tokens + [0] *
                                       (max_size - len(tokens)))
        X = np.array(padded_data.tolist())
        return X

    def _process_text(self, raw_data):
        """
        Takes in a dataframe with raw note text and training features and outcomes by first
        tokenizing the text, then transforming it with tfidf before reducing dimensionality with
        pca

        Arguments:
            raw_data (Pandas DataFrame): data with a raw text column and outcome column

        Returns:
            X_train (array): Array with training features
            y_train (array): Array with training outcomes
        """
        tokenized_data = self._tokenize_text(raw_data=raw_data)
        X_train = self._token_to_index(tokenized_data=tokenized_data)
        y_train = self._get_outcome_value(data=tokenized_data)
        return X_train, y_train

    def _create_saved_model(self):
        """
        Creates and saves a `NoteTaggerTrainedRandomForest` class object with the necessary
        components
        """
        print("Saving Model")

        # initialize trained random forest class
        self._trained_model = NoteTaggerTrainedLSTM(
            window_size=self._config["notetagger_params"]['window_size'],
            word_tags=self._config["notetagger_params"]['word_tags'],
            stride_length=self._config["notetagger_params"]['stride_length'],
            model_config=self._config['model_params'])

        # set word_to_index
        self._trained_model._word_to_index = self._word_to_index

        # load the best model weights and store it
        best_model_weights = os.path.join(
            self._checkpoints_save_dir,
            os.listdir(self._checkpoints_save_dir)[-1])
        self._model.load_weights(best_model_weights)
        self._trained_model._model = self._model

        # save model to pickle file
        with open(self._model_save_file, 'wb') as outfile:
            pickle.dump(self._trained_model, outfile)

    def train_model(self, validation_data=None, store_result=True):

        with tempfile.TemporaryDirectory() as temp_dir:

            model_callbacks = [
                EarlyStopping(**self._config['model_params']['callbacks']
                              ['early_stopping']),
                ModelCheckpoint(
                    filepath=os.path.join(temp_dir,
                                          '{epoch:02d}-{val_loss:.4f}.hdf5'),
                    **self._config['model_params']['callbacks']['checkpoints'])
            ]

            self._checkpoints_save_dir = temp_dir

            super().train_model(validation_data=validation_data,
                                store_result=store_result,
                                callbacks=model_callbacks,
                                **self._config['model_params']['training'])
Ejemplo n.º 26
0
trdata = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=90)
traindata = trdata.flow(x=X_train, y=y_train)
tsdata = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=90)
testdata = tsdata.flow(x=X_test, y=y_test)

from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpoint = ModelCheckpoint("ieeercnn_vgg16_1.h5", monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_loss', min_delta=0, patience=100, verbose=1, mode='auto')
hist = model_final.fit_generator(generator= traindata, steps_per_epoch= 100, epochs= 1000, validation_data= testdata, validation_steps=2, callbacks=[checkpoint,early])

model_final.save_weights('model_final_weights.h5')
model_final.save('model_final_architecure.h5')

## read_file
'''
model_final.load_weights('model_final_weights.h5')

img = cv2.imread('images/IMG_6975.jpg')
ss.setBaseImage(img)
ss.switchToSelectiveSearchFast()
ssresults = ss.process()
imout = img.copy()
for e,result in enumerate(ssresults):
    if e < 2000:
        x,y,w,h = result
        timage = imout[y:y+h,x:x+w]
        resized = cv2.resize(timage, (224,224), interpolation = cv2.INTER_AREA)
        img = np.expand_dims(resized, axis=0)
        out= model_final.predict(img)
        
Ejemplo n.º 27
0
def main():
    encoder_input_data, decoder_input_data, decoder_target_data = get_data()
    encoder_input_data = to_categorical(encoder_input_data, num_classes=26)
    decoder_input_data = to_categorical(decoder_input_data, num_classes=130)
    decoder_target_data = to_categorical(decoder_target_data, num_classes=130)

    num_encoder_tokens = 26
    num_decoder_tokens = 130
    num_duration = 50
    latent_dim = 128

    # Define an input sequence and process it.
    encoder_inputs = Input(shape=(None, num_encoder_tokens))
    encoder = LSTM(latent_dim, return_state=True)
    encoder_outputs, state_h, state_c = encoder(encoder_inputs)
    # We discard `encoder_outputs` and only keep the states.
    encoder_states = [state_h, state_c]

    # Set up the decoder, using `encoder_states` as initial state.
    decoder_inputs = Input(shape=(None, num_decoder_tokens))
    # We set up our decoder to return full output sequences,
    # and to return internal states as well. We don't use the
    # return states in the training model, but we will use them in inference.
    decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                         initial_state=encoder_states)
    decoder_dense = Dense(num_decoder_tokens, activation='softmax')
    decoder_outputs = decoder_dense(decoder_outputs)

    # Define the model that will turn
    # `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

    # Run training
    if not os.path.exists('s2s_note.h5'):
        optimizer = Adam(clipnorm=1.0)
        model.compile(loss='categorical_crossentropy',
                      optimizer=optimizer,
                      metrics='categorical_accuracy')

        history = model.fit([encoder_input_data, decoder_input_data],
                            decoder_target_data,
                            batch_size=32,
                            epochs=5,
                            validation_split=0.1)

        plt.plot(range(len(history.history['loss'])),
                 history.history['loss'],
                 label='train loss')
        plt.plot(range(len(history.history['val_loss'])),
                 history.history['val_loss'],
                 label='validation loss')

        plt.savefig('loss_train_test.png')

        # Save model
        model.save_weights('s2s_note.h5')

    else:
        model.load_weights('s2s_fariz.h5')

    # Define sampling models
    encoder_model = Model(encoder_inputs, encoder_states)

    decoder_state_input_h = Input(shape=(latent_dim, ))
    decoder_state_input_c = Input(shape=(latent_dim, ))
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
    decoder_outputs, state_h, state_c = decoder_lstm(
        decoder_inputs, initial_state=decoder_states_inputs)
    decoder_states = [state_h, state_c]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = Model([decoder_inputs] + decoder_states_inputs,
                          [decoder_outputs] + decoder_states)

    def decode(input_seq, beam_width=3):
        if beam_width > 1:
            candidate_list, _ = beam_search(input_seq,
                                            encoder_model,
                                            decoder_model,
                                            num_decoder_tokens,
                                            beam_width=beam_width)
        else:
            candidate_list = decode_sequence(input_seq, encoder_model,
                                             decoder_model, num_decoder_tokens)

        return candidate_list

    ind = 10
    input_seq = np.expand_dims(encoder_input_data[ind], axis=0)
    beam_width = 1
    res = decode(input_seq, beam_width=beam_width)
    if beam_width == 1:
        print(res)
    else:
        for r in res:
            print("res: ", r)
            print(len(r))
    print(np.argmax(decoder_input_data, axis=-1)[ind][:40])
    print(np.argmax(decoder_target_data, axis=-1)[ind][:40])

    for i in range(1, 15, 2):
        res2 = model.predict([
            np.expand_dims(encoder_input_data[ind], axis=0),
            np.expand_dims(decoder_input_data[ind][:i], axis=0)
        ])
        print("Input: {}".format(
            np.argmax(decoder_input_data[ind][:i], axis=-1)))
        print("Output: {}".format(np.argmax(res2, axis=-1)))
        print()
Ejemplo n.º 28
0
class CharacterTagger:
    """
    A class for character-based neural morphological tagger
    """
    def __init__(self,
                 symbols: DefaultVocabulary,
                 tags: DefaultVocabulary,
                 reverse=False,
                 word_rnn="cnn",
                 char_embeddings_size=16,
                 char_conv_layers=1,
                 char_window_size=5,
                 char_filters=None,
                 char_filter_multiple=25,
                 char_highway_layers=1,
                 conv_dropout=0.0,
                 highway_dropout=0.0,
                 intermediate_dropout=0.0,
                 lstm_dropout=0.0,
                 word_vectorizers=None,
                 word_lstm_layers=1,
                 word_lstm_units=128,
                 word_dropout=0.0,
                 regularizer=None,
                 verbose=1):
        self.symbols = symbols
        self.tags = tags
        self.reverse = reverse
        self.word_rnn = word_rnn
        self.char_embeddings_size = char_embeddings_size
        self.char_conv_layers = char_conv_layers
        self.char_window_size = char_window_size
        self.char_filters = char_filters
        self.char_filter_multiple = char_filter_multiple
        self.char_highway_layers = char_highway_layers
        self.conv_dropout = conv_dropout
        self.highway_dropout = highway_dropout
        self.intermediate_dropout = intermediate_dropout
        self.lstm_dropout = lstm_dropout
        self.word_dropout = word_dropout
        self.word_vectorizers = word_vectorizers  # a list of additional vectorizer dimensions
        self.word_lstm_layers = word_lstm_layers
        self.word_lstm_units = word_lstm_units
        self.regularizer = regularizer
        self.verbose = verbose
        self.initialize()
        log.info("{} symbols, {} tags in CharacterTagger".format(
            self.symbols_number_, self.tags_number_))
        self.build()

    def initialize(self):
        if isinstance(self.char_window_size, int):
            self.char_window_size = [self.char_window_size]
        if self.char_filters is None or isinstance(self.char_filters, int):
            self.char_filters = [self.char_filters] * len(
                self.char_window_size)
        if len(self.char_window_size) != len(self.char_filters):
            raise ValueError(
                "There should be the same number of window sizes and filter sizes"
            )
        if isinstance(self.word_lstm_units, int):
            self.word_lstm_units = [self.word_lstm_units
                                    ] * self.word_lstm_layers
        if len(self.word_lstm_units) != self.word_lstm_layers:
            raise ValueError(
                "There should be the same number of lstm layer units and lstm layers"
            )
        if self.word_vectorizers is None:
            self.word_vectorizers = []
        if self.regularizer is not None:
            self.regularizer = kreg.l2(self.regularizer)

    @property
    def symbols_number_(self):
        return len(self.symbols)

    @property
    def tags_number_(self):
        return len(self.tags)

    def build(self):
        word_inputs = kl.Input(shape=(None, MAX_WORD_LENGTH + 2),
                               dtype="int32")
        inputs = [word_inputs]
        word_outputs = self.build_word_cnn(word_inputs)
        if len(self.word_vectorizers) > 0:
            additional_word_inputs = [
                kl.Input(shape=(None, input_dim), dtype="float32")
                for input_dim, dense_dim in self.word_vectorizers
            ]
            inputs.extend(additional_word_inputs)
            additional_word_embeddings = [
                kl.Dense(dense_dim)(additional_word_inputs[i])
                for i, (_, dense_dim) in enumerate(self.word_vectorizers)
            ]
            word_outputs = kl.Concatenate()([word_outputs] +
                                            additional_word_embeddings)
        outputs, lstm_outputs = self.build_basic_network(word_outputs)
        compile_args = {
            "optimizer": ko.nadam(lr=0.002, clipnorm=5.0),
            "loss": "categorical_crossentropy",
            "metrics": ["accuracy"]
        }
        self.model_ = Model(inputs, outputs)
        self.model_.compile(**compile_args)
        if self.verbose > 0:
            log.info(str(self.model_.summary()))
        return self

    def build_word_cnn(self, inputs):
        # inputs = kl.Input(shape=(MAX_WORD_LENGTH,), dtype="int32")
        inputs = kl.Lambda(kb.one_hot,
                           arguments={"num_classes": self.symbols_number_},
                           output_shape=lambda x: tuple(x) +
                           (self.symbols_number_, ))(inputs)
        char_embeddings = kl.Dense(self.char_embeddings_size,
                                   use_bias=False)(inputs)
        conv_outputs = []
        self.char_output_dim_ = 0
        for window_size, filters_number in zip(self.char_window_size,
                                               self.char_filters):
            curr_output = char_embeddings
            curr_filters_number = (min(self.char_filter_multiple *
                                       window_size, 200) if
                                   filters_number is None else filters_number)
            for _ in range(self.char_conv_layers - 1):
                curr_output = kl.Conv2D(
                    curr_filters_number, (1, window_size),
                    padding="same",
                    activation="relu",
                    data_format="channels_last")(curr_output)
                if self.conv_dropout > 0.0:
                    curr_output = kl.Dropout(self.conv_dropout)(curr_output)
            curr_output = kl.Conv2D(curr_filters_number, (1, window_size),
                                    padding="same",
                                    activation="relu",
                                    data_format="channels_last")(curr_output)
            conv_outputs.append(curr_output)
            self.char_output_dim_ += curr_filters_number
        if len(conv_outputs) > 1:
            conv_output = kl.Concatenate(axis=-1)(conv_outputs)
        else:
            conv_output = conv_outputs[0]
        highway_input = kl.Lambda(kb.max, arguments={"axis": -2})(conv_output)
        if self.intermediate_dropout > 0.0:
            highway_input = kl.Dropout(
                self.intermediate_dropout)(highway_input)
        for i in range(self.char_highway_layers - 1):
            highway_input = Highway(activation="relu")(highway_input)
            if self.highway_dropout > 0.0:
                highway_input = kl.Dropout(self.highway_dropout)(highway_input)
        highway_output = Highway(activation="relu")(highway_input)
        return highway_output

    def build_basic_network(self, word_outputs):
        """
        Creates the basic network architecture,
        transforming word embeddings to intermediate outputs
        """
        if self.word_dropout > 0.0:
            lstm_outputs = kl.Dropout(self.word_dropout)(word_outputs)
        else:
            lstm_outputs = word_outputs
        for j in range(self.word_lstm_layers - 1):
            lstm_outputs = kl.Bidirectional(
                kl.LSTM(self.word_lstm_units[j],
                        return_sequences=True,
                        dropout=self.lstm_dropout))(lstm_outputs)
        lstm_outputs = kl.Bidirectional(
            kl.LSTM(self.word_lstm_units[-1],
                    return_sequences=True,
                    dropout=self.lstm_dropout))(lstm_outputs)
        pre_outputs = kl.TimeDistributed(kl.Dense(
            self.tags_number_,
            activation="softmax",
            activity_regularizer=self.regularizer),
                                         name="p")(lstm_outputs)
        return pre_outputs, lstm_outputs

    def _transform_batch(self, data, labels=None, transform_to_one_hot=True):
        if len(self.word_vectorizers) > 0:
            data, additional_data = data[0], data[1:]
        L = max(len(x) for x in data)
        X = np.array([self._make_sent_vector(x, L) for x in data])
        if len(self.word_vectorizers) > 0:
            X = [X] + [np.array(x) for x in additional_data]
        if labels is not None:
            Y = np.array([self._make_tags_vector(y, L) for y in labels])
            if transform_to_one_hot:
                Y = to_one_hot(Y, len(self.tags))
            return X, Y
        else:
            return X

    def train_on_batch(self, data, labels):
        """
        Trains model on a single batch

        data: a batch of word sequences
        labels: a batch of correct tag sequences
        """
        X, Y = self._transform_batch(data, labels)
        # TO_DO: add weights to deal with padded instances
        return self.model_.train_on_batch(X, Y)

    def predict_on_batch(self, data: List, return_indexes=False):
        """
        Makes predictions on a single batch

        data: a batch of word sequences,
        -----------------------------------------
        answer: a batch of label sequences
        """
        X = self._transform_batch(data)
        if len(self.word_vectorizers) > 0:
            objects_number, lengths = len(
                X[0]), [len(elem) for elem in data[0]]
        else:
            objects_number, lengths = len(X), [len(elem) for elem in data]
        Y = self.model_.predict_on_batch(X)
        labels = np.argmax(Y, axis=-1)
        answer: List[List[str]] = [None] * objects_number
        for i, (elem, length) in enumerate(zip(labels, lengths)):
            elem = elem[:length]
            answer[i] = elem if return_indexes else self.tags.idxs2toks(elem)
        return answer

    def _make_sent_vector(self, sent, bucket_length=None):
        bucket_length = bucket_length or len(sent)
        answer = np.zeros(shape=(bucket_length, MAX_WORD_LENGTH + 2),
                          dtype=np.int32)
        for i, word in enumerate(sent):
            answer[i, 0] = self.tags.tok2idx("BEGIN")
            m = min(len(word), MAX_WORD_LENGTH)
            for j, x in enumerate(word[-m:]):
                answer[i, j + 1] = self.symbols.tok2idx(x)
            answer[i, m + 1] = self.tags.tok2idx("END")
            answer[i, m + 2:] = self.tags.tok2idx("PAD")
        return answer

    def _make_tags_vector(self, tags, bucket_length=None):
        bucket_length = bucket_length or len(tags)
        answer = np.zeros(shape=(bucket_length, ), dtype=np.int32)
        for i, tag in enumerate(tags):
            answer[i] = self.tags.tok2idx(tag)
        return answer

    def save(self, outfile):
        """
        outfile: file with model weights (other model components should be given in config)
        """
        self.model_.save_weights(outfile)

    def load(self, infile):
        self.model_.load_weights(infile)
Ejemplo n.º 29
0
class ModelOrientation:
    """
    Le model doit trouver le deplacement effectue par le rectangle dans l'image vide
    Pour cela il dispose du flot optique entre l'image precedente et celle actuelle
    """
    def __init__(self, img_hau, img_lar, rect_x, rect_y, rect_hau, rect_lar):
        self.hau = img_hau
        self.lar = img_lar
        self.img = np.zeros((img_hau, img_lar, 3), dtype="uint8")

        self.w_img2 = copy.deepcopy(self.img)

        self.rect = Rectangle(self.img, rect_x, rect_y, rect_hau, rect_lar)
        self.w_img2 = self.rect.draw(self.w_img2)
        self.create_model()

    def create_model(self):
        img_input = layers.Input(shape=(self.hau, self.lar, 2))

        x = layers.Conv2D(16, 3, activation='relu')(img_input)
        x = layers.Conv2D(32, 3, activation='relu')(img_input)
        x = layers.Conv2D(64, 3, activation='relu')(x)
        x = layers.MaxPooling2D(2)(x)
        x = layers.Dropout(0.5)(x)

        # x = layers.Conv2D(64, 3, activation='relu')(x)
        # x = layers.Conv2D(128, 3, activation='relu')(x)
        # x = layers.MaxPooling2D(2)(x)
        # x = layers.Dropout(0.5)(x)

        # Flatten feature map to a 1-dim tensor so we can add fully connected layers
        x = layers.Flatten()(x)

        # Create a fully connected layer with ReLU activation
        x = layers.Dense(50, activation='relu')(x)
        x = layers.Dropout(0.5)(x)
        x = layers.Dense(50, activation='relu')(x)
        # Create output layer with a single node and sigmoid activation
        output = layers.Dense(2, activation='linear')(x)
        # output = keras.layers.Linear(x)

        # Create model:
        self.model = Model(img_input, output)

        self.model.compile(loss='mean_squared_error', optimizer='adam')

        if os.path.isfile("./weights.hdf5"):
            self.model.load_weights('./weights.hdf5')

        print(self.model.summary())

    def fit(self, steps, size_of_training):
        self.prvs = cv.cvtColor(self.w_img2, cv.COLOR_BGR2GRAY)

        # arrays pour contenir les donnees pour le train
        self.features = np.zeros((size_of_training, self.hau * self.lar * 2))
        self.targets = np.zeros((size_of_training, 2))

        for i in tqdm(range(steps)):

            if i % (size_of_training / 10) == 0 and i > 0:
                # on entraine le model sur les donnees crees
                self.train(i, size_of_training)

            self.create_data(i, size_of_training)

    def create_data(self, i, size_of_training):
        """
        cree des donnees et remplace les plus vielles au passage
        """
        # de combien de case va on deplacer le rectangle
        longx = random.randint(0, 3)
        longy = random.randint(0, 3)

        # # pour choisir l'axe sur lequel on deplace
        # bool_choix1 = random.randint(0, 1)

        # pour choisir la direction sur lequel on deplace
        dirx = random.choice([-1, 1])
        diry = random.choice([-1, 1])

        # si on peut pas deplacer le rectangle dans ce sens car il sortirait de l'image
        if self.rect.x + longx * dirx < 0 or self.rect.x + longx * dirx + self.rect.lar > self.img.shape[
                0]:
            dirx *= -1

        longx *= dirx

        # si on peut pas deplacer le rectangle dans ce sens car il sortirait de l'image
        if self.rect.y + longy * diry < 0 or self.rect.y + longy * diry + self.rect.hau > self.img.shape[
                1]:
            diry *= -1

        longy *= diry

        self.rect.move(longx, longy)

        w_img2 = copy.deepcopy(self.img)
        w_img2 = self.rect.draw(w_img2)

        next = cv.cvtColor(w_img2, cv.COLOR_BGR2GRAY)

        flow = cv.calcOpticalFlowFarneback(self.prvs, next, None, 0.5, 3, 15,
                                           3, 5, 1.2, 0)

        self.features[i % size_of_training, :] = flow.flatten()
        self.targets[i % size_of_training, :] = [longx, longy]

        # mag, ang = cv.cartToPolar(flow[..., 0], flow[..., 1])

        # hsv = np.zeros((self.img.shape[0], self.img.shape[1], self.img.shape[2]), dtype='uint8')

        # hsv[..., 0] = ang*180/np.pi/2
        # hsv[..., 1] = 255
        # hsv[..., 2] = cv.normalize(mag, None, 0, 255, cv.NORM_MINMAX)

        # bgr = cv.cvtColor(hsv, cv.COLOR_HSV2BGR)
        # cv.imshow('frame2', bgr)
        # k = cv.waitKey(30) & 0xff

        # if k == 27:
        # break
        # elif k == ord('s'):
        # cv.imwrite('opticalfb.png', w_img2)
        # cv.imwrite('opticalhsv.png', bgr)

        self.prvs = next

    def train(self, i, size_of_training):
        print("==========================================================")
        print("Step: ", i)

        self.features = np.reshape(self.features,
                                   (size_of_training, self.hau, self.lar, 2))
        print("----------------")
        print("Train")

        self.model_checkpoint = ModelCheckpoint('weights.hdf5',
                                                monitor='loss',
                                                verbose=1,
                                                save_best_only=True,
                                                save_weights_only=True)

        if i < size_of_training:
            self.model.fit(self.features[:i],
                           self.targets[:i],
                           shuffle=True,
                           callbacks=[self.model_checkpoint])
        else:
            self.model.fit(self.features,
                           self.targets,
                           shuffle=True,
                           callbacks=[self.model_checkpoint])

        print("----------------")
        print("Test")

        for j in range(10):
            [[predx,
              predy]] = self.model.predict(np.array([self.features[j, :, :]]))
            predx = round(predx)
            predy = round(predy)

            print(
                "Loss: ",
                np.linalg.norm([
                    predx - self.targets[j, 0], predy - self.targets[j, 1]
                ]), "  / Pred: ", predx, predy, " / Diff: ",
                self.targets[j, 0] - predx, self.targets[j, 1] - predy)

        # on reset les arrays

        self.features = np.reshape(self.features,
                                   (size_of_training, self.hau * self.lar * 2))

        print()
Ejemplo n.º 30
0
def SegNet(nClasses, input_height, input_width):

    assert input_height % 32 == 0
    assert input_width % 32 == 0

    img_input = Input(shape=(input_height, input_width, 3))

    # Block 1
    x = layers.Conv2D(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv1')(img_input)
    x = layers.Conv2D(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv2')(x)
    x, mask_1 = MaxPoolingWithArgmax2D(name='block1_pool')(x)

    # Block 2
    x = layers.Conv2D(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv1')(x)
    x = layers.Conv2D(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv2')(x)
    x, mask_2 = MaxPoolingWithArgmax2D(name='block2_pool')(x)

    # Block 3
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv1')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv2')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv3')(x)
    x, mask_3 = MaxPoolingWithArgmax2D(name='block3_pool')(x)

    # Block 4
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv1')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv2')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv3')(x)

    x, mask_4 = MaxPoolingWithArgmax2D(name='block4_pool')(x)

    # Block 5
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv1')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv2')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv3')(x)
    x, mask_5 = MaxPoolingWithArgmax2D(name='block5_pool')(x)

    Vgg_streamlined = Model(inputs=img_input, outputs=x)

    # o=None
    # fcn8=Model(inputs=img_input,outputs=o)
    # mymodel.summary()

    # 加载vgg16的预训练权重
    Vgg_streamlined.load_weights(
        'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5 '
    )

    # 解码层
    unpool_1 = MaxUnpooling2D()([x, mask_5])
    y = Conv2D(512, (3, 3), padding="same")(unpool_1)
    y = BatchNormalization()(y)
    y = Activation("relu")(y)
    y = Conv2D(512, (3, 3), padding="same")(y)
    y = BatchNormalization()(y)
    y = Activation("relu")(y)
    y = Conv2D(512, (3, 3), padding="same")(y)
    y = BatchNormalization()(y)
    y = Activation("relu")(y)

    unpool_2 = MaxUnpooling2D()([y, mask_4])
    y = Conv2D(512, (3, 3), padding="same")(unpool_2)
    y = BatchNormalization()(y)
    y = Activation("relu")(y)
    y = Conv2D(512, (3, 3), padding="same")(y)
    y = BatchNormalization()(y)
    y = Activation("relu")(y)
    y = Conv2D(256, (3, 3), padding="same")(y)
    y = BatchNormalization()(y)
    y = Activation("relu")(y)

    unpool_3 = MaxUnpooling2D()([y, mask_3])
    y = Conv2D(256, (3, 3), padding="same")(unpool_3)
    y = BatchNormalization()(y)
    y = Activation("relu")(y)
    y = Conv2D(256, (3, 3), padding="same")(y)
    y = BatchNormalization()(y)
    y = Activation("relu")(y)
    y = Conv2D(128, (3, 3), padding="same")(y)
    y = BatchNormalization()(y)
    y = Activation("relu")(y)

    unpool_4 = MaxUnpooling2D()([y, mask_2])
    y = Conv2D(128, (3, 3), padding="same")(unpool_4)
    y = BatchNormalization()(y)
    y = Activation("relu")(y)
    y = Conv2D(64, (3, 3), padding="same")(y)
    y = BatchNormalization()(y)
    y = Activation("relu")(y)

    unpool_5 = MaxUnpooling2D()([y, mask_1])
    y = Conv2D(64, (3, 3), padding="same")(unpool_5)
    y = BatchNormalization()(y)
    y = Activation("relu")(y)

    y = Conv2D(nClasses, (1, 1), padding="same")(y)
    y = BatchNormalization()(y)
    y = Activation("relu")(y)

    y = Reshape((-1, nClasses))(y)
    y = Activation("softmax")(y)

    model = Model(inputs=img_input, outputs=y)
    return model
Ejemplo n.º 31
0
class CharacterTagger:

    """A class for character-based neural morphological tagger

    Parameters:
        symbols: character vocabulary
        tags: morphological tags vocabulary
        word_rnn: the type of character-level network (only `cnn` implemented)
        char_embeddings_size: the size of character embeddings
        char_conv_layers: the number of convolutional layers on character level
        char_window_size: the width of convolutional filter (filters).
            It can be a list if several parallel filters are applied, for example, [2, 3, 4, 5].
        char_filters: the number of convolutional filters for each window width.
            It can be a number, a list (when there are several windows of different width
            on a single convolution layer), a list of lists, if there
            are more than 1 convolution layers, or **None**.
            If **None**, a layer with width **width** contains
            min(**char_filter_multiple** * **width**, 200) filters.

        char_filter_multiple: the ratio between filters number and window width
        char_highway_layers: the number of highway layers on character level
        conv_dropout: the ratio of dropout between convolutional layers
        highway_dropout: the ratio of dropout between highway layers,
        intermediate_dropout: the ratio of dropout between convolutional
            and highway layers on character level
        lstm_dropout: dropout ratio in word-level LSTM
        word_vectorizers: list of parameters for additional word-level vectorizers,
            for each vectorizer it stores a pair of vectorizer dimension and
            the dimension of the corresponding word embedding
        word_lstm_layers: the number of word-level LSTM layers
        word_lstm_units: hidden dimensions of word-level LSTMs
        word_dropout: the ratio of dropout before word level (it is applied to word embeddings)
        regularizer: l2 regularization parameter
        verbose: the level of verbosity
    """
    def __init__(self,
                 symbols: DefaultVocabulary,
                 tags: DefaultVocabulary,
                 word_rnn: str = "cnn",
                 char_embeddings_size: int = 16,
                 char_conv_layers: int = 1,
                 char_window_size: Union[int, List[int]] = 5,
                 char_filters: Union[int, List[int]] = None,
                 char_filter_multiple: int = 25,
                 char_highway_layers: int = 1,
                 conv_dropout: float = 0.0,
                 highway_dropout: float = 0.0,
                 intermediate_dropout: float = 0.0,
                 lstm_dropout: float = 0.0,
                 word_vectorizers: List[Tuple[int, int]] = None,
                 word_lstm_layers: int = 1,
                 word_lstm_units: Union[int, List[int]] = 128,
                 word_dropout: float = 0.0,
                 regularizer: float = None,
                 verbose: int = 1):
        self.symbols = symbols
        self.tags = tags
        self.word_rnn = word_rnn
        self.char_embeddings_size = char_embeddings_size
        self.char_conv_layers = char_conv_layers
        self.char_window_size = char_window_size
        self.char_filters = char_filters
        self.char_filter_multiple = char_filter_multiple
        self.char_highway_layers = char_highway_layers
        self.conv_dropout = conv_dropout
        self.highway_dropout = highway_dropout
        self.intermediate_dropout = intermediate_dropout
        self.lstm_dropout = lstm_dropout
        self.word_dropout = word_dropout
        self.word_vectorizers = word_vectorizers  # a list of additional vectorizer dimensions
        self.word_lstm_layers = word_lstm_layers
        self.word_lstm_units = word_lstm_units
        self.regularizer = regularizer
        self.verbose = verbose
        self._initialize()
        self.build()

    def _initialize(self):
        if isinstance(self.char_window_size, int):
            self.char_window_size = [self.char_window_size]
        if self.char_filters is None or isinstance(self.char_filters, int):
            self.char_filters = [self.char_filters] * len(self.char_window_size)
        if len(self.char_window_size) != len(self.char_filters):
            raise ValueError("There should be the same number of window sizes and filter sizes")
        if isinstance(self.word_lstm_units, int):
            self.word_lstm_units = [self.word_lstm_units] * self.word_lstm_layers
        if len(self.word_lstm_units) != self.word_lstm_layers:
            raise ValueError("There should be the same number of lstm layer units and lstm layers")
        if self.word_vectorizers is None:
            self.word_vectorizers = []
        if self.regularizer is not None:
            self.regularizer = kreg.l2(self.regularizer)
        if self.verbose > 0:
            log.info("{} symbols, {} tags in CharacterTagger".format(self.symbols_number_, self.tags_number_))

    @property
    def symbols_number_(self) -> int:
        """Character vocabulary size
        """
        return len(self.symbols)

    @property
    def tags_number_(self) -> int:
        """Tag vocabulary size
        """
        return len(self.tags)

    def build(self):
        """Builds the network using Keras.
        """
        word_inputs = kl.Input(shape=(None, MAX_WORD_LENGTH+2), dtype="int32")
        inputs = [word_inputs]
        word_outputs = self._build_word_cnn(word_inputs)
        if len(self.word_vectorizers) > 0:
            additional_word_inputs = [kl.Input(shape=(None, input_dim), dtype="float32")
                                      for input_dim, dense_dim in self.word_vectorizers]
            inputs.extend(additional_word_inputs)
            additional_word_embeddings = [kl.Dense(dense_dim)(additional_word_inputs[i])
                                          for i, (_, dense_dim) in enumerate(self.word_vectorizers)]
            word_outputs = kl.Concatenate()([word_outputs] + additional_word_embeddings)
        outputs, lstm_outputs = self._build_basic_network(word_outputs)
        compile_args = {"optimizer": ko.nadam(lr=0.002, clipnorm=5.0),
                        "loss": "categorical_crossentropy", "metrics": ["accuracy"]}
        self.model_ = Model(inputs, outputs)
        self.model_.compile(**compile_args)
        if self.verbose > 0:
            self.model_.summary(print_fn=log.info)
        return self

    def _build_word_cnn(self, inputs):
        """Builds word-level network
        """
        inputs = kl.Lambda(kb.one_hot, arguments={"num_classes": self.symbols_number_},
                           output_shape=lambda x: tuple(x) + (self.symbols_number_,))(inputs)
        char_embeddings = kl.Dense(self.char_embeddings_size, use_bias=False)(inputs)
        conv_outputs = []
        self.char_output_dim_ = 0
        for window_size, filters_number in zip(self.char_window_size, self.char_filters):
            curr_output = char_embeddings
            curr_filters_number = (min(self.char_filter_multiple * window_size, 200)
                                   if filters_number is None else filters_number)
            for _ in range(self.char_conv_layers - 1):
                curr_output = kl.Conv2D(curr_filters_number, (1, window_size),
                                        padding="same", activation="relu",
                                        data_format="channels_last")(curr_output)
                if self.conv_dropout > 0.0:
                    curr_output = kl.Dropout(self.conv_dropout)(curr_output)
            curr_output = kl.Conv2D(curr_filters_number, (1, window_size),
                                    padding="same", activation="relu",
                                    data_format="channels_last")(curr_output)
            conv_outputs.append(curr_output)
            self.char_output_dim_ += curr_filters_number
        if len(conv_outputs) > 1:
            conv_output = kl.Concatenate(axis=-1)(conv_outputs)
        else:
            conv_output = conv_outputs[0]
        highway_input = kl.Lambda(kb.max, arguments={"axis": -2})(conv_output)
        if self.intermediate_dropout > 0.0:
            highway_input = kl.Dropout(self.intermediate_dropout)(highway_input)
        for i in range(self.char_highway_layers - 1):
            highway_input = Highway(activation="relu")(highway_input)
            if self.highway_dropout > 0.0:
                highway_input = kl.Dropout(self.highway_dropout)(highway_input)
        highway_output = Highway(activation="relu")(highway_input)
        return highway_output

    def _build_basic_network(self, word_outputs):
        """
        Creates the basic network architecture,
        transforming word embeddings to intermediate outputs
        """
        if self.word_dropout > 0.0:
            lstm_outputs = kl.Dropout(self.word_dropout)(word_outputs)
        else:
            lstm_outputs = word_outputs
        for j in range(self.word_lstm_layers-1):
            lstm_outputs = kl.Bidirectional(
                kl.LSTM(self.word_lstm_units[j], return_sequences=True,
                        dropout=self.lstm_dropout))(lstm_outputs)
        lstm_outputs = kl.Bidirectional(
                kl.LSTM(self.word_lstm_units[-1], return_sequences=True,
                        dropout=self.lstm_dropout))(lstm_outputs)
        pre_outputs = kl.TimeDistributed(
                kl.Dense(self.tags_number_, activation="softmax",
                         activity_regularizer=self.regularizer),
                name="p")(lstm_outputs)
        return pre_outputs, lstm_outputs

    def _transform_batch(self, data, labels=None, transform_to_one_hot=True):
        data, additional_data = data[0], data[1:]
        L = max(len(x) for x in data)
        X = np.array([self._make_sent_vector(x, L) for x in data])
        X = [X] + [np.array(x) for x in additional_data]
        if labels is not None:
            Y = np.array([self._make_tags_vector(y, L) for y in labels])
            if transform_to_one_hot:
                Y = to_one_hot(Y, len(self.tags))
            return X, Y
        else:
            return X

    def train_on_batch(self, data: List[Iterable], labels: Iterable[list]) -> None:
        """Trains model on a single batch

        Args:
            data: a batch of word sequences
            labels: a batch of correct tag sequences
        Returns:
            the trained model
        """
        X, Y = self._transform_batch(data, labels)
        self.model_.train_on_batch(X, Y)

    def predict_on_batch(self, data: Union[list, tuple],
                         return_indexes: bool = False) -> List[List[str]]:
        """
        Makes predictions on a single batch

        Args:
            data: a batch of word sequences together with additional inputs
            return_indexes: whether to return tag indexes in vocabulary or tags themselves

        Returns:
            a batch of label sequences
        """
        X = self._transform_batch(data)
        objects_number, lengths = len(X[0]), [len(elem) for elem in data[0]]
        Y = self.model_.predict_on_batch(X)
        labels = np.argmax(Y, axis=-1)
        answer: List[List[str]] = [None] * objects_number
        for i, (elem, length) in enumerate(zip(labels, lengths)):
            elem = elem[:length]
            answer[i] = elem if return_indexes else self.tags.idxs2toks(elem)
        return answer

    def _make_sent_vector(self, sent: List, bucket_length: int =None) -> np.ndarray:
        """Transforms a sentence to Numpy array, which will be the network input.

        Args:
            sent: input sentence
            bucket_length: the width of the bucket

        Returns:
            A 3d array, answer[i][j][k] contains the index of k-th letter
            in j-th word of i-th input sentence.
        """
        bucket_length = bucket_length or len(sent)
        answer = np.zeros(shape=(bucket_length, MAX_WORD_LENGTH+2), dtype=np.int32)
        for i, word in enumerate(sent):
            answer[i, 0] = self.tags.tok2idx("BEGIN")
            m = min(len(word), MAX_WORD_LENGTH)
            for j, x in enumerate(word[-m:]):
                answer[i, j+1] = self.symbols.tok2idx(x)
            answer[i, m+1] = self.tags.tok2idx("END")
            answer[i, m+2:] = self.tags.tok2idx("PAD")
        return answer

    def _make_tags_vector(self, tags, bucket_length=None) -> np.ndarray:
        """Transforms a sentence of tags to Numpy array, which will be the network target.

        Args:
            tags: input sentence of tags
            bucket_length: the width of the bucket

        Returns:
            A 2d array, answer[i][j] contains the index of j-th tag in i-th input sentence.
        """
        bucket_length = bucket_length or len(tags)
        answer = np.zeros(shape=(bucket_length,), dtype=np.int32)
        for i, tag in enumerate(tags):
            answer[i] = self.tags.tok2idx(tag)
        return answer

    def save(self, outfile) -> None:
        """Saves model weights to a file

        Args:
            outfile: file with model weights (other model components should be given in config)
        """
        self.model_.save_weights(outfile)

    def load(self, infile) -> None:
        """Loads model weights from a file

        Args:
            infile: file to load model weights from
        """
        self.model_.load_weights(infile)
Ejemplo n.º 32
0
class CharacterTagger:

    """A class for character-based neural morphological tagger

    Parameters:
        symbols: character vocabulary
        tags: morphological tags vocabulary
        word_rnn: the type of character-level network (only `cnn` implemented)
        char_embeddings_size: the size of character embeddings
        char_conv_layers: the number of convolutional layers on character level
        char_window_size: the width of convolutional filter (filters).
            It can be a list if several parallel filters are applied, for example, [2, 3, 4, 5].
        char_filters: the number of convolutional filters for each window width.
            It can be a number, a list (when there are several windows of different width
            on a single convolution layer), a list of lists, if there
            are more than 1 convolution layers, or **None**.
            If **None**, a layer with width **width** contains
            min(**char_filter_multiple** * **width**, 200) filters.

        char_filter_multiple: the ratio between filters number and window width
        char_highway_layers: the number of highway layers on character level
        conv_dropout: the ratio of dropout between convolutional layers
        highway_dropout: the ratio of dropout between highway layers,
        intermediate_dropout: the ratio of dropout between convolutional
            and highway layers on character level
        lstm_dropout: dropout ratio in word-level LSTM
        word_vectorizers: list of parameters for additional word-level vectorizers,
            for each vectorizer it stores a pair of vectorizer dimension and
            the dimension of the corresponding word embedding
        word_lstm_layers: the number of word-level LSTM layers
        word_lstm_units: hidden dimensions of word-level LSTMs
        word_dropout: the ratio of dropout before word level (it is applied to word embeddings)
        regularizer: l2 regularization parameter
        verbose: the level of verbosity
    """
    def __init__(self,
                 symbols: DefaultVocabulary,
                 tags: DefaultVocabulary,
                 word_rnn: str = "cnn",
                 char_embeddings_size: int = 16,
                 char_conv_layers: int = 1,
                 char_window_size: Union[int, List[int]] = 5,
                 char_filters: Union[int, List[int]] = None,
                 char_filter_multiple: int = 25,
                 char_highway_layers: int = 1,
                 conv_dropout: float = 0.0,
                 highway_dropout: float = 0.0,
                 intermediate_dropout: float = 0.0,
                 lstm_dropout: float = 0.0,
                 word_vectorizers: List[Tuple[int, int]] = None,
                 word_lstm_layers: int = 1,
                 word_lstm_units: Union[int, List[int]] = 128,
                 word_dropout: float = 0.0,
                 regularizer: float = None,
                 verbose: int = 1):
        self.symbols = symbols
        self.tags = tags
        self.word_rnn = word_rnn
        self.char_embeddings_size = char_embeddings_size
        self.char_conv_layers = char_conv_layers
        self.char_window_size = char_window_size
        self.char_filters = char_filters
        self.char_filter_multiple = char_filter_multiple
        self.char_highway_layers = char_highway_layers
        self.conv_dropout = conv_dropout
        self.highway_dropout = highway_dropout
        self.intermediate_dropout = intermediate_dropout
        self.lstm_dropout = lstm_dropout
        self.word_dropout = word_dropout
        self.word_vectorizers = word_vectorizers  # a list of additional vectorizer dimensions
        self.word_lstm_layers = word_lstm_layers
        self.word_lstm_units = word_lstm_units
        self.regularizer = regularizer
        self.verbose = verbose
        self._initialize()
        self.build()

    def _initialize(self):
        if isinstance(self.char_window_size, int):
            self.char_window_size = [self.char_window_size]
        if self.char_filters is None or isinstance(self.char_filters, int):
            self.char_filters = [self.char_filters] * len(self.char_window_size)
        if len(self.char_window_size) != len(self.char_filters):
            raise ValueError("There should be the same number of window sizes and filter sizes")
        if isinstance(self.word_lstm_units, int):
            self.word_lstm_units = [self.word_lstm_units] * self.word_lstm_layers
        if len(self.word_lstm_units) != self.word_lstm_layers:
            raise ValueError("There should be the same number of lstm layer units and lstm layers")
        if self.word_vectorizers is None:
            self.word_vectorizers = []
        if self.regularizer is not None:
            self.regularizer = kreg.l2(self.regularizer)
        if self.verbose > 0:
            log.info("{} symbols, {} tags in CharacterTagger".format(self.symbols_number_, self.tags_number_))

    @property
    def symbols_number_(self) -> int:
        """Character vocabulary size
        """
        return len(self.symbols)

    @property
    def tags_number_(self) -> int:
        """Tag vocabulary size
        """
        return len(self.tags)

    def build(self):
        """Builds the network using Keras.
        """
        word_inputs = kl.Input(shape=(None, MAX_WORD_LENGTH+2), dtype="int32")
        inputs = [word_inputs]
        word_outputs = self._build_word_cnn(word_inputs)
        if len(self.word_vectorizers) > 0:
            additional_word_inputs = [kl.Input(shape=(None, input_dim), dtype="float32")
                                      for input_dim, dense_dim in self.word_vectorizers]
            inputs.extend(additional_word_inputs)
            additional_word_embeddings = [kl.Dense(dense_dim)(additional_word_inputs[i])
                                          for i, (_, dense_dim) in enumerate(self.word_vectorizers)]
            word_outputs = kl.Concatenate()([word_outputs] + additional_word_embeddings)
        outputs, lstm_outputs = self._build_basic_network(word_outputs)
        compile_args = {"optimizer": ko.nadam(lr=0.002, clipnorm=5.0),
                        "loss": "categorical_crossentropy", "metrics": ["accuracy"]}
        self.model_ = Model(inputs, outputs)
        self.model_.compile(**compile_args)
        if self.verbose > 0:
            self.model_.summary(print_fn=log.info)
        return self

    def _build_word_cnn(self, inputs):
        """Builds word-level network
        """
        inputs = kl.Lambda(kb.one_hot, arguments={"num_classes": self.symbols_number_},
                           output_shape=lambda x: tuple(x) + (self.symbols_number_,))(inputs)
        char_embeddings = kl.Dense(self.char_embeddings_size, use_bias=False)(inputs)
        conv_outputs = []
        self.char_output_dim_ = 0
        for window_size, filters_number in zip(self.char_window_size, self.char_filters):
            curr_output = char_embeddings
            curr_filters_number = (min(self.char_filter_multiple * window_size, 200)
                                   if filters_number is None else filters_number)
            for _ in range(self.char_conv_layers - 1):
                curr_output = kl.Conv2D(curr_filters_number, (1, window_size),
                                        padding="same", activation="relu",
                                        data_format="channels_last")(curr_output)
                if self.conv_dropout > 0.0:
                    curr_output = kl.Dropout(self.conv_dropout)(curr_output)
            curr_output = kl.Conv2D(curr_filters_number, (1, window_size),
                                    padding="same", activation="relu",
                                    data_format="channels_last")(curr_output)
            conv_outputs.append(curr_output)
            self.char_output_dim_ += curr_filters_number
        if len(conv_outputs) > 1:
            conv_output = kl.Concatenate(axis=-1)(conv_outputs)
        else:
            conv_output = conv_outputs[0]
        highway_input = kl.Lambda(kb.max, arguments={"axis": -2})(conv_output)
        if self.intermediate_dropout > 0.0:
            highway_input = kl.Dropout(self.intermediate_dropout)(highway_input)
        for i in range(self.char_highway_layers - 1):
            highway_input = Highway(activation="relu")(highway_input)
            if self.highway_dropout > 0.0:
                highway_input = kl.Dropout(self.highway_dropout)(highway_input)
        highway_output = Highway(activation="relu")(highway_input)
        return highway_output

    def _build_basic_network(self, word_outputs):
        """
        Creates the basic network architecture,
        transforming word embeddings to intermediate outputs
        """
        if self.word_dropout > 0.0:
            lstm_outputs = kl.Dropout(self.word_dropout)(word_outputs)
        else:
            lstm_outputs = word_outputs
        for j in range(self.word_lstm_layers-1):
            lstm_outputs = kl.Bidirectional(
                kl.LSTM(self.word_lstm_units[j], return_sequences=True,
                        dropout=self.lstm_dropout))(lstm_outputs)
        lstm_outputs = kl.Bidirectional(
                kl.LSTM(self.word_lstm_units[-1], return_sequences=True,
                        dropout=self.lstm_dropout))(lstm_outputs)
        pre_outputs = kl.TimeDistributed(
                kl.Dense(self.tags_number_, activation="softmax",
                         activity_regularizer=self.regularizer),
                name="p")(lstm_outputs)
        return pre_outputs, lstm_outputs

    def _transform_batch(self, data, labels=None, transform_to_one_hot=True):
        data, additional_data = data[0], data[1:]
        L = max(len(x) for x in data)
        X = np.array([self._make_sent_vector(x, L) for x in data])
        X = [X] + [np.array(x) for x in additional_data]
        if labels is not None:
            Y = np.array([self._make_tags_vector(y, L) for y in labels])
            if transform_to_one_hot:
                Y = to_one_hot(Y, len(self.tags))
            return X, Y
        else:
            return X

    def train_on_batch(self, data: List[Iterable], labels: Iterable[list]) -> None:
        """Trains model on a single batch

        Args:
            data: a batch of word sequences
            labels: a batch of correct tag sequences
        Returns:
            the trained model
        """
        X, Y = self._transform_batch(data, labels)
        self.model_.train_on_batch(X, Y)

    def predict_on_batch(self, data: Union[list, tuple],
                         return_indexes: bool = False) -> List[List[str]]:
        """
        Makes predictions on a single batch

        Args:
            data: a batch of word sequences together with additional inputs
            return_indexes: whether to return tag indexes in vocabulary or tags themselves

        Returns:
            a batch of label sequences
        """
        X = self._transform_batch(data)
        objects_number, lengths = len(X[0]), [len(elem) for elem in data[0]]
        Y = self.model_.predict_on_batch(X)
        labels = np.argmax(Y, axis=-1)
        answer: List[List[str]] = [None] * objects_number
        for i, (elem, length) in enumerate(zip(labels, lengths)):
            elem = elem[:length]
            answer[i] = elem if return_indexes else self.tags.idxs2toks(elem)
        return answer

    def _make_sent_vector(self, sent: List, bucket_length: int =None) -> np.ndarray:
        """Transforms a sentence to Numpy array, which will be the network input.

        Args:
            sent: input sentence
            bucket_length: the width of the bucket

        Returns:
            A 3d array, answer[i][j][k] contains the index of k-th letter
            in j-th word of i-th input sentence.
        """
        bucket_length = bucket_length or len(sent)
        answer = np.zeros(shape=(bucket_length, MAX_WORD_LENGTH+2), dtype=np.int32)
        for i, word in enumerate(sent):
            answer[i, 0] = self.tags.tok2idx("BEGIN")
            m = min(len(word), MAX_WORD_LENGTH)
            for j, x in enumerate(word[-m:]):
                answer[i, j+1] = self.symbols.tok2idx(x)
            answer[i, m+1] = self.tags.tok2idx("END")
            answer[i, m+2:] = self.tags.tok2idx("PAD")
        return answer

    def _make_tags_vector(self, tags, bucket_length=None) -> np.ndarray:
        """Transforms a sentence of tags to Numpy array, which will be the network target.

        Args:
            tags: input sentence of tags
            bucket_length: the width of the bucket

        Returns:
            A 2d array, answer[i][j] contains the index of j-th tag in i-th input sentence.
        """
        bucket_length = bucket_length or len(tags)
        answer = np.zeros(shape=(bucket_length,), dtype=np.int32)
        for i, tag in enumerate(tags):
            answer[i] = self.tags.tok2idx(tag)
        return answer

    def save(self, outfile) -> None:
        """Saves model weights to a file

        Args:
            outfile: file with model weights (other model components should be given in config)
        """
        self.model_.save_weights(outfile)

    def load(self, infile) -> None:
        """Loads model weights from a file

        Args:
            infile: file to load model weights from
        """
        self.model_.load_weights(infile)
Ejemplo n.º 33
0
class PolicyValueNetwork:
    """ AlphaZero Residual-CNN """
    def __init__(self, model_file=None):

        # Build Network Architecture
        input_shape = Board().encoded_states().shape  # (6, 15, 15)
        inputs = Input(input_shape)

        shared_net = Sequential([
            *ConvBlock(32, input_shape=input_shape),
            *ConvBlock(64),
            *ConvBlock(128)
        ], "shared_net")

        policy_head = Sequential([
            shared_net,
            *ConvBlock(4, (1, 1), "relu"),
            Flatten(),
            Dense(Game["board_size"], kernel_regularizer=l2()),
            Activation("softmax")
        ], "policy_head")

        value_head = Sequential([
            shared_net,
            *ConvBlock(2, (1, 1), "relu"),
            Flatten(),
            Dense(64, activation="relu", kernel_regularizer=l2()),
            Dense(1, kernel_regularizer=l2()),
            Activation("tanh")
        ], "value_head")

        self.model = Model(
            inputs,
            [value_head(inputs), policy_head(inputs)]
        )

        if model_file is not None:
            self.restore_model(model_file)

    def compile(self, opt):
        """
        Optimization and Loss definition
        """
        self.model.compile(
            optimizer=sgd(),
            loss=["mse", "categorical_crossentropy"]
        )

    def eval_state(self, state):
        """
        Evaluate a board state.
        """
        vp = self.model.predict_on_batch(state.encoded_states()[np.newaxis, :])
        # format to (float, np.array((255,1),dtype=float)) structure
        return vp[0][0][0], vp[1][0]

    def train_step(self, optimizer):
        """
        One Network Tranning step.
        """
        opt = self.model.optimizer
        K.set_value(opt.lr, optimizer["lr"])
        K.set_value(opt.momentum, optimizer["momentum"])
        # loss = self.model.train_on_batch(inputs, [winner, probs])
        # return loss

    def save_model(self, filename):
        base_path = "{}/keras".format(TRAINING_CONFIG["model_path"])
        if not os.path.exists(base_path):
            os.mkdir(base_path)
        self.model.save_weights("{}/{}.h5".format(base_path, filename))

    def restore_model(self, filename):
        base_path = "{}/keras".format(TRAINING_CONFIG["model_path"])
        if os.path.exists("{}/{}.h5".format(base_path, filename)):
            self.model.load_weights("{}/{}.h5".format(base_path, filename))