Beispiel #1
0
def train(run_name, start_epoch, stop_epoch, img_w):
    # Input Parameters
    img_h = 64
    words_per_epoch = 16000
    val_split = 0.2
    val_words = int(words_per_epoch * (val_split))

    # Network parameters
    conv_filters = 16
    kernel_size = (3, 3)
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512
    minibatch_size = 32

    if K.image_data_format() == 'channels_first':
        input_shape = (1, img_w, img_h)
    else:
        input_shape = (img_w, img_h, 1)

    fdir = os.path.dirname(
        get_file('wordlists.tgz',
                 origin='http://www.mythic-ai.com/datasets/wordlists.tgz',
                 untar=True))

    img_gen = TextImageGenerator(
        monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
        bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
        minibatch_size=minibatch_size,
        img_w=img_w,
        img_h=img_h,
        downsample_factor=(pool_size**2),
        val_split=words_per_epoch - val_words)
    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Conv2D(conv_filters,
                   kernel_size,
                   padding='same',
                   activation=act,
                   kernel_initializer='he_normal',
                   name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner = Conv2D(conv_filters,
                   kernel_size,
                   padding='same',
                   activation=act,
                   kernel_initializer='he_normal',
                   name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)

    conv_to_rnn_dims = (img_w // (pool_size**2),
                        (img_h // (pool_size**2)) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # Two layers of bidirectional GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru1')(inner)
    gru_1b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(img_gen.get_output_size(),
                  kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)
    Model(inputs=input_data, outputs=y_pred).summary()

    labels = Input(name='the_labels',
                   shape=[img_gen.absolute_max_string_len],
                   dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                      name='ctc')([y_pred, labels, input_length, label_length])

    # clipnorm seems to speeds up convergence
    sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

    model = Model(inputs=[input_data, labels, input_length, label_length],
                  outputs=loss_out)

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
    if start_epoch > 0:
        weight_file = os.path.join(
            OUTPUT_DIR,
            os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
        model.load_weights(weight_file)
    # captures output of softmax so we can decode the output during visualization
    test_func = K.function([input_data], [y_pred])

    viz_cb = VizCallback(run_name, test_func, img_gen.next_val())

    model.fit_generator(generator=img_gen.next_train(),
                        steps_per_epoch=(words_per_epoch - val_words) //
                        minibatch_size,
                        epochs=stop_epoch,
                        validation_data=img_gen.next_val(),
                        validation_steps=val_words // minibatch_size,
                        callbacks=[viz_cb, img_gen],
                        initial_epoch=start_epoch)
Beispiel #2
0
def clipped_relu(x):
    return keras.activations.relu(x, max_value=20)


NB_FREQUENCIES = 161

inputs = Input(shape=(None, NB_FREQUENCIES), name='main_input')
labels = Input(shape=(None, ), name='labels')
input_length = Input(shape=(1, ), name='input_length')
label_length = Input(shape=(1, ), name='label_length')

h1 = TimeDistributed(Dense(128, activation=clipped_relu))(inputs)
h2 = TimeDistributed(Dense(128, activation=clipped_relu))(h1)
h3 = TimeDistributed(Dense(128, activation=clipped_relu))(h2)

lb = GRU(128, go_backwards=True, return_sequences=True)(h3)
lf = GRU(128, return_sequences=True)(h3)

h4 = Add()([lb, lf])  # add the two layers

h5 = TimeDistributed(Dense(128, activation=clipped_relu))(h4)
h6 = TimeDistributed(Dense(29, activation='softmax'), name='aux_output')(h5)

loss_out = Lambda(ctc_loss_lambda, output_shape=(1, ),
                  name='main_output')([h6, labels, input_length, label_length])

model = keras.models.Model(inputs=[inputs, labels, input_length, label_length],
                           outputs=[loss_out, h6])
model.summary()

if b:
Beispiel #3
0
    nominator = K.sum((y_true * (1 - K.round(K.clip(y_pred, 0, 1))) +
                       (1 - y_true) * K.round(K.clip(y_pred, 0, 1))))
    return (nominator / denominator)


VALIDATION_SPLIT = 0.2  # ration for split of training data and test data
NUM_EPOCHS = 2  # number of epochs the network is trained
DROPOUT = 0.2
#REGULARIZATION = 0.1
BATCH_SIZE = 64
LR = 0.005

model = Sequential()
model.add(
    Embedding(MAX_NUM_WORDS, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH))
model.add(GRU(128, dropout=0.25, return_sequences=True))
model.add(GRU(128, dropout=0.25))
model.add(Dense(labels.shape[1], activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=[hamming, f1, precision, recall])

history = model.fit(train_data,
                    train_labels,
                    class_weight=class_weight,
                    validation_split=VALIDATION_SPLIT,
                    epochs=NUM_EPOCHS,
                    batch_size=BATCH_SIZE)

model.save("models/model_EP_%s_DO_%s_BAT_%s_LR_%s.h5" %
           (str(NUM_EPOCHS), str(DROPOUT), str(BATCH_SIZE), str(LR)))
Beispiel #4
0
    if(i == 1):
        top_k_acc = top_2_categorical_accuracy
    if(i == 2):
        top_k_acc = top_3_categorical_accuracy
    if(i == 3):
        top_k_acc = top_4_categorical_accuracy
    if(i == 4):
        top_k_acc = top_5_categorical_accuracy
        #變數設置
    group_train_text = group1_train_text
    group_train_label = group1_train_label
    group_test_text = group1_test_text
    group_test_label = group1_test_label

    global_first_inputs = Input(shape = (8,18,))
    global_inputs = GRU(units = 32,return_sequences=False)(global_first_inputs)
    global_inputs = Dropout(0.35)(global_inputs)
    global_inputs = Dense(units=256,activation='relu')(global_inputs)
    global_inputs = Dropout(0.35)(global_inputs)
    global_outputs = Dense(units = 18,activation='softmax')(global_inputs)
    global_model = Model(inputs = global_first_inputs , outputs = global_outputs )

    group_first_inputs = Input(shape = (8,18,))
    group_inputs = GRU(units = 32,return_sequences=False)(group_first_inputs)
    group_inputs = Dropout(0.35)(group_inputs)
    group_inputs = Dense(units=256,activation='relu')(group_inputs)
    group_inputs = Dropout(0.35)(group_inputs)
    group_outputs = Dense(units = 18,activation='softmax')(group_inputs)
    group_model = Model(inputs = group_first_inputs , outputs = group_outputs)

    global_model.summary()
Beispiel #5
0
    def build(self):
        if K.image_data_format() == 'channels_first':
            input_shape = (self.img_c, self.frames_n, self.img_w, self.img_h)
        else:
            input_shape = (self.frames_n, self.img_w, self.img_h, self.img_c)

        self.input_data = Input(name='the_input',
                                shape=input_shape,
                                dtype='float32')

        self.zero1 = ZeroPadding3D(padding=(1, 2, 2),
                                   name='zero1')(self.input_data)
        self.conv1 = Conv3D(32, (3, 5, 5),
                            strides=(1, 2, 2),
                            kernel_initializer='he_normal',
                            name='conv1')(self.zero1)
        self.batc1 = BatchNormalization(name='batc1')(self.conv1)
        self.actv1 = Activation('relu', name='actv1')(self.batc1)
        self.drop1 = SpatialDropout3D(0.5)(self.actv1)
        self.maxp1 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max1')(self.drop1)

        self.zero2 = ZeroPadding3D(padding=(1, 2, 2), name='zero2')(self.maxp1)
        self.conv2 = Conv3D(64, (3, 5, 5),
                            strides=(1, 1, 1),
                            kernel_initializer='he_normal',
                            name='conv2')(self.zero2)
        self.batc2 = BatchNormalization(name='batc2')(self.conv2)
        self.actv2 = Activation('relu', name='actv2')(self.batc2)
        self.drop2 = SpatialDropout3D(0.5)(self.actv2)
        self.maxp2 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max2')(self.drop2)

        self.zero3 = ZeroPadding3D(padding=(1, 1, 1), name='zero3')(self.maxp2)
        self.conv3 = Conv3D(96, (3, 3, 3),
                            strides=(1, 1, 1),
                            kernel_initializer='he_normal',
                            name='conv3')(self.zero3)
        self.batc3 = BatchNormalization(name='batc3')(self.conv3)
        self.actv3 = Activation('relu', name='actv3')(self.batc3)
        self.drop3 = SpatialDropout3D(0.5)(self.actv3)
        self.maxp3 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max3')(self.drop3)

        self.resh1 = TimeDistributed(Flatten())(self.maxp3)

        self.gru_1 = Bidirectional(GRU(256,
                                       return_sequences=True,
                                       kernel_initializer='Orthogonal',
                                       name='gru1'),
                                   merge_mode='concat')(self.resh1)
        self.gru_2 = Bidirectional(GRU(256,
                                       return_sequences=True,
                                       kernel_initializer='Orthogonal',
                                       name='gru2'),
                                   merge_mode='concat')(self.gru_1)
        self.attn = SeqSelfAttention(
            attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL,
            kernel_regularizer=keras.regularizers.l2(1e-4),
            bias_regularizer=keras.regularizers.l1(1e-4),
            attention_regularizer_weight=1e-4,
            name='Attention')(self.gru_2)

        # transforms RNN output to character activations:
        self.dense1 = Dense(self.output_size,
                            kernel_initializer='he_normal',
                            name='dense1')(self.attn)

        self.y_pred = Activation('softmax', name='softmax')(self.dense1)

        self.labels = Input(name='the_labels',
                            shape=[self.absolute_max_string_len],
                            dtype='float32')
        self.input_length = Input(name='input_length',
                                  shape=[1],
                                  dtype='int64')
        self.label_length = Input(name='label_length',
                                  shape=[1],
                                  dtype='int64')

        self.loss_out = CTC(
            'ctc',
            [self.y_pred, self.labels, self.input_length, self.label_length])

        self.model = Model(inputs=[
            self.input_data, self.labels, self.input_length, self.label_length
        ],
                           outputs=self.loss_out)
def evaluate_fold(fold_ix, use_pretrained_embedding, bi_directional, num_rnns,
                  merge_mode, hidden_size):
    if use_pretrained_embedding:
        embedding_matrix = get_embedding_matrix(unique_words,
                                                generator,
                                                max_features,
                                                init='uniform',
                                                unit_length=False)
        embedding_layer = Embedding(
            max_features,
            EMBEDDING_DIM,
            weights=[embedding_matrix],
            input_length=maxlen,
            trainable=True,
            mask_zero=True)  # If false, initialize unfound words with all 0's
    else:
        embedding_layer = Embedding(max_features,
                                    embedding_size,
                                    input_length=maxlen,
                                    trainable=True,
                                    mask_zero=True)

    if bi_directional:
        rnn_layer_fact = lambda: Bidirectional(GRU(
            hidden_size, return_sequences=True, consume_less="cpu"),
                                               merge_mode=merge_mode)
    else:
        rnn_layer_fact = lambda: GRU(
            hidden_size, return_sequences=True, consume_less="cpu")

    model = Sequential()
    model.add(embedding_layer)
    for i in range(num_rnns):
        model.add(rnn_layer_fact())

    model.add(TimeDistributedDense(out_size))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  sample_weight_mode="temporal")

    X_train, y_train, train_ys_by_tag, seq_len_train = fold2training_data[
        fold_ix]
    X_dev, y_dev, dev_ys_by_tag, seq_len_dev = fold2dev_data[fold_ix]
    X_test, y_test, test_ys_by_tag, seq_len_test = fold2test_data[fold_ix]

    # init loop vars
    f1_scores = [-1]
    num_since_best_score = 0
    patience = 3
    best_weights = None

    for i in range(30):
        print("{ts}: Epoch={epoch}".format(ts=get_ts(), epoch=i))
        epochs = 1  # epochs per training instance
        results = model.fit(X_train,
                            y_train,
                            batch_size=batch_size,
                            nb_epoch=epochs,
                            validation_split=0.0,
                            verbose=0)
        micro_metrics, _ = score_predictions(model, X_dev, dev_ys_by_tag,
                                             seq_len_dev)
        print(micro_metrics)
        f1_score = micro_metrics.f1_score
        best_f1_score = max(f1_scores)
        if f1_score <= best_f1_score:
            num_since_best_score += 1
        else:  # score improved
            num_since_best_score = 0
            best_weights = model.get_weights()

        f1_scores.append(f1_score)
        if num_since_best_score >= patience:
            break

    # load best weights
    model.set_weights(best_weights)
    train_predictions_by_tag = get_predictions(model, X_train, train_ys_by_tag,
                                               seq_len_train)
    test_predictions_by_tag = get_predictions(model, X_test, test_ys_by_tag,
                                              seq_len_test)
    return train_predictions_by_tag, test_predictions_by_tag, train_ys_by_tag, test_ys_by_tag
Beispiel #7
0
C_1    = Dense(32, activation='relu')(C);
C_2    = Dense(32, activation='relu')(T);
C_     = Add()([C_1,C_2]);
R_S    = Dense(DIM_Gs, activation='relu')(C_); # the output GFT;
#------ I(V,l,G_s[t],G_s[t-1]) ----
C_i    = Dense(DIM_hm, activation='relu')(C_);
S_i    = Dense(DIM_hm, activation='relu')(Gs_in);
S_i_   = Dense(DIM_hm, activation='relu')(Gs_in_);
G_i    = Add()([C_i,S_i,S_i_]);
G_i    = Dense(32, activation='relu')(G_i);
I_G    = Dense(DIM_Gi, activation='relu')(G_i);
#------ GRU for Pi(G_s,G_i,C*) -------
G_S    = Embedding(output_dim=ACT_OUT_DIM, input_dim=DIM_Gs, input_length=ACT_STEPS,name = 'emb1')(Gs_in);
G_I    = Embedding(output_dim=ACT_OUT_DIM, input_dim=DIM_Gi, input_length=ACT_STEPS,name = 'emb2')(Gi_in);
# L      = Embedding(output_dim=ACT_OUT_DIM, input_dim=DIM_COM, input_length=ACT_STEPS,name = 'emb3')(l_in);
G_S    = GRU(units=DIM_ha)(G_S);
G_I    = GRU(units=DIM_ha)(G_I);
G_S    = Dense(ACT_OUT_DIM, activation='relu')(G_S);
G_I    = Dense(ACT_OUT_DIM, activation='relu')(G_I);
h_a    = Add()([G_S,G_I]);
L      = Dense(ACT_OUT_DIM, activation='relu')(l_in);
h_a    = Dense(ACT_OUT_DIM, activation='relu')(h_a);
h_a    = Add()([h_a,L]);
A_out  = Dense(ACT_OUT_DIM, activation='relu')(h_a);
Q_out  = Dense(ACT_OUT_DIM, activation='relu')(h_a);



model  = Model(inputs=[Gs_in,Gi_in,l_in,o_in,Gs_in_], outputs=[A_out,R_S,I_G,Q_out]);
<<<<<<< HEAD
sgd    = optimizers.SGD(lr=0.00001, decay=0.0, momentum=0.4, nesterov=True);
Beispiel #8
0
    'reset_states': reset_states,
    'num_layers': num_layers,
    'classes': class_string
}
t = str(int(round(time.time())))
model_name = t + '-num_layers_%(num_layers)s_maxlen_%(input_length)s_lstmsize_%(lstm_size)s_trainsize_%(trainsize)s_testsize_%(testsize)s_classes_%(classes)s' % fd

model_path = model_path + model_name + '/'
if not os.path.exists(model_path):
    os.makedirs(model_path)

# Define an input sequence and process it.
inputs = Input(shape=(None, input_dim))
lstm_outputs = inputs
for layer_no in range(num_layers - 1):
    lstm_outputs = GRU(lstm_size, return_state=False,
                       return_sequences=True)(lstm_outputs)
#last layer, that does not return sequences
lstm_outputs = GRU(lstm_size, return_state=False,
                   return_sequences=False)(lstm_outputs)

dense = Dense(num_classes, activation='softmax')
outputs = dense(lstm_outputs)

model = Model(inputs, outputs)
if optimizer == 'RMS': optimizer = RMSprop(lr=learning_rate)
if optimizer == 'Adam': optimizer = Adam(lr=learning_rate)
#loss = 'categorical_crossentropy'
loss = 'binary_crossentropy'
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
print(model.summary())
Beispiel #9
0
def NN_model(args, training=True):
    global N_COL
    global N_ROW

    if args.model == 'densenet121':
        from keras.applications.densenet import DenseNet121
        input_tensor = Input(shape=(N_COL, N_ROW, 3))
        base_model = DenseNet121(input_shape=(N_COL, N_ROW, 3),
                                 include_top=False,
                                 weights='imagenet',
                                 input_tensor=input_tensor,
                                 pooling=None)

    elif args.model == 'resnet18':
        import resnet
        NOT_CARE = 1
        base_model = resnet.ResnetBuilder.build_resnet_18(input_shape=(N_COL,
                                                                       N_ROW,
                                                                       3),
                                                          num_outputs=NOT_CARE,
                                                          include_top=False)
    elif args.model == 'resnet18_2222':
        import resnet
        NOT_CARE = 1
        base_model = resnet.ResnetBuilder.build_resnet_18_2222(
            input_shape=(N_COL, N_ROW, 3),
            num_outputs=NOT_CARE,
            include_top=False)
    elif args.model == 'resnet18_2222_64':
        import resnet
        NOT_CARE = 1
        base_model = resnet.ResnetBuilder.build_resnet_18_2222_start_from64(
            input_shape=(N_COL, N_ROW, 3),
            num_outputs=NOT_CARE,
            include_top=False)
    elif args.model == 'resnet34':
        import resnet
        NOT_CARE = 1
        base_model = resnet.ResnetBuilder.build_resnet_34(input_shape=(N_COL,
                                                                       N_ROW,
                                                                       3),
                                                          num_outputs=NOT_CARE,
                                                          include_top=False)
    elif args.model == 'resnet50':
        import resnet
        NOT_CARE = 1
        base_model = resnet.ResnetBuilder.build_resnet_50(input_shape=(N_COL,
                                                                       N_ROW,
                                                                       3),
                                                          num_outputs=NOT_CARE,
                                                          include_top=False)
    elif args.model == 'resnet101':
        import resnet
        NOT_CARE = 1
        base_model = resnet.ResnetBuilder.build_resnet_101(
            input_shape=(N_COL, N_ROW, 3),
            num_outputs=NOT_CARE,
            include_top=False)

    else:
        raise TypeError('model should be in the list of the supported model!')

    print('Input col: ', N_COL)
    print('Input row: ', N_ROW)

    x = base_model.output
    #CNN to RNN
    x = Lambda(lambda x: K.permute_dimensions(x, (0, 2, 1, 3)))(
        x)  # switchaxes from [b,h,w,c] to [b,w,h,c]
    conv_shape = x.get_shape()  # b, h,w,c  resnet 18 -> (?, 16, 32, 256)
    print('conv_shape', conv_shape)
    x = Reshape(target_shape=(int(conv_shape[1]),
                              int(conv_shape[2] * conv_shape[3])),
                name='reshape')(x)
    x = Dense(para.dense_size,
              activation='relu',
              kernel_initializer='he_normal',
              name='dense1')(x)
    #x = BatchNormalization()(x)
    # GRU RNN
    gru_1 = GRU(para.rnn_size,
                return_sequences=True,
                init='he_normal',
                name='gru1')(x)
    gru_1b = GRU(para.rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 init='he_normal',
                 name='gru1_b')(x)
    gru1_merged = add([gru_1, gru_1b])
    gru1_merged = BatchNormalization()(gru1_merged)

    gru_2 = GRU(para.rnn_size,
                return_sequences=True,
                init='he_normal',
                name='gru2')(gru1_merged)
    gru_2b = GRU(para.rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 init='he_normal',
                 name='gru2_b')(gru1_merged)
    gru2_merged = concatenate([gru_2, gru_2b])
    gru2_merged = BatchNormalization()(gru2_merged)

    inner = Dense(para.num_classes,
                  kernel_initializer='he_normal',
                  name='dense2')(gru2_merged)
    y_pred = Activation('softmax', name='softmax')(inner)

    labels = Input(name='the_labels',
                   shape=[para.max_text_len],
                   dtype='float32')  # (None ,7)
    input_length = Input(name='input_length', shape=[1],
                         dtype='int64')  # (None, 1)
    label_length = Input(name='label_length', shape=[1],
                         dtype='int64')  # (None, 1)

    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                      name='ctc')([y_pred, labels, input_length,
                                   label_length])  #(None, 1)

    if training:
        return Model(
            inputs=[base_model.input, labels, input_length, label_length],
            outputs=loss_out), conv_shape[1]
    else:
        return Model(inputs=[base_model.input], outputs=y_pred)
Beispiel #10
0
 def build(input_shape=(32, None, 1),
           rnn_unit=256,
           num_classes=5990,
           max_string_len=10):
     input = Input(shape=input_shape, name='the_input')
     m = Conv2D(64,
                kernel_size=(3, 3),
                activation='relu',
                padding='same',
                name='conv1')(input)
     m = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(m)
     m = Conv2D(128,
                kernel_size=(3, 3),
                activation='relu',
                padding='same',
                name='conv2')(m)
     m = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool2')(m)
     m = Conv2D(256,
                kernel_size=(3, 3),
                activation='relu',
                padding='same',
                name='conv3')(m)
     m = Conv2D(256,
                kernel_size=(3, 3),
                activation='relu',
                padding='same',
                name='conv4')(m)
     m = MaxPooling2D(pool_size=(2, 1),
                      strides=(2, 1),
                      padding='valid',
                      name='pool3')(m)
     m = Conv2D(512,
                kernel_size=(3, 3),
                activation='relu',
                padding='same',
                name='conv5')(m)
     m = BatchNormalization(axis=3)(m)
     m = Conv2D(512,
                kernel_size=(3, 3),
                activation='relu',
                padding='same',
                name='conv6')(m)
     m = BatchNormalization(axis=3)(m)
     m = MaxPooling2D(pool_size=(2, 1),
                      strides=(2, 1),
                      padding='valid',
                      name='pool4')(m)
     m = Conv2D(512,
                kernel_size=(2, 2),
                activation='relu',
                padding='valid',
                name='conv7')(m)
     m = Permute((2, 1, 3), name='permute')(m)
     m = TimeDistributed(Flatten(), name='timedistrib')(m)
     m = Bidirectional(GRU(rnn_unit,
                           return_sequences=True,
                           implementation=2),
                       name='blstm1')(m)
     m = Bidirectional(GRU(rnn_unit,
                           return_sequences=True,
                           implementation=2),
                       name='blstm2')(m)
     y_pred = Dense(num_classes, name='blstm2_out', activation='softmax')(m)
     base_model = Model(inputs=input, outputs=y_pred)
     label = Input(name='label', shape=[max_string_len], dtype='int64')
     seq_length = Input(name='seq_length', shape=[1], dtype='int64')
     label_length = Input(name='label_length', shape=[1], dtype='int64')
     loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')(
         [label, y_pred, seq_length, label_length])
     model = Model(input=[input, label, seq_length, label_length],
                   output=[loss_out])
     model.summary()
     return base_model, model
    def training(self):
        #init network configuration
        self.setColumns()
        #preprocessing data
        X_train, Y_train = self.pre_processing_for_data(
            str(self.PATH + '/data/' + sys.argv[1] + '/' + sys.argv[1] +
                '.csv'))

        #create model
        model = Sequential()
        #input layer
        model.add(
            GRU(output_dim=int(self.hidden_unit[0]),
                return_sequences=True,
                input_shape=(self.timesteps, self.data_dim)))
        #hidden layer
        for i in range(0, self.hidden_layer):
            if i == self.hidden_layer - 1:
                model.add(GRU(int(self.hidden_unit[i])))
            else:
                model.add(GRU(int(self.hidden_unit[i]), return_sequences=True))
                model.add(Dropout(float(self.dropout[i])))
        #output layer
        model.add(Dense(len(Y_train[0]), activation=self.in_activation))
        #set cost-function, optimizser, metrics
        model.compile(loss=self.loss_function,
                      optimizer=rmsprop(lr=self.learning_rate),
                      metrics=['accuracy'])
        #do training
        model.fit(X_train,
                  Y_train,
                  batch_size=self.batch_size,
                  nb_epoch=self.epoch,
                  validation_data=(X_train, Y_train))
        #save model
        out = open(
            self.PATH + '/data/' + sys.argv[1] + '/' + sys.argv[1] + '.out',
            'w')
        P = model.predict_classes(X_train, verbose=0)
        score = model.evaluate(X_train, Y_train, verbose=0)
        model_name = 'train_' + time.strftime("%Y%m_%d_%H_%M",
                                              time.localtime())

        json.dump(
            {
                'ntb': {
                    'model_name': model_name,
                    'samples': self.samples,
                    'score': score[0],
                    'accuracy': score[1],
                    #'recall_score' : recall_score(P, Y_train, average='weighted'), # it's not working because of multi-dimension
                    #'precision_score' : precision_score(P, Y_train, average='weighted') # it's not working becuse of multi-dimension
                },
            },
            out,
            separators=(',', ':'))

        if not path.exists(self.PATH + '/data/' + sys.argv[1] + '/' +
                           model_name):
            mkdir(self.PATH + '/data/' + sys.argv[1] + '/' + model_name)
        model.save(self.PATH + '/data/' + sys.argv[1] + '/' + model_name +
                   '/' + 'model' + '.h5')
        #model.save('./test_py/gru/weight.h5')
        del model
        out.close()
Beispiel #12
0
                                      test_size=int(len(X) * 0.2),
                                      shuffle=False)

# トレーニング

n_in = 1
n_hidden = 20
n_out = 1
epochs = 10
batch_size = 10

model = Sequential()
#model.add(SimpleRNN(n_hidden, input_shape=(inputlen, n_in), kernel_initializer='random_normal'))
model.add(
    GRU(n_hidden,
        input_shape=(inputlen, n_in),
        kernel_initializer='random_normal'))
model.add(Dense(n_out, kernel_initializer='random_normal'))
model.add(Activation('linear'))
model.compile(loss='mean_squared_error',
              optimizer=Adam(lr=0.01, beta_1=0.9, beta_2=0.999))
model.fit(x,
          y,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=(val_x, val_y))

# 予測

in_ = x[:1]  # x の先頭 (1,20,1) 配列
predicted = [None for _ in range(inputlen)]
Beispiel #13
0
def train(load=None):

    # Network parameters
    conv_filters = 16
    kernel_size = (3, 3)
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512

    if K.image_data_format() == 'channels_first':
        input_shape = (1, IMG_W, IMG_H)
    else:
        input_shape = (IMG_W, IMG_H, 1)

    batch_size = 32
    downsample_factor = pool_size**2
    output_size = len(LETTERS) + 1
    if not load:
        tiger_train = TextImageGenerator(join(DATA_PATH, 'train'), batch_size,
                                         downsample_factor)
        tiger_val = TextImageGenerator(join(DATA_PATH, 'val'), batch_size,
                                       downsample_factor)
        print(tiger_train.n)
        print(tiger_val.n)

    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Conv2D(conv_filters,
                   kernel_size,
                   padding='same',
                   activation=act,
                   kernel_initializer='he_normal',
                   name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner = Conv2D(conv_filters,
                   kernel_size,
                   padding='same',
                   activation=act,
                   kernel_initializer='he_normal',
                   name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)

    conv_to_rnn_dims = (IMG_W // (pool_size**2),
                        (IMG_H // (pool_size**2)) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # Two layers of bidirecitonal GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru1')(inner)
    gru_1b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(output_size, kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)
    Model(inputs=input_data, outputs=y_pred).summary()

    labels = Input(name='the_labels', shape=[MAX_OUT_LEN], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                      name='ctc')([y_pred, labels, input_length, label_length])

    # clipnorm seems to speeds up convergence
    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

    if load:
        model = load_model(load, compile=False)
        print('Model loaded from file.')
    else:
        model = Model(inputs=[input_data, labels, input_length, label_length],
                      outputs=loss_out)

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    model.compile(loss={
        'ctc': lambda y_true, y_pred: y_pred
    },
                  optimizer=sgd,
                  metrics=['accuracy'])

    if not load:
        # captures output of softmax so we can decode the output during visualization
        test_func = K.function([input_data], [y_pred])

        # Create a TensorBoard instance with the path to the logs directory
        tensorboard = TensorBoard(log_dir='logs/{}'.format(time()),
                                  batch_size=batch_size,
                                  update_freq=128)

        history = model.fit_generator(generator=tiger_train.next_batch(),
                                      steps_per_epoch=tiger_train.n,
                                      epochs=1,
                                      validation_data=tiger_val.next_batch(),
                                      validation_steps=tiger_val.n,
                                      callbacks=[tensorboard])

        # save model and architecture to single file
        modelName = join(
            './models', "model-" + str(datetime.datetime.utcnow()).replace(
                ' ', '_').replace(':', '-').replace('.', '-') + ".h5")
        model.save(modelName)
        print("Saved model to disk:%s\n" % modelName)

    return model
Beispiel #14
0
        graph.add_node(TimeDistributedMaxPooling2D(pool_size=(SENTENCE_LENGTH -
                                                              n + 1, 1)),
                       name='maxpool{}gram'.format(n),
                       input='conv{}gram'.format(n))

        graph.add_node(Dropout(0.15),
                       name='dropout{}gram'.format(n),
                       input='maxpool{}gram'.format(n))

        graph.add_node(TimeDistributedFlatten(),
                       name='flatten{}gram'.format(n),
                       input='dropout{}gram'.format(n))

    log('Adding bi-directional GRU')
    graph.add_node(GRU(72),
                   name='gru_forwards',
                   inputs=['flatten{}gram'.format(n) for n in NGRAMS],
                   concat_axis=-1)
    graph.add_node(GRU(72, go_backwards=True),
                   name='gru_backwards',
                   inputs=['flatten{}gram'.format(n) for n in NGRAMS],
                   concat_axis=-1)
    # graph.add_node(GRU(16), name='gru', input='flatten4gram')

    ADDITIONAL_FC = True

    graph.add_node(Dropout(0.7),
                   name='gru_dropout',
                   inputs=['gru_forwards', 'gru_backwards'])
Beispiel #15
0
def test_attention_mm1(batch_size, word_embed_size, sent_embed_size,
                       doc_embed_size, vocab_size, max_words, max_sents,
                       num_classes, should_fit_model):
    """ AttentionMM """

    def sum_over_axis(X, axis):
        return K.mean(X, axis=axis)

    E = np.random.random((vocab_size, word_embed_size))

    # LHS sentence    
    sent_in_left = Input(shape=(max_words,), dtype="int32")
    sent_emb_left = Embedding(input_dim=vocab_size,
                              output_dim=word_embed_size,
                              mask_zero=True,
                              weights=[E])(sent_in_left)
    sent_enc_left = Bidirectional(GRU(sent_embed_size,
                                      return_sequences=False))(sent_emb_left)
                                      
    sent_model_left = Model(inputs=sent_in_left, outputs=sent_enc_left)                                      

    # RHS sentence
    sent_in_right = Input(shape=(max_words,), dtype="int32")
    sent_emb_right = Embedding(input_dim=vocab_size,
                               output_dim=word_embed_size,
                               mask_zero=True,
                               weights=[E])(sent_in_right)
    sent_enc_right = Bidirectional(GRU(sent_embed_size,
                                       return_sequences=False))(sent_emb_right)

    sent_model_right = Model(inputs=sent_in_right, outputs=sent_enc_right)
                                      
    # LHS document
    doc_in_left = Input(shape=(max_sents, max_words), dtype="int32")
    
    doc_emb_left = TimeDistributed(sent_model_left)(doc_in_left)

    doc_enc_left = Bidirectional(GRU(doc_embed_size, 
                                return_sequences=True))(doc_emb_left)
    
    # RHS document
    doc_in_right = Input(shape=(max_sents, max_words), dtype="int32")
    
    doc_emb_right = TimeDistributed(sent_model_right)(doc_in_right)

    doc_enc_right = Bidirectional(GRU(doc_embed_size, 
                                return_sequences=True))(doc_emb_right)

    # attention
    doc_att = custom_attn.AttentionMM("concat")([doc_enc_left, doc_enc_right])

    # prediction    
    fc1_dropout = Dropout(0.2)(doc_att)
    fc1 = Dense(50, activation="relu")(fc1_dropout)
    fc2_dropout = Dropout(0.2)(fc1)
    doc_pred = Dense(num_classes, activation="softmax")(fc2_dropout)

    model = Model(inputs=[doc_in_left, doc_in_right], outputs=doc_pred)
    model.summary()
    
    if should_fit_model:
        Xleft = np.random.random((batch_size*2, max_sents, max_words))
        Xright = np.random.random((batch_size*2, max_sents, max_words))
        y = np.random.randint(0, num_classes, batch_size*2)
        Y = np_utils.to_categorical(y, num_classes=num_classes)
        model.compile(optimizer="adam", loss="categorical_crossentropy")
        model.fit([Xleft, Xright], Y, batch_size=batch_size, epochs=1)

    return
Beispiel #16
0
def create_model(Vocabulary_size, X_max_len, n_phonetic_features, n1, n2, n3, n4, n5, n6, HIDDEN_DIM, LAYER_NUM):
    def smart_merge(vectors, **kwargs):
        return vectors[0] if len(vectors) == 1 else add(vectors, **kwargs)

    current_word = Input(shape=(X_max_len,), dtype='float32', name='input1')  # for encoder (shared)
    decoder_input = Input(shape=(X_max_len,), dtype='float32', name='input3')  # for decoder -- attention
    right_word1 = Input(shape=(X_max_len,), dtype='float32', name='input4')
    right_word2 = Input(shape=(X_max_len,), dtype='float32', name='input5')
    right_word3 = Input(shape=(X_max_len,), dtype='float32', name='input6')
    right_word4 = Input(shape=(X_max_len,), dtype='float32', name='input7')
    left_word1 = Input(shape=(X_max_len,), dtype='float32', name='input8')
    left_word2 = Input(shape=(X_max_len,), dtype='float32', name='input9')
    left_word3 = Input(shape=(X_max_len,), dtype='float32', name='input10')
    left_word4 = Input(shape=(X_max_len,), dtype='float32', name='input11')
    phonetic_input = Input(shape=(n_phonetic_features,), dtype='float32', name='input12')

    emb_layer1 = Embedding(Vocabulary_size, EMBEDDING_DIM,
                           input_length=X_max_len,
                           mask_zero=False, name='Embedding')

    list_of_inputs = [current_word, right_word1, right_word2, right_word3, right_word4,
                      left_word1, left_word2, left_word3, left_word4]

    list_of_embeddings = [emb_layer1(i) for i in list_of_inputs]

     
    list_of_embeddings = [Dropout(0.50, name='drop1_' + str(i))(j) for i, j in
                          enumerate(list_of_embeddings)]
    
    list_of_embeddings = [GaussianNoise(0.05, name='noise1_' + str(i))(j) for i, j in
                          enumerate(list_of_embeddings)]
                          
    conv4s  = [Conv1D(filters=no_filters,
                kernel_size=4, padding='valid', activation='relu',
                strides=1, name='conv4_' + str(i))(j) for i, j in enumerate(list_of_embeddings)
            ]

    maxPool4 = [MaxPooling1D(name='max4_' + str(i))(j) for i, j in enumerate(conv4s)]
    avgPool4 = [AveragePooling1D(name='avg4_' + str(i))(j) for i, j in enumerate(conv4s)]

    pool4s=[add([i, j], name='merge_conv4_' + str(k)) for i, j, k in zip(maxPool4, avgPool4, range(len(maxPool4)))]

    conv5s = [Conv1D(filters=no_filters,
                kernel_size=5,
                padding='valid',
                activation='relu',
                strides=1, name='conv5_' + str(i))(j) for i, j in enumerate(list_of_embeddings)
            ]

    maxPool5 = [MaxPooling1D(name='max5_' + str(i))(j) for i, j in enumerate(conv5s)]
    avgPool5 = [AveragePooling1D(name='avg5_' + str(i))(j) for i, j in enumerate(conv5s)]

    pool5s=[add([i, j], name='merge_conv5_' + str(k)) for i, j, k in zip(maxPool5, avgPool5, range(len(maxPool5)))]

    mergedPools=pool4s+pool5s

    concat = concatenate(mergedPools, name='main_merge')

    x = Dropout(0.15, name='drop_single1')(concat)
    x = Bidirectional(GRU(rnn_output_size), name='bidirec1')(concat)

    total_features = [x, phonetic_input]
    concat2 = concatenate(total_features, name='phonetic_merging')

    x = Dense(HIDDEN_DIM, activation='relu', kernel_initializer='he_normal',
              kernel_constraint=maxnorm(3), bias_constraint=maxnorm(3), name='dense1')(concat2)
    x = Dropout(0.15, name='drop_single2')(x)
    x = Dense(HIDDEN_DIM, kernel_initializer='he_normal', activation='tanh',
              kernel_constraint=maxnorm(3), bias_constraint=maxnorm(3), name='dense2')(x)
    x = Dropout(0.15, name='drop_single3')(x)

    out1 = Dense(n1, kernel_initializer='he_normal', activation='softmax', name='output1')(x)
    out2 = Dense(n2, kernel_initializer='he_normal', activation='softmax', name='output2')(x)
    out3 = Dense(n3, kernel_initializer='he_normal', activation='softmax', name='output3')(x)
    out4 = Dense(n4, kernel_initializer='he_normal', activation='softmax', name='output4')(x)
    out5 = Dense(n5, kernel_initializer='he_normal', activation='softmax', name='output5')(x)
    out6 = Dense(n6, kernel_initializer='he_normal', activation='softmax', name='output6')(x)

    # Luong et al. 2015 attention model
    emb_layer = Embedding(Vocabulary_size, EMBEDDING_DIM,
                          input_length=X_max_len,
                          mask_zero=True, name='Embedding_for_seq2seq')

    current_word_embedding = emb_layer(list_of_inputs[0])
    # current_word_embedding = smart_merge([ current_word_embedding, right_word_embedding1,  left_word_embedding1])

    encoder, state = GRU(rnn_output_size, return_sequences=True, unroll=True, return_state=True, name='encoder')(current_word_embedding)
    encoder_last = encoder[:, -1, :]

    decoder = emb_layer(decoder_input)
    decoder = GRU(rnn_output_size, return_sequences=True, unroll=True, name='decoder')(decoder,initial_state=[encoder_last])

    attention = dot([decoder, encoder], axes=[2, 2], name='dot')
    attention = Activation('softmax', name='attention')(attention)

    context = dot([attention, encoder], axes=[2, 1], name='dot2')
    decoder_combined_context = concatenate([context, decoder], name='concatenate')

    outputs = TimeDistributed(Dense(64, activation='tanh'), name='td1')(decoder_combined_context)
    outputs = TimeDistributed(Dense(Vocabulary_size, activation='softmax'), name='td2')(outputs)

    all_inputs = [
                    current_word, decoder_input, right_word1, right_word2,
                    right_word3, right_word4, left_word1,
                    left_word2, left_word3, left_word4, phonetic_input
                ]
    all_outputs = [outputs, out1, out2, out3, out4, out5, out6]

    model = Model(inputs=all_inputs, outputs=all_outputs)
    opt = Adam()

    return model
Beispiel #17
0
               kernel_initializer='he_normal',
               name='conv3')(inner)
# inner = BatchNormalization()(inner)
# inner = Dropout(0.2)(inner)

conv_to_rnn_dims = (img_w // (pool_size**2),
                    (img_h // (pool_size**2)) * conv_filters)
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

# cuts down input size going into RNN:
inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

# Two layers of bidirectional GRUs
# GRU seems to work as well, if not better than LSTM:
gru_1 = GRU(rnn_size,
            return_sequences=True,
            kernel_initializer='he_normal',
            name='gru1')(inner)
gru_1b = GRU(rnn_size,
             return_sequences=True,
             go_backwards=True,
             kernel_initializer='he_normal',
             name='gru1_b')(inner)
gru1_merged = add([gru_1, gru_1b])
gru_2 = GRU(rnn_size,
            return_sequences=True,
            kernel_initializer='he_normal',
            name='gru2')(gru1_merged)
gru_2b = GRU(rnn_size,
             return_sequences=True,
             go_backwards=True,
             kernel_initializer='he_normal',
one_hots_test = np.reshape(one_hots_test, (-1, 1))
one_hots_test = ohe.fit_transform(one_hots_test).A

p = np.random.permutation(len(train_x))
train_x = np.array(train_x)[p]
one_hots_train = one_hots_train[p]

train_x = train_x.reshape([train_x.shape[0], train_x.shape[1], -1])
test_x = np.array(test_x)
test_x = test_x.reshape([test_x.shape[0], test_x.shape[1], -1])

input_shape = (train_x.shape[1], train_x.shape[2])

print('Build RNN Model:')
model= Sequential()
model.add(GRU(32,input_shape = input_shape,activation = 'relu',return_sequences = False ) )
model.add(Dense(64, init='normal', activation='relu'))
model.add(Dropout(0.3))
# model.add(Dense(128, init='normal', activation='relu'))
# model.add(Dropout(0.3))
# model.add(Dense(32, init='normal', activation='relu'))
# model.add(Dropout(0.3))
model.add(Dense(5, activation='softmax'))

# training
his=LossHistory()

model.compile(loss='mean_squared_error', optimizer=Adam(1e-4), metrics=['accuracy'])

X_train = train_x
Y_train = one_hots_train
Beispiel #19
0
metaData = {
    "maxLength": maxLength,
    "vocab_size": vocab_size,
    "output_dimen": output_dimen,
    "sentiment_tag": sentiment_tag
}
__pickleStuff("./data/meta_sentiment_chinese.p", metaData)

# build model and train
embedding_dim = 256

model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=maxLength))
# Each input would have a size of (maxLength x 256) and each of these 256 sized vectors are fed into the GRU layer one at a time.
# All the intermediate outputs are collected and then passed on to the second GRU layer.
model.add(GRU(256, dropout=0.9, return_sequences=True))
# Using the intermediate outputs, we pass them to another GRU layer and collect the final output only this time
model.add(GRU(256, dropout=0.9))
# The output is then sent to a fully connected layer that would give us our final output_dim classes
model.add(Dense(output_dimen, activation='softmax'))
# We use the adam optimizer instead of standard SGD since it converges much faster
tbCallBack = TensorBoard(log_dir='./Graph/sentiment_chinese',
                         histogram_freq=0,
                         write_graph=True,
                         write_images=True)
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()
model.fit(totalX,
          totalY,
Beispiel #20
0
def negative_samples(input_length, input_dim, output_length, output_dim,
                     hidden_dim, ns_amount, learning_rate, drop_rate):
    q_encoder_input = Input(shape=(input_length, input_dim))
    r_decoder_input = Input(shape=(output_length, output_dim))
    weight_data_r = Input(shape=(1, ))
    weight_data_w = Input(shape=(1, ns_amount))
    if ns_amount == 0:
        weight_data_w_list = []
    else:
        weight_data_w_list = Lambda(lambda x: tf.split(
            x, num_or_size_splits=ns_amount, axis=2))(weight_data_w)
    fixed_r_decoder_input = adding_weight(
        output_length, output_dim)([r_decoder_input, weight_data_r])
    w_decoder_input = Input(shape=(output_length, output_dim, ns_amount))
    if ns_amount == 0:
        w_decoder_input_list = []
    else:
        w_decoder_input_list = Lambda(lambda x: tf.split(
            x, num_or_size_splits=ns_amount, axis=3))(w_decoder_input)
    if ns_amount == 1:
        # print("===w_decoder_input_list:", w_decoder_input_list.shape)
        w_decoder_input_list = [w_decoder_input_list]
        weight_data_w_list = [weight_data_w_list]
    fixed_w_decoder_input = []
    for i in range(ns_amount):
        w_decoder_input_list[i] = Reshape(
            (output_length, output_dim))(w_decoder_input_list[i])
        weight_data_w_list[i] = Reshape((1, ))(weight_data_w_list[i])

        w_decoder_weighted = adding_weight(output_length, output_dim)(
            [w_decoder_input_list[i], weight_data_w_list[i]])
        w_decoder_weighted_masked = Masking(
            mask_value=0.,
            input_shape=(output_length, output_dim))(w_decoder_weighted)
        fixed_w_decoder_input.append(w_decoder_weighted_masked)

    q_encoder_input_masked = Masking(mask_value=0.,
                                     input_shape=(input_length,
                                                  input_dim))(q_encoder_input)
    fixed_r_decoder_input_masked = Masking(
        mask_value=0.,
        input_shape=(output_length, output_dim))(fixed_r_decoder_input)

    encoder = Bidirectional(GRU(hidden_dim),
                            merge_mode="ave",
                            name="bidirectional1")
    q_encoder_output = encoder(q_encoder_input_masked)
    q_encoder_output = Dropout(rate=drop_rate,
                               name="dropout1")(q_encoder_output)

    decoder = Bidirectional(GRU(hidden_dim),
                            merge_mode="ave",
                            name="bidirectional2")
    r_decoder_output = decoder(fixed_r_decoder_input_masked)
    r_decoder_output = Dropout(rate=drop_rate,
                               name="dropout2")(r_decoder_output)

    # doc_output = MaxPooling1D(pool_size=20, stride=5, padding='same')(q_encoder_input)
    # doc_output = Flatten()(q_encoder_input)
    # que_output = MaxPooling1D(pool_size=20, stride=5, padding='same')(fixed_r_decoder_input)
    # que_output = Flatten()(fixed_r_decoder_input)

    # output_vec = Concatenate(axis=1, name="dropout_con")([q_encoder_output, r_decoder_output])
    # output_hid = Dense(hidden_dim, name="output_hid", activation="relu")(output_vec)
    # similarity = Dense(1, name="similarity", activation="softmax")(output_hid)

    # Difference between kernel, bias, and activity regulizers in Keras
    # https://stats.stackexchange.com/questions/383310/difference-between-kernel-bias-and-activity-regulizers-in-keras
    # output = Dense(128, kernel_regularizer=keras.regularizers.l2(0.0001))(output_vec) # activation="relu",
    # output = Dense(64, name="output_hid", kernel_regularizer=keras.regularizers.l2(0.0001))(output) # activation="relu",
    # similarity = Dense(1, name="similarity", activation="softmax")(output)

    w_decoder_output_list = []
    for i in range(ns_amount):
        w_decoder_output = decoder(fixed_w_decoder_input[i])
        w_decoder_output = Dropout(rate=drop_rate)(w_decoder_output)
        w_decoder_output_list.append(w_decoder_output)
    # similarities = [ similarity ]
    similarities = [
        Dot(axes=1, normalize=True)([q_encoder_output, r_decoder_output])
    ]
    for i in range(ns_amount):
        similarities.append(
            Dot(axes=1,
                normalize=True)([q_encoder_output, w_decoder_output_list[i]]))
    loss_data = Lambda(lambda x: loss_c(x))(similarities)
    model = Model([
        q_encoder_input, r_decoder_input, w_decoder_input, weight_data_r,
        weight_data_w
    ], similarities[0])
    ada = adam(lr=learning_rate)
    model.compile(optimizer=ada, loss=lambda y_true, y_pred: loss_data)
    return model
Beispiel #21
0
print(vocabSize, embeddingSize)


x_in = Input( shape = ( numSentencesPerDoc, numWordsPerSentence ) , name='Input' )
embLayer = Embedding( input_dim=embWeights.shape[0], output_dim=embWeights.shape[1], weights=[embWeights]
                      ,mask_zero=False , trainable=False, embeddings_regularizer=regularizers.l2(0.0000001)
                      , input_length=numWordsPerSentence, name='Embedding' )

sent_vecs = []

extraDimLayer = Lambda(lambda x: K.expand_dims(x), name='extraDimForConvo')
squeezeSecondLayer = Lambda(lambda x: K.squeeze(x, 1), name='squeezeLayer')

 
biRnn_Layer = Bidirectional(GRU(WORD_GRU_NUM,  return_sequences=True, bias_regularizer=regularizers.l2(eta)
                           ,kernel_regularizer=regularizers.l2(eta),recurrent_regularizer=regularizers.l2(eta)
                           ,dropout=dr, recurrent_dropout=dropWordRnnOut, unroll=True), merge_mode='concat')
              
CONTEXT_DIM = 2*WORD_GRU_NUM                  
att_layer1 = Dense(CONTEXT_DIM, use_bias=True, activation='tanh')
att_layer2 = Dense(1, use_bias=False)

for i in range(numSentencesPerDoc):
    
    x_pop = Lambda(lambda x: x[:,i], output_shape=(numWordsPerSentence, ) , name='convert_shape_'+'sentence'+str(i))( x_in )
    
    emb = embLayer(x_pop)
    emb = Dropout(dropWordEmb)(emb)
    
   
    
Beispiel #22
0
    def build_model(self):
        #paper download url:https://arxiv.org/abs/1508.04025
        #fig1
        enc_in = Input(shape=(self.input_length, ),
                       dtype='int32',
                       name='enc_input')
        enc_embedding = Embedding(input_dim=self.num_vocab,
                                  output_dim=self.embedding_dim,
                                  input_length=self.input_length,
                                  trainable=True,
                                  name='enc_embedding')
        enc_embedded = enc_embedding(enc_in)
        encoded, state = GRU(units=self.num_units,
                             return_sequences=True,
                             return_state=True,
                             name='enc_GRU')(enc_embedded)
        #\fig1

        ################################
        ###### decoder model ############
        ##################################

        #fig2
        dec_in = Input(shape=(self.output_length, ),
                       dtype='int32',
                       name='dec_input')
        dec_embedding = Embedding(input_dim=self.num_vocab,
                                  output_dim=self.embedding_dim,
                                  input_length=self.output_length,
                                  trainable=True,
                                  name='dec_embedding')
        #share weights with encoder embedding layer
        dec_embedding.embeddings = enc_embedding.embeddings
        dec_embedded = dec_embedding(dec_in)
        decoded = GRU(units=self.num_units,
                      return_sequences=True,
                      name='dec_GRU')(dec_embedded, initial_state=state)
        #Luong's global attention
        repeat_dec = TimeDistributed(RepeatVector(self.input_length),
                                     name='repeat_dec')
        rep_decoded = repeat_dec(decoded)
        #/fig2

        #fig3
        annotation_layer = TimeDistributed(Dense(units=self.num_units),
                                           name='annotation_layer')
        annotation = annotation_layer(encoded)
        repeat_enc = TimeDistributed(RepeatVector(self.output_length),
                                     name='repeat_enc')
        rep_annotation = repeat_enc(annotation)
        rep_annotation = Permute(
            (2, 1, 3),
            input_shape=(self.input_length, self.output_length,
                         self.num_units),
            name='permute_rep_annotation')(rep_annotation)

        #fig4
        attention_mul = Multiply(name='attention_mul')
        elem_score = attention_mul([rep_decoded, rep_annotation])
        score = Lambda(lambda x: K.sum(x, axis=3, keepdims=True),
                       name='score')(elem_score)
        attention_weight = Lambda(lambda x: softmax(x, axis=2),
                                  name='attention_weight')(score)
        context_mul = Multiply(name='context_mul')
        #\fig4

        #fig5
        rep_encoded = repeat_enc(encoded)
        rep_encoded = Permute((2, 1, 3),
                              input_shape=(self.input_length,
                                           self.output_length, self.num_units),
                              name='permute_rep_encoded')(rep_encoded)
        elem_context = context_mul([rep_encoded, attention_weight])
        context = Lambda(lambda x: K.sum(x, axis=2),
                         name='context')(elem_context)
        concat = Concatenate(axis=-1)
        dec_and_att = Lambda(lambda x: K.concatenate([x[0], x[1]], axis=-1),
                             name='dec_att_concat')([decoded, context])
        #\fig5

        #full_connection and output
        #fig6
        fc1 = TimeDistributed(Dense(units=self.num_units * 2),
                              name='fc1')(dec_and_att)
        fc1_activated = Activation('tanh')(fc1)
        fc2 = TimeDistributed(Dense(units=self.num_vocab),
                              name='fc2')(fc1_activated)
        preds = Activation('softmax', name='softmax')(fc2)
        #\fig6

        model = Model([enc_in, dec_in], preds)
        model.summary()
        plot_model(model,
                   to_file='seq2seq_attention_model_plot.png',
                   show_shapes=True,
                   show_layer_names=True)
        return model
Beispiel #23
0
def get_Model(training):
    input_shape = (img_w, img_h, 1)  # (128, 64, 1)

    # Make Networkw
    inputs = Input(name='the_input', shape=input_shape,
                   dtype='float32')  # (None, 128, 64, 1)

    # Convolution layer (VGG)
    inner = Conv2D(64, (3, 3),
                   padding='same',
                   name='conv1',
                   kernel_initializer='he_normal')(
                       inputs)  # (None, 128, 64, 64)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(2, 2),
                         name='max1')(inner)  # (None,64, 32, 64)

    inner = Conv2D(128, (3, 3),
                   padding='same',
                   name='conv2',
                   kernel_initializer='he_normal')(
                       inner)  # (None, 64, 32, 128)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(2, 2),
                         name='max2')(inner)  # (None, 32, 16, 128)

    inner = Conv2D(256, (3, 3),
                   padding='same',
                   name='conv3',
                   kernel_initializer='he_normal')(
                       inner)  # (None, 32, 16, 256)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = Conv2D(256, (3, 3),
                   padding='same',
                   name='conv4',
                   kernel_initializer='he_normal')(
                       inner)  # (None, 32, 16, 256)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(1, 2),
                         name='max3')(inner)  # (None, 32, 8, 256)

    inner = Conv2D(512, (3, 3),
                   padding='same',
                   name='conv5',
                   kernel_initializer='he_normal')(inner)  # (None, 32, 8, 512)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = Conv2D(512, (3, 3), padding='same',
                   name='conv6')(inner)  # (None, 32, 8, 512)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(1, 2),
                         name='max4')(inner)  # (None, 32, 4, 512)

    inner = Conv2D(512, (2, 2),
                   padding='same',
                   kernel_initializer='he_normal',
                   name='con7')(inner)  # (None, 32, 4, 512)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)

    # CNN to RNN
    inner = Reshape(target_shape=((32, 2048)),
                    name='reshape')(inner)  # (None, 32, 2048)
    inner = Dense(64,
                  activation='relu',
                  kernel_initializer='he_normal',
                  name='dense1')(inner)  # (None, 32, 64)

    # RNN layer
    gru_1 = GRU(256,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru1')(inner)  # (None, 32, 512)
    gru_1b = GRU(256,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])  # (None, 32, 512)
    gru1_merged = BatchNormalization()(gru1_merged)
    gru_2 = GRU(256,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru2')(gru1_merged)
    gru_2b = GRU(256,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru2_b')(gru1_merged)
    gru2_merged = concatenate([gru_2, gru_2b])  # (None, 32, 1024)
    gru2_merged = BatchNormalization()(gru2_merged)

    # transforms RNN output to character activations:
    inner = Dense(num_classes, kernel_initializer='he_normal',
                  name='dense2')(gru2_merged)  #(None, 32, 63)
    y_pred = Activation('softmax', name='softmax')(inner)

    labels = Input(name='the_labels', shape=[max_text_len],
                   dtype='float32')  # (None ,8)
    input_length = Input(name='input_length', shape=[1],
                         dtype='int64')  # (None, 1)
    label_length = Input(name='label_length', shape=[1],
                         dtype='int64')  # (None, 1)

    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                      name='ctc')([y_pred, labels, input_length,
                                   label_length])  #(None, 1)

    if training:
        return Model(inputs=[inputs, labels, input_length, label_length],
                     outputs=loss_out)
    else:
        return Model(inputs=[inputs], outputs=y_pred)
Beispiel #24
0
def trainGestureRNN(numLayers,
                    numNodesPerLayer,
                    useGRU,
                    batchSize,
                    numEpochs,
                    learningRate,
                    l1Reg,
                    l2Reg,
                    dropoutI,
                    dropoutH,
                    sequences,
                    classes,
                    trainRange,
                    valRange,
                    testRange,
                    numClasses,
                    numObservations,
                    numSequences,
                    numFeatures,
                    modelFile,
                    callbacks=None,
                    outDirectory='',
                    trainMode='continue'):
    """
    Returns True if training was completed, False if interrupted.
    """
    trainModes = ['continue', 'overwrite', 'skip']

    if trainMode.lower() not in trainModes:
        raise ValueError(
            "Parameter 'trainMode' must be one of 'continue', 'overwrite', or 'skip'"
        )

    if dropoutI < 0 or dropoutH < 0 or l2Reg < 0 or l1Reg < 0:
        raise ValueError('Regularization parameters must be non-negative.')

    if outDirectory is not None and outDirectory != '':
        outDirectory = outDirectory + '\\'
    else:
        outDirectory = ''
    # initialize, compile, and train model
    #finish preparing data
    #class labels must be made into binary arrays
    binaryClasses = np.zeros((numObservations, numSequences, numClasses))
    # tell cost function which timesteps to ignore
    sampleWeights = np.ones((numObservations, numSequences))
    #eh...just use for loops
    for i in range(numObservations):
        for j in range(numSequences):
            if classes[i, j] >= 0:
                binaryClasses[i, j, classes[i, j]] = 1
            else:
                sampleWeights[i, j] = 0
    sequences = sequences.transpose((1, 0, 2))
    binaryClasses = binaryClasses.transpose((1, 0, 2))
    sampleWeights = sampleWeights.T

    trainData = [
        sequences[trainRange, :, :], binaryClasses[trainRange, :, :],
        sampleWeights[trainRange, :]
    ]
    valData = [
        sequences[valRange, :, :], binaryClasses[valRange, :, :],
        sampleWeights[valRange, :]
    ]
    testData = [
        sequences[testRange, :, :], binaryClasses[testRange, :, :],
        sampleWeights[testRange, :]
    ]

    modelFile = outDirectory + 'Keras' + modelFile
    weightsFile = modelFile + '_Weights'
    completedEpochs = 0
    if (trainMode
            == 'overwrite') or (not os.path.isfile(modelFile + '.json')
                                or not os.path.isfile(weightsFile + '.h5')):
        model = Sequential()
        #add masking layer to indicate dummy timesteps
        model.add(Masking(0, input_shape=(numObservations, numFeatures)))
        if dropoutI:
            model.add(Dropout(dropoutI))
        for i in range(numLayers):
            if useGRU:
                model.add(
                    GRU(output_dim=numNodesPerLayer,
                        return_sequences=True,
                        W_regularizer=l2(l2Reg)))
            else:
                model.add(
                    LSTM(output_dim=numNodesPerLayer,
                         return_sequences=True,
                         W_regularizer=l2(l2Reg)))
            if dropoutH:
                model.add(Dropout(dropoutH))
        model.add(
            TimeDistributed(
                Dense(output_dim=numClasses,
                      activation='softmax',
                      W_regularizer=l2(l2Reg))))
    else:
        model = model_from_json(open(modelFile + '.json', 'rb').read())
        model.load_weights(weightsFile + '.h5')

    #compile model and training objective function
    sgd = SGD(lr=learningRate)
    rms = RMSprop(lr=learningRate)
    adagrad = Adagrad(lr=learningRate)
    model.compile(loss='categorical_crossentropy',
                  optimizer=rms,
                  sample_weight_mode='temporal',
                  metrics=['accuracy'])
    checkp = [ModelCheckpoint(weightsFile + '.h5', save_best_only=True)]
    if callbacks is None:
        callbacks = checkp
    else:
        callbacks += checkp
    try:
        if trainMode != 'skip':
            completedEpochs = model.fit(x=trainData[0],
                                        y=trainData[1],
                                        sample_weight=trainData[2],
                                        validation_data=valData,
                                        batch_size=batchSize,
                                        nb_epoch=numEpochs,
                                        callbacks=callbacks,
                                        verbose=2)
            completedEpochs = len(completedEpochs.history['loss'])
    except KeyboardInterrupt:
        if (not queryUser('Training interrupted. Compute test statistics?')):
            return 0, float('nan'), float('nan'), float('nan')
    #retrieve the best weights based upon validation set loss
    if os.path.isfile(weightsFile + '.h5'):
        model.load_weights(weightsFile + '.h5')
    scores = model.test_on_batch(x=testData[0],
                                 y=testData[1],
                                 sample_weight=testData[2])
    predictedClasses = model.predict_classes(x=testData[0])
    scores[1] = accuracy(classes[:, testRange].T, predictedClasses)
    scores.append(balancedAccuracy(classes[:, testRange].T, predictedClasses))
    scores.append(
        weightedAccuracy(classes[:, testRange].T,
                         predictedClasses,
                         forgetFactor=0))
    print(
        "Test loss of %.5f\nFrame-wise accuracy of %.5f\nSequence-wise accuracy of %.5f\nFinal frame accuracy of %0.5f"
        % (scores[0], scores[1], scores[2], scores[3]))
    if trainMode != 'skip':
        modelString = model.to_json()
        open(modelFile + '.json', 'wb').write(modelString)
        model.save_weights(weightsFile + '.h5', overwrite=True)
        print('Model and weights saved to %s and %s.' %
              (modelFile + '.json', weightsFile + '.h5'))
    return completedEpochs, scores[0], scores[1], scores[2], scores[3]
Beispiel #25
0
    print("train_X shape", X_train.shape)
    print("valid_X shape", X_valid.shape)
    # print("target shape", y_train.shape)
    # print("training size:", len(train_inputs['X']), 'validation', len(valid_inputs['X']), 'test size:', len(test_inputs['X']) )
    # print("sum sizes", len(train_inputs['X']) + len(valid_inputs['X']) + len(test_inputs['X']))

    ## build CNN
    from keras.models import Model, Sequential
    from keras.layers import Conv1D, Dense, Flatten
    from keras.callbacks import EarlyStopping, ModelCheckpoint

    LATENT_DIM = 5
    BATCH_SIZE = 32

    model = Sequential()
    model.add(GRU(LATENT_DIM, input_shape=(time_step_lag, 1)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    model.summary()
    earlystop = EarlyStopping(monitor='val_loss', patience=5)

    # Test the model
    X_test = test_inputs['X']
    y1_test = test_inputs['target_load']
    y1_test = y_scaler.inverse_transform(y1_test)

    if not os.path.exists(output_dir + '/original_' + predict_component +
                          '_lag' + str(time_step_lag) + '.csv'):
        np.savetxt(output_dir + '/original_' + predict_component + '_lag' +
                   str(time_step_lag) + '.csv',
                   y1_test,
Beispiel #26
0
		shape = list(input_shape)
		assert len(shape)== 3
		outshape = [None, shape[2]]
		return tuple(outshape)
	def mean_along_time(x):
		return K.means(x,axis=1)

	def sum_one(x):
		return x.sum(axis=-1,keepdims=True)
	def sum_one_output_shape(input_shape):
		shape = list(input_shape)
		assert len(shape)==2
		outshape = [None, 1]
		return tuple(outshape)

	shared_GRU =  GRU(output_dim = dim_gru, return_sequences = False, input_shape = (maxlen,dim_glove), init = 'glorot_uniform', inner_init = 'orthogonal', inner_activation = 'sigmoid')
	shared_backGRU = GRU(output_dim = dim_gru,go_backwards=True, return_sequences = False, input_shape = (maxlen,dim_glove), init = 'glorot_uniform', inner_init = 'orthogonal', inner_activation = 'sigmoid')

	pass_input = Input(shape=(maxlen_pass,dim_glove), dtype='float32', name='pass_input')
	pass_gru = GRU(output_dim = dim_gru, dropout_W=args.dropout, return_sequences = True, input_shape = (maxlen_pass,dim_glove), init = 'glorot_uniform', inner_init = 'orthogonal', inner_activation = 'sigmoid')(pass_input) # maxlen_pass, dim_gru
	pass_backgru = GRU(output_dim = dim_gru, dropout_W=args.dropout ,go_backwards=True, return_sequences = True, input_shape = (maxlen_pass,dim_glove), init = 'glorot_uniform', inner_init = 'orthogonal', inner_activation = 'sigmoid')(pass_input) # maxlen_pass, dim_gru
	pass_con = merge([pass_gru,pass_backgru],mode='concat') # maxlen_pass, 2*dim_gru

	ques_input = Input(shape=(maxlen,dim_glove), dtype='float32', name='ques_input')
	gru_out = shared_GRU(ques_input)
	backgru_out = shared_backGRU(ques_input)
	ques_con = merge([gru_out,backgru_out],mode='concat') # , 2*dim_gru
	repeat_ques = RepeatVector(maxlen_pass)(ques_con) # maxlen_pass, 2*dim_gru
	mul_ques_pass = merge([pass_con,repeat_ques],mode='mul') # maxlen_pass, 2*dim_gru
	permute_qp_mul = Permute((2,1))(mul_ques_pass) # 2*dim_gru, maxlen_pass
	#cos_ques_pass = merge([ques_con,pass_con],mode='cos',dot_axes=[1,2]) # ,maxlen_pass
Beispiel #27
0
        i += 1

# 字句長度不足補空白        
X = sequence.pad_sequences(X, maxlen=MAX_SENTENCE_LENGTH)
# 資料劃分訓練組及測試組
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42)
# 模型構建
EMBEDDING_SIZE = 128
HIDDEN_LAYER_SIZE = 64
BATCH_SIZE = 32
NUM_EPOCHS = 10
model = Sequential()
# 加『嵌入』層
model.add(Embedding(vocab_size, EMBEDDING_SIZE,input_length=MAX_SENTENCE_LENGTH))
# 加『GRU』層
model.add(GRU(HIDDEN_LAYER_SIZE, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1))
model.add(Activation("sigmoid"))
# binary_crossentropy:二分法
model.compile(loss="binary_crossentropy", optimizer="adam",metrics=["accuracy"])

# 模型訓練
model.fit(Xtrain, ytrain, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS,validation_data=(Xtest, ytest))

# 預測
score, acc = model.evaluate(Xtest, ytest, batch_size=BATCH_SIZE)
print("\nTest score: %.3f, accuracy: %.3f" % (score, acc))
print('{}   {}      {}'.format('預測','真實','句子'))
for i in range(5):
    idx = np.random.randint(len(Xtest))
    xtest = Xtest[idx].reshape(1,MAX_SENTENCE_LENGTH)
Beispiel #28
0
def train(model_file):
    """
    Train a neural network to take speech as input and produce gesture as an output

    Args:
        model_file: file to store the model

    Returns:

    """

    # Get the data
    X = np.load(DATA_DIR + '/X_train.npy')

    if ENCODED:

        # If we learn speech-representation mapping we use encoded motion as output
        Y = np.load(DATA_DIR + '/' + str(N_OUTPUT) + '/Y_train_encoded.npy')

        # Correct the sizes
        train_size = min(X.shape[0], Y.shape[0])
        X = X[:train_size]
        Y = Y[:train_size]

    else:
        Y = np.load(DATA_DIR + '/Y_train.npy')

    N_train = int(len(X) * 0.9)
    N_validation = len(X) - N_train

    # Split on training and validation
    X_train, X_validation, Y_train, Y_validation = train_test_split(
        X, Y, test_size=N_validation)

    # Define Keras model

    model = Sequential()
    model.add(
        TimeDistributed(Dense(N_HIDDEN), input_shape=(N_CONTEXT, N_INPUT)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.1))

    model.add(TimeDistributed(Dense(N_HIDDEN)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.1))

    model.add(TimeDistributed(Dense(N_HIDDEN)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.1))

    model.add(GRU(N_HIDDEN, return_sequences=False))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.1))

    model.add(Dense(N_OUTPUT))
    model.add(Activation('linear'))

    print(model.summary())

    optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999)
    model.compile(loss='mean_squared_error', optimizer=optimizer)

    hist = model.fit(X_train,
                     Y_train,
                     batch_size=BATCH_SIZE,
                     epochs=EPOCHS,
                     validation_data=(X_validation, Y_validation))

    model.save(model_file)

    # Save convergence results into an image
    pyplot.plot(hist.history['loss'], linewidth=3, label='train')
    pyplot.plot(hist.history['val_loss'], linewidth=3, label='valid')
    pyplot.grid()
    pyplot.legend()
    pyplot.xlabel('epoch')
    pyplot.ylabel('loss')
    pyplot.savefig(model_file.replace('hdf5', 'png'))
Beispiel #29
0
# Write
f.write('Batch size: ' + str(batch_size) + '\n')
f.write('Num outtuples: ' + str(num_outtuples) + '\n')
f.write('Max num of letters: ' + str(features_per_sample) + '\n')
f.write('Num epochs: ' + str(num_epochs) + '\n')

# Create the model of RNN
input_shape = (num_outtuples, features_per_sample)
model = Sequential()
# Masking adds a padding and a special vector to ignore the padding values.
#model.add(Masking(input_shape = input_shape, mask_value = 0.0))
model.add(Embedding(max_ord_value, 500, input_length=features_per_sample))
# GRU is the main RNN layer
model.add(
    GRU(256,
        return_sequences=True,
        input_shape=(num_outtuples, features_per_sample)))
model.add(
    GRU(512,
        return_sequences=False,
        input_shape=(num_outtuples, features_per_sample)))
# Fully connected layer with 1 neuron output
model.add(Dense(1))
# Final output value between 0 and 1 as probability
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# Write
Beispiel #30
0
    def test_gru_benchmark(self):
        try:
            import lasagne
            from keras.layers.recurrent import GRU
        except:
            print('\n This test require lasagne and keras.')
            return
        np.random.seed(12082518)
        X = np.random.rand(32, 12, 13)
        g1 = nnet.GRU((None, 12, 13), hidden_info=8,
            resetgate=nnet.Gate(),
            updategate=nnet.Gate(),
            hidden_update=nnet.Gate(nonlinearity=T.tanh),
            batch_norm=False,
            dropoutW=None, dropoutU=None)

        f1 = T.function(g1.input_var, outputs=g1(True))
        x1 = f1(X)[0]

        g2 = GRU(output_dim=8, input_shape=(12, 13),
                activation=T.tanh, inner_activation=T.sigmoid,
                dropout_W=None, dropout_U=None,
                return_sequences=True)
        g2.set_weights(g1.get_params_value(True, True))
        f2 = T.function([g2.get_input(True)],
            outputs=g2.get_output(True))
        x2 = f2(X)

        l_in = lasagne.layers.InputLayer(shape=(None, 12, 13))
        l = lasagne.layers.GRULayer(l_in, num_units=8)
        lasagne.layers.set_all_param_values(l,
            g1.get_params_value(True, True) + [T.np_constant((1, 8))])
        f3 = T.function([l_in.input_var],
            outputs=lasagne.layers.get_output(l, deterministic=False))
        x3 = f3(X)

        print('Odin - Keras:   ', np.sum(np.abs(x1 - x2)))
        print('Odin - Lasagne: ', np.sum(np.abs(x1 - x3)))
        print('Keras - Lasagne:', np.sum(np.abs(x2 - x3)))
        self.assertAlmostEqual(np.sum(np.abs(x1 - x3)), 0.)
        # print(g1.get_params(True, True))
        # p1 = g1.get_params_value(True, True)
        # print(g2.get_params()[0])
        # p2 = [T.get_value(i) for i in g2.get_params()[0]]
        # print([np.sum(np.abs(i - j)) for i, j in zip(p1, p2)])

        print()
        time.sleep(1)
        start = time.time()
        for i in xrange(12):
            f1(X)
        print('Odin GRU speed:', (time.time() - start) / 12)
        time.sleep(1)

        start = time.time()
        for i in xrange(12):
            f3(X)
        print('Lasagne GRU speed:', (time.time() - start) / 12)
        time.sleep(1)

        start = time.time()
        for i in xrange(12):
            f2(X)
        print('Keras GRU speed:', (time.time() - start) / 12)
Beispiel #31
0
'''
model building
'''
input_tensor = Input((width, height, 3))
x = input_tensor
for i in range(3):
    x = Conv2D(32, (3, 3), activation="relu")(x)
    x = Conv2D(32, (3, 3), activation="relu")(x)
    #BatchNormalization()
    x = BatchNormalization(axis=-1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

conv_shape = x.get_shape()
x = Reshape(target_shape=(int(conv_shape[1]), int(conv_shape[2]*conv_shape[3])))(x)
x = Dense(32, activation='relu')(x)
gru_1 = GRU(opts.rnn_size, return_sequences=True, kernel_initializer="he_normal", name="gru1")(x)
gru_1b = GRU(opts.rnn_size, go_backwards=True, kernel_initializer="he_normal", name="gru1_b", return_sequences=True)(x)
gru1_merged = add([gru_1, gru_1b])
gru_2 = GRU(opts.rnn_size, return_sequences=True, kernel_initializer="he_normal", name="gru2")(gru1_merged)
gru_2b = GRU(opts.rnn_size, go_backwards=True, kernel_initializer="he_normal", 
        name="gru2_b", return_sequences=True)(gru1_merged)
x = concatenate([gru_2, gru_2b])
x = Dropout(0.25)(x)
x = Dense(n_class+1, activation="softmax", kernel_initializer="he_normal")(x)
base_model = Model(inputs=input_tensor, outputs=x)
labels = Input(name='the_labels', shape=[n_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), 
                                  name='ctc')([x, labels, input_length, label_length])