def create_model(Vocabulary_size, X_max_len, n_phonetic_features, n1, n2, n3,
                 n4, n5, n6, HIDDEN_DIM, LAYER_NUM):
    def smart_merge(vectors, **kwargs):
        return vectors[0] if len(vectors) == 1 else add(vectors, **kwargs)

    current_word = Input(shape=(X_max_len, ), dtype='float32',
                         name='input1')  # for encoder (shared)
    decoder_input = Input(shape=(X_max_len, ), dtype='float32',
                          name='input3')  # for decoder -- attention
    right_word1 = Input(shape=(X_max_len, ), dtype='float32', name='input4')
    right_word2 = Input(shape=(X_max_len, ), dtype='float32', name='input5')
    right_word3 = Input(shape=(X_max_len, ), dtype='float32', name='input6')
    right_word4 = Input(shape=(X_max_len, ), dtype='float32', name='input7')
    left_word1 = Input(shape=(X_max_len, ), dtype='float32', name='input8')
    left_word2 = Input(shape=(X_max_len, ), dtype='float32', name='input9')
    left_word3 = Input(shape=(X_max_len, ), dtype='float32', name='input10')
    left_word4 = Input(shape=(X_max_len, ), dtype='float32', name='input11')
    phonetic_input = Input(shape=(n_phonetic_features, ),
                           dtype='float32',
                           name='input12')

    emb_layer1 = Embedding(Vocabulary_size,
                           EMBEDDING_DIM,
                           input_length=X_max_len,
                           mask_zero=False,
                           name='Embedding')

    list_of_inputs = [
        current_word, right_word1, right_word2, right_word3, right_word4,
        left_word1, left_word2, left_word3, left_word4
    ]

    list_of_embeddings = [emb_layer1(i) for i in list_of_inputs]

    list_of_embeddings = [
        Dropout(0.50, name='drop1_' + str(i))(j)
        for i, j in enumerate(list_of_embeddings)
    ]

    list_of_embeddings = [
        GaussianNoise(0.05, name='noise1_' + str(i))(j)
        for i, j in enumerate(list_of_embeddings)
    ]

    conv4s = [
        Conv1D(filters=no_filters,
               kernel_size=4,
               padding='valid',
               activation='relu',
               strides=1,
               name='conv4_' + str(i))(j)
        for i, j in enumerate(list_of_embeddings)
    ]

    maxPool4 = [
        MaxPooling1D(name='max4_' + str(i))(j) for i, j in enumerate(conv4s)
    ]
    avgPool4 = [
        AveragePooling1D(name='avg4_' + str(i))(j)
        for i, j in enumerate(conv4s)
    ]

    pool4s = [
        add([i, j], name='merge_conv4_' + str(k))
        for i, j, k in zip(maxPool4, avgPool4, range(len(maxPool4)))
    ]

    conv5s = [
        Conv1D(filters=no_filters,
               kernel_size=5,
               padding='valid',
               activation='relu',
               strides=1,
               name='conv5_' + str(i))(j)
        for i, j in enumerate(list_of_embeddings)
    ]

    maxPool5 = [
        MaxPooling1D(name='max5_' + str(i))(j) for i, j in enumerate(conv5s)
    ]
    avgPool5 = [
        AveragePooling1D(name='avg5_' + str(i))(j)
        for i, j in enumerate(conv5s)
    ]

    pool5s = [
        add([i, j], name='merge_conv5_' + str(k))
        for i, j, k in zip(maxPool5, avgPool5, range(len(maxPool5)))
    ]

    mergedPools = pool4s + pool5s

    concat = concatenate(mergedPools, name='main_merge')

    x = Dropout(0.15, name='drop_single1')(concat)
    x = Bidirectional(GRU(rnn_output_size), name='bidirec1')(concat)

    total_features = [x, phonetic_input]
    concat2 = concatenate(total_features, name='phonetic_merging')

    x = Dense(HIDDEN_DIM,
              activation='relu',
              kernel_initializer='he_normal',
              kernel_constraint=maxnorm(3),
              bias_constraint=maxnorm(3),
              name='dense1')(concat2)
    x = Dropout(0.15, name='drop_single2')(x)
    x = Dense(HIDDEN_DIM,
              kernel_initializer='he_normal',
              activation='tanh',
              kernel_constraint=maxnorm(3),
              bias_constraint=maxnorm(3),
              name='dense2')(x)
    x = Dropout(0.15, name='drop_single3')(x)

    out1 = Dense(n1,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output1')(x)
    out2 = Dense(n2,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output2')(x)
    out3 = Dense(n3,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output3')(x)
    out4 = Dense(n4,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output4')(x)
    out5 = Dense(n5,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output5')(x)
    out6 = Dense(n6,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output6')(x)

    # Luong et al. 2015 attention model
    emb_layer = Embedding(Vocabulary_size,
                          EMBEDDING_DIM,
                          input_length=X_max_len,
                          mask_zero=True,
                          name='Embedding_for_seq2seq')

    current_word_embedding = emb_layer(list_of_inputs[0])
    # current_word_embedding = smart_merge([ current_word_embedding, right_word_embedding1,  left_word_embedding1])

    encoder, state = GRU(rnn_output_size,
                         return_sequences=True,
                         unroll=True,
                         return_state=True,
                         name='encoder')(current_word_embedding)
    encoder_last = encoder[:, -1, :]

    decoder = emb_layer(decoder_input)
    decoder = GRU(rnn_output_size,
                  return_sequences=True,
                  unroll=True,
                  name='decoder')(decoder, initial_state=[encoder_last])

    attention = dot([decoder, encoder], axes=[2, 2], name='dot')
    attention = Activation('softmax', name='attention')(attention)

    context = dot([attention, encoder], axes=[2, 1], name='dot2')
    decoder_combined_context = concatenate([context, decoder],
                                           name='concatenate')

    outputs = TimeDistributed(Dense(64, activation='tanh'),
                              name='td1')(decoder_combined_context)
    outputs = TimeDistributed(Dense(Vocabulary_size, activation='softmax'),
                              name='td2')(outputs)

    all_inputs = [
        current_word, decoder_input, right_word1, right_word2, right_word3,
        right_word4, left_word1, left_word2, left_word3, left_word4,
        phonetic_input
    ]
    all_outputs = [outputs, out1, out2, out3, out4, out5, out6]

    model = Model(inputs=all_inputs, outputs=all_outputs)
    opt = Adam()

    return model
Example #2
0
    def build(self):
        raw_current = Input(shape=(self.time_length,), dtype='int32')
        if len(self.embeddings_matrix) == 0:
            embedding = Embedding(input_dim=len(self.word2idx),
                                  output_dim=self.embedding_size,
                                  input_length=self.time_length)
        else:
            embedding = Embedding(len(self.embeddings_matrix),
                                  self.embedding_size,
                                  weights=[self.embeddings_matrix],
                                  trainable=False)
        current = embedding(raw_current)
        # set optimizer
        if self.update_f == 'sgd':
            opt_func = SGD(lr=self.learning_rate,
                           momentum=self.momentum, decay=self.decay_rate)
        elif self.update_f == 'rmsprop':
            opt_func = RMSprop(lr=self.learning_rate,
                               rho=self.rho, epsilon=self.smooth_eps)
        elif self.update_f == 'adagrad':
            opt_func = Adagrad(lr=self.learning_rate, epsilon=self.smooth_eps)
        elif self.update_f == 'adadelta':
            opt_func = Adadelta(lr=self.learning_rate,
                                rho=self.rho, epsilon=self.smooth_eps)
        elif self.update_f == 'adam':
            opt_func = Adam(lr=self.learning_rate, beta_1=self.beta1, beta_2=self.beta2,
                            epsilon=self.smooth_eps)
        elif self.update_f == 'adamax':
            opt_func = Adamax(lr=self.learning_rate, beta_1=self.beta1, beta_2=self.beta2,
                              epsilon=self.smooth_eps)
        else:
            sys.stderr.write("Invalid optimizer.\n")
            exit()

        # Vallina RNN (LSTM, SimpleRNN, GRU)
        # Bidirectional-RNN (LSTM, SimpleRNN, GRU)
        if self.arch == 'lstm' or self.arch == 'rnn' or self.arch == 'gru' \
                or self.arch == 'blstm' or self.arch == 'brnn' or self.arch == 'bgru':
            if 'rnn' in self.arch:
                forward = SimpleRNN(self.hidden_size, return_sequences=True, activation=self.activation,
                                    kernel_initializer=self.init_type)(current)
                backward = SimpleRNN(self.hidden_size, return_sequences=True, activation=self.activation,
                                     go_backwards=True, kernel_initializer=self.init_type)(current)
            elif 'gru' in self.arch:
                forward = GRU(self.hidden_size, return_sequences=True, init=self.init_type,
                              activation=self.activation)(current)
                backward = GRU(self.hidden_size, return_sequences=True, init=self.init_type,
                               activation=self.activation, go_backwards=True)(current)
            elif 'lstm' in self.arch:
                forward = LSTM(self.hidden_size,
                               return_sequences=True,
                               activation=self.activation,
                               kernel_initializer=self.init_type)(current)

                backward = LSTM(self.hidden_size,
                                return_sequences=True,
                                activation=self.activation,
                                go_backwards=True,
                                kernel_initializer=self.init_type)(current)
            if 'b' in self.arch:
                tagger = layers.concatenate([forward, backward])
            else:
                tagger = forward

            if self.dropout:
                tagger = Dropout(self.dropout_ratio)(tagger)
            prediction = TimeDistributed(
                Dense(self.output_vocab_size, activation='softmax'))(tagger)
            self.model = Model(inputs=raw_current, outputs=prediction)
            self.model.compile(
                loss='categorical_crossentropy', optimizer=opt_func)

        # 2-Stacked Layered RNN (LSTM, SimpleRNN, GRU)
        elif self.arch == '2lstm' or self.arch == '2rnn' or self.arch == '2gru':
            model = Sequential()
            model.add(embedding)
            if self.arch == '2lstm':
                basic_model = LSTM(self.hidden_size, return_sequences=True,
                                   input_shape=(self.time_length,
                                                self.embedding_size),
                                   init=self.init_type,
                                   activation=self.activation)
                stack_model = LSTM(self.hidden_size, return_sequences=True,
                                   input_shape=(self.time_length,
                                                self.hidden_size),
                                   init=self.init_type,
                                   activation=self.activation)
            elif self.arch == '2rnn':
                basic_model = SimpleRNN(self.hidden_size, return_sequences=True,
                                        input_shape=(
                                            self.time_length, self.embedding_size),
                                        init=self.init_type,
                                        activation=self.activation)
                stack_model = SimpleRNN(self.hidden_size, return_sequences=True,
                                        input_shape=(
                                            self.time_length, self.hidden_size),
                                        init=self.init_type,
                                        activation=self.activation)
            else:
                basic_model = GRU(self.hidden_size, return_sequences=True,
                                  input_shape=(self.time_length,
                                               self.embedding_size),
                                  init=self.init_type,
                                  activation=self.activation)
                stack_model = GRU(self.hidden_size, return_sequences=True,
                                  input_shape=(self.time_length,
                                               self.hidden_size),
                                  init=self.init_type,
                                  activation=self.activation)
            model.add(basic_model)
            if self.dropout:
                model.add(Dropout(self.dropout_ratio))
            model.add(stack_model)
            model.add(TimeDistributed(Dense(self.output_vocab_size)))
            model.add(Activation('softmax'))
            model.compile(loss='categorical_crossentropy', optimizer=opt_func)
            self.model = model
        else:
            sys.stderr.write("Invalid arch.\n")
            exit()

        # save model descriptions
        self.model.summary()
def make_model(img_w, img_h, output_size, absolute_max_string_len):

    # Network parameters
    conv_filters = 16
    #conv_filters = 32 # experiment 2
    kernel_size = (3, 3)
    time_dense_size = 32
    rnn_size = 512
    pool_size = 2

    if K.image_data_format() == 'channels_first':
        input_shape = (1, img_w, img_h)
    else:
        input_shape = (img_w, img_h, 1)

    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    type_of_model = "original" # "https://keras.io/examples/mnist_cnn/"
    if type_of_model == "https://keras.io/examples/mnist_cnn/":
        inner = Conv2D(32, kernel_size, padding='same',
                    activation=act, kernel_initializer='he_normal',
                    name='conv1')(input_data)
        inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
        inner = Conv2D(64, kernel_size, padding='same',
                    activation=act, kernel_initializer='he_normal',
                    name='conv2')(inner)
        inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)
        inner = Dropout(0.25)(inner) # Fraction of the input units to drop
        conv_filters = 64
    else:
        inner = Conv2D(conv_filters, kernel_size, padding='same',
                    activation=act, kernel_initializer='he_normal',
                    name='afilter'+str(conv_filters))(input_data)
        inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='apool'+str(pool_size)+"by"+str(pool_size))(inner)
        inner = Conv2D(conv_filters, kernel_size, padding='same',
                    activation=act, kernel_initializer='he_normal',
                    name='bfilter'+str(conv_filters))(inner)
        inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='bpool'+str(pool_size)+"by"+str(pool_size))(inner)
        # experiment 3b ... add dropout
        #inner = Dropout(0.5)(inner) # Fraction of the input units to drop
        # experiment 3c ... add dropout
        inner = Dropout(0.25)(inner) # Fraction of the input units to drop

    # image is down sampled by MaxPooling twice, hence pool_size ** 2
    conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    # experiment 3 removes this reduction
    #inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    if type_of_model == "https://keras.io/examples/mnist_cnn/":
        inner = Dropout(0.5)(inner)

    # Two layers of bidirectional GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner)
    gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(output_size, kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)

    # this intermediate point is usefull for predictions without training
    model_p = Model(inputs=input_data, outputs=y_pred)
 
    labels = Input(name='the_labels', shape=[absolute_max_string_len], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
   
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    
    # K.Lambda wraps arbitrary expression as a Layer object.
    # Q then its called ?
    loss_out = Lambda(ctc_drop_first_2.ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])



    model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)
    
    return (model, model_p, input_data, y_pred)
Example #4
0
t_vocabsize = len(t_wordfreqs) + 1
t_word2index = { x[0]: i+1 for i, x in enumerate(t_wordfreqs.most_common(T_MAX_FEATURES))}
t_word2index['PAD'] = 0
t_index2word = { v: k for k, v in t_word2index.items() }

test_size = int(0.3 * len(s_sents))
s_sents_train, s_sents_test = s_sents[0:-test_size], s_sents[-test_size:]
t_sents_train, t_sents_test = t_sents[0:-test_size], t_sents[-test_size:]
train_gen = generate_batch(s_sents_train, s_word2index, t_sents_train, t_word2index, BATCH_SIZE, MAX_SEQLEN)
test_gen = generate_batch(s_sents_test, s_word2index, t_sents_test, t_word2index, BATCH_SIZE, MAX_SEQLEN)
print(len(s_sents_train), len(s_sents_test))

model = Sequential()
model.add(Embedding(input_dim=s_vocabsize, output_dim=EMBED_SIZE, input_length=MAX_SEQLEN))
model.add(SpatialDropout1D(0.2))
model.add(GRU(HIDDEN_SIZE, dropout=0.2, recurrent_dropout=0.2))
model.add(RepeatVector(MAX_SEQLEN))
model.add(GRU(HIDDEN_SIZE, return_sequences=True))
model.add(TimeDistributed(Dense(t_vocabsize)))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

num_train_samples = len(s_sents_train) // BATCH_SIZE
num_test_samples = len(s_sents_test) // BATCH_SIZE

hist_acc, hist_val_acc, hist_loss, hist_val_loss = [], [], [], []
for i in range(NUM_ITERATIONS):
    history = model.fit_generator(train_gen, steps_per_epoch=num_train_samples, epochs=NUM_EPOCHS, validation_data=test_gen, validation_steps=num_test_samples)
    hist_acc.extend(history.history['acc'])
    hist_val_acc.extend(history.history['val_acc'])
Example #5
0
def RNet(vocab_size,
         vocab_init=None,
         hdim=150,
         dropout=0.1,
         p_length=None,
         q_length=None,
         w2vec=300,
         char_level_embeddings=False):
    ## define variables
    H = hdim
    N = p_length
    M = q_length
    W = w2vec

    P_vecs = Input(shape=[N], name='Passage')
    Q_vecs = Input(shape=[M], name='Question')

    v = SharedWeight(size=(H, 1), name='v')
    WQ_u = SharedWeight(size=(2 * H, H), name='WQ_u')
    WP_u = SharedWeight(size=(2 * H, H), name='WP_u')
    WP_v = SharedWeight(size=(H, H), name='WP_v')
    W_g1 = SharedWeight(size=(2 * H, 2 * H), name='W_g1')
    W_g2 = SharedWeight(size=(H, H), name='W_g2')
    WP_h = SharedWeight(size=(2 * H, H), name='WP_h')
    Wa_h = SharedWeight(size=(2 * H, H), name='Wa_h')
    WQ_v = SharedWeight(size=(2 * H, H), name='WQ_v')
    WPP_v = SharedWeight(size=(H, H), name='WPP_v')
    VQ_r = SharedWeight(size=(H, H), name='VQ_r')

    vv = SharedWeight(size=(4 * H, 2 * H), name='vv')
    vv2 = SharedWeight(size=(2 * H, H), name='vv2')

    shared_weights = [
        v, WQ_u, WP_u, WP_v, W_g1, W_g2, WP_h, Wa_h, WQ_v, WPP_v, VQ_r, vv, vv2
    ]
    if vocab_init is not None:
        em = Embedding(vocab_size,
                       W,
                       weights=[vocab_init],
                       trainable=False,
                       mask_zero=True)
    else:
        em = Embedding(vocab_size, W, trainable=False, mask_zero=True)

    uP = em(P_vecs)
    uQ = em(Q_vecs)

    #uP = Masking()(P)
    for i in range(3):
        uP = Bidirectional(
            GRU(
                units=H,
                return_sequences=True,
                #kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.01),
                dropout=dropout))(uP)

    #uQ = Masking()(Q)
    for i in range(3):
        uQ = Bidirectional(
            GRU(
                units=H,
                return_sequences=True,
                #kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.01),
                dropout=dropout))(uQ)

    ## Gated Attn
    cell = GatedAttnGRUCell(units=H)

    vP = RNN(cell, return_sequences=True)(
        uP, constants=[WP_u, uQ, WQ_u, WP_v, v, W_g1, vv])
    vP_back = RNN(cell, return_sequences=True, go_backwards=True)(
        uP, constants=[WP_u, uQ, WQ_u, WP_v, v, W_g1, vv])
    #vP = Average()([vP, vP_back])
    vP = Concatenate()([vP, vP_back])
    vP = GRU(units=H, return_sequences=True)(vP)

    ## Self Match
    cell2 = SelfMatchGRUCell(units=H)

    hP = RNN(cell2,
             return_sequences=True)(vP,
                                    constants=[vP, W_g2, WPP_v, WP_v, v, vv2])
    hP_back = RNN(cell2, return_sequences=True,
                  go_backwards=True)(vP,
                                     constants=[vP, W_g2, WPP_v, WP_v, v, vv2])
    hP = Concatenate()([hP, hP_back])

    ## Question Pooling
    rQ = QuestionPooling()([uQ, WQ_u, WQ_v, v, VQ_r])
    rQ = Dropout(rate=dropout, name='rQ')(rQ)

    fake_em = Embedding(vocab_size, 2 * H, trainable=False)(P_vecs)
    fake_input = GlobalMaxPooling1D()(fake_em)
    fake_input = RepeatVector(n=2, name='fake_input')(fake_input)

    ## Pointer
    cell3 = PointerCell(units=2 * H)
    Ptr = RNN(cell3, return_sequences=True)(fake_input,
                                            initial_state=[rQ],
                                            constants=[hP, WP_h, Wa_h, v])

    answer_start = Slice(0, name='answer_start')(Ptr)
    answer_end = Slice(1, name='answer_end')(Ptr)

    inputs = [P_vecs, Q_vecs] + shared_weights
    outputs = [answer_start, answer_end]

    model = Model(inputs=inputs, outputs=outputs)

    model.summary()

    return model
Example #6
0
def build_model(opts, verbose=False):
    model = Graph()
    k = 2 * opts.lstm_units
    L = opts.xmaxlen
    N = opts.xmaxlen + opts.ymaxlen + 1  # for delim
    print "x len", L, "total len", N
    # model.add_input(name='inputx', input_shape=(opts.xmaxlen,), dtype=int)
    # model.add_input(name='inputy', input_shape=(opts.ymaxlen,), dtype=int)
    # model.add_node(Embedding(opts.max_features, opts.wx_emb, input_length=opts.xmaxlen), name='x_emb',
    #                input='inputx')
    # model.add_node(Embedding(opts.max_features, opts.wy_emb, input_length=opts.ymaxlen), name='y_emb',
    #                input='inputy')
    # model.add_node(LSTM(opts.lstm_units, return_sequences=True), name='forward', inputs=['x_emb', 'y_emb'],
    #                concat_axis=1)
    # model.add_node(LSTM(opts.lstm_units, return_sequences=True, go_backwards=True), name='backward',
    #                inputs=['x_emb', 'y_emb'], concat_axis=1)

    model.add_input(name='input', input_shape=(N, ), dtype=int)
    model.add_node(Embedding(opts.max_features, opts.emb, input_length=N),
                   name='emb',
                   input='input')
    model.add_node(Dropout(0.1), name='d_emb', input='emb')
    model.add_node(GRU(opts.lstm_units, return_sequences=True),
                   name='forward',
                   input='d_emb')
    model.add_node(GRU(opts.lstm_units,
                       return_sequences=True,
                       go_backwards=True),
                   name='backward',
                   input='d_emb')

    model.add_node(Dropout(0.1),
                   name='dropout',
                   inputs=['forward', 'backward'])
    model.add_node(Lambda(get_H_n, output_shape=(k, )),
                   name='h_n',
                   input='dropout')

    # model.add_node(Lambda(XMaxLen(10), output_shape=(L, k)), name='Y', input='dropout')

    model.add_node(Lambda(get_Y, output_shape=(L, k)),
                   name='Y',
                   input='dropout')
    # model.add_node(SliceAtLength((None,N,k),L), name='Y', input='dropout')
    model.add_node(Dense(k, W_regularizer=l2(0.01)), name='Wh_n', input='h_n')
    model.add_node(RepeatVector(L), name='Wh_n_cross_e', input='Wh_n')
    model.add_node(TimeDistributedDense(k, W_regularizer=l2(0.01)),
                   name='WY',
                   input='Y')
    model.add_node(Activation('tanh'),
                   name='M',
                   inputs=['Wh_n_cross_e', 'WY'],
                   merge_mode='sum')
    model.add_node(TimeDistributedDense(1, activation='softmax'),
                   name='alpha',
                   input='M')
    model.add_node(Lambda(get_R, output_shape=(k, 1)),
                   name='_r',
                   inputs=['Y', 'alpha'],
                   merge_mode='join')
    model.add_node(Reshape((k, )), name='r', input='_r')
    model.add_node(Dense(k, W_regularizer=l2(0.01)), name='Wr', input='r')
    model.add_node(Dense(k, W_regularizer=l2(0.01)), name='Wh', input='h_n')
    model.add_node(Activation('tanh'),
                   name='h_star',
                   inputs=['Wr', 'Wh'],
                   merge_mode='sum')

    model.add_node(Dense(3, activation='softmax'), name='out', input='h_star')
    model.add_output(name='output', input='out')
    model.summary()
    if verbose:
        model.summary()
        # plot(model, 'model.png')
    # model.compile(loss={'output':'binary_crossentropy'}, optimizer=Adam())
    model.compile(loss={'output': 'categorical_crossentropy'},
                  optimizer=Adam(options.lr))
    return model
def main(parameters=DEF_PRMS,
         weight_file='weights.h5',
         model_file='model.json'):
    for key in DEF_PRMS:
        if key not in parameters:
            parameters[key] = DEF_PRMS[key]
        if type(parameters[key]) in [float, np.ndarray]:
            parameters[key] = np.float(parameters[key])
#        print key, parameters[key] # 19 July

    def no_schedule(x):
        return float(1)

    def sigmoid_schedule(x, slope=1., start=parameters['vae_annealer_start']):
        return float(1 / (1. + np.exp(slope * (start - float(x)))))

    start = time.time()
    with open(CONFIGS[TRAIN_SET]['file'], 'r') as f:
        smiles = f.readlines()

    smiles = [i.strip() for i in smiles]
    print 'Training set size is', len(smiles)
    smiles = [smile_convert(i) for i in smiles if smile_convert(i)]
    print 'Training set size is {}, after filtering to max length of {}'.format(len(smiles), MAX_LEN)
    shuffle(smiles)

    #print('total chars:', nchars) # 19 July

    X = np.zeros((len(smiles), MAX_LEN, nchars), dtype=np.float32)

    for i, smile in enumerate(smiles):
        for t, char in enumerate(smile):
            X[i, t, char_indices[char]] = 1

    model = Sequential()

    ## Convolutions
    if parameters['do_conv_encoder']:
        model.add(Convolution1D(int(parameters['conv_dim_depth'] *
                                    parameters['conv_d_growth_factor']),
                                int(parameters['conv_dim_width'] *
                                    parameters['conv_w_growth_factor']),
                                batch_input_shape=(parameters['batch_size'], MAX_LEN, nchars),
                                activation=parameters['conv_activation']))

        if parameters['batchnorm_conv']:
            model.add(BatchNormalization(mode=0, axis=-1))
        if parameters['average_pooling']:
            model.add(AveragePooling1D())

        for j in range(parameters['conv_depth'] - 1):
            model.add(Convolution1D(int(parameters['conv_dim_depth'] *
                                        parameters['conv_d_growth_factor']**(j + 1)),
                                    int(parameters['conv_dim_width'] *
                                        parameters['conv_w_growth_factor']**(j + 1)),
                                    activation=parameters['conv_activation']))
            if parameters['batchnorm_conv']:
                model.add(BatchNormalization(mode=0, axis=-1))
            if parameters['average_pooling']:
                model.add(AveragePooling1D())

        if parameters['do_extra_gru']:
            model.add(GRU(parameters['recurrent_dim'],
                      return_sequences=False,
                      activation=parameters['rnn_activation']))
        else:
            model.add(Flatten())

    else:
        for k in range(parameters['gru_depth'] - 1):
            model.add(GRU(parameters['recurrent_dim'], return_sequences=True,
                          batch_input_shape=(parameters['batch_size'], MAX_LEN, nchars),
                          activation=parameters['rnn_activation']))
            if parameters['batchnorm_gru']:
                model.add(BatchNormalization(mode=0, axis=-1))

        model.add(GRU(parameters['recurrent_dim'],
                      return_sequences=False,
                      activation=parameters['rnn_activation']))
        if parameters['batchnorm_gru']:
            model.add(BatchNormalization(mode=0, axis=-1))

    ## Middle layers
    for i in range(parameters['middle_layer']):
        model.add(Dense(int(parameters['hidden_dim'] *
                            parameters['hg_growth_factor']**(parameters['middle_layer'] - i)),
                        activation=parameters['activation']))
        if parameters['batchnorm_mid']:
            model.add(BatchNormalization(mode=0, axis=-1))

    ## Variational AE
    if parameters['do_vae']:
        model.add(VAE(parameters['hidden_dim'], batch_size=parameters['batch_size'],
                      activation=parameters['vae_activation'],
                      prior_logsigma=0))
        if parameters['batchnorm_vae']:
            model.add(BatchNormalization(mode=0, axis=-1))

    if DOUBLE_HG:
        for i in range(parameters['middle_layer']):
            model.add(Dense(int(parameters['hidden_dim'] *
                                parameters['hg_growth_factor']**(i)),
                            activation=parameters['activation']))
            if parameters['batchnorm_mid']:
                model.add(BatchNormalization(mode=0, axis=-1))

    if REPEAT_VECTOR:
        model.add(RepeatVector(MAX_LEN))

    ## Recurrent for writeout
    for k in range(parameters['gru_depth'] - 1):
        model.add(GRU(parameters['recurrent_dim'], return_sequences=True,
                      activation=parameters['rnn_activation']))
        if parameters['batchnorm_gru']:
            model.add(BatchNormalization(mode=0, axis=-1))

    model.add(TerminalGRU(nchars, return_sequences=True,
                          activation='softmax',
                          temperature=TEMP,
                          dropout_U=parameters['tgru_dropout']))

    if OPTIM == 'adam':
        optim = Adam(lr=parameters['lr'], beta_1=parameters['momentum'])
    elif OPTIM == 'rmsprop':
        optim = RMSprop(lr=parameters['lr'], beta_1=parameters['momentum'])
    elif OPTIM == 'sgd':
        optim = SGD(lr=parameters['lr'], beta_1=parameters['momentum'])

    model.compile(loss=LOSS, optimizer=optim)

    # SAVE

    json_string = model.to_json()
    open(model_file, 'w').write(json_string)

   # print parameters # 19 July

    # CALLBACK
    smile_checker = CheckMolecule()

    cbk = ModelCheckpoint(weight_file,
                          save_best_only=True)

    if parameters['do_vae']:
        for i, layer in enumerate(model.layers):
            if layer.name == 'variationaldense':
                vae_index = i

        vae_schedule = VAEWeightAnnealer(sigmoid_schedule,
                                         vae_index,
                                         )
        anneal_epoch = parameters['vae_annealer_start']
        weights_start = anneal_epoch + int(min(VAE_WEIGHTS_START, 0.25 * anneal_epoch))

        cbk_post_VAE = CheckpointPostAnnealing('annealed_' + weight_file,
                                               save_best_only=True,
                                               monitor='val_acc',
                                               start_epoch=weights_start,
                                               verbose=1)

        model.fit(X, X, batch_size=parameters['batch_size'],
                  nb_epoch=parameters['epochs'],
                  callbacks=[smile_checker, vae_schedule, cbk, cbk_post_VAE],
                  validation_split=VAL_SPLIT,
                  show_accuracy=True)
    else:
        model.fit(X, X, batch_size=parameters['batch_size'],
                  nb_epoch=parameters['epochs'],
                  callbacks=[smile_checker, cbk],
                  validation_split=VAL_SPLIT,
                  show_accuracy=True)

    end = time.time()
Example #8
0
def train2gru(img_w, img_h, path_data, load, path_name_model, epoch_cnt, lrate,
              ibatchsize):
    # Input Parameters

    #ORiginal!!!

    # Network parameters
    conv_filters = 16
    kernel_size = (3, 3)
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512
    batch_size = ibatchsize
    downsample_factor = pool_size**2

    if K.image_data_format() == 'channels_first':
        input_shape = (1, img_w, img_h)
    else:
        input_shape = (img_w, img_h, 1)

    tiger_train = TextImageGenerator(path_data, 'train', img_w, img_h,
                                     batch_size, downsample_factor)
    tiger_train.build_data()
    tiger_val = TextImageGenerator(path_data, 'val', img_w, img_h, batch_size,
                                   downsample_factor)
    tiger_val.build_data()

    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Conv2D(conv_filters,
                   kernel_size,
                   padding='same',
                   activation=act,
                   kernel_initializer='he_normal',
                   name='conv1')(input_data)
    #evrim
    inner = Dropout(0.2)(inner)
    #evrim ends

    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner = Conv2D(conv_filters,
                   kernel_size,
                   padding='same',
                   activation=act,
                   kernel_initializer='he_normal',
                   name='conv2')(inner)

    #evrim
    inner = Dropout(0.2)(inner)
    #evrim ends

    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)

    conv_to_rnn_dims = (img_w // (pool_size**2),
                        (img_h // (pool_size**2)) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    #evrim
    inner = Dropout(0.2)(inner)
    #evrim ends

    # Two layers of bidirecitonal GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru1')(inner)
    gru_1b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])

    gru_2 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru2_b')(gru1_merged)

    #evrim
    """
    gru2_merged = add([gru_2, gru_2b]) 
    gru_3 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru3')(gru2_merged)
    gru_3b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru3_b')(gru2_merged)
    """
    # transforms RNN output to character activations:
    inner = Dense(
        tiger_train.get_output_size(),
        kernel_initializer='he_normal',
        #name='dense3')(concatenate([gru_3, gru_3b]))
        name='dense2')(concatenate([gru_2, gru_2b]))

    y_pred = Activation('softmax', name='softmax')(inner)

    Model(inputs=input_data, outputs=y_pred).summary()

    labels = Input(name='the_labels',
                   shape=[tiger_train.max_text_len],
                   dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                      name='ctc')([y_pred, labels, input_length, label_length])

    # clipnorm seems to speeds up convergence
    sgd = SGD(lr=lrate, decay=1e-6, momentum=0.9, nesterov=True,
              clipnorm=5)  #evrim

    if load:
        model = load_model('./tmp_model.h5', compile=False)
    else:
        model = Model(inputs=[input_data, labels, input_length, label_length],
                      outputs=loss_out)

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    model.compile(loss={
        'ctc': lambda y_true, y_pred: y_pred
    },
                  optimizer=sgd,
                  metrics=['accuracy'])

    if not load:
        # captures output of softmax so we can decode the output during visualization
        test_func = K.function([input_data], [y_pred])

        history = model.fit_generator(generator=tiger_train.next_batch(),
                                      steps_per_epoch=tiger_train.n,
                                      epochs=epoch_cnt,
                                      validation_data=tiger_val.next_batch(),
                                      validation_steps=tiger_val.n)

    #save model**************

    # serialize model to JSON
    model_json = model.to_json()
    with open(path_name_model + ".json", "w") as json_file:
        json_file.write(model_json)

    # serialize weights to HDF5
    model.save_weights(path_name_model + ".h5")

    print("Saved" + path_name_model + " model to disk")

    #model.save(path_name_model + ".h5")  bu çalışmadı
    #RuntimeError: Unable to create attribute (object header message is too large)

    return model
Example #9
0
                   name='conv1')(input_data)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation='relu', kernel_initializer='he_normal',
                   name='conv2')(inner)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)

conv_to_rnn_dims = (width // (pool_size ** 2), (height // (pool_size ** 2)) * conv_filters)
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

# cuts down input size going into RNN:
inner = Dense(time_dense_size, activation='relu', name='dense1')(inner)

# Two layers of bidirecitonal GRUs
# GRU seems to work as well, if not better than LSTM:
gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner)
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner)
gru1_merged = add([gru_1, gru_1b])
gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged)

# transforms RNN output to character activations:
inner = Dense(len(characters)+1, kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
y_pred = Activation('softmax', name='softmax')(inner)
Model(inputs=input_data, outputs=y_pred).summary()

labels = Input(name='the_labels', shape=[7], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
# Keras doesn't currently support loss funcs with extra parameters
Example #10
0
for i in range(numSentencesPerDoc):

    x_pop = Lambda(lambda x: x[:, i],
                   output_shape=(numWordsPerSentence, ),
                   name='convert_shape_' + 'sentence' + str(i))(x_in)

    emb = embLayer(x_pop)
    emb = Dropout(dropWordEmb)(emb)

    WORD_GRU_NUM = 30

    biRnn_word = Bidirectional(GRU(WORD_GRU_NUM,
                                   return_sequences=True,
                                   bias_regularizer=regularizers.l2(eta),
                                   kernel_regularizer=regularizers.l2(eta),
                                   recurrent_regularizer=regularizers.l2(eta),
                                   dropout=dr,
                                   recurrent_dropout=dropWordRnnOut,
                                   unroll=True),
                               merge_mode='concat')(emb)  #int(emb.shape[1])

    CONTEXT_DIM = int(biRnn_word.shape[2])

    eij_ = Dense(CONTEXT_DIM, use_bias=True, activation='tanh')(biRnn_word)
    eij = Dense(1, use_bias=False)(eij_)
    eij_normalized = TimeDistributed(Activation('softmax'))(eij)
    #eij_permuted =Permute((2,1))(eij_normalized)

    sent_vec = Dot(axes=1)(
        [eij_normalized, biRnn_word]
    )  #merge([eij_permuted, biRnn_word], mode = 'mul', name='word_attention_'+str(i))
Example #11
0
'num_layers':num_layers, 'only_train_note_starts': only_train_note_starts, 
'velocity_threshold_such_that_it_is_a_played_note': velocity_threshold_such_that_it_is_a_played_note, 
'scale': scale_velocity_between_0_and_1, 'classes': class_string}
t = str(int(round(time.time())))
model_name = t+'-num_layers_%(num_layers)s_maxlen_%(input_length)s_otns_%(only_train_note_starts)s_lstmsize_%(lstm_size)s_trainsize_%(trainsize)s_testsize_%(testsize)s_thresh_%(velocity_threshold_such_that_it_is_a_played_note)s_scale_%(scale)s_classes_%(classes)s' % fd

model_path = model_path + model_name + '/'
if not os.path.exists(model_path):
    os.makedirs(model_path)


# Define an input sequence and process it.
inputs = Input(shape=(None, input_dim))
lstm_outputs = inputs
for layer_no in range(num_layers-1):
    lstm_outputs = GRU(lstm_size, return_state=False, return_sequences=True)(lstm_outputs)
#last layer, that does not return sequences
lstm_outputs = GRU(lstm_size, return_state=False, return_sequences=False)(lstm_outputs)
dense = Dense(num_classes, activation='softmax')
outputs = dense(lstm_outputs)
model = Model(inputs, outputs)


#compile autoencoder
if optimizer == 'RMS': optimizer = RMSprop(lr=learning_rate)
if optimizer == 'Adam': optimizer = Adam(lr=learning_rate)
loss = 'binary_crossentropy'
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

print(model.summary())
Example #12
0
# TextCNN
# model = Sequential()
# model.add(Embedding(2000, 64, input_length=50))
# model.add(Conv1D(filters=64, kernel_size=4, padding='same', activation='relu'))
# model.add(MaxPooling1D(pool_size=2))
# model.add(Dropout(0.25))
# model.add(Flatten())
# model.add(Dense(64, activation='relu'))
# model.add(Dense(32, activation='relu'))
# model.add(Dense(1, activation='sigmoid'))

# GRU
model = Sequential()
model.add(Embedding(2000, 64, input_length=50))
model.add(GRU(64, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]
    K.get_session().run(tf.local_variables_initializer())
    return auc

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[auc])
#
model.fit(x_train, y_train, epochs=5, batch_size=128)
# model.save('gru_model.h5')
Example #13
0
m = ZeroPadding2D(padding=(0, 1))(m)
m = MaxPooling2D(pool_size=(2, 2),
                 strides=(2, 1),
                 padding='valid',
                 name='pool4')(m)
m = Conv2D(512,
           kernel_size=(2, 2),
           activation='relu',
           padding='valid',
           name='conv7')(m)
m = BatchNormalization(axis=3)(m)

m = Permute((2, 1, 3), name='permute')(m)
m = TimeDistributed(Flatten(), name='timedistrib')(m)

m = Bidirectional(GRU(rnnunit, return_sequences=True, implementation=2),
                  name='blstm1')(m)
# m = Bidirectional(LSTM(rnnunit,return_sequences=True),name='blstm1')(m)
m = Dense(
    rnnunit,
    name='blstm1_out',
    activation='linear',
)(m)
# m = Bidirectional(LSTM(rnnunit,return_sequences=True),name='blstm2')(m)
m = Bidirectional(GRU(rnnunit, return_sequences=True, implementation=2),
                  name='blstm2')(m)
y_pred = Dense(nclass, name='blstm2_out', activation='softmax')(m)

basemodel = Model(inputs=input, outputs=y_pred)
basemodel.summary()
labels = Input(name='the_labels', shape=[maxlabellength], dtype='float32')
Example #14
0
    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(input_dim=input_dim + self.m_length,
                           input_length=input_leng,
                           output_dim=self.output_dim,
                           init=self.init,
                           inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(input_dim=input_dim + self.m_length,
                            input_length=input_leng,
                            output_dim=self.output_dim,
                            init=self.init,
                            inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()

        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001 * np.ones((1, )).astype(floatX)))
        self.init_h = shared_zeros((self.output_dim))
        self.init_wr = self.rnn.init((self.n_slots, ))
        self.init_ww = self.rnn.init((self.n_slots, ))

        # write
        self.W_e = self.rnn.init((self.output_dim, self.m_length))  # erase
        self.b_e = shared_zeros((self.m_length))
        self.W_a = self.rnn.init((self.output_dim, self.m_length))  # add
        self.b_a = shared_zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_read = self.rnn.init((self.m_length, ))
        self.W_c_read = self.rnn.init(
            (self.output_dim,
             3))  # 3 = beta, g, gamma see eq. 5, 7, 9 in Graves et. al 2014
        self.b_c_read = shared_zeros((3))
        self.W_s_read = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_read = shared_zeros((self.shift_range))

        # get_w  parameters for writing operation
        self.W_k_write = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_write = self.rnn.init((self.m_length, ))
        self.W_c_write = self.rnn.init(
            (self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = shared_zeros((3))
        self.W_s_write = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_write = shared_zeros((self.shift_range))

        self.C = _circulant(self.n_slots, self.shift_range)

        self.params = self.rnn.params + [
            self.W_e, self.b_e, self.W_a, self.b_a, self.W_k_read,
            self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read,
            self.b_s_read, self.W_k_write, self.b_k_write, self.W_s_write,
            self.b_s_write, self.W_c_write, self.b_c_write, self.M,
            self.init_h, self.init_wr, self.init_ww
        ]

        if self.inner_rnn == 'lstm':
            self.init_c = shared_zeros((self.output_dim))
            self.params = self.params + [
                self.init_c,
            ]
Example #15
0
                      activation=act,
                      name='conv2')(inner)
inner = MaxPooling2D(pool_size=(pool_size_2, pool_size_2), name='max2')(inner)

conv_to_rnn_dims = ((img_h // (pool_size_1 * pool_size_2)) * conv_num_filters,
                    img_w // (pool_size_1 * pool_size_2))
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
inner = Permute(dims=(2, 1), name='permute')(inner)

# cuts down input size going into RNN:
inner = TimeDistributed(Dense(time_dense_size, activation=act,
                              name='dense1'))(inner)

# Two layers of bidirecitonal GRUs
# GRU seems to work as well, if not better than LSTM:
gru_1 = GRU(rnn_size, return_sequences=True, name='gru1')(inner)
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True,
             name='gru1_b')(inner)
gru1_merged = merge([gru_1, gru_1b], mode='sum')
gru_2 = GRU(rnn_size, return_sequences=True, name='gru2')(gru1_merged)
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True)(gru1_merged)

# transforms RNN output to character activations:
inner = TimeDistributed(Dense(img_gen.get_output_size(),
                              name='dense2'))(merge([gru_2, gru_2b],
                                                    mode='concat'))
y_pred = Activation('softmax', name='softmax')(inner)
Model(input=[input_data], output=y_pred).summary()

labels = Input(name='the_labels',
               shape=[img_gen.absolute_max_string_len],
Example #16
0
def train(run_name, start_epoch, stop_epoch, img_w):
    # Input Parameters
    img_h = 64
    #todo by saiful
    # words_per_epoch you have to calculate divided by minibatch_size
    words_per_epoch = 9280
    val_split = 0.1
    val_words = int(words_per_epoch * (val_split))

    # Network parameters
    conv_filters = 16
    kernel_size = (3, 3)
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512
    minibatch_size = 32

    if K.image_data_format() == 'channels_first':
        input_shape = (1, img_w, img_h)
    else:
        input_shape = (img_w, img_h, 1)

    imgs_data_dir = 'word'  # data directory
    img_gen = TextImageGenerator(image_data_path=imgs_data_dir,
                                 minibatch_size=minibatch_size,
                                 img_w=img_w,
                                 img_h=img_h,
                                 downsample_factor=(pool_size**2),
                                 val_split=words_per_epoch - val_words)

    act = 'relu'

    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Conv2D(conv_filters,
                   kernel_size,
                   padding='same',
                   activation=act,
                   kernel_initializer='he_normal',
                   name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner = Conv2D(conv_filters,
                   kernel_size,
                   padding='same',
                   activation=act,
                   kernel_initializer='he_normal',
                   name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)
    # inner = Dropout(0.3)(inner)
    # conv3 added
    inner = Conv2D(16, (2, 2),
                   padding='same',
                   activation=act,
                   kernel_initializer='he_normal',
                   name='conv3')(inner)
    # inner = BatchNormalization()(inner)
    # inner = Dropout(0.2)(inner)

    conv_to_rnn_dims = (img_w // (pool_size**2),
                        (img_h // (pool_size**2)) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # Two layers of bidirectional GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru1')(inner)
    gru_1b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(img_gen.get_output_size(),
                  kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)
    Model(inputs=input_data, outputs=y_pred).summary()
    # exit()

    labels = Input(name='the_labels',
                   shape=[img_gen.absolute_max_string_len],
                   dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                      name='ctc')([y_pred, labels, input_length, label_length])

    lr = 0.001 / 30.0  # decrease 20 %
    optimizer = Adam(lr=lr)
    model = Model(inputs=[input_data, labels, input_length, label_length],
                  outputs=loss_out)

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    model.compile(loss={
        'ctc': lambda y_true, y_pred: y_pred
    },
                  optimizer=optimizer)
    if start_epoch > 0:
        weight_file = os.path.join(
            OUTPUT_DIR,
            os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
        model.load_weights(weight_file)
    # captures output of softmax so we can decode the output during visualization
    test_func = K.function([input_data], [y_pred])

    viz_cb = VizCallback(run_name, test_func, img_gen.next_val())

    model.fit_generator(generator=img_gen.next_train(),
                        steps_per_epoch=(words_per_epoch - val_words) //
                        minibatch_size,
                        epochs=stop_epoch,
                        validation_data=img_gen.next_val(),
                        validation_steps=val_words // minibatch_size,
                        callbacks=[viz_cb, img_gen],
                        initial_epoch=start_epoch)
              d.index[i + size][1],  # week
              d[i:(i + size + 1)].values.flatten()]
        for i in range(len(d) - size)
    ]


dw = window_with_index(ds.astype('float'), t)

data = np.array([i[0:-1] for i in dw]).reshape(len(dw), input_num, 1)
labels = np.array([i[-1] for i in dw]).reshape(len(dw), 1)

model = Sequential()

model.add(BatchNormalization(axis=1, input_shape=(input_num, 1)))

model.add(GRU(n_num, activation='relu'))

model.add(Dense(1))
model.add(Activation('linear'))

opt = Nadam()

model.compile(loss='mean_squared_error', optimizer=opt)

stopper = SampleStopper(dest_model, patience=800, min_epochs=3000)
tensorboard = TensorBoard()

model.fit(data,
          labels,
          epochs=epoch,
          batch_size=batch_size,
Example #18
0
    model = Word2Vec(tokenize, size=100, window=3, min_count=1, workers=4)
    model.save("models/word2vec_skipgram.model")
    start_time = time.time()

    # X_test = np.reshape(np.array(test_tokenize), np.array(test_tokenize).shape + (1,1))
    Y_train = np.array(df['label_id'])
    # Y_test = np.array(data4)
    X_train = np.array(get_train_input(tokenize))
    print(np.shape(X_train))
    # print(np.shape(X_test))
    print(np.shape(Y_train))
    # print(np.shape(Y_test))
    input_dim = (X_train.shape[1], X_train.shape[2])

    model = Sequential()

    model.add(GRU(64, return_sequences=True, input_shape=input_dim))
    model.add(Dropout(0.2))
    model.add(GRU(32))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(3, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.summary()
    model.fit(X_train, Y_train, epochs=15, batch_size=64)
    scores = model.evaluate(X_train, Y_train)
    model.save("models/GRU_model1.model")
    print("Accuracy: %.2f%%" % (scores[1] * 100))

    print()
    print("Execution Time %s seconds: " % (time.time() - start_time))
Example #19
0
def train(run_name, start_epoch, stop_epoch, img_w):
    # Input Parameters
    img_h = 64
    words_per_epoch = 16000
    val_split = 0.2
    val_words = int(words_per_epoch * (val_split))

    # Network parameters
    conv_filters = 16
    kernel_size = (3, 3)
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512
    minibatch_size = 32

    if K.image_data_format() == 'channels_first':
        input_shape = (1, img_h, img_w)
    else:
        input_shape = (img_h, img_w, 1)

    fdir = os.path.dirname(
        get_file('wordlists.tgz',
                 origin='http://www.mythic-ai.com/datasets/wordlists.tgz',
                 untar=True))

    img_gen = TextImageGenerator(
        monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
        bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
        minibatch_size=minibatch_size,
        img_w=img_w,
        img_h=img_h,
        downsample_factor=(pool_size**2),
        val_split=words_per_epoch - val_words)
    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Conv2D(conv_filters,
                   kernel_size,
                   padding='same',
                   activation=act,
                   kernel_initializer='he_normal',
                   name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner = Conv2D(conv_filters,
                   kernel_size,
                   padding='same',
                   activation=act,
                   kernel_initializer='he_normal',
                   name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)

    conv_to_rnn_dims = ((img_h // (pool_size**2)) * conv_filters,
                        img_w // (pool_size**2))
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # Two layers of bidirectional GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru1')(inner)
    gru_1b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(img_gen.get_output_size(),
                  kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)
    Model(inputs=input_data, outputs=y_pred).summary()

    labels = Input(name='the_labels',
                   shape=[img_gen.absolute_max_string_len],
                   dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                      name='ctc')([y_pred, labels, input_length, label_length])

    # clipnorm seems to speeds up convergence
    sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

    model = Model(inputs=[input_data, labels, input_length, label_length],
                  outputs=loss_out)

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
    if start_epoch > 0:
        weight_file = os.path.join(
            OUTPUT_DIR,
            os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
        model.load_weights(weight_file)
    # captures output of softmax so we can decode the output during visualization
    test_func = K.function([input_data], [y_pred])

    viz_cb = VizCallback(run_name, test_func, img_gen.next_val())

    model.fit_generator(generator=img_gen.next_train(),
                        steps_per_epoch=(words_per_epoch - val_words) //
                        minibatch_size,
                        epochs=stop_epoch,
                        validation_data=img_gen.next_val(),
                        validation_steps=val_words // minibatch_size,
                        callbacks=[viz_cb, img_gen, missinglink_callback],
                        initial_epoch=start_epoch)
Example #20
0
def CRNN2D(X_shape, nb_classes):
    '''
    Model used for evaluation in paper. Inspired by K. Choi model in:
    https://github.com/keunwoochoi/music-auto_tagging-keras/blob/master/music_tagger_crnn.py
    '''

    nb_layers = 4  # number of convolutional layers
    nb_filters = [64, 128, 128, 128]  # filter sizes
    kernel_size = (3, 3)  # convolution kernel size
    activation = 'elu'  # activation function to use after each layer
    pool_size = [(2, 2), (4, 2), (4, 2), (4, 2),
                 (4, 2)]  # size of pooling area

    # shape of input data (frequency, time, channels)
    input_shape = (X_shape[1], X_shape[2], X_shape[3])
    frequency_axis = 1
    time_axis = 2
    channel_axis = 3

    # Create sequential model and normalize along frequency axis
    model = Sequential()
    model.add(BatchNormalization(axis=frequency_axis, input_shape=input_shape))

    # First convolution layer specifies shape
    model.add(
        Conv2D(nb_filters[0],
               kernel_size=kernel_size,
               padding='same',
               data_format="channels_last",
               input_shape=input_shape))
    model.add(Activation(activation))
    model.add(BatchNormalization(axis=channel_axis))
    model.add(MaxPooling2D(pool_size=pool_size[0], strides=pool_size[0]))
    model.add(Dropout(0.1))

    # Add more convolutional layers
    for layer in range(nb_layers - 1):
        # Convolutional layer
        model.add(
            Conv2D(nb_filters[layer + 1],
                   kernel_size=kernel_size,
                   padding='same'))
        model.add(Activation(activation))
        model.add(BatchNormalization(
            axis=channel_axis))  # Improves overfitting/underfitting
        model.add(
            MaxPooling2D(pool_size=pool_size[layer + 1],
                         strides=pool_size[layer + 1]))  # Max pooling
        model.add(Dropout(0.1))

        # Reshaping input for recurrent layer
    # (frequency, time, channels) --> (time, frequency, channel)
    model.add(Permute((time_axis, frequency_axis, channel_axis)))
    resize_shape = model.output_shape[2] * model.output_shape[3]
    model.add(Reshape((model.output_shape[1], resize_shape)))

    # recurrent layer
    model.add(GRU(32, return_sequences=True))
    model.add(GRU(32, return_sequences=False))
    model.add(Dropout(0.3))

    # Output layer
    model.add(Dense(nb_classes))
    model.add(Activation("softmax"))
    return model
Example #21
0
class LossHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))


# RMSE
def root_mean_squared_error(act, pred):
    return K.sqrt(K.mean(K.square(pred - act), axis=1))


model = Sequential()

model.add(GRU(n_num, activation='relu', input_shape=(window_size - 1, 1)))

model.add(Dense(1))
model.add(Activation('linear'))

opt = Adam(lr=learning_rate)

model.compile(loss=root_mean_squared_error, optimizer=opt)

history = LossHistory()

model.fit(data,
          labels,
          epochs=epoch,
          batch_size=batch_size,
          callbacks=[history])
Example #22
0
def CRNN2DLarger(X_shape, nb_classes):
    '''
    Making the previous model larger and deeper
    '''
    nb_layers = 5  # number of convolutional layers
    nb_filters = [64, 128, 256, 512, 512]
    kernel_size = (3, 3)  # convolution kernel size
    activation = 'elu'  # activation function to use after each layer
    pool_size = [(2, 2), (2, 2), (2, 2), (4, 1),
                 (4, 1)]  # # size of pooling area
    # pool_size = [(4,2), (4,2), (4,1), (2,1)] this worked well

    # shape of input data (frequency, time, channels)
    input_shape = (X_shape[1], X_shape[2], X_shape[3])
    frequency_axis = 1
    time_axis = 2
    channel_axis = 3

    # Create sequential model
    model = Sequential()
    model.add(BatchNormalization(axis=frequency_axis, input_shape=input_shape))

    # First convolution layer
    model.add(
        Conv2D(nb_filters[0],
               kernel_size=kernel_size,
               padding='same',
               data_format="channels_last",
               input_shape=input_shape))
    model.add(Activation(activation))
    model.add(BatchNormalization(
        axis=channel_axis))  # Improves overfitting/underfitting
    model.add(MaxPooling2D(pool_size=pool_size[0],
                           strides=pool_size[0]))  # Max pooling
    model.add(Dropout(0.1))  # 0.2

    # Add more convolutional layers
    for layer in range(nb_layers - 1):
        # Convolutional layer
        model.add(
            Conv2D(nb_filters[layer + 1],
                   kernel_size=kernel_size,
                   padding='same'))
        model.add(Activation(activation))
        model.add(BatchNormalization(
            axis=channel_axis))  # Improves overfitting/underfitting
        model.add(
            MaxPooling2D(pool_size=pool_size[layer + 1],
                         strides=pool_size[layer + 1]))  # Max pooling
        model.add(Dropout(0.1))  # 0.2

    # Reshaping input for recurrent layer
    # (frequency, time, channels) --> (time, frequency, channel)
    model.add(Permute((time_axis, frequency_axis, channel_axis)))
    resize_shape = model.output_shape[2] * model.output_shape[3]
    model.add(Reshape((model.output_shape[1], resize_shape)))

    # recurrent layer
    model.add(GRU(32, return_sequences=True))
    model.add(GRU(32, return_sequences=False))
    model.add(Dropout(0.3))

    # Output layer
    model.add(Dense(nb_classes))
    model.add(Activation("softmax"))
    return model
Example #23
0
    def build(self):
        input_shape = (self.frames_n, self.img_w, self.img_h, self.img_c)

        self.input_data = Input(name='the_input',
                                shape=input_shape,
                                dtype='float32')

        self.zero1 = ZeroPadding3D(padding=(1, 2, 2),
                                   name='zero1')(self.input_data)
        self.conv1 = Conv3D(32, (3, 5, 5),
                            strides=(1, 2, 2),
                            kernel_initializer='he_normal',
                            name='conv1')(self.zero1)
        self.batc1 = BatchNormalization(name='batc1')(self.conv1)
        self.actv1 = Activation('relu', name='actv1')(self.batc1)
        self.drop1 = SpatialDropout3D(0.5)(self.actv1)
        self.maxp1 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max1')(self.drop1)

        self.zero2 = ZeroPadding3D(padding=(1, 2, 2), name='zero2')(self.maxp1)
        self.conv2 = Conv3D(64, (3, 5, 5),
                            strides=(1, 1, 1),
                            kernel_initializer='he_normal',
                            name='conv2')(self.zero2)
        self.batc2 = BatchNormalization(name='batc2')(self.conv2)
        self.actv2 = Activation('relu', name='actv2')(self.batc2)
        self.drop2 = SpatialDropout3D(0.5)(self.actv2)
        self.maxp2 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max2')(self.drop2)

        self.zero3 = ZeroPadding3D(padding=(1, 1, 1), name='zero3')(self.maxp2)
        self.conv3 = Conv3D(96, (3, 3, 3),
                            strides=(1, 1, 1),
                            kernel_initializer='he_normal',
                            name='conv3')(self.zero3)
        self.batc3 = BatchNormalization(name='batc3')(self.conv3)
        self.actv3 = Activation('relu', name='actv3')(self.batc3)
        self.drop3 = SpatialDropout3D(0.5)(self.actv3)
        self.maxp3 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max3')(self.drop3)

        self.resh1 = TimeDistributed(Flatten())(self.maxp3)

        self.gru_1 = Bidirectional(GRU(256,
                                       return_sequences=True,
                                       kernel_initializer='Orthogonal',
                                       name='gru1'),
                                   merge_mode='concat')(self.resh1)
        self.gru_2 = Bidirectional(GRU(256,
                                       return_sequences=True,
                                       kernel_initializer='Orthogonal',
                                       name='gru2'),
                                   merge_mode='concat')(self.gru_1)

        # transforms RNN output to character activations:
        self.dense1 = Dense(self.output_size,
                            kernel_initializer='he_normal',
                            name='dense1')(self.gru_2)

        self.y_pred = Activation('softmax', name='softmax')(self.dense1)

        self.labels = Input(name='the_labels',
                            shape=[self.absolute_max_string_len],
                            dtype='float32')
        self.input_length = Input(name='input_length',
                                  shape=[1],
                                  dtype='int64')
        self.label_length = Input(name='label_length',
                                  shape=[1],
                                  dtype='int64')

        self.loss_out = CTC(
            'ctc',
            [self.y_pred, self.labels, self.input_length, self.label_length])

        self.model = Model(inputs=[
            self.input_data, self.labels, self.input_length, self.label_length
        ],
                           outputs=self.loss_out)
def MusicTaggerCRNN(weights='msd', input_tensor=None, include_top=True):
    '''Instantiate the MusicTaggerCRNN architecture,
    optionally loading weights pre-trained
    on Million Song Dataset. Note that when using TensorFlow,
    for best performance you should set
    `image_dim_ordering="tf"` in your Keras config
    at ~/.keras/keras.json.

    The model and the weights are compatible with both
    TensorFlow and Theano. The dimension ordering
    convention used by the model is the one
    specified in your Keras config file.

    For preparing mel-spectrogram input, see
    `audio_conv_utils.py` in [applications](https://github.com/fchollet/keras/tree/master/keras/applications).
    You will need to install [Librosa](http://librosa.github.io/librosa/)
    to use it.

    # Arguments
        weights: one of `None` (random initialization)
            or "msd" (pre-training on ImageNet).
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        include_top: whether to include the 1 fully-connected
            layer (output layer) at the top of the network.
            If False, the network outputs 32-dim features.


    # Returns
        A Keras model instance.
    '''
    if weights not in {'msd', None}:
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `msd` '
                         '(pre-training on Million Song Dataset).')

    # Determine proper input shape
    if K.image_dim_ordering() == 'th':
        input_shape = (1, 96, 1366)
    else:
        input_shape = (96, 1366, 1)

    if input_tensor is None:
        melgram_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            melgram_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            melgram_input = input_tensor

    # Determine input axis
    if K.image_dim_ordering() == 'th':
        channel_axis = 1
        freq_axis = 2
        time_axis = 3
    else:
        channel_axis = 3
        freq_axis = 1
        time_axis = 2

    # Input block
    x = ZeroPadding2D(padding=(0, 37))(melgram_input)
    x = BatchNormalization(axis=time_axis, name='bn_0_freq')(x)

    # Conv block 1
    x = Convolution2D(64, 3, 3, border_mode='same', name='conv1')(x)
    x = BatchNormalization(axis=channel_axis, mode=0, name='bn1')(x)
    x = ELU()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x)
    x = Dropout(0.5, name='dropout1')(x)

    # Conv block 2
    x = Convolution2D(128, 3, 3, border_mode='same', name='conv2')(x)
    x = BatchNormalization(axis=channel_axis, mode=0, name='bn2')(x)
    x = ELU()(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(3, 3), name='pool2')(x)
    x = Dropout(0.5, name='dropout2')(x)

    # Conv block 3
    x = Convolution2D(128, 3, 3, border_mode='same', name='conv3')(x)
    x = BatchNormalization(axis=channel_axis, mode=0, name='bn3')(x)
    x = ELU()(x)
    x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool3')(x)
    x = Dropout(0.5, name='dropout3')(x)

    # Conv block 4
    x = Convolution2D(128, 3, 3, border_mode='same', name='conv4')(x)
    x = BatchNormalization(axis=channel_axis, mode=0, name='bn4')(x)
    x = ELU()(x)
    x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool4')(x)
    x = Dropout(0.5, name='dropout4')(x)

    # reshaping
    if K.image_dim_ordering() == 'th':
        x = Permute((3, 1, 2))(x)
    x = Reshape((15, 128))(x)

    # GRU block 1, 2, output
    x = GRU(32, return_sequences=True, name='gru1')(x)
    x = GRU(32, return_sequences=False, name='gru2')(x)
    x = Dropout(0.3)(x)
    if include_top:
        x = Dense(50, activation='sigmoid', name='output')(x)

    # Create model
    model = Model(melgram_input, x)
    if weights is None:
        return model
    else:
        # Load input
        if K.image_dim_ordering == 'tf':
            raise RuntimeError("Please set image_dim_ordering == 'th'."
                               "You can set it at ~/.keras/keras.json")

        model.load_weights('data/music_tagger_crnn_weights_theano.h5',
                           by_name=True)
        return model
Example #25
0
def train(run_name, start_epoch, stop_epoch, img_w):
    # 输入参数
    img_h = 64
    words_per_epoch = 16000
    val_split = 0.2
    val_words = int(words_per_epoch * (val_split))

    # 网络参数
    conv_filters = 16
    kernel_size = (3, 3)
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512
    minibatch_size = 32

    if K.image_data_format() == 'channels_first':
        input_shape = (1, img_w, img_h)
    else:
        input_shape = (img_w, img_h, 1)

    fdir = os.path.dirname(
        get_file('wordlists.tgz',
                 origin='http://www.mythic-ai.com/datasets/wordlists.tgz',
                 untar=True))

    img_gen = TextImageGenerator(
        monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
        bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
        minibatch_size=minibatch_size,
        img_w=img_w,
        img_h=img_h,
        downsample_factor=(pool_size ** 2),
        val_split=words_per_epoch - val_words)
    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)

    conv_to_rnn_dims = (img_w // (pool_size ** 2),
                        (img_h // (pool_size ** 2)) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # 减少进入 RNN 的输入大小:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # 两层双向GRU
    # 单层 GRU 似乎也可以,如果不比 LSTM 强:
    gru_1 = GRU(rnn_size, return_sequences=True,
                kernel_initializer='he_normal', name='gru1')(inner)
    gru_1b = GRU(rnn_size, return_sequences=True,
                 go_backwards=True, kernel_initializer='he_normal',
                 name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size, return_sequences=True,
                kernel_initializer='he_normal', name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True,
                 kernel_initializer='he_normal', name='gru2_b')(gru1_merged)

    # 将 RNN 输出转换为字符激活:
    inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)
    Model(inputs=input_data, outputs=y_pred).summary()

    labels = Input(name='the_labels',
                   shape=[img_gen.absolute_max_string_len], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras 当前不支持带有额外参数的损失函数,因此 CTC 损失在 Lambda 层中实现
    loss_out = Lambda(
        ctc_lambda_func, output_shape=(1,),
        name='ctc')([y_pred, labels, input_length, label_length])

    # clipnorm 似乎加快了收敛速度
    sgd = SGD(learning_rate=0.02,
              decay=1e-6,
              momentum=0.9,
              nesterov=True)

    model = Model(inputs=[input_data, labels, input_length, label_length],
                  outputs=loss_out)

    # 损失计算发生在其他地方,因此请使用虚拟 lambda 函数补偿损失
    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
    if start_epoch > 0:
        weight_file = os.path.join(
            OUTPUT_DIR,
            os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
        model.load_weights(weight_file)
    # 捕获 softmax 的输出,以便我们可以在可视化过程中解码输出
    test_func = K.function([input_data], [y_pred])

    viz_cb = VizCallback(run_name, test_func, img_gen.next_val())

    model.fit_generator(
        generator=img_gen.next_train(),
        steps_per_epoch=(words_per_epoch - val_words) // minibatch_size,
        epochs=stop_epoch,
        validation_data=img_gen.next_val(),
        validation_steps=val_words // minibatch_size,
        callbacks=[viz_cb, img_gen],
        initial_epoch=start_epoch)
Example #26
0
def get_crnn_model():
    # Determine input shape
    if K.image_dim_ordering() == 'th':
        input_shape = (1, 20, 1250)
    else:
        input_shape = (20, 1250, 1)

    # Determine input axis
    if K.image_dim_ordering() == 'th':
        channel_axis = 1
        mfcc_axis = 2
        time_axis = 3
    else:
        channel_axis = 3
        mfcc_axis = 1
        time_axis = 2

    mfcc_input = Input(shape=input_shape)

    # Input block
    x = BatchNormalization(axis=mfcc_axis, name='bn0_mfcc')(mfcc_input)

    # Conv block 1
    x = Conv2D(32, (3, 3), name='conv1', padding='same')(x)  # (320, 1250, 32)
    x = BatchNormalization(axis=channel_axis, name='bn1')(x)
    x = ELU()(x)
    x = MaxPooling2D(pool_size=(2, 2), name='pool1')(x)  # (10, 625, 32)
    x = Dropout(0.1, name='dropout1')(x)

    # Conv block 2
    x = Conv2D(64, (3, 3), name='conv2', padding='same')(x)  # (10, 625, 64)
    x = BatchNormalization(axis=channel_axis, name='bn2')(x)
    x = ELU()(x)
    x = MaxPooling2D(pool_size=(2, 5), name='pool2')(x)  # (5, 125, 64)
    x = Dropout(0.1, name='dropout2')(x)

    # Conv block 3
    x = Conv2D(128, (3, 3), name='conv3', padding='same')(x)  # (5, 125, 128)
    x = BatchNormalization(axis=channel_axis, name='bn3')(x)
    x = ELU()(x)
    x = MaxPooling2D(pool_size=(1, 5), name='pool3')(x)  # (5, 25, 128)
    x = Dropout(0.1, name='dropout3')(x)

    # Conv block 4
    x = Conv2D(64, (3, 3), name='conv4', padding='same')(x)  # (5, 25, 64)
    x = BatchNormalization(axis=channel_axis, name='bn4')(x)
    x = ELU()(x)
    x = MaxPooling2D(pool_size=(5, 5), name='pool4')(x)  # (1, 5, 64)
    x = Dropout(0.1, name='dropout4')(x)

    # Reshaping
    if K.image_dim_ordering() == 'th':
        x = Permute((3, 1, 2))(x)
    x = Reshape((5, 64))(x)

    # GRU block
    x = GRU(32, return_sequences=True, name='gru1')(x)
    x = GRU(32, return_sequences=False, name='gru2')(x)
    x = Dropout(0.3)(x)

    # Conv block 5
    # x = Conv2D(64, (3, 3), name='conv5', padding='same')(x)
    # x = BatchNormalization(axis=channel_axis, name='bn5')(x)
    # x = ELU()(x)
    # x = MaxPooling2D(pool_size=(4, 4), name='pool5')(x)

    # Output
    # x = Flatten()(x)
    x = Dense(10, activation='softmax', name='output')(x)

    # Create model
    model = Model(mfcc_input, x)

    # Compile model
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model
                  trainable=False)(main_input)
embed = Dropout(0.1, name='embed_dropout')(embed)
"""
双向LSTM 获取 Char embedding
"""
char_embed = Embedding(input_dim=charsize,
                       output_dim=char_embed_dim,
                       embeddings_initializer='lecun_uniform',
                       input_length=maxlen_char_word,
                       mask_zero=False,
                       name='char_embedding')(char_input)
s = char_embed.shape
char_embed = Lambda(lambda x: K.reshape(x, shape=(-1, s[-2], char_embed_dim)))(
    char_embed)

fwd_state = GRU(150, return_state=True)(char_embed)[-2]
bwd_state = GRU(150, return_state=True, go_backwards=True)(char_embed)[-2]
char_embed = Concatenate(axis=-1)([fwd_state, bwd_state])
char_embed = Lambda(lambda x: K.reshape(x, shape=[-1, s[1], 2 * 150]))(
    char_embed)
char_embed = Dropout(0.1, name='char_embed_dropout')(char_embed)
"""
使用attention将word embedding和character embedding结合起来
"""
W_embed = Dense(300, name='Wembed')(embed)
W_char_embed = Dense(300, name='W_charembed')(char_embed)
merged1 = merge([W_embed, W_char_embed], name='merged1', mode='sum')
tanh = Activation('tanh', name='tanh')(merged1)
W_tanh = Dense(300, name='w_tanh')(tanh)
a = Activation('sigmoid', name='sigmoid')(W_tanh)
Example #28
0
def get_model(data_in, data_out, dropout_rate, nb_cnn2d_filt, pool_size,
              rnn_size, fnn_size, classification_mode, weights, loader,
              loader2):
    # model definition
    spec_start = Input(shape=(data_in[-3], data_in[-2], data_in[-1]))
    spec_cnn = spec_start
    for i, convCnt in enumerate(pool_size):
        spec_cnn = Conv2D(filters=nb_cnn2d_filt,
                          kernel_size=(3, 3),
                          padding='same')(spec_cnn)
        spec_cnn = BatchNormalization()(spec_cnn)
        spec_cnn = Activation('relu')(spec_cnn)
        spec_cnn = MaxPooling2D(pool_size=(1, pool_size[i]))(spec_cnn)
        spec_cnn = Dropout(dropout_rate)(spec_cnn)
    spec_cnn = Permute((2, 1, 3))(spec_cnn)

    spec_rnn = Reshape((data_in[-2], -1))(spec_cnn)
    for nb_rnn_filt in rnn_size:
        spec_rnn = Bidirectional(GRU(nb_rnn_filt,
                                     activation='tanh',
                                     dropout=dropout_rate,
                                     recurrent_dropout=dropout_rate,
                                     return_sequences=True),
                                 merge_mode='mul')(spec_rnn)

    # Attention
    # spec_rnn = SeqSelfAttention(attention_activation='tanh')(spec_rnn)

    # DOA
    doa = spec_rnn
    # doa = SeqSelfAttention(attention_activation='tanh')(spec_rnn)
    for nb_fnn_filt in fnn_size:
        doa = TimeDistributed(Dense(nb_fnn_filt))(doa)
        doa = Dropout(dropout_rate)(doa)

    doa = TimeDistributed(Dense(data_out[1][-1]))(doa)
    doa = Activation('tanh', name='doa_out')(doa)

    # SED
    sed = spec_rnn
    # sed = SeqSelfAttention(attention_activation='tanh')(spec_rnn)
    for nb_fnn_filt in fnn_size:
        sed = TimeDistributed(Dense(nb_fnn_filt))(sed)
        sed = Dropout(dropout_rate)(sed)

    sed = TimeDistributed(Dense(data_out[0][-1]))(sed)
    sed = Activation('sigmoid', name='sed_out')(sed)

    model = Model(inputs=spec_start, outputs=[sed, doa])
    if loader:
        model = load_model(
            'C:/Users/shalea2/PycharmProjects/Drones/models/attention_3_ansim_ov1_split2_regr0_3d0_1_model.h5',
            custom_objects=SeqSelfAttention.get_custom_objects())
        if loader2:
            temp_weights = [layer.get_weights() for layer in model.layers]
            model.layers.pop()
            model.layers.pop()
            model.layers.pop()
            model.layers.pop()
            doa = TimeDistributed(Dense(data_out[1][-1]))(
                model.layers[-1].output)
            doa = Activation('tanh', name='doa_out')(doa)
            sed = TimeDistributed(Dense(data_out[0][-1]))(
                model.layers[-2].output)
            sed = Activation('sigmoid', name='sed_out')(sed)
            model = Model(inputs=model.get_input_at(0), outputs=[sed, doa])
            for i in range(len(temp_weights) - 4):
                model.layers[i].set_weights(temp_weights[i])
    model.compile(optimizer=Adam(),
                  loss=['binary_crossentropy', 'mse'],
                  loss_weights=weights)

    model.summary()
    return model
Example #29
0
        values.append(round(labels[i]))

    return np.asarray(values, np.int32)


X_train = np.reshape(
    X_train, (X_train_transformed.shape[0], X_train_transformed.shape[1], 1))
X_test = np.reshape(
    X_test, (X_test_transformed.shape[0], X_test_transformed.shape[1], 1))

model = Sequential()
model.add(
    GRU(4,
        input_shape=X_train.shape[1:],
        W_regularizer=l2(0.01),
        dropout_W=0.4,
        dropout_U=0.4,
        U_regularizer=l2(0.4),
        b_regularizer=l2(0.4)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, nb_epoch=100, batch_size=1, verbose=2)

#make prediction
trainPredict = model.predict(X_train)
testPredict = model.predict(X_test)
print testPredict
print y_test
print label_tranform(testPredict)
accuracy = accuracy_score(y_test, label_tranform(testPredict))
print "Accuuracy GRU", accuracy
Example #30
0
conv_4 = Conv1D(256, 3, activation='relu', name='conv4')
conv_5 = Conv1D(256, 3, activation='relu', name='conv5')
conv_6 = Conv1D(1024, 3, activation='relu', name='conv6')
conv_7 = Conv1D(1024, 3, activation='relu', name='conv7')
conv_8 = Conv1D(1024, 3, activation='relu', name='conv8')

pool_1 = AveragePooling1D(pool_length=3, name='pool1')
pool_2 = AveragePooling1D(pool_length=3,  name='pool2')
pool_3 = MaxPooling1D(pool_length=3, name='pool3')
pool_4 = MaxPooling1D(pool_length=3, name='pool4')

lstm_1 = LSTM(256, name='lstm1', return_sequences=True)
lstm_2 = LSTM(128, name='lstm2', return_sequences=True)
lstm_3 = LSTM(64, name='lstm3')

gru_1 = GRU(256, name='gru1', return_sequences=True)
gru_2 = GRU(256, name='gru2', return_sequences=True)
gru_3 = GRU(256, name='gru3')

bi_lstm_1 = Bidirectional(lstm_1, name='bilstm1')
bi_lstm_2 = Bidirectional(lstm_2, name='bilstm2')
bi_lstm_3 = Bidirectional(lstm_3, name='bilstm3')

dense_1 = Dense(256, activation='relu', name='dense1')
dense_2 = Dense(1, activation='sigmoid', name='dense2')

drop_1 = Dropout(0.5, name='drop1')
drop_2 = Dropout(0.5, name='drop2')


def get_rnn_model():