def LSTM(summary=False,
         nb_hidden=1024,
         nb_classes=10,
         input_dim=512,
         input_length=32,
         lr='1e-3',
         dropout=0.5):
    """
    Sepp Hochreiter Jurgen Schmidhuber (1997). "Long short-term memory".
    Neural Computation. 9 (8): 1735–1780. doi:10.1162/neco.1997.9.8.1735.
    """
    model = Sequential()

    model.add(
        recurrent.GRU(nb_hidden,
                      return_sequences=True,
                      input_dim=input_dim,
                      input_length=input_length))
    model.add(BatchNormalization())
    model.add(Dropout(dropout))

    model.add(
        recurrent.GRU(nb_hidden,
                      return_sequences=True,
                      input_dim=input_dim,
                      input_length=input_length))
    model.add(BatchNormalization())
    model.add(Dropout(dropout))

    model.add(
        recurrent.GRU(nb_hidden,
                      return_sequences=True,
                      input_dim=input_dim,
                      input_length=input_length))
    model.add(BatchNormalization())
    model.add(Dropout(dropout))

    model.add(
        recurrent.GRU(nb_hidden,
                      return_sequences=True,
                      input_dim=input_dim,
                      input_length=input_length))
    model.add(BatchNormalization())
    model.add(Dropout(dropout))

    model.add(
        recurrent.GRU(nb_hidden,
                      return_sequences=True,
                      input_dim=input_dim,
                      input_length=input_length))
    model.add(BatchNormalization())
    model.add(Dropout(dropout))

    model.add(TimeDistributed(Dense(nb_classes)))
    model.add(Activation('softmax'))

    if summary:
        print(model.summary())
    return model
Exemple #2
0
def StackingConvRNN(input_shape, rnn_level=3, filters=64, rweight=0.1):
    """
    Using Conv network over RNN unit layer
    input_shape: [n_timestep, img_size, img_size, n_channels]
    """
    our_model = Sequential()
    our_model.add(
        TimeDistributed(Conv2D(filters, (3, 3),
                               padding='same',
                               activation='relu'),
                        input_shape=input_shape))
    our_model.add(TimeDistributed(BatchNormalization()))
    our_model.add(TimeDistributed(MaxPooling2D((2, 2))))
    our_model.add(
        TimeDistributed(
            Conv2D(filters, (3, 3), padding='same', activation='relu')))
    our_model.add(TimeDistributed(BatchNormalization()))
    our_model.add(
        TimeDistributed(
            Conv2D(filters, (3, 3), padding='same', activation='relu')))
    our_model.add(TimeDistributed(BatchNormalization()))
    our_model.add(TimeDistributed(MaxPooling2D((2, 2))))
    our_model.add(TimeDistributed(Flatten()))
    # our_model.add(TimeDistributed(Dropout(0.4)))
    for _ in range(rnn_level):
        our_model.add(recurrent_unit.GRU(filters, return_sequences=True))
        our_model.add(Dense(filters))
    our_model.add(Flatten())
    our_model.add(Dense(filters, kernel_regularizer=regularizers.l2(rweight)))
    our_model.add(Dropout(0.4))
    our_model.add(Dense(1, activation='relu'))
    our_model.compile(loss='mean_squared_error', optimizer='adadelta')

    return our_model
Exemple #3
0
    def test_explicit_device_with_go_backward_and_mask(self):
        batch_size = 8
        timestep = 7
        masksteps = 5
        units = 4

        inputs = np.random.randn(batch_size, timestep,
                                 units).astype(np.float32)
        mask = np.ones((batch_size, timestep)).astype(np.bool)
        mask[:, masksteps:] = 0

        # Test for V1 behavior.
        lstm_v1 = rnn_v1.GRU(units, return_sequences=True, go_backwards=True)
        with testing_utils.device(should_use_gpu=True):
            outputs_masked_v1 = lstm_v1(inputs, mask=tf.constant(mask))
            outputs_trimmed_v1 = lstm_v1(inputs[:, :masksteps])
        self.assertAllClose(outputs_masked_v1[:, -masksteps:],
                            outputs_trimmed_v1)

        # Test for V2 behavior.
        lstm = rnn.GRU(units, return_sequences=True, go_backwards=True)
        with testing_utils.device(should_use_gpu=True):
            outputs_masked = lstm(inputs, mask=tf.constant(mask))
            outputs_trimmed = lstm(inputs[:, :masksteps])
        self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed)
Exemple #4
0
    def test_explicit_device_with_go_backward_and_mask(self):
        if tf.test.is_built_with_rocm():
            self.skipTest('Skipping the test as ROCm MIOpen does not '
                          'support padded input yet.')

        batch_size = 8
        timestep = 7
        masksteps = 5
        units = 4

        inputs = np.random.randn(batch_size, timestep,
                                 units).astype(np.float32)
        mask = np.ones((batch_size, timestep)).astype(np.bool)
        mask[:, masksteps:] = 0

        # Test for V1 behavior.
        lstm_v1 = rnn_v1.GRU(units, return_sequences=True, go_backwards=True)
        with testing_utils.device(should_use_gpu=True):
            outputs_masked_v1 = lstm_v1(inputs, mask=tf.constant(mask))
            outputs_trimmed_v1 = lstm_v1(inputs[:, :masksteps])
        self.assertAllClose(outputs_masked_v1[:, -masksteps:],
                            outputs_trimmed_v1)

        # Test for V2 behavior.
        lstm = rnn.GRU(units, return_sequences=True, go_backwards=True)
        with testing_utils.device(should_use_gpu=True):
            outputs_masked = lstm(inputs, mask=tf.constant(mask))
            outputs_trimmed = lstm(inputs[:, :masksteps])
        self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed)
def build_embedding_lstm2_crf_model(VOCAB_SIZE, NUM_CLASS, TIME_STAMPS):
    """
    带embedding的双向LSTM + crf
    """
    model = Sequential()
    model.add(
        Embedding(VOCAB_SIZE, output_dim=EMBEDDING_OUT_DIM, mask_zero=True))
    # model.add(Dropout(DROPOUT_RATE))
    model.add(recurrent.GRU(HIDDEN_UNITS // 2, return_sequences=True))

    # model.add(Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True)))
    # model.add(Dropout(DROPOUT_RATE))
    model.add(TimeDistributed(Dense(NUM_CLASS)))
    crf_layer = CRF(NUM_CLASS, sparse_target=True)
    model.add(crf_layer)
    # model.summary()
    adam = Adam(lr=0.001,
                beta_1=0.9,
                beta_2=0.999,
                epsilon=None,
                decay=0.0,
                amsgrad=False)
    model.compile(adam,
                  loss=crf_layer.loss_function,
                  metrics=[crf_layer.accuracy])
    return model
Exemple #6
0
    def create_emb_layer(self):
        iw3 = Input(shape=(self.max_ngram_one_class, ),
                    dtype='int32',
                    name="inputword3")
        iw4 = Input(shape=(self.max_ngram_one_class, ),
                    dtype='int32',
                    name="inputword4")
        iw5 = Input(shape=(self.max_ngram_one_class, ),
                    dtype='int32',
                    name="inputword5")
        iw6 = Input(shape=(self.max_ngram_one_class, ),
                    dtype='int32',
                    name="inputword6")
        emb_in = embeddings.Embedding(output_dim=self.vector_size,
                                      input_dim=self.ngram_size,
                                      init="uniform",
                                      mask_zero=True,
                                      name="input_layer")

        vv_iw3 = emb_in(iw3)
        vv_iw4 = emb_in(iw4)
        vv_iw5 = emb_in(iw5)
        vv_iw6 = emb_in(iw6)

        zm = ZeroMaskedEntries()
        zm.build((None, self.max_ngram_num, self.vector_size))

        zero_masked_emd3 = zm(vv_iw3)
        zero_masked_emd4 = zm(vv_iw4)
        zero_masked_emd5 = zm(vv_iw5)
        zero_masked_emd6 = zm(vv_iw6)

        lstm_l3 = recurrent.GRU(self.vector_size, return_sequences=False)
        lstm_l4 = recurrent.GRU(self.vector_size, return_sequences=False)
        lstm_l5 = recurrent.GRU(self.vector_size, return_sequences=False)
        lstm_l6 = recurrent.GRU(self.vector_size, return_sequences=False)

        lstm3 = lstm_l3(zero_masked_emd3)
        lstm4 = lstm_l4(zero_masked_emd4)
        lstm5 = lstm_l5(zero_masked_emd5)
        lstm6 = lstm_l6(zero_masked_emd6)

        merge_conv = Merge(mode='ave', concat_axis=1)
        merged = merge_conv([lstm3, lstm4, lstm5, lstm6])

        reshaped = Reshape((1, self.vector_size))(merged)
        return ([iw3, iw4, iw5, iw6], emb_in, reshaped)
def rnn_att_model(embed,
                  MAX_LEN,
                  SENT_HIDDEN_SIZE,
                  ACTIVATION,
                  DP,
                  L2,
                  LABEL_NUM,
                  OPTIMIZER,
                  MLP_LAYER,
                  LAYERS,
                  RNN_Cell='LSTM'):
    print('Build model...')

    RNN = recurrent.LSTM
    if RNN_Cell == 'BiLSTM':
        RNN = lambda *args, **kwargs: Bidirectional(
            recurrent.LSTM(*args, **kwargs))
    elif RNN_Cell == 'GRU':
        RNN = recurrent.GRU
    elif RNN_Cell == 'BiGRU':
        RNN = lambda *args, **kwargs: Bidirectional(
            recurrent.GRU(*args, **kwargs))

    rnn_kwargs = dict(units=SENT_HIDDEN_SIZE, dropout=DP, recurrent_dropout=DP)

    translate = TimeDistributed(Dense(SENT_HIDDEN_SIZE, activation=ACTIVATION))

    premise = Input(shape=(MAX_LEN, ), dtype='int32')

    prem = embed(premise)

    # prem = translate(prem)

    if LAYERS > 1:
        for l in range(LAYERS - 1):
            rnn = RNN(return_sequences=True, **rnn_kwargs)
            prem = BatchNormalization()(rnn(prem))
    rnn = RNN(return_sequences=True, **rnn_kwargs)
    prem = rnn(prem)

    prem = Attention(MAX_LEN)(prem)

    joint = Dropout(DP)(prem)
    for i in range(MLP_LAYER):
        joint = Dense(2 * SENT_HIDDEN_SIZE,
                      activation=ACTIVATION,
                      kernel_regularizer=l2(L2) if L2 else None)(joint)
        joint = Dropout(DP)(joint)
        # joint = BatchNormalization()(joint)

    pred = Dense(LABEL_NUM, activation='softmax')(joint)

    model = Model(inputs=premise, outputs=pred)
    model.compile(optimizer=OPTIMIZER,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    model.summary()
    return model
Exemple #8
0
 def create_emb_layer(self):
     iw = Input(shape=(self.max_ngram_num, ),
                dtype='int32',
                name="inputword")
     emb_in = embeddings.Embedding(output_dim=self.vector_size,
                                   input_dim=self.ngram_size,
                                   init="uniform",
                                   name="input_layer")
     vv_iw = emb_in(iw)
     lstm_l = recurrent.GRU(self.vector_size, return_sequences=False)
     lstm = lstm_l(vv_iw)
     return ([iw], emb_in, lstm)
Exemple #9
0
def get_lstm(params, weights, summary=False):
    """
    Sepp Hochreiter Jurgen Schmidhuber (1997). "Long short-term memory".
    Neural Computation. 9 (8): 1735–1780. doi:10.1162/neco.1997.9.8.1735.

    Yue-Hei Ng, Joe, et al.
    "Beyond short snippets: Deep networks for video classification."
    Proceedings of the IEEE conference on computer vision and pattern recognition. 2015.
    https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Ng_Beyond_Short_Snippets_2015_CVPR_paper.pdf
    """

    model = Sequential()

    # model.add(recurrent.GRU(params['nb_hidden'], return_sequences=True,
    #                         input_dim=params['input_dim'],
    #                         input_length=params['input_size']))
    # model.add(BatchNormalization())

    # model.add(recurrent.GRU(params['nb_hidden'], return_sequences=True,
    #                         input_dim=params['input_dim'],
    #                         input_length=params['input_size']))
    # model.add(BatchNormalization())

    # model.add(recurrent.GRU(params['nb_hidden'], return_sequences=True,
    #                          input_dim=params['input_dim'],
    #                          input_length=params['input_size']))
    # model.add(BatchNormalization())

    # model.add(recurrent.GRU(params['nb_hidden'], return_sequences=True,
    #                          input_dim=params['input_dim'],
    #                          input_length=params['input_size']))
    # model.add(BatchNormalization())

    model.add(
        recurrent.GRU(params['nb_hidden'],
                      return_sequences=True,
                      input_dim=params['input_dim'],
                      input_length=params['input_size']))
    model.add(BatchNormalization())
    model.add(Dropout(params['dropout']))

    model.add(TimeDistributed(Dense(params['nb_classes'])))
    model.add(Activation('softmax'))

    if summary:
        print(model.summary())

    if weights:
        model.load_weights(weights)

    return model
Exemple #10
0
    def test_gru_v2_feature_parity_with_canonical_gru(self):
        if tf.test.is_built_with_rocm():
            self.skipTest('Skipping the test as ROCm MIOpen does not '
                          'support padded input yet.')

        input_shape = 10
        rnn_state_size = 8
        timestep = 4
        batch = 20

        (x_train,
         y_train), _ = testing_utils.get_test_data(train_samples=batch,
                                                   test_samples=0,
                                                   input_shape=(timestep,
                                                                input_shape),
                                                   num_classes=rnn_state_size,
                                                   random_seed=87654321)
        y_train = np_utils.to_categorical(y_train, rnn_state_size)
        # For the last batch item of the test data, we filter out the last
        # timestep to simulate the variable length sequence and masking test.
        x_train[-2:, -1, :] = 0.0
        y_train[-2:] = 0

        inputs = keras.layers.Input(shape=[timestep, input_shape],
                                    dtype=tf.float32)
        masked_input = keras.layers.Masking()(inputs)
        gru_layer = rnn_v1.GRU(rnn_state_size,
                               recurrent_activation='sigmoid',
                               reset_after=True)
        output = gru_layer(masked_input)
        gru_model = keras.models.Model(inputs, output)
        weights = gru_model.get_weights()
        y_1 = gru_model.predict(x_train)
        gru_model.compile('rmsprop', 'mse')
        gru_model.fit(x_train, y_train)
        y_2 = gru_model.predict(x_train)

        with testing_utils.device(should_use_gpu=True):
            cudnn_layer = rnn.GRU(rnn_state_size,
                                  recurrent_activation='sigmoid',
                                  reset_after=True)
            cudnn_model = keras.models.Model(inputs, cudnn_layer(masked_input))
        cudnn_model.set_weights(weights)
        y_3 = cudnn_model.predict(x_train)
        cudnn_model.compile('rmsprop', 'mse')
        cudnn_model.fit(x_train, y_train)
        y_4 = cudnn_model.predict(x_train)

        self.assertAllClose(y_1, y_3, rtol=2e-5, atol=2e-5)
        self.assertAllClose(y_2, y_4, rtol=2e-5, atol=2e-5)
Exemple #11
0
    def test_gru_v2_output_on_multiple_kernel(self):
        input_shape = 10
        rnn_state_size = 8
        timestep = 4
        batch = 100

        x_train = np.random.random((batch, timestep, input_shape))

        inputs = keras.layers.Input(shape=[timestep, input_shape],
                                    dtype=tf.float32)
        with testing_utils.device(should_use_gpu=False):
            layer = rnn.GRU(rnn_state_size)
            output = layer(inputs)
            cpu_model = keras.models.Model(inputs, output)
            weights = cpu_model.get_weights()
            y_1 = cpu_model.predict(x_train)

        with testing_utils.device(should_use_gpu=True):
            layer = rnn.GRU(rnn_state_size)
            output = layer(inputs)
            gpu_model = keras.models.Model(inputs, output)
            gpu_model.set_weights(weights)
            y_2 = gpu_model.predict(x_train)

        # Note that CuDNN uses 'sigmoid' as activation, so the GRU V2 uses
        # 'sigmoid' as default. Construct the canonical GRU with sigmoid to achieve
        # the same output.
        with testing_utils.device(should_use_gpu=True):
            layer = rnn_v1.GRU(rnn_state_size,
                               recurrent_activation='sigmoid',
                               reset_after=True)
            output = layer(inputs)
            canonical_model = keras.models.Model(inputs, output)
            canonical_model.set_weights(weights)
            y_3 = canonical_model.predict(x_train)

        self.assertAllClose(y_1, y_2, rtol=1e-5, atol=1e-5)
        self.assertAllClose(y_2, y_3, rtol=1e-5, atol=1e-5)
Exemple #12
0
    def create_emb_layer(self):
        iw = Input(shape=(self.max_ngram_num, ),
                   dtype='int32',
                   name="inputword3")
        emb_in = embeddings.Embedding(output_dim=self.vector_size,
                                      input_dim=self.ngram_size,
                                      init="uniform",
                                      mask_zero=True,
                                      name="input_layer")

        vv_iw = emb_in(iw)

        zm = ZeroMaskedEntries()
        zm.build((None, self.max_ngram_num, self.vector_size))

        zero_masked_emd = zm(vv_iw)

        gru_l = recurrent.GRU(self.vector_size, return_sequences=False)

        gru = gru_l(zero_masked_emd)

        reshaped = Reshape((1, self.vector_size))(gru)
        return ([iw], emb_in, reshaped)
Exemple #13
0
    def train(self, words_filepath, tmp_dir, nb_samples=10000000):
        '''
        Тренируем модель на словах в указанном файле words_filepath.

        :param words_filepath: путь к plain text utf8 файлу со списком слов (одно слово на строку)
        :param tmp_dir: путь к каталогу, куда будем сохранять всякие сводки по процессу обучения
        для визуализации и прочего контроля
        '''

        # составляем список слов для тренировки и валидации
        known_words = self.load_words(words_filepath)
        logging.info('There are {} known words'.format(len(known_words)))

        max_word_len = max(map(len, known_words))
        seq_len = max_word_len + 2  # 2 символа добавляются к каждому слову для маркировки начала и конца последовательности
        logging.info('max_word_len={}'.format(max_word_len))

        # ограничиваем число слов для обучения и валидации
        if len(known_words) > nb_samples:
            known_words = set(list(known_words)[:nb_samples])

        val_share = 0.3
        random.seed(self.seed)
        train_words = set(filter(lambda z: random.random() > val_share, known_words))
        val_words = set(filter(lambda z: z not in train_words, known_words))

        # В тренировочный набор добавляем особое "пустое" слово, которое нужно
        # в качестве заполнителя при выравнивании цепочек слов разной длины.
        train_words.add(u'')

        train_words = raw_wordset(train_words, max_word_len)
        val_words = raw_wordset(val_words, max_word_len)

        logging.info('train set contains {} words'.format(len(train_words)))
        logging.info('val set contains {} words'.format(len(val_words)))

        all_chars = {FILLER_CHAR, BEG_CHAR, END_CHAR}
        for word in known_words:
            all_chars.update(word)

        char2index = {FILLER_CHAR: 0}
        for i, c in enumerate(all_chars):
            if c != FILLER_CHAR:
                char2index[c] = len(char2index)

        index2char = dict([(i, c) for c, i in six.iteritems(char2index)])

        nb_chars = len(all_chars)
        logging.info('nb_chars={}'.format(nb_chars))

        mask_zero = self.arch_type == 'rnn'

        if self.char_dims > 0:
            # Символы будут представляться векторами заданной длины,
            # и по мере обучения вектора будут корректироваться для
            # уменьшения общего лосса.
            embedding = Embedding(output_dim=self.char_dims,
                                  input_dim=nb_chars,
                                  input_length=seq_len,
                                  mask_zero=mask_zero,
                                  trainable=True)
        else:
            # 1-hot encoding of characters.
            # длина векторов пользователем не указана, поэтому задаем ее так, что
            # поместилось 1-hot представление.
            self.char_dims = nb_chars

            char_matrix = np.zeros((nb_chars, self.char_dims))
            for i in range(nb_chars):
                char_matrix[i, i] = 1.0

            embedding = Embedding(output_dim=self.char_dims,
                                  input_dim=nb_chars,
                                  input_length=seq_len,
                                  weights=[char_matrix],
                                  mask_zero=mask_zero,
                                  trainable=self.tunable_char_embeddings)

        input_chars = Input(shape=(seq_len,), dtype='int32', name='input')
        encoder = embedding(input_chars)

        logging.info('Building "{}" neural network'.format(self.arch_type))
        if self.arch_type == 'cnn':
            conv_list = []
            merged_size = 0

            nb_filters = 32

            for kernel_size in range(1, 4):
                conv_layer = Conv1D(filters=nb_filters,
                                    kernel_size=kernel_size,
                                    padding='valid',
                                    activation='relu',
                                    strides=1)(encoder)
                # conv_layer = GlobalMaxPooling1D()(conv_layer)
                conv_layer = GlobalAveragePooling1D()(conv_layer)
                conv_list.append(conv_layer)
                merged_size += nb_filters

            encoder = keras.layers.concatenate(inputs=conv_list)
            encoder = Dense(units=self.vec_size, activation='sigmoid')(encoder)

        elif self.arch_type == 'rnn':
            encoder = recurrent.LSTM(units=self.vec_size, return_sequences=False)(encoder)

        elif self.arch_type == 'bidir_lstm':
            encoder = Bidirectional(recurrent.LSTM(units=self.vec_size // 2, return_sequences=False))(encoder)

        elif self.arch_type == 'lstm(lstm)':
            encoder = Bidirectional(recurrent.LSTM(units=self.vec_size // 2, return_sequences=True))(encoder)
            encoder = Bidirectional(recurrent.LSTM(units=self.vec_size // 2, return_sequences=False))(encoder)

        elif self.arch_type == 'lstm+cnn':
            conv_list = []
            merged_size = 0

            rnn_size = self.vec_size
            conv_list.append(recurrent.LSTM(units=rnn_size, return_sequences=False)(encoder))
            merged_size += rnn_size

            nb_filters = 32
            for kernel_size in range(1, 4):
                conv_layer = Conv1D(filters=nb_filters,
                                    kernel_size=kernel_size,
                                    padding='valid',
                                    activation='relu',
                                    strides=1)(encoder)
                # conv_layer = GlobalMaxPooling1D()(conv_layer)
                conv_layer = GlobalAveragePooling1D()(conv_layer)
                conv_list.append(conv_layer)
                merged_size += nb_filters

            encoder = keras.layers.concatenate(inputs=conv_list)
            encoder = Dense(units=self.vec_size, activation='sigmoid')(encoder)

        elif self.arch_type == 'lstm(cnn)':
            conv_list = []
            merged_size = 0

            nb_filters = 32
            rnn_size = nb_filters

            for kernel_size in range(1, 4):
                conv_layer = Conv1D(filters=nb_filters,
                                    kernel_size=kernel_size,
                                    padding='valid',
                                    activation='relu',
                                    strides=1,
                                    name='shared_conv_{}'.format(kernel_size))(encoder)

                # conv_layer = keras.layers.MaxPooling1D(pool_size=kernel_size, strides=None, padding='valid')(conv_layer)
                conv_layer = keras.layers.AveragePooling1D(pool_size=kernel_size, strides=None, padding='valid')(conv_layer)
                conv_layer = recurrent.LSTM(rnn_size, return_sequences=False)(conv_layer)

                conv_list.append(conv_layer)
                merged_size += rnn_size

            encoder = keras.layers.concatenate(inputs=conv_list)
            encoder = Dense(units=self.vec_size, activation='sigmoid')(encoder)

        elif self.arch_type == 'gru(cnn)':
            conv_list = []
            merged_size = 0

            for kernel_size, nb_filters in [(1, 16), (2, 32), (3, 64), (4, 128)]:
                conv_layer = Conv1D(filters=nb_filters,
                                    kernel_size=kernel_size,
                                    padding='valid',
                                    activation='relu',
                                    strides=1,
                                    name='shared_conv_{}'.format(kernel_size))(encoder)

                conv_layer = keras.layers.AveragePooling1D(pool_size=kernel_size, strides=None, padding='valid')(conv_layer)
                conv_layer = recurrent.GRU(nb_filters, return_sequences=False)(conv_layer)

                conv_list.append(conv_layer)
                merged_size += nb_filters

            encoder = keras.layers.concatenate(inputs=conv_list)
            encoder = Dense(units=self.vec_size, activation='sigmoid')(encoder)

        else:
            raise RuntimeError('Unknown architecture of neural net: {}'.format(self.arch_type))

        decoder = RepeatVector(seq_len)(encoder)
        decoder = recurrent.LSTM(self.vec_size, return_sequences=True)(decoder)
        decoder = TimeDistributed(Dense(nb_chars, activation='softmax'), name='output')(decoder)

        model = Model(inputs=input_chars, outputs=decoder)
        model.compile(loss='categorical_crossentropy', optimizer='nadam')

        try:
            keras.utils.plot_model(model,
                                   to_file=os.path.join(self.model_dir, 'wordchar2vector.arch.png'),
                                   show_shapes=False,
                                   show_layer_names=True)
        except:
            print('Could not render network graph, something wrong with pydot or Graphviz')

        weigths_path = os.path.join(self.model_dir, 'wordchar2vector.model')
        arch_filepath = os.path.join(self.model_dir, 'wordchar2vector.arch')

        model_config = {
            'max_word_len': max_word_len,
            'seq_len': seq_len,
            'char2index': char2index,
            'FILLER_CHAR': FILLER_CHAR,
            'BEG_CHAR': BEG_CHAR,
            'END_CHAR': END_CHAR,
            'arch_filepath': arch_filepath,
            'weights_path': weigths_path,
            'vec_size': self.vec_size,
            'arch_type': self.arch_type
        }

        with open(os.path.join(self.model_dir, self.config_filename), 'w') as f:
            json.dump(model_config, f)

        X_viz, y_viz = build_test(list(val_words)[0:1000], max_word_len, char2index)

        learning_curve_filename = os.path.join(tmp_dir,
                                               'learning_curve__{}_vecsize={}_tunable_char_embeddings={}_chardims={}_batchsize={}_seed={}.csv'.format(
                                                   self.arch_type, self.vec_size, self.tunable_char_embeddings,
                                                   self.char_dims, self.batch_size, self.seed))
        visualizer = VisualizeCallback(X_viz, y_viz, model, index2char, weigths_path,
                                       learning_curve_filename)

        # csv_logger = CSVLogger(learning_curve_filename, append=True, separator='\t')

        remaining_epochs = 1000
        workout_count = 0
        batch_size = self.batch_size
        while batch_size >= self.min_batch_size and remaining_epochs > 0:
            logging.info('Workout #{}: start training with batch_size={}, remaining epochs={}'.format(workout_count, batch_size, remaining_epochs))
            if workout_count > 0:
                model.load_weights(weigths_path)
            workout_count += 1
            visualizer.new_epochs()
            hist = model.fit_generator(generator=generate_rows(train_words, self.batch_size, char2index, seq_len, 1),
                                       steps_per_epoch=len(train_words) // self.batch_size,
                                       epochs=remaining_epochs,
                                       verbose=1,
                                       callbacks=[visualizer],  # csv_logger, model_checkpoint, early_stopping],
                                       validation_data=generate_rows(val_words, self.batch_size, char2index, seq_len, 1),
                                       validation_steps=len(val_words) // self.batch_size,
                                       )
            remaining_epochs -= len(hist.history)
            batch_size = batch_size // 2

        logging.info('Training complete, best_accuracy={}'.format(visualizer.get_best_accuracy()))

        # Загружаем наилучшее состояние модели
        model.load_weights(weigths_path)

        # Создадим модель с урезанным до кодирующей части графом.
        model = Model(inputs=input_chars, outputs=encoder)

        # Сохраним эту модель
        with open(arch_filepath, 'w') as f:
            f.write(model.to_json())

        # Пересохраним веса усеченной модели
        model.save_weights(weigths_path)
Exemple #14
0
# Fibonacci sequence
X = np.array([[1, 1, 2], [1, 2, 3], [2, 3, 5], [3, 5, 8], [5, 8, 13],
              [8, 13, 21], [13, 21, 34], [21, 34, 55], [34, 55, 89],
              [55, 89, 144], [89, 144, 233], [144, 233, 377]])
y = np.array([3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610])
vocab = set(list(X.flatten()) + list(y.flatten()))
vocab_size = len(vocab)
batch_size = len(X) // 2

# Reshape so that each value is its own array.
X = np.reshape(X, (len(X), -1, 1))
y = kr.utils.np_utils.to_categorical(y)

# Build model, using Keras' Sequential model
model = kr.models.Sequential()
model.add(recurrent.GRU(12, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.05))
model.add(Dense(y.shape[1], activation='softmax'))

# Compile model
model.compile(loss='categorical_crossentropy',
              optimizer=kr.optimizers.Adam(lr=0.005),
              metrics=['accuracy'])

# Fit model to our data
model.fit(x=X, y=y, nb_epoch=n_epochs, batch_size=batch_size)

example_x = X[5]
example_out = model.predict(np.reshape(example_x, (1, len(example_x), 1)))
example_argmax = np.argmax(example_out.flatten())
example_correct = np.argmax(y[5])
Exemple #15
0
X = hindi_encoded[first_half_indices]
y = eng_encoded[first_half_indices]

# Explicitly set apart 10% for validation data that we never train over
split_at = len(X) - len(X) / 10
(X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at))
(y_train, y_val) = (y[:split_at], y[split_at:])

print(X_train.shape)
print(y_train.shape)
print('Build model...')
model = Sequential()

model.add(
    recurrent.GRU(HIDDEN_SIZE,
                  input_shape=(None, VOCAB),
                  return_sequences=False))  # encoder

model.add(RepeatVector(MAXLEN))

for _ in range(LAYERS):
    model.add(recurrent.GRU(HIDDEN_SIZE, return_sequences=True))  # decoder

# For each of step of the output sequence, decide which character should be chosen
model.add(TimeDistributedDense(VOCAB))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')
model.fit(X_train,
          y_train,
          batch_size=BATCH_SIZE,
Exemple #16
0
    Attention(
        recurrent.LSTM(output_dim,
                       input_dim=embedding_dim,
                       consume_less='gpu',
                       return_sequences=True)))
model.add(
    Attention(
        recurrent.LSTM(embedding_dim,
                       input_dim=embedding_dim,
                       consume_less='mem',
                       return_sequences=True)))
# test each other RNN type
model.add(
    Attention(
        recurrent.GRU(embedding_dim,
                      input_dim=embedding_dim,
                      consume_less='mem',
                      return_sequences=True)))
model.add(
    Attention(
        recurrent.SimpleRNN(embedding_dim,
                            input_dim=embedding_dim,
                            consume_less='mem',
                            return_sequences=True)))
model.add(core.Activation('relu'))
model.compile(optimizer='rmsprop', loss='mse')
model.fit(x, y, nb_epoch=1, batch_size=nb_samples)

# test with return_sequence = False
model = Sequential()
model.add(InputLayer(batch_input_shape=(nb_samples, timesteps, embedding_dim)))
model.add(
#this array has 1,2,3 to distinghish three types of positive peptides 
list_val0 = np.array(list0)
mask_non_i = list_val0 >= 2
# non_i includes non_sub
len_non_i = sum(mask_non_i)
mask_non_sub = list_val0 == 3
len_non_sub = sum(mask_non_sub)
'''

##########################Parameters for the model and dataset
#TRAINING_SIZE = len(inputs)
# Try replacing JZS1 with LSTM, GRU, or SimpleRNN
RNN = recurrent.GRU(HIDDEN_SIZE,
                    input_shape=(None, len(chars)),
                    return_sequences=False,
                    W_regularizer=l2(l2_c),
                    b_regularizer=l2(l2_c),
                    dropout_W=drop_out_c,
                    dropout_U=drop_out_c)

##########################start a model
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
#model.add(Masking())
#print(str(LAYERS))
#keras.layers.core.ActivityRegularization(l2=0.0, l2=0.0)

if LAYERS > 1:
    #print('1')
    model.add(RNN(return_sequences=True))
class LayerCorrectnessTest(keras_parameterized.TestCase):
    def setUp(self):
        super(LayerCorrectnessTest, self).setUp()
        # Set two virtual CPUs to test MirroredStrategy with multiple devices
        cpus = tf.config.list_physical_devices('CPU')
        tf.config.set_logical_device_configuration(cpus[0], [
            tf.config.LogicalDeviceConfiguration(),
            tf.config.LogicalDeviceConfiguration(),
        ])

    def _create_model_from_layer(self, layer, input_shapes):
        inputs = [layers.Input(batch_input_shape=s) for s in input_shapes]
        if len(inputs) == 1:
            inputs = inputs[0]
        y = layer(inputs)
        model = models.Model(inputs, y)
        model.compile('sgd', 'mse')
        return model

    @parameterized.named_parameters(
        ('LeakyReLU', advanced_activations.LeakyReLU, (2, 2)),
        ('PReLU', advanced_activations.PReLU, (2, 2)),
        ('ELU', advanced_activations.ELU, (2, 2)),
        ('ThresholdedReLU', advanced_activations.ThresholdedReLU, (2, 2)),
        ('Softmax', advanced_activations.Softmax, (2, 2)),
        ('ReLU', advanced_activations.ReLU, (2, 2)),
        ('Conv1D', lambda: convolutional.Conv1D(2, 2), (2, 2, 1)),
        ('Conv2D', lambda: convolutional.Conv2D(2, 2), (2, 2, 2, 1)),
        ('Conv3D', lambda: convolutional.Conv3D(2, 2), (2, 2, 2, 2, 1)),
        ('Conv2DTranspose', lambda: convolutional.Conv2DTranspose(2, 2),
         (2, 2, 2, 2)),
        ('SeparableConv2D', lambda: convolutional.SeparableConv2D(2, 2),
         (2, 2, 2, 1)),
        ('DepthwiseConv2D', lambda: convolutional.DepthwiseConv2D(2, 2),
         (2, 2, 2, 1)),
        ('UpSampling2D', convolutional.UpSampling2D, (2, 2, 2, 1)),
        ('ZeroPadding2D', convolutional.ZeroPadding2D, (2, 2, 2, 1)),
        ('Cropping2D', convolutional.Cropping2D, (2, 3, 3, 1)),
        ('ConvLSTM2D',
         lambda: convolutional_recurrent.ConvLSTM2D(4, kernel_size=(2, 2)),
         (4, 4, 4, 4, 4)),
        ('Dense', lambda: core.Dense(2), (2, 2)),
        ('Dropout', lambda: core.Dropout(0.5), (2, 2)),
        ('SpatialDropout2D', lambda: core.SpatialDropout2D(0.5), (2, 2, 2, 2)),
        ('Activation', lambda: core.Activation('sigmoid'), (2, 2)),
        ('Reshape', lambda: core.Reshape((1, 4, 1)), (2, 2, 2)),
        ('Permute', lambda: core.Permute((2, 1)), (2, 2, 2)),
        ('Attention', dense_attention.Attention, [(2, 2, 3), (2, 3, 3),
                                                  (2, 3, 3)]),
        ('AdditiveAttention', dense_attention.AdditiveAttention, [(2, 2, 3),
                                                                  (2, 3, 3),
                                                                  (2, 3, 3)]),
        ('Embedding', lambda: embeddings.Embedding(4, 4),
         (2, 4), 2e-3, 2e-3, np.random.randint(4, size=(2, 4))),
        ('LocallyConnected1D', lambda: local.LocallyConnected1D(2, 2),
         (2, 2, 1)),
        ('LocallyConnected2D', lambda: local.LocallyConnected2D(2, 2),
         (2, 2, 2, 1)),
        ('Add', merge.Add, [(2, 2), (2, 2)]),
        ('Subtract', merge.Subtract, [(2, 2), (2, 2)]),
        ('Multiply', merge.Multiply, [(2, 2), (2, 2)]),
        ('Average', merge.Average, [(2, 2), (2, 2)]),
        ('Maximum', merge.Maximum, [(2, 2), (2, 2)]),
        ('Minimum', merge.Minimum, [(2, 2), (2, 2)]),
        ('Concatenate', merge.Concatenate, [(2, 2), (2, 2)]),
        ('Dot', lambda: merge.Dot(1), [(2, 2), (2, 2)]),
        ('GaussianNoise', lambda: noise.GaussianNoise(0.5), (2, 2)),
        ('GaussianDropout', lambda: noise.GaussianDropout(0.5), (2, 2)),
        ('AlphaDropout', lambda: noise.AlphaDropout(0.5), (2, 2)),
        ('BatchNormalization', normalization_v2.BatchNormalization,
         (2, 2), 1e-2, 1e-2),
        ('LayerNormalization', normalization.LayerNormalization, (2, 2)),
        ('LayerNormalizationUnfused',
         lambda: normalization.LayerNormalization(axis=1), (2, 2, 2)),
        ('MaxPooling2D', pooling.MaxPooling2D, (2, 2, 2, 1)),
        ('AveragePooling2D', pooling.AveragePooling2D, (2, 2, 2, 1)),
        ('GlobalMaxPooling2D', pooling.GlobalMaxPooling2D, (2, 2, 2, 1)),
        ('GlobalAveragePooling2D', pooling.GlobalAveragePooling2D,
         (2, 2, 2, 1)),
        ('SimpleRNN', lambda: recurrent.SimpleRNN(units=4),
         (4, 4, 4), 1e-2, 1e-2),
        ('GRU', lambda: recurrent.GRU(units=4), (4, 4, 4)),
        ('LSTM', lambda: recurrent.LSTM(units=4), (4, 4, 4)),
        ('GRUV2', lambda: recurrent_v2.GRU(units=4), (4, 4, 4)),
        ('LSTMV2', lambda: recurrent_v2.LSTM(units=4), (4, 4, 4)),
        ('TimeDistributed', lambda: wrappers.TimeDistributed(core.Dense(2)),
         (2, 2, 2)),
        ('Bidirectional',
         lambda: wrappers.Bidirectional(recurrent.SimpleRNN(units=4)),
         (2, 2, 2)),
        ('AttentionLayerCausal',
         lambda: dense_attention.Attention(causal=True), [(2, 2, 3), (2, 3, 3),
                                                          (2, 3, 3)]),
        ('AdditiveAttentionLayerCausal',
         lambda: dense_attention.AdditiveAttention(causal=True), [(2, 3, 4),
                                                                  (2, 3, 4),
                                                                  (2, 3, 4)]),
    )
    def test_layer(self,
                   f32_layer_fn,
                   input_shape,
                   rtol=2e-3,
                   atol=2e-3,
                   input_data=None):
        """Tests a layer by comparing the float32 and mixed precision weights.

    A float32 layer, a mixed precision layer, and a distributed mixed precision
    layer are run. The three layers are identical other than their dtypes and
    distribution strategies. The outputs after predict() and weights after fit()
    are asserted to be close.

    Args:
      f32_layer_fn: A function returning a float32 layer. The other two layers
        will automatically be created from this
      input_shape: The shape of the input to the layer, including the batch
        dimension. Or a list of shapes if the layer takes multiple inputs.
      rtol: The relative tolerance to be asserted.
      atol: The absolute tolerance to be asserted.
      input_data: A Numpy array with the data of the input. If None, input data
        will be randomly generated
    """

        if f32_layer_fn == convolutional.ZeroPadding2D and \
           tf.test.is_built_with_rocm():
            return
        if isinstance(input_shape[0], int):
            input_shapes = [input_shape]
        else:
            input_shapes = input_shape
        strategy = create_mirrored_strategy()
        f32_layer = f32_layer_fn()

        # Create the layers
        assert f32_layer.dtype == f32_layer._compute_dtype == 'float32'
        config = f32_layer.get_config()
        config['dtype'] = policy.Policy('mixed_float16')
        mp_layer = f32_layer.__class__.from_config(config)
        distributed_mp_layer = f32_layer.__class__.from_config(config)

        # Compute per_replica_input_shapes for the distributed model
        global_batch_size = input_shapes[0][0]
        assert global_batch_size % strategy.num_replicas_in_sync == 0, (
            'The number of replicas, %d, does not divide the global batch size of '
            '%d' % (strategy.num_replicas_in_sync, global_batch_size))
        per_replica_batch_size = (global_batch_size //
                                  strategy.num_replicas_in_sync)
        per_replica_input_shapes = [(per_replica_batch_size, ) + s[1:]
                                    for s in input_shapes]

        # Create the models
        f32_model = self._create_model_from_layer(f32_layer, input_shapes)
        mp_model = self._create_model_from_layer(mp_layer, input_shapes)
        with strategy.scope():
            distributed_mp_model = self._create_model_from_layer(
                distributed_mp_layer, per_replica_input_shapes)

        # Set all model weights to the same values
        f32_weights = f32_model.get_weights()
        mp_model.set_weights(f32_weights)
        distributed_mp_model.set_weights(f32_weights)

        # Generate input data
        if input_data is None:
            # Cast inputs to float16 to avoid measuring error from having f16 layers
            # cast to float16.
            input_data = [
                np.random.normal(size=s).astype('float16')
                for s in input_shapes
            ]
            if len(input_data) == 1:
                input_data = input_data[0]

        # Assert all models have close outputs.
        f32_output = f32_model.predict(input_data)
        mp_output = mp_model.predict(input_data)
        self.assertAllClose(mp_output, f32_output, rtol=rtol, atol=atol)
        self.assertAllClose(distributed_mp_model.predict(input_data),
                            f32_output,
                            rtol=rtol,
                            atol=atol)

        # Run fit() on models
        output = np.random.normal(
            size=f32_model.outputs[0].shape).astype('float16')
        for model in f32_model, mp_model, distributed_mp_model:
            model.fit(input_data, output, batch_size=global_batch_size)

        # Assert all models have close weights
        f32_weights = f32_model.get_weights()
        self.assertAllClose(mp_model.get_weights(),
                            f32_weights,
                            rtol=rtol,
                            atol=atol)
        self.assertAllClose(distributed_mp_model.get_weights(),
                            f32_weights,
                            rtol=rtol,
                            atol=atol)
def rnn_cnn_model(embed,
                  MAX_LEN,
                  SENT_HIDDEN_SIZE,
                  ACTIVATION,
                  DP,
                  L2,
                  LABEL_NUM,
                  OPTIMIZER,
                  MLP_LAYER,
                  LAYERS,
                  NGRAM_FILTERS,
                  NUM_FILTER,
                  RNN_Cell='LSTM'):
    print('Build model...')

    RNN = recurrent.LSTM
    if RNN_Cell == 'BiLSTM':
        RNN = lambda *args, **kwargs: Bidirectional(
            recurrent.LSTM(*args, **kwargs))
    elif RNN_Cell == 'GRU':
        RNN = recurrent.GRU
    elif RNN_Cell == 'BiGRU':
        RNN = lambda *args, **kwargs: Bidirectional(
            recurrent.GRU(*args, **kwargs))

    rnn_kwargs = dict(units=SENT_HIDDEN_SIZE, dropout=DP, recurrent_dropout=DP)

    translate = TimeDistributed(Dense(SENT_HIDDEN_SIZE, activation=ACTIVATION))

    premise = Input(shape=(MAX_LEN, ), dtype='int32')

    prem = embed(premise)

    prem = translate(prem)

    if LAYERS > 1:
        for l in range(LAYERS - 1):
            rnn = RNN(return_sequences=True, **rnn_kwargs)
            prem = BatchNormalization()(rnn(prem))
    rnn = RNN(return_sequences=True, **rnn_kwargs)
    prem = rnn(prem)
    prem = BatchNormalization()(prem)

    # cnn model
    convolutions = []
    i = 0
    for n_gram in NGRAM_FILTERS:
        i += 1
        cur_conv = Conv1D(name="conv_" + str(n_gram) + '_' + str(i),
                          filters=NUM_FILTER,
                          filter_length=n_gram,
                          border_mode='valid',
                          activation='relu',
                          subsample_length=1)(prem)
        # pool
        one_max = MaxPooling1D(pool_length=MAX_LEN - n_gram + 1)(cur_conv)
        flattened = Flatten()(one_max)
        convolutions.append(flattened)

    sentence_vector = concatenate(
        convolutions, name="sentence_vector")  # hang on to this layer!

    for i in range(MLP_LAYER):
        sentence_vector = Dense(
            2 * SENT_HIDDEN_SIZE,
            activation=ACTIVATION,
            W_regularizer=l2(L2) if L2 else None)(sentence_vector)
        sentence_vector = Dropout(DP)(sentence_vector)
        sentence_vector = BatchNormalization()(sentence_vector)

    sentence_vector = Dropout(DP)(sentence_vector)
    pred = Dense(LABEL_NUM, activation="softmax",
                 name="sentence_prediction")(sentence_vector)

    model = Model(input=premise, output=pred)
    model.compile(optimizer=OPTIMIZER,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    model.summary()
    return model
Exemple #20
0
def rnn_cnn_model(embed, MAX_LEN_TITLE, MAX_LEN_DES, SENT_HIDDEN_SIZE, ACTIVATION, DP, L2,
                  LABEL_NUM, OPTIMIZER, MLP_LAYER, LAYERS, NGRAM_FILTERS, NUM_FILTER, RNN_Cell='LSTM'):
    print('Build model...')

    RNN = recurrent.LSTM
    if RNN_Cell == 'BiLSTM':
        RNN = lambda *args, **kwargs: Bidirectional(recurrent.LSTM(*args, **kwargs))
    elif RNN_Cell == 'GRU':
        RNN = recurrent.GRU
    elif RNN_Cell == 'BiGRU':
        RNN = lambda *args, **kwargs: Bidirectional(recurrent.GRU(*args, **kwargs))

    rnn_kwargs = dict(output_dim=SENT_HIDDEN_SIZE, dropout_W=DP, dropout_U=DP)

    translate_title = TimeDistributed(Dense(SENT_HIDDEN_SIZE, activation=ACTIVATION))
    translate_des = TimeDistributed(Dense(SENT_HIDDEN_SIZE, activation=ACTIVATION))

    premise_title = Input(shape=(MAX_LEN_TITLE,), dtype='int32')
    prem_title = embed(premise_title)
    prem_title = translate_title(prem_title)

    premise_des = Input(shape=(MAX_LEN_DES,), dtype='int32')
    prem_des = embed(premise_des)
    prem_des = translate_des(prem_des)

    if LAYERS > 1:
        for l in range(LAYERS - 1):
            rnn_title = RNN(return_sequences=True, **rnn_kwargs)
            prem_title = BatchNormalization()(rnn_title(prem_title))

            rnn_des = RNN(return_sequences=True, **rnn_kwargs)
            prem_des = BatchNormalization()(rnn_des(prem_des))

    rnn_title = RNN(return_sequences=True, **rnn_kwargs)
    prem_title = rnn_title(prem_title)
    prem_title = BatchNormalization()(prem_title)

    rnn_des = RNN(return_sequences=True, **rnn_kwargs)
    prem_des = rnn_des(prem_des)
    prem_des = BatchNormalization()(prem_des)

    # prem =  merge([prem_title,prem_des], name="sentence_vector",mode = 'concat')

    # cnn model
    convolutions = []
    i = 0
    for n_gram in NGRAM_FILTERS:
        i += 1
        cur_conv_title = Conv1D(
            name="conv_title" + str(n_gram) + '_' + str(i),
            nb_filter=NUM_FILTER,
            filter_length=n_gram,
            border_mode='valid',
            activation='relu',
            subsample_length=1)(prem_title)
        # pool
        one_max = GlobalMaxPooling1D()(cur_conv_title)
        # flattened = Flatten()(one_max)
        convolutions.append(one_max)

    i = 0
    for n_gram in NGRAM_FILTERS:
        i += 1
        cur_conv_des = Conv1D(
            name="conv_des" + str(n_gram) + '_' + str(i),
            nb_filter=NUM_FILTER,
            filter_length=n_gram,
            border_mode='valid',
            activation='relu',
            subsample_length=1
        )(prem_des)
        # pool
        one_max = GlobalMaxPooling1D()(cur_conv_des)
        # flattened = Flatten()(one_max)
        convolutions.append(one_max)

    sentence_vector = concatenate(convolutions, name="sentence_vector")  # hang on to this layer!

    for i in range(MLP_LAYER):
        sentence_vector = Dense(2 * SENT_HIDDEN_SIZE, activation=ACTIVATION, W_regularizer=l2(L2) if L2 else None)(
            sentence_vector)
        sentence_vector = Dropout(DP)(sentence_vector)
        sentence_vector = BatchNormalization()(sentence_vector)

    sentence_vector = Dropout(DP)(sentence_vector)
    pred = Dense(LABEL_NUM, activation="sigmoid", name="sentence_prediction")(sentence_vector)

    model = Model(input=[premise_title, premise_des], output=pred)
    model.compile(optimizer=OPTIMIZER, loss='binary_crossentropy', metrics=[metrics.top_k_categorical_accuracy])

    model.summary()
    return model
Exemple #21
0
def generateModel(X_train,Y_train,X_test,Y_test, testDataFiles="",
                  resultFile=""):
    # LSTM model
    # model = Sequential()
    # model.add(recurrent.LSTM(32, input_dim=1, input_length=99,
    #                         activation='sigmoid',
    # inner_activation='hard_sigmoid'))
    # model.add(Dropout(0.5))
    # model.add(recurrent.LSTM(10))

    # model 1
    # model = Sequential()
    # model.add(recurrent.GRU(64, input_dim=2,
    #                                       input_length=100,
    #                          activation='sigmoid',
    #                          inner_activation='hard_sigmoid'))
    # # model.add(Dropout(0.5))
    # # model.add(recurrent.GRU(32))
    #
    # model.add(Dense(10, activation='softmax'))

    #model2 BDLSTM
    model = Sequential()
    model.add(Bidirectional(recurrent.GRU(32,
                             activation='sigmoid',
                             inner_activation='hard_sigmoid'),
                            input_shape=(100, 2)))
    # model.add(Dropout(0.5))
    # model.add(recurrent.GRU(32))

    model.add(Dense(10, activation='softmax'))

    sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
    adam = Adam(decay=1e-6)
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer=adam,
                  metrics=['accuracy'])

    X_train = np.array(X_train).reshape(-1,100,2)
    # X_test = np.array(X_test).reshape(-1,100,2)

    Y_train = to_categorical(Y_train, 10)
    # Y_test = to_categorical(Y_test, 10)

    # Fit the model
    model.fit(X_train, Y_train, nb_epoch=150, batch_size=4)

    model.save("BGRU1layerdxdy.h5")

    # model.predict(X_test, batch_size=4, verbose=0)
    # model.predict_on_batch(self, x)

    # model.save('my_model.h5')  # creates a HDF5 file 'my_model.h5'
    # del model  # deletes the existing model

    # returns a compiled model
    # identical to the previous one
    # model = load_model('my_model.h5')

    # scores = model.evaluate(X_test, Y_test, batch_size=4)

    # generating output result file for fusion
    # prediction = model.predict(X_test, batch_size=4)
    #
    # for i, pred in enumerate(prediction):
    #     rank = sorted(range(len(pred)),key=pred.__getitem__,
    #                   reverse=True)
    #
    #     resultString = str(np.argmax(Y_test[i])) + " " + testDataFiles[i]
    #
    #     for r in rank:
    #         resultString += " " + str(r)
    #     resultString += "\n"
    #
    #     file = open("results/" + resultFile, 'a')
    #     file.write(resultString)
    #     file.flush()
    #     os.fsync(file.fileno())
    #     file.close()

    # write accuracy results in a file
    # model.save(resultFile + ".h5")
    # scores = model.evaluate(X_test, Y_test, batch_size=4)
    # file = open("expresultFile.txt", 'a')
    # file.write("\n" + resultFile +  ":\n")
    # print("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
    # file.write("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
    # file.flush()
    # os.fsync(file.fileno())
    # file.close()


    return model
Exemple #22
0
def rnn_att_model(embed, MAX_LEN_TITLE, MAX_LEN_DES, SENT_HIDDEN_SIZE, ACTIVATION, DP, L2,
                  LABEL_NUM, OPTIMIZER, MLP_LAYER, LAYERS, RNN_Cell='LSTM'):
    print('Build model...')

    RNN = recurrent.LSTM
    if RNN_Cell == 'BiLSTM':
        RNN = lambda *args, **kwargs: Bidirectional(recurrent.LSTM(*args, **kwargs))
    elif RNN_Cell == 'GRU':
        RNN = recurrent.GRU
    elif RNN_Cell == 'BiGRU':
        RNN = lambda *args, **kwargs: Bidirectional(recurrent.GRU(*args, **kwargs))

    rnn_kwargs = dict(units=SENT_HIDDEN_SIZE, recurrent_dropout=DP, dropout=DP)

    translate_title = TimeDistributed(Dense(SENT_HIDDEN_SIZE, activation=ACTIVATION))
    translate_des = TimeDistributed(Dense(SENT_HIDDEN_SIZE, activation=ACTIVATION))

    premise_title = Input(shape=(MAX_LEN_TITLE,), dtype='int32')

    prem_title = embed(premise_title)

    prem_title = translate_title(prem_title)

    premise_des = Input(shape=(MAX_LEN_DES,), dtype='int32')

    prem_des = embed(premise_des)

    prem_des = translate_des(prem_des)

    if LAYERS > 1:
        for l in range(LAYERS - 1):
            rnn_title = RNN(return_sequences=True, **rnn_kwargs)
            prem_title = BatchNormalization()(rnn(prem_title))
            rnn_des = RNN(return_sequences=True, **rnn_kwargs)
            prem_des = BatchNormalization()(rnn(prem_des))

    rnn_title = RNN(return_sequences=True, **rnn_kwargs)
    prem_title = rnn_title(prem_title)

    rnn_des = RNN(return_sequences=True, **rnn_kwargs)
    prem_des = rnn_des(prem_des)

    prem_title = Attention(MAX_LEN_TITLE)(prem_title)
    prem_des = Attention(MAX_LEN_DES)(prem_des)

    joint_title = Dropout(DP)(prem_title)
    joint_des = Dropout(DP)(prem_des)

    joint = concatenate([joint_title, joint_des], name="sentence_vector")
    for i in range(MLP_LAYER):
        joint = Dense(2 * SENT_HIDDEN_SIZE, activation=ACTIVATION, kernel_regularizer=l2(L2) if L2 else None)(joint)
        joint = Dropout(DP)(joint)
        # joint = BatchNormalization()(joint)

    pred = Dense(LABEL_NUM, activation='sigmoid')(joint)

    model = Model(inputs=[premise_title, premise_des], outputs=pred)
    model.compile(optimizer=OPTIMIZER, loss='binary_crossentropy', metrics=[metrics.top_k_categorical_accuracy])

    model.summary()
    return model
print('max_phrase_len={} words'.format(max_phrase_len))
print('samples.count={}'.format(len(samples)))

word_dims = w2v_dims

# на входе каждое предложение представляется цепочкой токенов
input_curr_phrase = Input(batch_shape=(batch_size, max_phrase_len, w2v_dims),
                          dtype='float32',
                          name='input_curr_phrase')
encoder_dim = -1

encoder_curr = input_curr_phrase

if arch == 'gru':
    rnn_size = 64
    rnn_layer = recurrent.GRU(rnn_size, return_sequences=False)
    encoder_curr = rnn_layer(encoder_curr)
    encoder_dim = rnn_size
elif arch == 'bilstm':
    rnn_size = 512
    rnn_layer = Bidirectional(recurrent.LSTM(rnn_size, return_sequences=False))
    encoder_curr = rnn_layer(encoder_curr)
    encoder_dim = rnn_size
elif 'lstm(cnn)':
    # рекуррентные слои поверх сверточных
    convs = []
    for kernel_size, nb_filters in [(1, 64), (2, 128), (3, 256)]:
        conv = Conv1D(filters=nb_filters,
                      kernel_size=kernel_size,
                      padding='valid',
                      activation='relu',
vocab_size = np.amax(X_text) + 1
print("vocab size is", vocab_size)

#Y_cat = np_utils.to_categorical(Y, 2)
#testY_cat = np_utils.to_categorical(Y,2)

#print ("Y_cat shape is",Y_cat.shape)

newsnetwork = Sequential()
newsnetwork.add(Embedding(vocab_size, 128, mask_zero=True))
newsnetwork.add(rnn.JZS3(100, return_sequences=False))
newsnetwork.add(Dropout(0.5))

marketnetwork = Sequential()
marketnetwork.add(GaussianNoise(.01, input_shape=(5, 70)))
marketnetwork.add(rnn.GRU(100, input_dim=70))
#marketnetwork.add(BatchNormalization())

#sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
sentnetwork = Sequential()
sentnetwork.add(Dense(100, input_dim=10, activation="softmax"))
#sentnetwork.add(BatchNormalization())
sentnetwork.add(PReLU())
#sentnetwork.add(Dropout(0.75))

model = Sequential()
model.add(Merge([newsnetwork, marketnetwork, sentnetwork], mode='concat'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              class_mode='categorical')