Exemple #1
0
    def __preprocess():
        (train_data,
         train_labels), (test_data,
                         test_labels) = imdb.load_data(num_words=10000)
        word_index = imdb.get_word_index()

        max_len = 500
        x_train = pad_sequences(train_data, maxlen=max_len)
        x_test = pad_sequences(test_data, maxlen=max_len)
        y_train = np.asarray(train_labels).astype('float32')
        y_test = np.asarray(test_labels).astype('float32')

        return (x_train, y_train), (x_test, y_test), word_index, max_len
    def on_epoch_end(self, batch, logs=None):
        val_targ = self.validation_data[-3]

        val_value = [x for x in self.validation_data[0:-3]]
        y_pred = np.asarray(self.model.predict(val_value))

        precision, recall, f_score, _ = precision_recall_fscore_support(
            val_targ, (y_pred > 0.5).astype(int), average='micro')
        print("— val_f1: % f — val_precision: % f — val_recall % f" %
              (f_score, precision, recall))
Exemple #3
0
 def on_epoch_end(self, epoch, logs={}):
     #         val_predict = (np.asarray(self.model.predict(self.validation_data[0]))).round()
     val_predict = np.argmax(np.asarray(
         self.model.predict(self.validation_data[0])),
                             axis=1)
     #         val_targ = self.validation_data[1]
     val_targ = np.argmax(self.validation_data[1], axis=1)
     _val_f1 = f1_score(val_targ, val_predict, average='macro')
     #         _val_recall = recall_score(val_targ, val_predict)
     #         _val_precision = precision_score(val_targ, val_predict)
     self.val_f1s.append(_val_f1)
     #         self.val_recalls.append(_val_recall)
     #         self.val_precisions.append(_val_precision)
     #         print('— val_f1: %f — val_precision: %f — val_recall %f' %(_val_f1, _val_precision, _val_recall))
     print(' — val_f1:', _val_f1)
     return
def main(args):
    # set parameters:
    max_features = 5000
    maxlen = 400
    batch_size = 32
    embedding_dims = 128
    filters = 250
    kernel_size = 3
    hidden_dims = 250
    epochs = 10  # we start off with an efficient embedding layer which maps

    print('Loading data...')
    X_train, y_train, X_test, y_test, tokenizer_train, tokenizer_test = load_split_data(
        args)

    y_train = np.asarray(y_train).astype('float32')
    y_test = np.asarray(y_test).astype('float32')
    vocab_size_train = len(tokenizer_train.word_index) + 1
    vocab_size_test = len(tokenizer_test.word_index) + 1
    print(len(X_train), 'train sequences')
    print(len(X_test), 'test sequences')

    print('Pad sequences (samples x time)')
    X_train = sequence.pad_sequences(X_train, padding='post')
    X_test = sequence.pad_sequences(X_test,
                                    maxlen=len(X_train[0]),
                                    padding='post')
    print('x_train shape:', X_train.shape)
    print('x_test shape:', X_test.shape)
    print('vocab_size_train', vocab_size_train)

    seqX_len = len(X_train[0])
    print('seqX_len', seqX_len)
    seqY_len = len(y_train[0])
    print('Build model...')

    model = Sequential()

    # our vocab indices into embedding_dims dimensions
    model.add(
        Embedding(input_dim=vocab_size_train,
                  output_dim=embedding_dims,
                  input_length=seqX_len))
    model.add(Dropout(0.2))

    # we add a Convolution1D, which will learn filters
    # word group filters of size filter_length:
    model.add(Conv1D(filters, kernel_size, padding='same', strides=1))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    # we use max pooling:
    model.add(MaxPooling1D(strides=1))

    model.add(Conv1D(filters, kernel_size, padding='same', strides=1))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    # we use max pooling:
    model.add(GlobalMaxPooling1D())

    # We add a vanilla hidden layer:
    model.add(Dense(hidden_dims))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    # We project onto a single unit output layer, and squash it with a sigmoid:
    model.add(Dense(units=5))
    model.add(Activation('sigmoid'))

    optimizer = Adam(lr=0.000001)
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])

    checkpointer = ModelCheckpoint(filepath='./drive/text_cnn' +
                                   '.{epoch:02d}-{val_loss:.2f}.hdf5',
                                   verbose=1,
                                   save_best_only=True,
                                   monitor='val_acc',
                                   mode='max')
    csv_logger = CSVLogger('./drive/text_cnn.log')

    model.fit(X_train,
              y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(X_test, y_test),
              callbacks=[checkpointer, csv_logger])

    with (open('./drive/text_cnn_imdb_model.json', 'w')) as f:
        f.write(model.to_json())