def build_cnn_model():
  #Instantiate a Keras tensor
  sequences= layers.Input(shape = (max_length, )) 
  #Turns positive integers (indexes) into dense vectors of fixed size
  embedded = layers.Embedding(12000, 64) (sequences)
  #Convolution kernel is convoled with the layer to produce a tensor of outputs
  #(output_space, kernel_size, linear_unit_activation_function)
  x = layers.Conv1D(64, 3, activation='relu') (embedded)
  #Normalize and scale inputs or activations
  x = layers.BatchNormalization() (x)
  #Downsamples the input representation by taking the maximum value over the window
  x = layers.MaxPool1D(3) (x)
  x = layers.Conv1D(64, 5, activation='relu') (x)
  x = layers.BatchNormalization() (x)
  x = layers.MaxPool1D(5) (x)
  x = layers.Conv1D(64, 5, activation='relu') (x)
  #Downsamples the input representation by taking the maximum value over the time dimension
  x = layers.GlobalMaxPool1D() (x)
  x = layers.Flatten() (x)
  #First parameter represents the dimension of the output space
  x = layers.Dense(100, activation='relu') (x)

  #Sigmoid function: values <-5 => value close to 0; values >5 => values close to 1
  predictions = layers.Dense(1, activation='sigmoid') (x)

  model = models.Model(inputs = sequences, outputs = predictions)

  model.compile(
      optimizer='rmsprop',
      loss='binary_crossentropy',
      metrics=['binary_accuracy']
  )

  return model
def build_model():
    sequences = layers.Input(shape=(MAX_LENGTH, ))
    embedded = layers.Embedding(MAX_FEATURES, 64)(sequences)
    x = layers.Conv1D(64, 3, activation='relu')(embedded)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool1D(3)(x)
    x = layers.Conv1D(64, 5, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool1D(5)(x)
    x = layers.Conv1D(64, 5, activation='relu')(x)
    x = layers.GlobalMaxPool1D()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(100, activation='relu')(x)
    predictions = layers.Dense(1, activation='sigmoid')(x)
    model = models.Model(inputs=sequences, outputs=predictions)
    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['binary_accuracy'])
    return model
Example #3
0
                        help='Number of trainings',
                        default=10000,
                        type=int)
    args = parser.parse_args(args=[])
    dwin = args.dwin
    folders = ffzl(args.train)
    tests = ffzl(args.test)

    x = tf.compat.v1.placeholder(tf.float32, shape=(None, dwin))
    y = tf.compat.v1.placeholder(tf.int32, shape=(None, 1))
    ans = tf.compat.v1.one_hot(y, len(folders))
    model = tf.reshape(x, [-1, dwin, 1])
    model = Convs(16)(model)
    model = Convs(32)(model)
    model = Convs(64)(model)
    model = KL.GlobalMaxPool1D()(model)
    model = KL.Dropout(0.25)(model)
    model = KL.Dense(len(folders), activation='softmax')(model)
    model = Ter(model, ans)

    mg_loss = tf.compat.v1.summary.scalar('loss', model.Loss)
    mg_test = tf.compat.v1.summary.scalar('test', model.Loss)

    with tf.Session() as sess:
        with tf.summary.FileWriter('./logs', sess.graph) as writer:
            sess.run(tf.global_variables_initializer())
            #try to load last state
            SLR = slr()
            SLR.load(sess)

            #training
Example #4
0
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

# Model Building

sequential_cnn = Sequential()
vocab_size = len(tokenizer.word_index) + 1
output_dim = 50
print(vocab_size)
print(X_train.shape[1])
input_length = X_train.shape[1]
sequential_cnn.add(
    layers.Embedding(input_dim=vocab_size,
                     output_dim=output_dim,
                     input_length=input_length))
sequential_cnn.add(layers.Conv1D(100, 5, activation='relu'))
sequential_cnn.add(layers.GlobalMaxPool1D())
sequential_cnn.add(layers.Dense(10, activation='relu'))
sequential_cnn.add(layers.Dense(6, activation='softmax'))
print(sequential_cnn.summary())
sequential_cnn.compile(loss='categorical_crossentropy',
                       optimizer='adam',
                       metrics=['accuracy'])
history_cnn = sequential_cnn.fit(X_train,
                                 Y_train,
                                 epochs=10,
                                 batch_size=32,
                                 validation_data=(X_test, Y_test))

plt_performance(history_cnn)
# accuracy - 1
# validation accuracy ~ 0.75
Example #5
0
def supervised_learning(load):
    # Loading dataset
    data = []
    g = gzip.open("Software.json.gz", 'r')
    print("Loading dataset ...")
    for l in g:
        data.append(json.loads(l))
    N = 100000
    print("The dataset used has ", len(data), "entries! Of this dataset", N,
          "entries are used to train the model.")

    reviews = []
    ratings = []
    print("Text preprocessing ...")
    for d in data[:N]:
        if d.get('reviewText') is None:
            continue
        # remove all unwanted chars
        review = re.compile(r'[^a-z0-9\s]').sub(
            r'',
            re.compile(r'[\W]').sub(r' ',
                                    d.get('reviewText').lower()))
        reviews.append(review)
        rating = float(d.get('overall'))
        ratings.append(rating)

    # vectorized the input texts
    max_features = 200000
    tokenizer = Tokenizer(num_words=max_features)
    tokenizer.fit_on_texts(reviews)
    reviews = tokenizer.texts_to_sequences(reviews)

    # calculating the maximal review length & pad all inputs to the same length for the neural network
    max_length = max(len(train_r) for train_r in reviews)
    reviews = tf.keras.preprocessing.sequence.pad_sequences(reviews,
                                                            maxlen=max_length)

    # split the data into training set, test set and validation set
    print("Splitting dataset ...")
    train_reviews, test_reviews, train_ratings, test_ratings = train_test_split(
        np.array(reviews), np.array(ratings), test_size=0.1)
    train_reviews, validation_reviews, train_ratings, validation_ratings = train_test_split(
        train_reviews, train_ratings, test_size=0.2)

    # Create the neural network. Input size was calculated above
    input = layers.Input(shape=(max_length, ))
    x = layers.Embedding(max_features, 64)(input)
    # three times, use a convolutional layer, normalization and max pooling layer
    x = layers.Conv1D(64, 5, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool1D(3)(x)
    x = layers.Conv1D(64, 5, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool1D(5)(x)
    x = layers.Conv1D(64, 5, activation='relu')(x)
    x = layers.GlobalMaxPool1D()(x)
    x = layers.Flatten()(x)
    # two normally connected layers to condense the output to a single number
    x = layers.Dense(64, activation='relu')(x)
    output = layers.Dense(1, activation=mapping_to_target_range)(x)
    model = models.Model(inputs=input, outputs=output)

    # Adam (a stochastic gradient descent variant) as optimization function
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)

    # compiling the model. As error the MSE is specified since the output and target are floats
    model.compile(optimizer=opt, loss='mean_squared_error')

    # loading model weights if wanted
    if load:
        print("\nLoading previous model weights:\n")
        model.load_weights('weights/supervisedLearning')

    # training the model
    print("Training Model:\n")
    model.fit(train_reviews,
              train_ratings,
              batch_size=64,
              epochs=3,
              validation_data=(validation_reviews, validation_ratings))

    # calculating the predictions on the test set
    test_pred = model.predict(test_reviews)

    # printing the classification report
    print(classification_report(test_ratings, np.round(test_pred)))

    # testing the model with 3 examples (positive, negative, neutral review)
    print("\nTesting model: ")
    x = "I really like this book. It is one of the best I have read."
    x = re.compile(r'[^a-z\s]').sub(r'',
                                    re.compile(r'[\W]').sub(r' ', x.lower()))
    x = tokenizer.texts_to_sequences(np.array([x]))
    x = tf.keras.preprocessing.sequence.pad_sequences(x, maxlen=max_length)
    result = model.predict(x)
    print(
        "'I really like this book. It is one of the best I have read.' got the rating: ",
        round(result[0][0]))

    x = "I really hate this book. It is one of the worst I have read."
    x = re.compile(r'[^a-z\s]').sub(r'',
                                    re.compile(r'[\W]').sub(r' ', x.lower()))
    x = tokenizer.texts_to_sequences(np.array([x]))
    x = tf.keras.preprocessing.sequence.pad_sequences(x, maxlen=max_length)
    result = model.predict(x)
    print(
        "'I really hate this book. It is one of the worst I have read.' got the rating: ",
        round(result[0][0]))

    x = "This book is ok. It is very average."
    x = re.compile(r'[^a-z\s]').sub(r'',
                                    re.compile(r'[\W]').sub(r' ', x.lower()))
    x = tokenizer.texts_to_sequences(np.array([x]))
    x = tf.keras.preprocessing.sequence.pad_sequences(x, maxlen=max_length)
    result = model.predict(x)
    print("'This book is ok. It is very average.' got the rating: ",
          round(result[0][0]))

    # saving the model weights
    print("\n\nSaving model weights ...")
    model.save_weights('weights/supervisedLearning')
Example #6
0
import os
import tensorboard
max_features = 2000
max_len = 500
data_path = r"F:\5-model data\imdb.npz"
(x_train, y_train), (x_test, y_test) = imdb.load_data(path=data_path,
                                                      num_words=max_features)
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)
model = keras.models.Sequential()
model.add(
    layers.Embedding(max_features, 128, input_length=max_len, name='embed'))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.MaxPooling1D(5))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.GlobalMaxPool1D())
model.add(layers.Dense(1))
model.summary()
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
log_dir = r"E:\1- data\log\1.log"
callbacks = [
    keras.callbacks.TensorBoard(
        log_dir=log_dir,
        histogram_freq=1,
        embeddings_freq=1,
    )
]
history = model.fit(x_train, y_train, batch_size=128, validation_split=0.2)
from tensorflow.python.keras.utils import plot_model
import pydot, graphviz
plot_model(model, to_file='model.png')