Example #1
0
def train():
    embeddings = np.load('text_embedding.npy', allow_pickle=True)
    sentiments = np.load('sentiments.npy', allow_pickle=True)
    texts = np.load('texts.npy', allow_pickle=True)
    all_texts = np.load('text_cache.npy', allow_pickle=True)
    categorical_sentiments = to_categorical(sentiments, num_classes=5)
    tokenizer = Tokenizer(num_words=300000, oov_token=None)
    tokenizer.fit_on_texts(all_texts)
    X_train, X_test, Y_train, Y_test = train_test_split(texts,
                                                        categorical_sentiments,
                                                        test_size=0.2)
    np.save("text_train.npy", X_train)
    np.save("sentiment_train.npy", Y_train)
    models = Models()
    logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = TensorBoard(log_dir=logdir)
    models = []
    bgruModel = "ensemble_bgru.h5"
    models.buil_pre_model(embeddings)
    model = models.model
    if os.path.isfile(filepath):
        model = load_model(filepath)

    checkpoint = ModelCheckpoint(filepath,
                                 monitor='loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min')
    callbacks_list = [checkpoint, tensorboard_callback]

    model.fit(pad_sequences(tokenizer.texts_to_sequences(X_train[:500000]),
                            maxlen=75),
              Y_train[:500000],
              batch_size=512,
              epochs=50,
              validation_data=(pad_sequences(tokenizer.texts_to_sequences(
                  X_test[:5000]),
                                             maxlen=75), Y_test[:5000]),
              callbacks=callbacks_list,
              shuffle=True)

    result = model.predict_on_batch(
        pad_sequences(tokenizer.texts_to_sequences([
            " What happened 2 ur vegan food options?! At least say on ur site so i know I won't be able 2 eat anything for next 6 hrs #fail",
            " I sleep hungry and It gets harder everyday",
            "everything is great, i have lost some weight",
            "awesome, really cool", "should I play cards",
            "I am full and inshape", "is it okay to be that hungry at night?"
        ]),
                      maxlen=75))
    print("result: ", np.argmax(result, axis=-1), "\n")
Example #2
0
                count += 1
                vocab = np.append(vocab, [words[0]])
    texts = np.append(texts, vocab)
    sentiments = np.append(sentiments, [0] * len(vocab))
    categorical_sentiments = to_categorical(sentiments, num_classes=5)
    tokenizer = Tokenizer(num_words=300000)
    tokenizer.fit_on_texts(texts)
    X_train, X_test, Y_train, Y_test = train_test_split(texts,
                                                        categorical_sentiments,
                                                        test_size=0.2)

    logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = TensorBoard(log_dir=logdir)
    filepath = "return.h5"
    models = Models()
    models.buil_pre_model(embeddings)
    model = models.model
    if os.path.isfile(filepath):
        model = load_model(filepath)

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['acc'])
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min')
    callbacks_list = [checkpoint, tensorboard_callback]

    model.fit(pad_sequences(tokenizer.texts_to_sequences(texts), maxlen=150),