def train(): embeddings = np.load('text_embedding.npy', allow_pickle=True) sentiments = np.load('sentiments.npy', allow_pickle=True) texts = np.load('texts.npy', allow_pickle=True) all_texts = np.load('text_cache.npy', allow_pickle=True) categorical_sentiments = to_categorical(sentiments, num_classes=5) tokenizer = Tokenizer(num_words=300000, oov_token=None) tokenizer.fit_on_texts(all_texts) X_train, X_test, Y_train, Y_test = train_test_split(texts, categorical_sentiments, test_size=0.2) np.save("text_train.npy", X_train) np.save("sentiment_train.npy", Y_train) models = Models() logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = TensorBoard(log_dir=logdir) models = [] bgruModel = "ensemble_bgru.h5" models.buil_pre_model(embeddings) model = models.model if os.path.isfile(filepath): model = load_model(filepath) checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint, tensorboard_callback] model.fit(pad_sequences(tokenizer.texts_to_sequences(X_train[:500000]), maxlen=75), Y_train[:500000], batch_size=512, epochs=50, validation_data=(pad_sequences(tokenizer.texts_to_sequences( X_test[:5000]), maxlen=75), Y_test[:5000]), callbacks=callbacks_list, shuffle=True) result = model.predict_on_batch( pad_sequences(tokenizer.texts_to_sequences([ " What happened 2 ur vegan food options?! At least say on ur site so i know I won't be able 2 eat anything for next 6 hrs #fail", " I sleep hungry and It gets harder everyday", "everything is great, i have lost some weight", "awesome, really cool", "should I play cards", "I am full and inshape", "is it okay to be that hungry at night?" ]), maxlen=75)) print("result: ", np.argmax(result, axis=-1), "\n")
count += 1 vocab = np.append(vocab, [words[0]]) texts = np.append(texts, vocab) sentiments = np.append(sentiments, [0] * len(vocab)) categorical_sentiments = to_categorical(sentiments, num_classes=5) tokenizer = Tokenizer(num_words=300000) tokenizer.fit_on_texts(texts) X_train, X_test, Y_train, Y_test = train_test_split(texts, categorical_sentiments, test_size=0.2) logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = TensorBoard(log_dir=logdir) filepath = "return.h5" models = Models() models.buil_pre_model(embeddings) model = models.model if os.path.isfile(filepath): model = load_model(filepath) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc']) checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint, tensorboard_callback] model.fit(pad_sequences(tokenizer.texts_to_sequences(texts), maxlen=150),