def load_review_data(): df = pd.read_csv("data/Reviews.csv") # preview print(df.head()) print(df.tail()) vocab = [] X = np.zeros((len(df), 2), dtype=object) for i in tqdm.tqdm(range(len(df)), "Cleaning X"): target = df['Text'].loc[i] X[i, 0] = clean_text(target) X[i, 1] = df['Score'].loc[i] for word in X[i, 0].split(): vocab.append(word) # vocab = set(vocab) vocab = Counter(vocab) # delete words that occur less than 10 times vocab = {k: v for k, v in vocab.items() if v >= N} # word to integer encoder dict vocab2int = {word: i for i, word in enumerate(vocab, start=1)} # pickle int2vocab for testing print("Pickling vocab2int...") pickle.dump(vocab2int, open("data/vocab2int.pickle", "wb")) # encoded reviews for i in tqdm.tqdm(range(X.shape[0]), "Tokenizing words"): X[i, 0] = tokenize_words(X[i, 0], vocab2int) lengths = [len(row) for row in X[:, 0]] print("min_length:", min(lengths)) print("max_length:", max(lengths)) X_train, X_test, y_train, y_test = train_test_split(X[:, 0], X[:, 1], test_size=test_size, shuffle=True, random_state=19) return X_train, X_test, y_train, y_test, vocab
def load_emotion_data(): df = pd.read_csv("data/text_emotion.csv") additional_anger = read_text_file("data/anger.txt") # additional_happy = read_text_file("data/happy.txt") # additional_sadness = read_text_file("data/sadness.txt") # additional_files = [additional_anger, additional_happy, additional_sadness] additional_files = [additional_anger] print(df.head()) print(df.tail()) # calculate number of data samples # for our additional files n_samples = 0 for file in additional_files: n_samples += len(file) print("samples:", n_samples) vocab = [] X = np.zeros((len(df) + n_samples, 2), dtype=object) for i in tqdm.tqdm(range(len(df)), "Cleaning data"): target = df['content'].loc[i] try: emotion = categories_reversed[df['sentiment'].loc[i]] except KeyError: continue X[i, 0] = clean_text(target) X[i, 1] = emotion for word in X[i, 0].split(): vocab.append(word) k = i + 1 for file in tqdm.tqdm(additional_files, "Cleaning additional data"): for i, (text, emotion) in enumerate(file, start=k): X[i, 0] = clean_text(text) if emotion == "joy": emotion = "happiness" X[i, 1] = categories_reversed[emotion] for word in X[i, 0].split(): vocab.append(word) k = i # remove zero lines # X = X[np.any(X != 0, axis=1)] vocab = Counter(vocab) # delete words that occur less than 10 times vocab = {k: v for k, v in vocab.items() if v >= N} # word to integer encoder dict vocab2int = {word: i for i, word in enumerate(vocab, start=1)} print("Pickling vocab2int...") pickle.dump(vocab2int, open("data/vocab2int.pickle", "wb")) # encoded reviews for i in tqdm.tqdm(range(X.shape[0]), "Tokenizing words"): X[i, 0] = tokenize_words(str(X[i, 0]), vocab2int) lengths = [len(row) for row in X[:, 0]] print("min_length:", min(lengths)) print("max_length:", max(lengths)) X_train, X_test, y_train, y_test = train_test_split(X[:, 0], X[:, 1], test_size=test_size, shuffle=True, random_state=7) return X_train, X_test, y_train, y_test, vocab
audio = r.listen(source) print("Loading vocab2int") vocab2int = pickle.load(open("Mood:Emotion Code/data/vocab2int.pickle", "rb")) model = get_model_emotions(len(vocab2int), sequence_length=sequence_length, embedding_size=embedding_size) model.load_weights("results/model_v1_0.59_0.76.h5") if __name__ == "__main__": import argparse # parser = argparse.ArgumentParser(description="Emotion classifier using text") # parser.add_argument("text", type=str, help="The text you want to analyze") # args = parser.parse_args() text = tokenize_words(clean_text(r.recognize_google(audio)), vocab2int) x = pad_sequences([text], maxlen=sequence_length) prediction = model.predict_classes(x)[0] probs = model.predict(x)[0] # print("hi:",index) print("Question asked: ", Textlist[index]) print("You said: " + r.recognize_google(audio)) print("Probs:") for i, category in categories.items(): print(f"{category.capitalize()}: {probs[i]*100:.2f}%") print("The most dominant emotion:", categories[prediction])
# audio = r.listen(source) print("Loading vocab2int") vocab2int = pickle.load(open("data/vocab2int.pickle", "rb")) model = get_model_emotions(len(vocab2int), sequence_length=sequence_length, embedding_size=embedding_size) model.load_weights("results/model_v1_0.59_0.76.h5") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description="Emotion classifier using text") parser.add_argument("text", type=str, help="The text you want to analyze") args = parser.parse_args() text = tokenize_words(clean_text(args.text), vocab2int) x = pad_sequences([text], maxlen=sequence_length) prediction = model.predict_classes(x)[0] probs = model.predict(x)[0] # print("hi:",index) # print("Question asked: ",Textlist[index]) # print("You said: " + r.recognize_google(audio)) print("Probs:") for i, category in categories.items(): print(f"{category.capitalize()}: {probs[i]*100:.2f}%") print("The most dominant emotion:", categories[prediction])
'CPU': 1, 'GPU': 0 }) from model import get_model_5stars from utils import clean_text, tokenize_words from config import embedding_size, sequence_length from keras.preprocessing.sequence import pad_sequences import pickle vocab2int = pickle.load(open("data/vocab2int.pickle", "rb")) model = get_model_5stars(len(vocab2int), sequence_length=sequence_length, embedding_size=embedding_size) model.load_weights("results/model_V20_0.38_0.80.h5") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Food Review evaluator") parser.add_argument("review", type=str, help="The review of the product in text") args = parser.parse_args() review = tokenize_words(clean_text(args.review), vocab2int) x = pad_sequences([review], maxlen=sequence_length) print(f"{model.predict(x)[0][0]:.2f}/5")