def test_torchmoji_return_attention(): seq_tensor = np.array([[1]]) # test the output of the normal model model = torchmoji_emojis(weight_path=PRETRAINED_PATH) # check correct number of outputs assert len(model(seq_tensor)) == 1 # repeat above described tests when returning attention weights model = torchmoji_emojis(weight_path=PRETRAINED_PATH, return_attention=True) assert len(model(seq_tensor)) == 2
async def predict_sentence_emojis(sentence: str, num_to_predict: int = 5) -> dict: """ Predict top n emojis based on the sentence :param sentence: sentence used in prediction :param num_to_predict: number of top emojis to return :return: Dictionary where key is predicted emoji and value is its probability """ with open(VOCAB_PATH, 'r') as f: vocabulary = json.load(f) st = SentenceTokenizer(vocabulary, MAXLEN) model = torchmoji_emojis(PRETRAINED_PATH) print('Running predictions.') tokenized, _, _ = st.tokenize_sentences([sentence]) prob = model(tokenized)[0] ind_top = top_elements(prob, num_to_predict) emojis = list(map(lambda x: EMOJIS[x], ind_top)) # Might be useful if we need to send it this way # emojis_unicode_escape = [unicode_codes.EMOJI_ALIAS_UNICODE[emoj].encode('unicode-escape') for emoj in emojis] emojis_unicode = [ unicode_codes.EMOJI_ALIAS_UNICODE[emoj] for emoj in emojis ] return dict(zip(emojis_unicode, prob[ind_top]))
def init_tokenizer_emotions(max_len): with open(VOCAB_PATH, 'r') as f: vocabulary = json.load(f) st = SentenceTokenizer(vocabulary, max_len) model = torchmoji_emojis(PRETRAINED_PATH) return st, model
def get_emotion_features_from_text(text, audio_filename): # https://github.com/huggingface/torchMoji/blob/master/examples/score_texts_emojis.py if text == '': emoji_ids = [] one_hot_encodings = [] else: text = [text] def top_elements(array, k): ind = np.argpartition(array, -k)[-k:] return ind[np.argsort(array[ind])][::-1] maxlen = 30 with open(VOCAB_PATH, 'r') as f: vocabulary = json.load(f) st = SentenceTokenizer(vocabulary, maxlen) model = torchmoji_emojis(PRETRAINED_PATH) tokenized, _, _ = st.tokenize_sentences(text) prob = model(tokenized) for prob in [prob]: # Find top emojis for each sentence. Emoji ids (0-63) # correspond to the mapping in emoji_overview.png # at the root of the torchMoji repo. scores = [] for i, t in enumerate(text): t_tokens = tokenized[i] t_score = [t] t_prob = prob[i] ind_top = top_elements(t_prob, 5) t_score.append(sum(t_prob[ind_top])) t_score.extend(ind_top) t_score.extend([t_prob[ind] for ind in ind_top]) scores.append(t_score) emoji_ids = scores[0][2:2 + 5] one_hot_encodings = [] for emoji_idx in emoji_ids: one_hot_encodings.append( [0 if i != emoji_idx else 1 for i in range(64)]) a = audio_filename.split('/') filename = '/' + '/'.join( a[1:-1]) + '/onehot_emotion_' + a[-1].split('.wav')[0] + '.pkl' with open(filename, 'wb') as f: pickle.dump(one_hot_encodings, f) filename = '/' + '/'.join( a[1:-1]) + '/emoji_ids_' + a[-1].split('.wav')[0] + '.pkl' with open(filename, 'wb') as f: pickle.dump(emoji_ids, f) return emoji_ids, one_hot_encodings
def __init__(self): # Tokenizing using dictionary with open(VOCAB_PATH, 'r') as f: vocabulary = json.load(f) self.st = SentenceTokenizer(vocabulary, 30) # Loading model self.model = torchmoji_emojis(PRETRAINED_PATH) # Running predictions self.dangoURL = "https://emoji.getdango.com/api/emoji?q="
def __init__(self, *args, **kwargs): HTTPServer.__init__(self, *args, **kwargs) with open(vocab_file_path, 'r') as f: vocabulary = json.load(f) max_sentence_length = 100 self.st = SentenceTokenizer(vocabulary, max_sentence_length) self.model = torchmoji_emojis(model_weights_path)
def __init__(self, use_cuda=True): super(MojiModel, self).__init__() self.use_cuda = use_cuda self.EMOJIS = EMOJIS self.emoji_model = torchmoji_emojis(PRETRAINED_PATH) with open(VOCAB_PATH, 'r') as f: vocabulary = json.load(f) self.tokenizer = SentenceTokenizer(vocabulary, 100) print(self.emoji_model) self.feat_model = torchmoji_feature_encoding(PRETRAINED_PATH) if use_cuda: self.emoji_model = self.emoji_model.cuda() self.feat_model = self.feat_model.cuda()
def __init__(self, vocab: Vocabulary) -> None: super().__init__(vocab) self.accuracy = MicroMetrics(vocab) self.label_index_to_label = self.vocab.get_index_to_token_vocabulary( 'labels') final_concatenated_dimension = 64 * 3 self.input_layer = torch.nn.Linear( in_features=final_concatenated_dimension, out_features=64) self.output_layer = torch.nn.Linear( in_features=64, out_features=vocab.get_vocab_size("labels")) self.sigmoid = nn.Sigmoid() with open(VOCAB_PATH, 'r') as f: self.vocabulary = json.load(f) self.st = SentenceTokenizer(self.vocabulary, 20) self.model = torchmoji_emojis(PRETRAINED_PATH)
def init(): global sentence_tokenizer global model global emoji_desc, emoji_unicode max_token = 30 with open(VOCAB_PATH, 'r') as f: vocabulary = json.load(f) sentence_tokenizer = SentenceTokenizer(vocabulary, max_token) model = torchmoji_emojis(PRETRAINED_PATH) with open('data/emoji_codes.json') as f: emoji_desc = json.load(f) with open('data/wanted_emojis.csv') as f: emoji_unicode = list(csv.reader(f))
def test_score_emoji(): """ Emoji predictions make sense. """ test_sentences = [ 'I love mom\'s cooking', 'I love how you never reply back..', 'I love cruising with my homies', 'I love messing with yo mind!!', 'I love you and now you\'re just gone..', 'This is shit', 'This is the shit' ] expected = [ np.array([36, 4, 8, 16, 47]), np.array([1, 19, 55, 25, 46]), np.array([31, 6, 30, 15, 13]), np.array([54, 44, 9, 50, 49]), np.array([46, 5, 27, 35, 34]), np.array([55, 32, 27, 1, 37]), np.array([48, 11, 6, 31, 9]) ] def top_elements(array, k): ind = np.argpartition(array, -k)[-k:] return ind[np.argsort(array[ind])][::-1] # Initialize by loading dictionary and tokenize texts with open(VOCAB_PATH, 'r') as f: vocabulary = json.load(f) st = SentenceTokenizer(vocabulary, 30) tokens, _, _ = st.tokenize_sentences(test_sentences) # Load model and run model = torchmoji_emojis(weight_path=PRETRAINED_PATH) prob = model(tokens) # Find top emojis for each sentence for i, t_prob in enumerate(list(prob)): assert np.array_equal(top_elements(t_prob, 5), expected[i])
def __init__(self, counter, name, max_concurrent_queries): super().__init__(counter, name, max_concurrent_queries) sys.path.append(os.path.join(self.data_dir, "tacotron2-PPP-1.3.0")) from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH from torchmoji.model_def import torchmoji_emojis, torchmoji_feature_encoding from torchmoji.sentence_tokenizer import SentenceTokenizer self.log.debug("Loading model") with open(VOCAB_PATH, "r") as f: vocabulary = json.load(f) with torch.no_grad(): self.tm_sentence_tokenizer = SentenceTokenizer( vocabulary, MAX_LEN, ignore_sentences_with_only_custom=True ) self.tm_torchmoji = torchmoji_feature_encoding(PRETRAINED_PATH) self.tm_model = torchmoji_emojis(PRETRAINED_PATH) self.log.debug("Model loaded")
def test(): def top_elements(array, k): ind = np.argpartition(array, -k)[-k:] return ind[np.argsort(array[ind])][::-1] if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument('--text', type=str, required=True, help="Input text to emojize") argparser.add_argument('--maxlen', type=int, default=30, help="Max length of input text") args = argparser.parse_args() # Tokenizing using dictionary with open(VOCAB_PATH, 'r') as f: vocabulary = json.load(f) st = SentenceTokenizer(vocabulary, args.maxlen) # Loading model model = torchmoji_emojis(PRETRAINED_PATH) # Running predictions tokenized, _, _ = st.tokenize_sentences([args.text]) # Get sentence probability prob = model(tokenized)[0] # Top emoji id emoji_ids = top_elements(prob, 5) # map to emojis emojis = map(lambda x: EMOJIS[x], emoji_ids) print( emoji.emojize("{} {}".format(args.text, ' '.join(emojis)), use_aliases=True))
def text_to_emoji(text, maxlen): # Tokenizing using dictionary with open(VOCAB_PATH, 'r') as f: vocabulary = json.load(f) st = SentenceTokenizer(vocabulary, maxlen) # Loading model model = torchmoji_emojis(PRETRAINED_PATH) # Running predictions tokenized, _, _ = st.tokenize_sentences([text]) # Get sentence probability prob = model(tokenized)[0] # Top emoji id emoji_ids = top_elements(prob, 5) # map to emojis emojis = map(lambda x: EMOJIS[x], emoji_ids) print( emoji.emojize("{} {}".format(text, ' '.join(emojis)), use_aliases=True))
:sweat: :broken_heart: :yellow_heart: :musical_note: :speak_no_evil: \ :wink: :skull: :confounded: :smile: :stuck_out_tongue_winking_eye: \ :angry: :no_good: :muscle: :facepunch: :purple_heart: \ :sparkling_heart: :blue_heart: :grimacing: :sparkles:".split(' ') # Specify the paths to the vocabulary and model weights files. vocab_file_path = '/model/vocabulary.json' model_weights_path = '/model/pytorch_model.bin' with open(vocab_file_path, 'r') as f: vocabulary = json.load(f) max_sentence_length = 100 st = SentenceTokenizer(vocabulary, max_sentence_length) model = torchmoji_emojis(model_weights_path) def predict(text): if not isinstance(text, list): text = [text] tokenized, _, _ = st.tokenize_sentences(text) prob = model(tokenized)[0] # Only keep the emoji with the highest confidence. emoji_ids = top_elements(prob, 1) emojis = list(map(lambda x: EMOJIS[x].strip(':'), emoji_ids)) return emojis[0] def top_elements(array, k): ind = np.argpartition(array, -k)[-k:]
import json import numpy as np import emoji def top_elements(array, k): ind = np.argpartition(array, -k)[-k:] return ind[np.argsort(array[ind])][::-1] with open(VOCAB_PATH, 'r') as f: vocabulary = json.load(f) st = SentenceTokenizer(vocabulary, 300) model = torchmoji_emojis(PRETRAINED_PATH) def emojify_sentences(l): tokenized, _, _ = st.tokenize_sentences(l) prob = model(tokenized) result = [] for prob in [prob]: for i in range(len(l)): t_prob = prob[i] ind_top = top_elements(t_prob, 5) result.append( list([ emoji.emojize(EMOJIS[i], use_aliases=True), float(t_prob[i])
df_sample.sentiment = pd.to_numeric(df_sample.sentiment) #checks that the sample mean is reasonable np.mean(df_sample.sentiment) #import tweets and replace text with full text if 'tweet' is a retweet df = pd.read_json('immigrationTweets.json') df.text[~df.retweeted_status.isnull()] = df[~df.retweeted_status.isnull()].retweeted_status.apply(lambda x: x.get('text')) df = df[['id','user','text','lang','reply_count','retweet_count','retweeted_status','term']] #import and parse emoji codes #import vocab and model, define sentence tokenizer, set chunk_size with open('/Users/ikennedy/Documents/GitHub/torchMoji/model/vocabulary.json') as f: vocab = json.load(f) model = torchmoji_emojis('pytorch_model.bin') st = SentenceTokenizer(vocab, 30) #specifiy colums for full df for: #twitter pull df_full = pd.DataFrame(columns=['id','user','text','lang','reply_count','retweet_count','retweeted_status','term']+list(emoji_codes)) #Twitter sample df_full = pd.DataFrame(columns=['sentiment', 'text']+list(emoji_codes)) #runn in a loops of 5000 to avoid overusing computational resources chunk_size = 5000 i = 1000 chunk_size = 1000 for i in range(chunk_size,len(df)+chunk_size,chunk_size): if(i>len(df)): i = len(df) chunk_size = len(df) % chunk_size
'disapprove_estimate', 'disapprove_hi', 'disapprove_lo', 'formatted_date', 'status_id', 'sum(numScore)', 'text', 'created_at', 'name' ]] #import and parse emoji codes emoji_codes = pd.read_json( '/Users/ikennedy/Work/UW/Code/GIT/cl_lda/twitter/emojicodes.json', orient='values', typ='series').str.extract(':(\w+):', expand=False).sort_index() #import vocab and model, define sentence tokenizer, set chunk_size os.getcwd() with open('/Users/ikennedy/Documents/GitHub/torchMoji/model/vocabulary.json' ) as f: vocab = json.load(f) model = torchmoji_emojis('twitter/pytorch_model.bin') st = SentenceTokenizer(vocab, 30) #specifiy colums for full df for: #twitter pull df_full = pd.DataFrame( columns=['sentiment', 'id', 'date', 'query', 'screen_name', 'text'] + list(emoji_codes)) #Twitter sample #df_full = pd.DataFrame(columns=['sentiment', 'text']+list(emoji_codes)) #runn in a loops of 5000 to avoid overusing computational resources chunk_size = 5000 i = 1000 chunk_size = 1000 for i in range(chunk_size, len(df) + chunk_size, chunk_size): if (i > len(df)):
def get_model(): pretrained_path = download_pretrained() return torchmoji_emojis(pretrained_path)
def __init__(self, classifier_dims, num_classes, embedding_dims, gaussian_noise, dropout, internal_dims, n_layers, featurizer, final_layer_builder, n_tokens_in=64, n_tokens_out=16, capabilities2dims=dict(), use_as_super=False, **kwargs): super(LangFeaturesModel, self).__init__(classifier_dims, num_classes, embedding_dims, gaussian_noise, dropout, internal_dims, n_layers, featurizer, final_layer_builder, n_tokens_in, n_tokens_out, use_as_super=True, **kwargs) assert "capabilities" in kwargs capabilities = kwargs["capabilities"] kwargs[ "rake_dims"] = kwargs["rake_dims"] if "rake_dims" in kwargs else 32 kwargs[ "yake_dims"] = kwargs["yake_dims"] if "yake_dims" in kwargs else 32 assert "key_phrases" not in capabilities or ( "key_phrases" in capabilities and "spacy" in capabilities) use_layer_norm = kwargs[ "use_layer_norm"] if "use_layer_norm" in kwargs else False self.capabilities = capabilities embedding_dim = 8 cap_to_dim_map = { "spacy": 128, "snlp": 32, "key_phrases": 64, "nltk": 192, "full_view": 64, "tmoji": 32, "ibm_max": 16, "gensim": 256, "fasttext_crawl": 256 } cap_to_dim_map.update(capabilities2dims) all_dims = sum([cap_to_dim_map[c] for c in capabilities]) self.cap_to_dim_map = cap_to_dim_map self.all_dims = all_dims if "spacy" in capabilities: tr = pytextrank.TextRank(token_lookback=7) self.nlp = spacy.load("en_core_web_lg", disable=[]) self.nlp.add_pipe(tr.PipelineComponent, name="textrank", last=True) spacy_in_dims = (96 * 2) + (11 * embedding_dim) + 2 self.spacy_nn = ExpandContract(spacy_in_dims, cap_to_dim_map["spacy"], dropout, use_layer_norm=use_layer_norm, groups=(2, 4)) if "fasttext_crawl" in capabilities: self.bpe = BPEmb(dim=200) self.cngram = CharNGram() fasttext_crawl_file = kwargs[ "fasttext_crawl_file"] if "fasttext_crawl_file" in kwargs else "crawl-300d-2M-subword.bin" self.crawl = fasttext.load_model(fasttext_crawl_file) self.crawl_nn = ExpandContract(200 + 300 + 100, cap_to_dim_map["fasttext_crawl"], dropout, use_layer_norm=use_layer_norm, groups=(4, 4)) if "gensim" in capabilities: gensim = [ api.load("glove-twitter-50"), api.load("glove-wiki-gigaword-50"), api.load("word2vec-google-news-300"), api.load("conceptnet-numberbatch-17-06-300") ] self.gensim = gensim self.gensim_nn = ExpandContract(400, cap_to_dim_map["gensim"], dropout, use_layer_norm=use_layer_norm, groups=(4, 4)) if "full_view" in capabilities: full_sent_in_dims = 300 self.full_sent_nn = ExpandContract(full_sent_in_dims, cap_to_dim_map["full_view"], dropout, use_layer_norm=use_layer_norm, groups=(4, 4)) if "snlp" in capabilities: import stanza self.snlp = stanza.Pipeline( 'en', processors='tokenize,pos,lemma,depparse,ner', use_gpu=False, pos_batch_size=2048) self.snlp_nn = ExpandContract(embedding_dim * 5, cap_to_dim_map["snlp"], dropout, use_layer_norm=use_layer_norm) if "key_phrases" in capabilities: import yake self.kw_extractor = yake.KeywordExtractor(lan="en", n=3, dedupLim=0.9, dedupFunc='seqm', windowsSize=3, top=10, features=None) self.key_occ_cnt_pytextrank = nn.Embedding(8, embedding_dim) nn.init.normal_(self.key_occ_cnt_pytextrank.weight, std=1 / embedding_dim) self.key_wc_pytextrank = nn.Embedding(4, embedding_dim) nn.init.normal_(self.key_wc_pytextrank.weight, std=1 / embedding_dim) yake_dims = kwargs["yake_dims"] if "yake_dims" in kwargs else 32 self.yake_dims = yake_dims self.yake_nn = ExpandContract(300, yake_dims, dropout, use_layer_norm=use_layer_norm, groups=(2, 2)) try: from multi_rake import Rake rake_dims = kwargs["rake_dims"] if "rake_dims" in kwargs else 32 self.rake_dims = rake_dims self.rake_nn = ExpandContract(300, rake_dims, dropout, use_layer_norm=use_layer_norm, groups=(2, 2)) self.rake = Rake(language_code="en") keyphrases_dim = 2 * embedding_dim + rake_dims + yake_dims except: self.rake = None keyphrases_dim = 2 * embedding_dim + yake_dims self.keyphrase_nn = ExpandContract(keyphrases_dim, cap_to_dim_map["key_phrases"], dropout, use_layer_norm=use_layer_norm, groups=(4, 4)) fasttext_file = kwargs[ "fasttext_file"] if "fasttext_file" in kwargs else "wiki-news-300d-1M-subword.bin" if not set(capabilities).isdisjoint( {"key_phrases", "full_view", "nltk"}): self.text_model = fasttext.load_model(fasttext_file) self.pdict = get_all_tags() self.tag_em = nn.Embedding(len(self.pdict) + 1, embedding_dim) nn.init.normal_(self.tag_em.weight, std=1 / embedding_dim) self.sw_em = nn.Embedding(2, embedding_dim) nn.init.normal_(self.sw_em.weight, std=1 / embedding_dim) self.sent_start_em = nn.Embedding(2, embedding_dim) nn.init.normal_(self.sent_start_em.weight, std=1 / embedding_dim) self.is_oov_em = nn.Embedding(2, embedding_dim) nn.init.normal_(self.is_oov_em.weight, std=1 / embedding_dim) self.has_digit_em = nn.Embedding(2, embedding_dim) nn.init.normal_(self.has_digit_em.weight, std=1 / embedding_dim) self.is_mask_em = nn.Embedding(2, embedding_dim) nn.init.normal_(self.is_mask_em.weight, std=1 / embedding_dim) self.w_len = nn.Embedding(16, embedding_dim) nn.init.normal_(self.w_len.weight, std=1 / embedding_dim) self.wc_emb = nn.Embedding(16, embedding_dim) nn.init.normal_(self.wc_emb.weight, std=1 / embedding_dim) if "nltk" in capabilities: import rake_nltk from textblob import TextBlob from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as VaderSentimentIntensityAnalyzer self.stop_words = set(stopwords.words('english')) self.rake_nltk = rake_nltk.Rake() self.key_wc_rake_nltk = nn.Embedding(4, embedding_dim) nn.init.normal_(self.key_wc_rake_nltk.weight, std=1 / embedding_dim) self.nltk_sid = SentimentIntensityAnalyzer() self.vader_sid = VaderSentimentIntensityAnalyzer() in_dims = 310 + 5 * embedding_dim self.nltk_nn = ExpandContract(in_dims, cap_to_dim_map["nltk"], dropout, use_layer_norm=use_layer_norm, groups=(2, 4)) if "ibm_max" in capabilities: from ..external import ModelWrapper self.ibm_max = ModelWrapper() for p in self.ibm_max.model.parameters(): p.requires_grad = False self.ibm_nn = ExpandContract(6, cap_to_dim_map["ibm_max"], dropout, use_layer_norm=use_layer_norm, groups=(1, 1)) if "tmoji" in capabilities: from torchmoji.sentence_tokenizer import SentenceTokenizer from torchmoji.model_def import torchmoji_emojis from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH with open(VOCAB_PATH, 'r') as f: maxlen = self.n_tokens_in self.vocabulary = json.load(f) self.st = SentenceTokenizer(self.vocabulary, maxlen) self.tmoji = torchmoji_emojis(PRETRAINED_PATH) for p in self.tmoji.parameters(): p.requires_grad = False self.tm_nn = ExpandContract(64, cap_to_dim_map["tmoji"], dropout, use_layer_norm=use_layer_norm, groups=(1, 1)) self.contract_nn = ExpandContract(self.all_dims, embedding_dims, dropout, use_layer_norm=True, unit_norm=False, groups=(4, 4)) if not use_as_super: if featurizer == "cnn": self.featurizer = CNN1DFeaturizer(n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_layers, gaussian_noise, dropout) elif featurizer == "gru": self.featurizer = GRUFeaturizer(n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_layers, gaussian_noise, dropout) elif featurizer == "basic": self.featurizer = BasicFeaturizer(n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_layers, gaussian_noise, dropout) elif featurizer == "transformer": self.attention_drop_proba = kwargs[ "attention_drop_proba"] if "attention_drop_proba" in kwargs else 0.0 n_encoders = kwargs.pop("n_encoders", n_layers) n_decoders = kwargs.pop("n_decoders", n_layers) self.featurizer = TransformerFeaturizer( n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_encoders, n_decoders, gaussian_noise, dropout, self.attention_drop_proba) else: raise NotImplementedError() self.final_layer = final_layer_builder(classifier_dims, n_tokens_out, num_classes, dropout, **kwargs) if "stored_model" in kwargs: load_stored_params(self, kwargs["stored_model"]) self.reg_layers = get_regularization_layers(self)
def __init__(self): self.maxlen = 30 self.sent_tokenizer = SentenceTokenizer() # Model weights self.model = torchmoji_emojis(PRETRAINED_PATH)
def text_to_emoji(input_text, max_length): #argparser = argparse.ArgumentParser() #argparser.add_argument('--text', type=str, required=True, help="Input text to emojize") #argparser.add_argument('--maxlen', type=int, default=30, help="Max length of input text") #args = argparser.parse_args() # Load dictionary for tokenizing with open(VOCAB_PATH, 'r') as f: vocabulary = json.load(f) #print(f'vocabulary: {vocabulary}') with open(os.path.join(os.path.dirname(__file__), './negative_words_parsed.txt'), 'r', encoding='utf-8', errors='ignore') as negative_words_list: negative_words = list(negative_words_list) negative_words = [ negative_word.rstrip('\n').lower() for negative_word in negative_words if negative_word != '\n' ] with open( os.path.join(os.path.dirname(__file__), './positive_words_parsed.txt'), 'r') as positive_words_list: positive_words = list(positive_words_list) positive_words = [ positive_word.rstrip('\n').lower() for positive_word in positive_words if positive_word != '\n' ] st = SentenceTokenizer(vocabulary, max_length) # Loading model model = torchmoji_emojis(PRETRAINED_PATH) # Running predictions # Determines the important words in the sentence tokenized, _, _ = st.tokenize_sentences([input_text]) # Get sentence probability prob = model(tokenized)[0] # Top emotion id emotion_ids = top_elements(prob, 5) #print(f'top five emotion ids: {emotion_ids}') # map to emotions emotions = map(lambda x: EMOTIONS[x], emotion_ids) emotions = list(emotions) #print(f'emotions: {emotions}') user_feelings = positive_or_negative(emotions) #print(f'user_feelings: {user_feelings}') # Find the words that are contributing to the feeling user_positive_words = [] user_negative_words = [] for word in input_text.split(' '): if word in positive_words: user_positive_words.append(word) elif word in negative_words: user_negative_words.append(word) # map to emojis emojis = map(lambda x: EMOJIS[x], emotion_ids) #print(f'emojis: {list(emojis)}') main_vibe = list(emojis)[0] #print(f'main_vibe: {main_vibe}') json_to_bot = { "user_emotion": user_feelings, "positive": user_positive_words, "negative": user_negative_words, "main_vibe": main_vibe } return json.dumps(json_to_bot)