def text_processing(post): print('Start ' + post['tag']) # post_id = post['post_id'] if 'post_id' in post else '_blank' if 'imgTextLength' in post and post['imgTextLength'] > 0: return { **post, 'textSentiment': sentiment(post['cleanedText']), 'textClassification': classify(post['cleanedText']), 'imgSentiment': sentiment(post['cleanedImgText']), 'imgClassification': classify(post['cleanedImgText']), } return { **post, 'textSentiment': sentiment(post['cleanedText']), 'textClassification': classify(post['cleanedText']), }
def get_sentiments(df): for index, row in df.iterrows(): message = row['message'] sentiment_status = 'None' if not pd.isnull(message): sentiment_status = sentiment(message) df.loc[index, 'sentiment'] = sentiment_status return df
def sentiment(request): result = {} try: data = json.loads(request.body.decode("utf-8")) text = data["text"] tags = uts.sentiment(text, domain="bank") result["output"] = tags except: result = {"error": "Bad request!"} return JsonResponse(result)
def underthesea_annotate(self, text, mode): if mode == 'sent_tokenize': return sent_tokenize(text) elif mode == 'word_tokenize': return word_tokenize(text) elif mode == 'pos_tag': return pos_tag(text) elif mode == 'chunk': return chunk(text) elif mode == 'ner': return ner(text) elif mode == 'classify': return classify(text) elif mode == 'sentiment': return sentiment(text) else: raise Exception("Wrong request, please check your request")
def predict(sent): """ 0: NEG, 1: NEU, 2:POS """ try: bert_out = bert_predict(sent)[0] except: bert_out = np.zeros((3,)) try: xgb_out = xgboost_model.predict_proba([sent])[0] except: xgb_out = np.zeros((3,)) try: svm_out = svm_model.predict_proba([sent])[0] except: svm_out = np.zeros((3,)) try: underthesea_out = sentiment(sent) except: underthesea_out = np.zeros((3,)) else: if underthesea_out == 'negative': underthesea_out = np.array([1,0,0]) elif underthesea_out == 'positive': underthesea_out = np.array([0,0,1]) final = (bert_out + xgb_out +svm_out)*0.5 + underthesea_out label = np.argmax(final) assert label in range(3) if label == 0: return "negative" elif label == 1: return "neutral" elif label == 2: return "positive"
def underthesea_prc(text): pos_tags = pos_tag(text) just_ner = ner(text) result = {} s = '' key = '' for index, x in enumerate(just_ner): ner_label = str(x[3]).split('-') if ner_label[0] == 'O' or index == len(just_ner) - 1: if s != '': if key not in result: result[key] = [] result[key].append(s) else: result[key].append(s) s = '' else: s = str(x[0]) key = ner_label[1] ner_text = [] for key, value in result.items(): a = '' a += key + ": " value_len = len(value) for index, x in enumerate(value): a += x if index != value_len - 1: a += ", " ner_text.append(a) classify_result = ViUtils.add_accents( (classify(text)[0]).replace('_', ' ')) sentiment_result = sentiment(text) return underthesea_text_result(pos_tags, ner_text, classify_result, sentiment_result)
def post(self): parser.add_argument('content', type=str) args = parser.parse_args() language = detect(args.content) if language == "vi": sentiment_result = sentiment(args.content) else: sentiment_course = sid.polarity_scores(args.content) sentiment_result = None if sentiment_course["pos"] > 0.5 or sentiment_course["neu"] > 0.5: sentiment_result = 'positive' else: sentiment_result = 'negative' return { 'status': 200, 'params': args, 'result': { 'label': sentiment_result, 'language': language } }
from underthesea import sentiment comment = sentiment( 'hàng kém chất lg,chăn đắp lên dính lông lá khắp người. thất vọng') print(comment)
def process(self, message, **kwargs): key, confidence = sentiment(message.text), 0.5 entity = self.convert_to_rasa(key, confidence) message.set("entities", [entity], add_to_output=True)
def sentimentAnalysis(): # Lay staff id cua client gui len source_string = request.args.get('text') # Tra ve cau chao Hello return sentiment(str(source_string))
import os import pandas as pd from tqdm import tqdm from underthesea import sentiment main_dir = os.path.dirname(os.getcwd()) data_dir = os.path.join(main_dir, 'data') submiss_dir = os.path.join(main_dir, 'submission') test_data = pd.read_csv(os.path.join(data_dir, 'test.csv'), index_col=0) submiss = pd.read_csv(os.path.join(data_dir, 'sample_submission.csv')) submiss.label = [ sentiment(text) for text in tqdm(test_data.text, position=0, leave=False) ] map_sentiment = {'positive': 0, 'negative': 1} submiss.label = submiss.label.map(map_sentiment) submiss.to_csv(os.path.join(submiss_dir, 'underthesea.csv'), index=False) # 0.88966