def sent_anly_prediction(): if request.method == 'POST': text = request.form['text'] Sentiment = '' max_review_length = 500 print(text + "\n") start = time.time() Util = util() end2 = time.time() ### total time taken print(f"Runtime of util is {end2 - start}") t = [Util.text_util_final(text)] end3 = time.time() ### total time taken print(f"Runtime of text_util_final is {end3 - start}") dict_tfidf = Dict_Tfidf(PATH) end4 = time.time() ### total time taken print(f"Runtime of Dict_Tfidf is {end4 - start}") vectorizer = dict_tfidf.create_dict_tfidf() end5 = time.time() ### total time taken print(f"Runtime of create_dict_tdidf is {end5 - start}") vector_tfidf = vectorizer.transform(t) end6 = time.time() ### total time taken print(f"Runtime of transform is {end6 - start}") label = model.predict(vector_tfidf) class1 = str(label[0]) end7 = time.time() ### total time taken print(f"Runtime of predict is {end7 - start}") end = time.time() ### total time taken print(f"Runtime of the program is {end - start}") print(class1 + "\n") ###Compare label if == "1" => negative and set sad emoji else positive and set smiling emoji if class1 == "1": sentiment = 'Negative' img_filename = os.path.join(app.config['UPLOAD_FOLDER'], 'Sad_Emoji.png') else: sentiment = 'Positive' img_filename = os.path.join(app.config['UPLOAD_FOLDER'], 'Smiling_Emoji.png') print(sentiment) #####write output contains class and content (input data) into csv file with open("output/output_sentiment_analysis.csv", 'a', newline='', encoding="utf8", errors="ignore") as out: filenames = ['class', 'content'] writer = csv.DictWriter(out, filenames) writer.writeheader() writer.writerow({'class': sentiment, 'content': text.strip("\n")}) return render_template('home.html', text=text, sentiment=sentiment, image=img_filename)
def classify(): ap = argparse.ArgumentParser() ap.add_argument("-t", "--text", help="nhap doan text!!!") args = vars(ap.parse_args()) text = args["text"] Util = util() text = Util.text_util_final(text) if (text.find("nhưng", 0, len(text)) > -1 or text.find("nhung", 0, len(text)) > -1): exception.exist_nhung(text) exit(0) if ((text.find("được mỗi", 0, len(text) > -1)) or (text.find("được cái", 0, len(text)) > -1)): print("Day la binh luan tieu cuc!") exit(0) text = [Util.text_util_final(text)] dict_tfidf = Dict_Tfidf(PATH) vectorizer = dict_tfidf.create_dict_tfidf() vector_tfidf = vectorizer.transform(text) model = joblib.load('./models/best_model.pkl') label = model.predict(vector_tfidf) if (label[0] == 1): print("Day la binh luan tieu cuc!") else: print("Day la binh luan tich cuc!")
def create_dict_tfidf(self): start8 = time.time() ds = DataSource() dict_data = pd.DataFrame(ds.load_data(self.path)).review dict_data = dict_data.tolist() Util = util() A = [] for i in range(len(dict_data)): text = dict_data[i] text = Util.text_util_final(text) A.append(text) end8 = time.time() # total time taken print(f"Runtime of preprocess is {end8 - start8}") vectorizer = TfidfVectorizer(max_features=100000, ngram_range=(1, 3)) end9 = time.time() # total time take print(f"Runtime of tfidfvectorizer is {end9 - start8}") vectorizer.fit(A) end10 = time.time() # total time taken print(f"Runtime of fit is {end10 - start8}") return vectorizer
def application(environ, start_response): # Set up initial parameters and utility instances request = webob.Request(environ) params = json.loads(request.body) if request.body else { 'username': '******' } # Default to USDI utl = utils.util(environ, start_response) try: # Authorize the Twitter API `auth` object using OAuth tokens auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_secret) # Set the `auth` object for tweepy's API twitter_api = tweepy.API(auth) # Perform a search ## tweets = twitter_api.search(q='from%3A{usernm}%20OR%20%23{usernm}%20OR%20%40{usernm}' ## .format(usernm=params['username'])) # from:nasa OR #nasa OR @nasa tweets = twitter_api.search(q='from%3A{usernm}'.format( usernm=params['username'])) # from:interior # Iterate over first 6 tweets and add information to response payload payload = [] for tweet in tweets[:6]: tweet_props = { "text": tweet.text, # Tweet contents "time": tweet.created_at.strftime( "%-m/%-d/%y at %I:%M %p"), # Datetime formatted "num_words": len(tweet.text.split( )) # Number of words in content split by whitespace } payload.append(tweet_props) # Log a successful result res = True except Exception as e: res = False err_texts = [] excptn, err_msg, tb = sys.exc_info() for file_name, line_no, func_name, text in traceback.extract_tb(tb): err_text = '{} (LINE {}) in {}() -> `{}`'.format( os.path.basename(file_name), line_no, func_name, text) err_texts.append(err_text) err_texts.insert(0, '{} {}'.format(e.message, type(e))) tb_error = '\n'.join(err_texts) finally: data = collections.OrderedDict() data['result'] = str(res) data[ 'traceback'] = tb_error if not res else None # Comment out in production if res: data['payload'] = payload resp = json.dumps(data) return utl.send_data(resp)
def create_dict_tfidf(self): ds = DataSource() dict_data = pd.DataFrame(ds.load_data(self.path)).review dict_data = dict_data.tolist() Util = util() A = [] for i in range(len(dict_data)): text = dict_data[i] text = Util.text_util_final(text) A.append(text) vectorizer = TfidfVectorizer(max_features=100000, ngram_range=(1, 3)) vectorizer.fit(A) return vectorizer
def create_tfidf_vector(path): dict_tfidf = Dict_Tfidf(PATH) vectorizer = dict_tfidf.create_dict_tfidf() #load du lieu ds = DataSource() train_data = pd.DataFrame(ds.load_data(path)) x_train = train_data.review y_train = train_data.label # chuan hoa lai du lieu x_train = x_train.tolist() Util = util() A = [] for i in range(len(x_train)): text = x_train[i] text = Util.text_util_final(text) A.append(text) #Chuyen ve vector tfidf x_train_tfidf = vectorizer.transform(A) return x_train_tfidf, y_train
def __init__(self, CV_fold=5, produceResult=False, forceRegen=False): self.forceRegen = forceRegen if self.forceRegen: print( 'Dataset force regeneration is active, regenerating all datasets' ) elif not self.forceRegen: print( 'Dataset force regeneration deactivated, using existing datasets' ) #Create working directories self.dirCreate() #Instantiate utility class self.utils = util() self.readData() self.raw_standarization() self.class_split() if produceResult or self.forceRegen: self.correlationMx() self.CV_feature_plot() self.feature_selection() self.CV_datasetGen(fold=CV_fold, pca_type='Vani', test_ratio=0.2) self.CV_datasetGen(fold=CV_fold, pca_type='Ker', test_ratio=0.2)
def __init__(self, bot): self.bot = bot self.db = ub.DBUtils() self.u = ii.util(self.bot) self.client = api.Client(None) self.utils = ii.util(self.bot)