def __init__(self, with_translation_support=False): self.sp_classifier = SentimentClassifier() self.af_classifier = Afinn(language='es') self.vader_classifier = SentimentIntensityAnalyzer() self._download_polyglot_languages() if with_translation_support: self.translator = translate.Client()
def evalArray(array, process_id, ret): clf = SentimentClassifier() tweets = [] i = 0 for tweet in array: i += 1 if ('extended_tweet' in tweet and 'full_text' in tweet['extended_tweet']): text = tweet['extended_tweet']['full_text'] else: text = tweet['text'] save_tweet = {} if (i % 50 == 0): print('Process ' + str(process_id) + ' Has predicted ' + str(i) + ' Tweets') save_tweet['value'] = clf.predict(text) save_tweet['text'] = text tweets.append(save_tweet) ret[process_id] = tweets
def get_position(self, text): # Avoid sentiment returning 0 # return 0 text = text.lower() if TOPICO == "politica": text = re.sub(r'^https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE) clf = SentimentClassifier() value = clf.predict(text) else: analysis = TextBlob(text) value = analysis.sentiment.polarity if value > .55: return 1 elif value < .35 : return -1 else: return 0
def extractTweets(): print('Preparando para extraer Tweets...') my_client = pymongo.MongoClient( 'mongodb+srv://grupo03DB:[email protected]/Tweets?retryWrites=true&w=majority') my_database = my_client.Tweets my_collection = my_database.collectionTweets ckey = "I65HkSxcTY22zgzydGmE14pUi" csecret = "4gBAWqmerng8tSWzC6YFqxCBCNv03CatcK1EhZh3YY1jzzxHfr" atoken = "1268310592496025600-QOhJRE7KRAJY9rYTb5ldUTeWt8Sgt5" asecret = "xW8zFoy2RlrdvmtKabDj2kDDPmBHUXvaR3WEnxnHHq0Nf" clf = SentimentClassifier() auth = tweepy.OAuthHandler(ckey, csecret) auth.set_access_token(atoken, asecret) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) for tweet in tweepy.Cursor(api.search, q="corrupcion -filter:retweets", geocode="-0.225219,-78.5248,500km", tweet_mode="extended").items(2000): print(tweet._json["full_text"], "\n") if tweet._json["place"] is not None: print("**************Con coordenadas**********\n") print(json.dumps(tweet._json, indent=2), "\n\n") # Con coordenadas para guardar decoded = json.loads(json.dumps(tweet._json)) coordenadas = decoded["place"]["bounding_box"]["coordinates"] ciudad = str(decoded["place"]["name"]) sentimiento = clf.predict(tweet._json["full_text"]) my_collection.insert_one({ "text": tweet._json["full_text"], "location": { "ciudad": ciudad, "coordinates": coordenadas[0][random.randint(0, 3)] }, "created_at": tweet._json["created_at"], "sentimiento": sentimiento })
def classify(msgList, results): cores = multiprocessing.cpu_count() classifiers = [] for c in range(cores): classifiers.append(SentimentClassifier()) print(str(c) + " clf initialized") mutex = Lock() #divide the list and call the thread with its mod clf sep = len(msgList) // cores pool = multiprocessing.Pool(cores) for c in range(cores): first = c * sep last = first + sep - 1 print("Calling " + str(c)) p = multiprocessing.Process(target=compute, args=(c, msgList, results, mutex, first, last, classifiers[c])) p.start() pool.close() pool.join()
class Feel(): sid = SentimentIntensityAnalyzer() def __init__(self): self.clf = SentimentClassifier() def getFeel(self, resultado): return 'Positivo' if resultado > 0.4 else ( 'Negativo' if resultado < -0.4 else 'Neutro') def get_En(self, texto): """ Puede retoranar valores [-1,1] mas cercanos a: -1: representa comentarios negativos 0 : representa comentarios neutrales 1 : representa comentarios positivos """ resultado = [] resultado.append( self.getFeel(self.sid.polarity_scores(texto)['compound'])) resultado.append(self.getFeel(TextBlob())) return 'Positivo' if resultado > 0.4 else ( 'Negativo' if resultado < -0.4 else 'Neutro') def get_Es(self, texto): """ Puede retoranar valores [0,1] mas cercanos a: 0: representa comentarios negativos 0.5 : representa comentarios neutrales 1 : representa comentarios positivos """ resultado = self.clf.predict(texto) return 'Positivo' if resultado > 0.6 else ( 'Negativo' if resultado < 0.4 else 'Neutro')
import pandas as pd import numpy as np # sentiment library + related # language detection.. could be korean for al we know from language import cleaning # this should create a csv of the casified text from classifier import SentimentClassifier clf = SentimentClassifier() def sentiment(df): df['sentiment'] = df.query("lang_langdetect == 'es'")['tweets'].apply( clf.predict) # df['sentiment_classification'] = df['sentiment'].apply(sentiment_split) return df # twitter api module. import tweepy from important import access_toekn, access_token_secret, consumer_key, consumer_secret def increment_(frame): temp_list = [] for i in frame['sentiment']: if i >= .6: temp_list.append(i) elif i <= .6 and i >= .31: temp_list.append(i) else:
class SentimentAnalyzer: supported_languages = ['es', 'ca', 'eu', 'an', 'ast', 'gl', 'pt', 'en'] sp_classifier = af_classifier = translator = vader_classifier = None def __init__(self, with_translation_support=False): self.sp_classifier = SentimentClassifier() self.af_classifier = Afinn(language='es') self.vader_classifier = SentimentIntensityAnalyzer() self._download_polyglot_languages() if with_translation_support: self.translator = translate.Client() def _download_polyglot_languages(self): for lang in self.supported_languages: lang_resource = 'sentiment2.{}'.format(lang) if not downloader.is_installed(lang_resource): downloader.download('sentiment2.es') def normalize_score(self, score): # Currently the Hyperbolic Tangent Function is implemented. # It returns integer from -1 to 1 return math.tanh(score) def analyze_sentiment(self, text, language): """ Method that applies sentiment analyzers and normalize results to make scores between the standard -1 to 1. In general, Polyglot is used to compute the sentiment score of text. For Spanish, two additional languages are used in the sentiment analysis. An average of the three analyzers are returned. For English, vader is applied together with polyglot. """ if language not in self.supported_languages: logging.info('Language {} not supported! Currently supported ' \ 'languages are: {}'.format(language, self.supported_languages)) return None sentiment_dict = {} num_applied_analyzers = 0 total_scores = 0.0 # Apply Vader analyzer if language == 'en': va_sentiment_score = self.analyze_sentiment_vader(text) total_scores += va_sentiment_score num_applied_analyzers += 1 sentiment_dict['sentiment_score_vader'] = va_sentiment_score # Apply Polyglot analyzer pg_sentiment_score = None pg_text = Text(text, hint_language_code=language) try: word_scores = [w.polarity for w in pg_text.words] pg_sentiment_score = sum(word_scores)/float(len(word_scores)) n_pg_sentiment_score = self.normalize_score(pg_sentiment_score) total_scores += n_pg_sentiment_score num_applied_analyzers += 1 sentiment_dict['sentiment_score_polyglot'] = pg_sentiment_score except: pass # For spanish language if language == 'es': # Apply Sentipy analyzer sp_sentiment_score = self.sp_classifier.predict(text) sentiment_dict['sentiment_score_sentipy'] = sp_sentiment_score n_sp_sentiment_score = self.normalize_score(sp_sentiment_score) total_scores += n_sp_sentiment_score num_applied_analyzers += 1 # Apply Affin analyzer af_sentiment_score = self.af_classifier.score(text) if len(text) > 0: af_sentiment_score = af_sentiment_score/len(text) else: af_sentiment_score = 0 sentiment_dict['sentiment_score_affin'] = af_sentiment_score n_af_sentiment_score = self.normalize_score(af_sentiment_score) total_scores += n_af_sentiment_score num_applied_analyzers += 1 # Compute final score if num_applied_analyzers > 0: sentiment_dict['sentiment_score'] = total_scores/num_applied_analyzers else: sentiment_dict['sentiment_score'] = None return sentiment_dict def translate_text(self, text, source_lang='es', target_lang='en'): translation_obj = self.translator.translate(text, source_language=source_lang, target_language=target_lang) return translation_obj['translatedText'] def analyze_sentiment_vader(self, text, language=None, need_translation=False): if need_translation and language: text = self.translate_text(text, language) vader_score = self.vader_classifier.polarity_scores(text) return vader_score['compound']
regrex_pattern = re.compile(pattern = "[" u"\U0001F600-\U0001F64F" # emoticons u"\U0001F300-\U0001F5FF" # symbols & pictographs u"\U0001F680-\U0001F6FF" # transport & map symbols u"\U0001F1E0-\U0001F1FF" # flags (iOS) "]+", flags = re.UNICODE) txt = regrex_pattern.sub(r'',txt) txt = re.sub("@[\w]*","",txt) txt = re.sub("https?://[A-Za-z0-9./]*","",txt) txt = re.sub("\n","",txt) f.write(txt + '\n') f.close() from classifier import SentimentClassifier sc = SentimentClassifier() ca = [] polarity = [] for i in range(0,17): lugar = bbox['CA'][i] ruta = my_path+lugar+'.txt' f = open(ruta, "r") p = f.read() for j in p.split('\n'): #vamos leyendo cada tweet y vamos analizando sentimientos polarity.append(sc.predict(j)) ca.append(lugar) sentiment = pd.DataFrame([ca,polarity]) sentiment.head()
######################################################################## # COMPLETAR AQUI: Crear conexion a redis y asignarla a la variable "db". ######################################################################## db = redis.Redis(host='redis', port=6379, db=0) # Host: Los servicios están orquestados por docker compose. Y corren en # la misma red docker. Entonces lo apuntamos con el nombre del servicio # definido antes en docker, Otra opción para este ejemplo, localhost # Cambiar según corresponda, para exponer para afuera. ######################################################################## ######################################################################## # COMPLETAR AQUI: Instanciar modelo de análisis de sentimientos. # Use classifier.SentimentClassifier de la libreria # spanish_sentiment_analysis ya instalada ######################################################################## model = SentimentClassifier() ######################################################################## def sentiment_from_score(score): """ Esta función recibe como entrada el score de positividad de nuestra sentencia y dependiendo su valor devuelve uno de las siguientes clases: - "Positivo": Cuando el score es mayor a 0.55. - "Neutral": Cuando el score se encuentra entre 0.45 y 0.55. - "Negativo": Cuando el score es menor a 0.45. Attributes ---------- score : float
from classifier import SentimentClassifier clf = SentimentClassifier() x = "Esta muy buena esa pelicula" y = "Que horrible comida!!!" z = "Tuve una experiencia netural" #sentimiento = clf.predict(x) #sentimiento = clf.predict(y) sentimiento = clf.predict(z) print(sentimiento)
def main(): neg_fp = 'data/sentiment/N.txt' neut_neg_fp = 'negative-inducer/NN.csv' pos_fp = 'data/sentiment/P.txt' sarc_fp = 'data/sentiment/S.txt' pos_vec, neg_vec, _, vocab, padding_index = get_data( pos_fp, neg_fp, sarc_fp) vocab_size = 20001 if not os.path.exists(neut_neg_fp): # train the classifier and output the needed neutral bits # literally just copied from classifier print("Preprocessing data...") pos_labels = np.ones(pos_vec.shape[0]) neg_labels = np.zeros(neg_vec.shape[0]) inputs = np.concatenate((pos_vec, neg_vec)) labels = np.concatenate((pos_labels, neg_labels)) # save as csv file inputs = np.asarray(inputs) labels = np.asarray(labels) np.savetxt('inputs.csv', inputs, delimiter=',') np.savetxt('labels.csv', labels, delimiter=',') inputs = np.loadtxt('inputs.csv', delimiter=',') labels = np.loadtxt('labels.csv', delimiter=',') train_x, test_x, train_y, test_y = sk.train_test_split(inputs, labels, test_size=0.2, random_state=42) print("Preprocessing complete.\n") neg_vec = np.asarray(neg_vec) SCModel = SentimentClassifier(vocab_size) print('Training sentiment classifier...') classifier_train(SCModel, train_x, train_y) print("Training complete.\n") neutralized = neutralize(SCModel, neg_vec) np.savetxt(neut_neg_fp, neutralized, delimiter=',') model = NegativeInducer(vocab_size) # For saving/loading models checkpoint_dir = 'negative-inducer/checkpoints' checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") checkpoint = tf.train.Checkpoint(model=model) manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=3) # Ensure the output directory exists if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) if args.restore_checkpoint or args.mode == 'test': # restores the latest checkpoint using from the manager checkpoint.restore(manager.latest_checkpoint) # init data for actual train/test! neut_neg_vec = np.loadtxt(neut_neg_fp, delimiter=',') inputs = neut_neg_vec labels = neg_vec train_x, test_x, train_y, test_y = sk.train_test_split(inputs, labels, test_size=0.2, random_state=42) train_x = tf.convert_to_tensor(train_x, dtype=tf.int64) test_x = tf.convert_to_tensor(test_x, dtype=tf.int64) train_y = tf.convert_to_tensor(train_y, dtype=tf.int64) test_y = tf.convert_to_tensor(test_y, dtype=tf.int64) try: # Specify an invalid GPU device with tf.device('/device:' + args.device): if args.mode == 'train': for epoch in range(0, args.num_epochs): train(model, train_x, train_y, manager, padding_index) print("**** SAVING CHECKPOINT AT END OF EPOCH ****") manager.save() perplexity, per_symbol_accuracy = test(model, test_x, test_y, padding_index) print('**** test perplexity: %g, per_symbol_accuracy**** %g' % (perplexity, per_symbol_accuracy)) if args.mode == 'test': perplexity, per_symbol_accuracy = test(model, test_x, test_y, padding_index) print('**** test perplexity: %g, per_symbol_accuracy**** %g' % (perplexity, per_symbol_accuracy)) except RuntimeError as e: print(e)
from classifier import SentimentClassifier from flask import Flask, request, render_template from time import time from codecs import open app = Flask(__name__) print('Preparing the classifier...') stime = time() classifier = SentimentClassifier() print('The classifier is ready') print('That got', round(time() - stime, 2), 'seconds') @app.route('/', methods=['GET', 'POST']) def index_page(text='', prediction_message=''): if request.method == 'POST': text = request.form['text'] prediction_message = classifier.get_prediction(text) return render_template('hello.html', text=text, prediction_message=prediction_message) if __name__ == '__main__': app.run()
def __init__(self): self.clf = SentimentClassifier()
def predict_sentiment(tweet: dict): sentiment_clf = SentimentClassifier('sentiment-classifier-v0.pkl') sentiment = sentiment_clf.predict([str(tweet['tweet'])]) return {'sentiment': sentiment}
from classifier import SentimentClassifier clf = SentimentClassifier() text = 'Нормально' print(clf.get_prediction(text))