def __init__(self, with_translation_support=False):
     self.sp_classifier = SentimentClassifier()
     self.af_classifier = Afinn(language='es')        
     self.vader_classifier = SentimentIntensityAnalyzer()
     self._download_polyglot_languages()
     if with_translation_support:
         self.translator = translate.Client()
Beispiel #2
0
def evalArray(array, process_id, ret):
    clf = SentimentClassifier()
    tweets = []
    i = 0
    for tweet in array:
        i += 1
        if ('extended_tweet' in tweet
                and 'full_text' in tweet['extended_tweet']):
            text = tweet['extended_tweet']['full_text']
        else:
            text = tweet['text']
        save_tweet = {}
        if (i % 50 == 0):
            print('Process ' + str(process_id) + ' Has predicted ' + str(i) +
                  ' Tweets')
        save_tweet['value'] = clf.predict(text)
        save_tweet['text'] = text
        tweets.append(save_tweet)
    ret[process_id] = tweets
    def get_position(self, text):
        # Avoid sentiment returning 0
        # return 0
        text = text.lower()

        if TOPICO == "politica":
            text = re.sub(r'^https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
            clf = SentimentClassifier()
            value = clf.predict(text)
        else:
            analysis = TextBlob(text)
            value = analysis.sentiment.polarity

        if value > .55:
            return 1
        elif value < .35 :
            return -1
        else:
            return 0
Beispiel #4
0
def extractTweets():
    print('Preparando para extraer Tweets...')
    my_client = pymongo.MongoClient(
        'mongodb+srv://grupo03DB:[email protected]/Tweets?retryWrites=true&w=majority')
    my_database = my_client.Tweets
    my_collection = my_database.collectionTweets

    ckey = "I65HkSxcTY22zgzydGmE14pUi"
    csecret = "4gBAWqmerng8tSWzC6YFqxCBCNv03CatcK1EhZh3YY1jzzxHfr"
    atoken = "1268310592496025600-QOhJRE7KRAJY9rYTb5ldUTeWt8Sgt5"
    asecret = "xW8zFoy2RlrdvmtKabDj2kDDPmBHUXvaR3WEnxnHHq0Nf"
    clf = SentimentClassifier()
    auth = tweepy.OAuthHandler(ckey, csecret)
    auth.set_access_token(atoken, asecret)

    api = tweepy.API(auth, wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    for tweet in tweepy.Cursor(api.search, q="corrupcion -filter:retweets", geocode="-0.225219,-78.5248,500km", tweet_mode="extended").items(2000):
        print(tweet._json["full_text"], "\n")

        if tweet._json["place"] is not None:
            print("**************Con coordenadas**********\n")
            print(json.dumps(tweet._json, indent=2), "\n\n")
            # Con coordenadas para guardar
            decoded = json.loads(json.dumps(tweet._json))
            coordenadas = decoded["place"]["bounding_box"]["coordinates"]
            ciudad = str(decoded["place"]["name"])
            sentimiento = clf.predict(tweet._json["full_text"])
            my_collection.insert_one({
                "text": tweet._json["full_text"],
                "location": {
                    "ciudad": ciudad,
                    "coordinates": coordenadas[0][random.randint(0, 3)]
                },
                "created_at": tweet._json["created_at"],
                "sentimiento": sentimiento
            })
Beispiel #5
0
def classify(msgList, results):
    cores = multiprocessing.cpu_count()
    classifiers = []
    for c in range(cores):
        classifiers.append(SentimentClassifier())
        print(str(c) + " clf initialized")
    mutex = Lock()

    #divide the list and call the thread with its mod clf
    sep = len(msgList) // cores
    pool = multiprocessing.Pool(cores)
    for c in range(cores):
        first = c * sep
        last = first + sep - 1
        print("Calling " + str(c))
        p = multiprocessing.Process(target=compute,
                                    args=(c, msgList, results, mutex, first,
                                          last, classifiers[c]))
        p.start()

    pool.close()
    pool.join()
Beispiel #6
0
class Feel():
    sid = SentimentIntensityAnalyzer()

    def __init__(self):
        self.clf = SentimentClassifier()

    def getFeel(self, resultado):
        return 'Positivo' if resultado > 0.4 else (
            'Negativo' if resultado < -0.4 else 'Neutro')

    def get_En(self, texto):
        """
        Puede retoranar valores [-1,1]
        mas cercanos a:
        -1: representa comentarios negativos
        0 : representa comentarios neutrales
        1 : representa comentarios positivos
        """
        resultado = []
        resultado.append(
            self.getFeel(self.sid.polarity_scores(texto)['compound']))
        resultado.append(self.getFeel(TextBlob()))
        return 'Positivo' if resultado > 0.4 else (
            'Negativo' if resultado < -0.4 else 'Neutro')

    def get_Es(self, texto):
        """
        Puede retoranar valores [0,1]
        mas cercanos a:
        0: representa comentarios negativos
        0.5 : representa comentarios neutrales
        1 : representa comentarios positivos
        """
        resultado = self.clf.predict(texto)
        return 'Positivo' if resultado > 0.6 else (
            'Negativo' if resultado < 0.4 else 'Neutro')
Beispiel #7
0
import pandas as pd
import numpy as np

# sentiment library + related
# language detection.. could be korean for al we know
from language import cleaning  # this should create a csv of the casified text
from classifier import SentimentClassifier

clf = SentimentClassifier()


def sentiment(df):
    df['sentiment'] = df.query("lang_langdetect == 'es'")['tweets'].apply(
        clf.predict)
    # df['sentiment_classification'] = df['sentiment'].apply(sentiment_split)
    return df


# twitter api module.
import tweepy
from important import access_toekn, access_token_secret, consumer_key, consumer_secret


def increment_(frame):
    temp_list = []
    for i in frame['sentiment']:
        if i >= .6:
            temp_list.append(i)
        elif i <= .6 and i >= .31:
            temp_list.append(i)
        else:
class SentimentAnalyzer:

    supported_languages = ['es', 'ca', 'eu', 'an', 'ast', 'gl', 'pt', 'en']
    sp_classifier = af_classifier = translator = vader_classifier = None

    def __init__(self, with_translation_support=False):
        self.sp_classifier = SentimentClassifier()
        self.af_classifier = Afinn(language='es')        
        self.vader_classifier = SentimentIntensityAnalyzer()
        self._download_polyglot_languages()
        if with_translation_support:
            self.translator = translate.Client()

    def _download_polyglot_languages(self):
        for lang in self.supported_languages:
            lang_resource = 'sentiment2.{}'.format(lang)
            if not downloader.is_installed(lang_resource):
                downloader.download('sentiment2.es')

    def normalize_score(self, score):
        # Currently the Hyperbolic Tangent Function is implemented.
        # It returns integer from -1 to 1
        return math.tanh(score)


    def analyze_sentiment(self, text, language):
        """
        Method that applies sentiment analyzers and
        normalize results to make scores between the 
        standard -1 to 1. 
        
        In general, Polyglot is used to compute the 
        sentiment score of text. 
        
        For Spanish, two additional 
        languages are used in the sentiment analysis. An 
        average of the three analyzers are returned.

        For English, vader is applied together with polyglot.
        """

        if language not in self.supported_languages:
            logging.info('Language {} not supported! Currently supported ' \
                         'languages are: {}'.format(language, self.supported_languages))
            return None
        
        sentiment_dict = {}
        num_applied_analyzers = 0
        total_scores = 0.0

        # Apply Vader analyzer
        if language == 'en':
            va_sentiment_score = self.analyze_sentiment_vader(text)
            total_scores += va_sentiment_score
            num_applied_analyzers += 1
            sentiment_dict['sentiment_score_vader'] = va_sentiment_score

        # Apply Polyglot analyzer
        pg_sentiment_score = None
        pg_text = Text(text, hint_language_code=language)
        try:
            word_scores = [w.polarity for w in pg_text.words]
            pg_sentiment_score = sum(word_scores)/float(len(word_scores))
            n_pg_sentiment_score = self.normalize_score(pg_sentiment_score)
            total_scores += n_pg_sentiment_score
            num_applied_analyzers += 1
            sentiment_dict['sentiment_score_polyglot'] = pg_sentiment_score
        except:
            pass        

        # For spanish language 
        if language == 'es':
            # Apply Sentipy analyzer
            sp_sentiment_score = self.sp_classifier.predict(text)
            sentiment_dict['sentiment_score_sentipy'] = sp_sentiment_score
            n_sp_sentiment_score = self.normalize_score(sp_sentiment_score)
            total_scores += n_sp_sentiment_score
            num_applied_analyzers += 1
            # Apply Affin analyzer
            af_sentiment_score = self.af_classifier.score(text)
            if len(text) > 0:
                af_sentiment_score = af_sentiment_score/len(text)
            else:
                af_sentiment_score = 0
            sentiment_dict['sentiment_score_affin'] = af_sentiment_score
            n_af_sentiment_score = self.normalize_score(af_sentiment_score)
            total_scores += n_af_sentiment_score
            num_applied_analyzers += 1
        # Compute final score
        if num_applied_analyzers > 0:
            sentiment_dict['sentiment_score'] = total_scores/num_applied_analyzers
        else:
            sentiment_dict['sentiment_score'] = None
        
        return sentiment_dict

    def translate_text(self, text, source_lang='es', target_lang='en'):
        translation_obj = self.translator.translate(text, 
            source_language=source_lang, 
            target_language=target_lang)
        return translation_obj['translatedText']

    def analyze_sentiment_vader(self, text, language=None, need_translation=False):
        if need_translation and language:
            text = self.translate_text(text, language)
        vader_score = self.vader_classifier.polarity_scores(text)
        return vader_score['compound']
      regrex_pattern = re.compile(pattern = "["
          u"\U0001F600-\U0001F64F"  # emoticons
          u"\U0001F300-\U0001F5FF"  # symbols & pictographs
          u"\U0001F680-\U0001F6FF"  # transport & map symbols
          u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                              "]+", flags = re.UNICODE)
      txt = regrex_pattern.sub(r'',txt)
      txt = re.sub("@[\w]*","",txt)
      txt = re.sub("https?://[A-Za-z0-9./]*","",txt)
      txt = re.sub("\n","",txt)    
      f.write(txt + '\n')
      f.close()

from classifier import SentimentClassifier

sc = SentimentClassifier()
ca = []
polarity = []
for i in range(0,17):
  lugar = bbox['CA'][i]
  ruta = my_path+lugar+'.txt'
  f  = open(ruta, "r")
  p = f.read()
  for j in p.split('\n'):
    #vamos leyendo cada tweet y vamos analizando sentimientos
    polarity.append(sc.predict(j))
    ca.append(lugar)

sentiment = pd.DataFrame([ca,polarity])
sentiment.head()
########################################################################
# COMPLETAR AQUI: Crear conexion a redis y asignarla a la variable "db".
########################################################################
db = redis.Redis(host='redis', port=6379, db=0)
# Host: Los servicios están orquestados por docker compose. Y corren en
# la misma red docker. Entonces lo apuntamos con el nombre del servicio
# definido antes en docker, Otra opción para este ejemplo, localhost
# Cambiar según corresponda, para exponer para afuera.
########################################################################

########################################################################
# COMPLETAR AQUI: Instanciar modelo de análisis de sentimientos.
# Use classifier.SentimentClassifier de la libreria
# spanish_sentiment_analysis ya instalada
########################################################################
model = SentimentClassifier()
########################################################################


def sentiment_from_score(score):
    """
    Esta función recibe como entrada el score de positividad
    de nuestra sentencia y dependiendo su valor devuelve uno
    de las siguientes clases:
        - "Positivo": Cuando el score es mayor a 0.55.
        - "Neutral": Cuando el score se encuentra entre 0.45 y 0.55.
        - "Negativo": Cuando el score es menor a 0.45.

    Attributes
    ----------
    score : float
from classifier import SentimentClassifier

clf = SentimentClassifier()

x = "Esta muy buena esa pelicula"

y = "Que horrible comida!!!"

z = "Tuve una experiencia netural"

#sentimiento = clf.predict(x)
#sentimiento = clf.predict(y)
sentimiento = clf.predict(z)

print(sentimiento)
Beispiel #12
0
def main():
    neg_fp = 'data/sentiment/N.txt'
    neut_neg_fp = 'negative-inducer/NN.csv'
    pos_fp = 'data/sentiment/P.txt'
    sarc_fp = 'data/sentiment/S.txt'

    pos_vec, neg_vec, _, vocab, padding_index = get_data(
        pos_fp, neg_fp, sarc_fp)

    vocab_size = 20001
    if not os.path.exists(neut_neg_fp):
        # train the classifier and output the needed neutral bits
        # literally just copied from classifier

        print("Preprocessing data...")

        pos_labels = np.ones(pos_vec.shape[0])
        neg_labels = np.zeros(neg_vec.shape[0])

        inputs = np.concatenate((pos_vec, neg_vec))
        labels = np.concatenate((pos_labels, neg_labels))

        # save as csv file
        inputs = np.asarray(inputs)
        labels = np.asarray(labels)
        np.savetxt('inputs.csv', inputs, delimiter=',')
        np.savetxt('labels.csv', labels, delimiter=',')

        inputs = np.loadtxt('inputs.csv', delimiter=',')
        labels = np.loadtxt('labels.csv', delimiter=',')

        train_x, test_x, train_y, test_y = sk.train_test_split(inputs,
                                                               labels,
                                                               test_size=0.2,
                                                               random_state=42)

        print("Preprocessing complete.\n")

        neg_vec = np.asarray(neg_vec)

        SCModel = SentimentClassifier(vocab_size)

        print('Training sentiment classifier...')
        classifier_train(SCModel, train_x, train_y)
        print("Training complete.\n")

        neutralized = neutralize(SCModel, neg_vec)
        np.savetxt(neut_neg_fp, neutralized, delimiter=',')
    model = NegativeInducer(vocab_size)

    # For saving/loading models
    checkpoint_dir = 'negative-inducer/checkpoints'
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
    checkpoint = tf.train.Checkpoint(model=model)
    manager = tf.train.CheckpointManager(checkpoint,
                                         checkpoint_dir,
                                         max_to_keep=3)
    # Ensure the output directory exists
    if not os.path.exists(args.out_dir):
        os.makedirs(args.out_dir)

    if args.restore_checkpoint or args.mode == 'test':
        # restores the latest checkpoint using from the manager
        checkpoint.restore(manager.latest_checkpoint)

    # init data for actual train/test!
    neut_neg_vec = np.loadtxt(neut_neg_fp, delimiter=',')
    inputs = neut_neg_vec
    labels = neg_vec
    train_x, test_x, train_y, test_y = sk.train_test_split(inputs,
                                                           labels,
                                                           test_size=0.2,
                                                           random_state=42)

    train_x = tf.convert_to_tensor(train_x, dtype=tf.int64)
    test_x = tf.convert_to_tensor(test_x, dtype=tf.int64)
    train_y = tf.convert_to_tensor(train_y, dtype=tf.int64)
    test_y = tf.convert_to_tensor(test_y, dtype=tf.int64)

    try:
        # Specify an invalid GPU device
        with tf.device('/device:' + args.device):
            if args.mode == 'train':
                for epoch in range(0, args.num_epochs):
                    train(model, train_x, train_y, manager, padding_index)
                    print("**** SAVING CHECKPOINT AT END OF EPOCH ****")
                    manager.save()
                perplexity, per_symbol_accuracy = test(model, test_x, test_y,
                                                       padding_index)
                print('**** test perplexity: %g, per_symbol_accuracy**** %g' %
                      (perplexity, per_symbol_accuracy))
            if args.mode == 'test':
                perplexity, per_symbol_accuracy = test(model, test_x, test_y,
                                                       padding_index)
                print('**** test perplexity: %g, per_symbol_accuracy**** %g' %
                      (perplexity, per_symbol_accuracy))
    except RuntimeError as e:
        print(e)
from classifier import SentimentClassifier
from flask import Flask, request, render_template
from time import time
from codecs import open
app = Flask(__name__)

print('Preparing the classifier...')
stime = time()
classifier = SentimentClassifier()
print('The classifier is ready')
print('That got', round(time() - stime, 2), 'seconds')


@app.route('/', methods=['GET', 'POST'])
def index_page(text='', prediction_message=''):
    if request.method == 'POST':
        text = request.form['text']
        prediction_message = classifier.get_prediction(text)

    return render_template('hello.html',
                           text=text,
                           prediction_message=prediction_message)


if __name__ == '__main__':
    app.run()
Beispiel #14
0
 def __init__(self):
     self.clf = SentimentClassifier()
Beispiel #15
0
def predict_sentiment(tweet: dict):

    sentiment_clf = SentimentClassifier('sentiment-classifier-v0.pkl')
    sentiment = sentiment_clf.predict([str(tweet['tweet'])])

    return {'sentiment': sentiment}
Beispiel #16
0
from classifier import SentimentClassifier

clf = SentimentClassifier()

text = 'Нормально'

print(clf.get_prediction(text))