Exemple #1
0
def _load_model():
    global sentiment_analysis
    global conversation

    with graph.as_default():
        sentiment_analysis = SentimentAnalysis(SA_TOKENIZER, SA_MODEL)
        conversation = Conversation(CONVERSATION_MODEL)
Exemple #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--bert-model',
                        default=None,
                        type=str,
                        required=True,
                        help='path to BERT model directory')
    parser.add_argument('--fine-tuned-model',
                        default=None,
                        type=str,
                        required=True,
                        help='path to fine-tuned PosNeg classifier model file')
    parser.add_argument('--jumanpp-command',
                        type=str,
                        action='store',
                        default="/mnt/violet/share/tool/juman++v2/bin/jumanpp")
    parser.add_argument("--server",
                        default=None,
                        type=str,
                        required=True,
                        help="server IP address.")
    parser.add_argument("--port",
                        default=None,
                        type=int,
                        required=True,
                        help="server port.")
    args = parser.parse_args()
    server = xmlrpc_server.SimpleXMLRPCServer((args.server, args.port),
                                              allow_none=True)

    sa_model = SentimentAnalysis(args.bert_model, args.fine_tuned_model,
                                 args.jumanpp_command)
    server.register_function(sa_model.get_prediction, 'get_prediction')
    print("loading done.", file=sys.stderr)
    server.serve_forever()
    def __init__(
        self,
        train_vocab,
        n_movies,
        params,
        autorec_path=os.path.join(config.AUTOREC_MODEL, "model_best"),
        sentiment_analysis_path=os.path.join(config.SENTIMENT_ANALYSIS_MODEL,
                                             "model_best"),
        cuda=None,
        gensen=True,
    ):
        super(RecommendFromDialogue, self).__init__()
        self.n_movies = n_movies
        if cuda is None:
            self.cuda_available = torch.cuda.is_available()
        else:
            self.cuda_available = cuda

        self.sentiment_analysis = SentimentAnalysis(
            params=params['sentiment_analysis_params'],
            train_vocab=train_vocab,
            gensen=gensen,
            resume=sentiment_analysis_path)
        self.autorec = AutoRec(params=params['autorec_params'],
                               n_movies=self.n_movies,
                               resume=autorec_path)

        # freeze sentiment analysis
        for param in self.sentiment_analysis.parameters():
            param.requires_grad = False
Exemple #4
0
def sentiment(nounchunk):
    sa = SentimentAnalysis()
    positive_sentences, negative_sentences, neutral_sentences = sa.get_sentence_orientation(
        nounchunk)
    print positive_sentences
    print neutral_sentences
    positive_neutral_sen = positive_sentences + neutral_sentences
    return positive_neutral_sen
Exemple #5
0
def sentiment():
    senti = SentimentAnalysis()
    input_msg = request.form.get('sentiment_text')
    prediction = senti.sentiment(input_msg)
    if prediction > 0.80:
        message = 'It is a positive message, having a score of {}'.format(
            prediction)
    else:
        message = 'It is a negative message, having a score of {}'.format(
            prediction)
    return render_template('result.html', prediction=message, input=input_msg)
Exemple #6
0
 def __init__(self, username=None, password=None, session_user=None):
     self.L = instaloader.Instaloader(
         dirname_pattern="posts/{profile}/{date}")
     self.analizer = SentimentAnalysis()
     if session_user is None and username and password is not None:
         print('logging in ... ')
         try:
             self.L.login(username, password)
             print('logged in -> ', username)
             self.Logged = 'logged'
         except Exception as ex:
             print(ex)
             self.Logged = ex
     elif session_user is not None:
         self.L.load_session_from_file(session_user)
     else:
         pass
def frp_multi(fr):

    zz = fr['reviews']
    multi_list = []

    for yy in zz:
        xx = yy['summary']
        multi_list.append(xx)

    abc = SentimentAnalysis()
    list_tds = []
    list_qc = []
    list_c = []
    list_dict = []

    for ss in multi_list:
        ss_a = list(jieba.cut(ss, cut_all=False))
        ss_b = " ".join(ss_a)
        #print (ss_b)
        result = abc.analyze(ss_b)
        #print (result)
        for item in result:
            t = item[0]
            d = item[1]
            s = item[4]
            tds = [t, d, s]
            list_tds.append(tds)

    for tri in list_tds:
        if tri not in list_qc:
            list_qc.append(tri)
            tri_count = list_tds.count(tri)
            list_c.append(tri_count)

    for ww in list_qc:
        vc_index = list_qc.index(ww)
        vc = list_c[vc_index]
        vt = ww[0]
        vd = ww[1]
        vs = ww[2]
        dict_a = {"对象": vt, "评价极性": vs, "描述词": vd, "评论数": vc}
        list_dict.append(dict_a)

    df = pd.DataFrame(list_dict, columns=["对象", "评价极性", "描述词", "评论数"])
    df.to_csv("./ndetails.csv", index=False)
def frp_single(sentence):

    abc = SentimentAnalysis()
    result = abc.analyze(sentence)
    str_a = []
    jsonlist_a = []

    for item in result:
        aspect = item[0]
        opinion = item[1]
        relation = item[4]
        #t = ***
        a = {'target': aspect, 'description': opinion, 'sentiment': relation}
        str_a.append(a)

    for i in str_a:
        json_info = json.dumps(i, default=set_default, ensure_ascii=False)
        jsonlist_a.append(json_info)

    return jsonlist_a
Exemple #9
0
def run_process(access_key, secret_access_key, opinions, output_path):
    pos = neg = neutral = mixed = 0

    # run sentiment analysis
    for i in opinions:
        obj = SentimentAnalysis(access_key, secret_access_key, i)
        data = obj.run_single_sentiment_analysis()
        pos += data['SentimentScore']['Positive']
        neg += data['SentimentScore']['Negative']
        neutral += data['SentimentScore']['Neutral']
        mixed += data['SentimentScore']['Mixed']

    cnt = len(opinions)
    dict_result = {
        'positive': pos / cnt,
        'negative': neg / cnt,
        'neutral': neutral / cnt,
        'mixed': mixed / cnt
    }

    create_pieplot_percent(dict_result, output_path)

    return json.dumps(dict_result)
Exemple #10
0
 def update(self, text):
     sa = SentimentAnalysis()
     cleaned_text = sa.clean_text_tweet_from_mails_and_rubbish(text)
     return sa.get_tweet_sentiment(cleaned_text)
Exemple #11
0
def _load_model():
    global sentiment_analysis
    global conversation

    sentiment_analysis = SentimentAnalysis(SA_TOKENIZER, SA_MODEL)
    conversation = Conversation(CONV_TOKENIZER, CONV_MODEL)
Exemple #12
0
    else:
        break

ls = [str(code)]
numberInsta = numberInsta + 9

#call on mongo class with the geotag code (which is used to name Mongo collection)
m = mongo(ls[0])

#clear previous data in MongoDB the collection
m.clearDB()

#run instagram scraping
s = InstaScraper("/Users/Ed/eclipse/chromedriver", ls, numberInsta)
s.runScraper()
print('Scraping Complete')

#connect to database with webscraped data
db = m.getdatabase()

#run sentiment analysis
c = SentimentAnalysis(db, ls[0])
captions, captionText = c.getCaptions()
c.sentimentIntensity(captions)

#run frequency distribution analysis
freq = WordFrequency(captionText)
freq.getFreqDist()

print('Analysis Complete')
 def analysis_status(self, statusText):
     s = SentimentAnalysis(statusText)
     return s.sentiments
Exemple #14
0
from sentiment_analysis import SentimentAnalysis

sa = SentimentAnalysis()
sentiment = sa.analyze_sentiment("This is best film i seen")
print(sa.model.layers[1].output)
print(sentiment)

intermediate_out = sa.debug_hidden_layer_out("This is best film i seen")

sentiment = sa.analyze_sentiment("Wonderful but suck")
print(sentiment)

sentiment = sa.analyze_sentiment("Ha ha i laught")
print(sentiment)
Exemple #15
0
def generate_training_data(database, collection, query):
    """Genera un conjunto de datos de entrenamiento.

    Esta generación se realiza con el propósito de encontrar los mejores
    parámetros para la clasificación de sentimientos. En específico,
    se intenta encontrar la mejor combinación de la negación y el
    léxicon de polaridad.

    paráms:
        database: str
            Base de datos de MongoDB que se utilizará.
        collection:
            Colección donde se encuentran los tweets etiquetados para
            entrenamiento.
        query:
            Filtro que se utilizará para recuperar los tweets de entrenamiento.
            Nótese que los tweets deben tener el campo "polarity".

    Example:

        >>> generate_training_data(database='tass_2017',
                                   collection='intertass',
                                   query={"$or": [{"dataset": "train"},
                                                  {"dataset": "development"}]})
    """
    four_label_homologation = {u'N+': 0, u'N': 0,
                               u'NEU': 1,
                               u'P': 2, u'P+': 2,
                               u'NONE': 3}

    client = pymongo.MongoClient()
    coll = client[database][collection]

    tweets = coll.find(filter=query,
                       projection=['tweet_id', 'content', 'polarity'],
                       sort=[('polarity', pymongo.ASCENDING),])

    tweets_ = [[_to_str(tweet['tweet_id']), _to_unicode(tweet['content']),
                _to_unicode(tweet['polarity']).upper()]
               for tweet in tweets if (tweet['content'] and len(tweet['content']) > 0)]

    client.close()

    tweets = None
    tweets = tweets_
    tweets_ = None

    output_path = DATA_PATH + '/train/' + collection
    if not os.path.isdir(output_path):
        os.makedirs(output_path)

    for negation_id in NEGATION_SETTINGS.iterkeys():

        lexicons = np.random.choice(np.arange(1, 7),
                                    size=3, replace=False).tolist() +\
                   np.random.choice(np.arange(7, 16),
                                    size=3, replace=False).tolist() +\
                   np.random.choice(np.arange(16, 26),
                                    size=4, replace=False).tolist() +\
                   np.random.choice(np.arange(26, 31),
                                    size=2, replace=False).tolist()

        lexicons = np.random.choice(lexicons, size=6, replace=False).tolist()

        if np.random.choice(range(2), p=[.9, .1]) == 1:
            lexicons.append(31)

        negation_path = output_path + '/%s' % negation_id
        if not os.path.isdir(negation_path):
            os.mkdir(negation_path)

        for lexicon_id in lexicons:

            output_fname = negation_path +\
                           '/metafeatures-lexicon-%s.tsv' % lexicon_id
            if os.path.isfile(output_fname):
                continue

            clf = SentimentAnalysis(negation_id=negation_id,
                                    lexicon='lexicon-%i' % lexicon_id)

            documents = []
            four_label_polarities = []
            metafeatures_list = []

            for j, (tweet_id, content, polarity) in enumerate(tweets):
                try:
                    text, metafeatures = clf.preprocess_tweet(content)
                except:
                    _write_in_file(fname=negation_path + '/errors-1.log',
                                   content=tweet_id + '\n', mode='a')
                    continue

                metafeatures = metafeatures.reshape(1, metafeatures.shape[0])

                if j == 0:
                    metafeatures_list = metafeatures
                else:
                    if metafeatures_list.shape[1] == metafeatures.shape[1]:
                        metafeatures_list = np.vstack((metafeatures_list,
                                                       metafeatures))
                    else:
                        _write_in_file(fname=negation_path + '/errors-2.log',
                                       content=tweet_id + '\n', mode='a')
                        continue

                documents.append(_to_str(text))
                four_label_polarities.append(four_label_homologation[polarity])

            if not os.path.isfile(negation_path + '/tweets.txt'):
                np.savetxt(negation_path + '/tweets.txt',
                           np.array(documents, dtype=str), fmt='%s')

            if not os.path.isfile(negation_path + '/target-labels.dat'):
                np.savetxt(negation_path + '/target-labels.dat',
                           np.array(four_label_polarities, dtype=int), fmt='%i')

            np.savetxt(output_fname, metafeatures_list, fmt='%i', delimiter='\t')

            clf = None
    print df
    #Summarizion
    text = ''
    for sent in df['original_sents'].values:
        text += '.' + sent

    summarize = Summarization(text, None, senti)
    final_summary1, final_summary2, final_summary3, neg_final_summary1, neg_final_summary2, neg_final_summary3, counts, eigen_explo = summarize.get_summaries(
    )
    print final_summary1, final_summary2, final_summary3, neg_final_summary1, neg_final_summary2, neg_final_summary3, counts, eigen_explo


if __name__ == "__main__":
    d = spell.request_dict("en_US")
    nlp = spacy.load('en')

    senti = SentimentAnalysis()

    start_time = time.time()
    filenames = [
        f for f in listdir(input_entity_files)
        if isfile(join(input_entity_files, f))
    ]
    #Parellel on CPU cores
    #Parallel(n_jobs=cpu_count() - 1, verbose=10, backend="multiprocessing", batch_size="auto")(delayed(processFiles)(fileName,input_entity_files) for fileName in filenames)
    #for (dirpath, dirnames, filenames) in walk(input_entity_files):
    for file in filenames:
        processFiles(file, input_entity_files)

    print("Time taken --- %s seconds ---" % (time.time() - start_time))
Exemple #17
0
from preprocess import WordSet, WordEmbedding, KnowledgeBase
from sentiment_analysis import SentimentAnalysis

#sentence = '外观 漂亮'
#sentence = '外观 不 太 漂亮'
#sentence = '高 规格 的 用料 和 精致 的 做工'
#sentence = '炫酷 的 造型 、 充沛 的 动力 再 加上 本田 家族 运动 基因 的 传承'

parser = argparse.ArgumentParser()
parser.add_argument('-s', required=True)
args = parser.parse_args()

sentence = args.s

abc = SentimentAnalysis()
result = abc.analyze(sentence)

print('--------------------')
print('%s\n' % (sentence))
for item in result:
    aspect = item[0]
    opinion = item[1]
    relation = item[4]
    print('%s\t%s\t%s' % (aspect, opinion, relation))
print('--------------------')

for item in result:
    aspect = item[0]
    opinion = item[1]
    relation = item[4]
Exemple #18
0
class ChatBot:
    sentiment = SentimentAnalysis()
    #sentiment.makemodel()
    knowledge = {}
    historybuffer = list()  # contains the 10 most recent messages
    # The Kernel object is the public interface to
    # the AIML interpreter.
    aimlBot = aiml.Kernel()
    # Use the 'learn' method to load the contents
    # of an AIML file into the Kernel.
    aimlBot.learn(os.path.dirname(os.path.abspath(__file__)) + '/botdata/*/*.aiml')
    status = False

    def read_input(self, input, latitude, longitude):
        input = input.lower()
        self.historybuffer.insert(0, input)
        if self.historybuffer.__sizeof__() == 100:
            self.historybuffer.pop()
        self.knowledge = self.sentiment.analyse_text(self.knowledge, input)
        if "@bot" in input or "@Bot" in input:
            input_list = input.split(' ')
            keyword = 'on' if len(input_list) == 1 else input_list[1]

            if keyword == 'off':
                self.status = False
            elif keyword == 'topic':
                return self.reply(input, 'topic', latitude, longitude)
            else:
                self.status = True
                return self.reply(input, "location", latitude, longitude)
        if self.status is True:
            return self.reply(input, "aiml", latitude, longitude)
        return None

    def reply(self, message, case, latitude, longitude):
        if case == "aiml":
            # group = self.classify(self.historytext())
            return {
                'message': self.aimlresponse(message),
            }
        elif case == "location":
            if len(self.historytext().split()) < 20:
                return {
                    'message': 'I am not sure about where you should go. Try talking more.',
                }

            group = self.classify(self.historytext())

            if len(group) == 0:
                return {
                    'message': 'I am not sure about where you should go. Try talking more.',
                }

            best_match = group[0]
            place_type = None
            query = ''
            if 'Restaurants' in best_match.name:
                place_type = PlaceType.restaurant
                category_split = best_match.name.strip('/').split('/')
                if len(category_split) > 2:
                    query = category_split[2]  # Fast Food or Pizzeria

            results = search_places(latitude, longitude, place_type=place_type, keyword=query)
            results = results[:4]

            return {
                'message': 'How about one of these?',
                'options': [{
                    'label': place['name'],
                    'link': f"https://www.google.com/maps/dir/?api=1&destination={place['geometry']['location']['lat']},{place['geometry']['location']['lng']}",
                } for place in results],
            }
        elif case == 'topic':
            interests = self.returnInterests()
            return {
                'message': 'How about one of these topics?',
                'options': [interest.name for interest in interests],
            }