Ejemplo n.º 1
0
def sentiment_analysis(model_tag, model_sa, FLAGS, source_count, source_word2idx, review, fr_nlp, wiki_model):
    # start_time=datetime.now()


    samples={}
    opinions=[]
    summury=[]

    all_aspect_words, all_aspect_categories, all_predictions=[], [], []

    # sentences=review.split(".?!")
    sentences=re.split('\.+|\!|\?', review)
    for sentence in sentences:
        sentence_nlp=fr_nlp(sentence)
        words_raw=[]
        words_raw.extend([sp.text for sp in sentence_nlp])

        # interval1=millis(start_time)
        # print("word processing spacy :", interval1)

        preds, aspects=model_tag.predict(words_raw)
        # to_print=align_data({"input": words_raw, "output": preds})
        # for key, seq in to_print.items():
        #    model_tag.config.logger.info(seq)

        # interval2=millis(start_time)
        # print("aspect extraction :", (interval2 - interval1))


        if len(aspects) > 0:
            # model_sa, source_count, source_word2idx=load_sentiment_model()
            aspect_words=np.array(aspects)[:, 0]
            aspect_categories=np.array(aspects)[:, 1]
            aspect_idx=np.array(aspects)[:, 2]

            all_aspect_words.extend(aspect_words)
            all_aspect_categories.extend(aspect_categories)

            test_data=read_sample(fr_nlp, sentence, aspect_words, aspect_idx, source_count, source_word2idx)
            # interval31=millis(start_time)
            # print("31 :", (interval31 - interval2))

            FLAGS.pre_trained_context_wt=init_word_embeddings(wiki_model, source_word2idx, FLAGS.nbwords)
            # interval32=millis(start_time)
            # print("init 32 :", (interval32 - interval31))

            FLAGS.pre_trained_context_wt[FLAGS.pad_idx, :]=0
            # interval33=millis(start_time)
            # print("33 :", (interval33 - interval32))

            # interval3=millis(start_time)
            # print("embedding & indexation :", (interval3 - interval2))

            predictions=model_sa.predict(test_data, source_word2idx)

            # interval4=millis(start_time)
            # print("sentiment analysis :", (interval4 - interval3))
            # all_predictions.extend(predictions)

            for asp, cat, idx, pred in zip(aspect_words, aspect_categories, aspect_idx, predictions):
                all_predictions.append(pred)
                print(asp, " : ", str(cat), " =>", mapping_sentiments(pred), end=" ; exemple : ")
                sample=[s.strip() for s in re.split('[\.\?!,;:]', sentence) if
                        re.sub(' ', '', asp.lower()) in re.sub(' ', '', s.lower())][0]
                print(sample)
                samples[str(cat) + '_' + str(pred)]=sample
                opinion=[asp, str(cat), str(idx), str(int(idx) + len(asp)), mapping_sentiments(pred), sample]
                opinions.append(opinion)

    if len(all_aspect_words) > 0:

        # summary review
        print("\n------SUMMARY REVIEW-------")
        categories=['SERVICE', 'AMBIANCE', 'QUALITE', 'PRIX', 'GENERAL', 'LOCALISATION']
        for categ in categories:
            exists=False
            total=0
            val=0
            for asp, cat, pred in zip(all_aspect_words, all_aspect_categories, all_predictions):

                if str(cat) == categ:
                    exists=True
                    total+=1
                    val+=pred
            if exists:
                sum=[categ, mapping_sentiments(round(val / total))]
                print(categ, " ", mapping_sentiments(round(val / total)), "; exemple : ", end=" ")
                summury.append(sum)
                try:
                    print(samples[categ + '_' + str(int(round(val / total)))])
                except:
                    print("conflict sentiments")
                    # interval5=millis(start_time)
                    # print("average aspects & summary review :", (interval5 - interval4))

                    # interval6=millis(start_time)
                    # print("Total :", interval6)
    else:
        print("PAS D'ASPECTS !")

    return opinions, summury
Ejemplo n.º 2
0
def main(_):
    configure.pp.pprint(FLAGS.__flags)
    source_count = []
    source_word2idx = {}

    read_vocabulary(fr_nlp, FLAGS.train_data, source_count, source_word2idx)

    print('loading pre-trained word vectors...')
    FLAGS.pre_trained_context_wt = init_word_embeddings(
        wiki_model, source_word2idx, FLAGS.nbwords)
    FLAGS.pre_trained_context_wt[FLAGS.pad_idx, :] = 0

    model = MemN2N(FLAGS)
    model.build_model()
    saver = tf.train.Saver(tf.trainable_variables())

    print('Loading Model...')
    ckpt = tf.train.get_checkpoint_state(FLAGS.pathModel)
    saver.restore(model.sess, ckpt.model_checkpoint_path)
    print("Model loaded")

    with open(FLAGS.test_samples, "r") as f:
        reviews = []
        for line in f:
            reviews.append(line)

    with open(FLAGS.test_aspects, 'rb') as fp:
        aspects = pickle.load(fp)

    for review, aspects_ in zip(reviews, aspects):
        print("\n", review, end='')
        if len(aspects_) > 0:
            aspect_words = np.array(aspects_)[:, 0]
            aspect_categories = np.array(aspects_)[:, 1]
            aspect_idx = np.array(aspects_)[:, 2]
            print(aspect_words, aspect_categories, aspect_idx)
            test_data = read_sample(fr_nlp, review, aspect_words, aspect_idx,
                                    source_count, source_word2idx)
            FLAGS.pre_trained_context_wt = init_word_embeddings(
                wiki_model, source_word2idx, FLAGS.nbwords)
            FLAGS.pre_trained_context_wt[FLAGS.pad_idx, :] = 0

            predictions = model.predict(test_data, source_word2idx)
            samples = {}
            for asp, cat, idx, pred in zip(aspect_words, aspect_categories,
                                           aspect_idx, predictions):
                print(asp,
                      " : ",
                      str(cat),
                      " =>",
                      mapping_sentiments(pred),
                      end=" ; ")
                sample = [
                    s.strip() for s in re.split('[\.\?!,;:]', review)
                    if re.sub(' ', '', asp.lower()) in re.sub(
                        ' ', '', s.lower())
                ][0]

                print(sample)
                samples[str(cat) + '_' + str(pred)] = sample

            #summary review
            print("\n------SUMMARY REVIEW-------")
            categories = [
                'SERVICE', 'AMBIANCE', 'QUALITE', 'PRIX', 'GENERAL',
                'LOCALISATION'
            ]
            for categ in categories:
                exists = False
                total = 0
                val = 0
                for asp, cat, pred in zip(aspect_words, aspect_categories,
                                          predictions):
                    #print(mapping(str(cat)),categ)
                    if str(cat) == categ:
                        exists = True
                        total += 1
                        val += pred
                if exists:
                    #print(categ, " ", mapping_sentiments(round(val / total)))
                    #print(samples)
                    #print(categ+'_'+str(int(round(val/total))))
                    print(categ, " ", mapping_sentiments(round(val / total)),
                          "exemple : ",
                          samples[categ + '_' + str(int(round(val / total)))])
        else:
            print("PAS D'ASPECTS !")
Ejemplo n.º 3
0
def main():
    # sequence tagging
    model_tag = load_tagging_model()
    model_sa, FLAGS, source_count, source_word2idx = load_sentiment_model(
        fr_nlp, wiki_model)

    profs = generate_review_data()

    # with open(configuration.filename_reviews, "r") as f:
    #    reviews=[]
    #    for line in f:
    #        reviews.append(line)

    # with open(configuration.filename_aspects, 'rb') as fp:
    #    aspects=pickle.load(fp)
    csv_file = open(configuration.filename_csv, 'w')
    csv_file.write(
        "code_etab;id_contrib;comment_1;note_globale_1;"
        "sentiment_SERVICE_1;target_SERVICE_1;sample_SERVICE_1;"
        "sentiment_AMBIANCE_1;target_AMBIANCE_1;sample_AMBIANCE_1;"
        "sentiment_QUALITY_1;target_QUALITY_1;sample_QUALITY_1;"
        "sentiment_PRICE_1;target_PRICE_1;sample_PRICE_1;"
        "sentiment_GENERAL_1;target_GENERAL_1;sample_GENERAL_1;"
        "sentiment_LOCATION_1;target_LOCATION_1;sample_LOCATION_1;comment_2;note_globale_2;"
        "sentiment_SERVICE_2;target_SERVICE_2;sample_SERVICE_2;"
        "sentiment_AMBIANCE_2;target_AMBIANCE_2;sample_AMBIANCE_2;"
        "sentiment_QUALITY_2;target_QUALITY_2;sample_QUALITY_2;"
        "sentiment_PRICE_2;target_PRICE_2;sample_PRICE_2;"
        "sentiment_GENERAL_2;target_GENERAL_2;sample_GENERAL_2;"
        "sentiment_LOCATION_2;target_LOCATION_2;sample_LOCATION_2;comment_3;note_globale_3;"
        "sentiment_SERVICE_3;target_SERVICE_3;sample_SERVICE_3;"
        "sentiment_AMBIANCE_3;target_AMBIANCE_3;sample_AMBIANCE_3;"
        "sentiment_QUALITY_3;target_QUALITY_3;sample_QUALITY_3;"
        "sentiment_PRICE_3;target_PRICE_3;sample_PRICE_3;"
        "sentiment_GENERAL_3;target_GENERAL_3;sample_GENERAL_3;"
        "sentiment_LOCATION_3;target_LOCATION_3;sample_LOCATION_3;"
        "summary_SERVICE;summary_AMBIANCE;"
        "summary_QUALITY;summary_PRICE;"
        "summary_GENERAL;summary_LOCATION\n")

    # k=0
    for prof in profs:
        csv_file.write(str(prof[0][0]) + ";" + str(prof[0][1]))
        tab_sent = [0, 0, 0, 0, 0, 0]
        tab_sum = [0, 0, 0, 0, 0, 0]

        for i in range(len(prof)):
            all_aspect_words, all_aspect_categories, all_predictions=[], [], []
            samples = {}
            # for review, aspects_,rev in zip(reviews, aspects,data):
            csv_file.write(";\"" + str(
                re.sub('\"', '&quot', prof[i][2]) + "\"" + ";" +
                str(prof[0][3])))
            review = prof[i][2]
            review = review.strip()
            sentences = re.split('\.+|\!|\?', review)

            for sentence in sentences:
                sentence_nlp = fr_nlp(sentence)
                words_raw = []
                words_raw.extend([sp.text for sp in sentence_nlp])
                # print(words_raw)
                _, aspects = model_tag.predict(words_raw)
                # print(res)
                if len(aspects) > 0:
                    aspect_words = np.array(aspects)[:, 0]
                    all_aspect_words.extend(aspect_words)

                    aspect_categories = np.array(aspects)[:, 1]
                    all_aspect_categories.extend(aspect_categories)

                    aspect_idx = np.array(aspects)[:, 2]

                    test_data = read_sample(fr_nlp, review, aspect_words,
                                            aspect_idx, source_count,
                                            source_word2idx)
                    FLAGS.pre_trained_context_wt = init_word_embeddings(
                        wiki_model, source_word2idx, FLAGS.nbwords)
                    FLAGS.pre_trained_context_wt[FLAGS.pad_idx, :] = 0

                    predictions = model_sa.predict(test_data, source_word2idx)
                    for asp, cat, pred in zip(aspect_words, aspect_categories,
                                              predictions):
                        all_predictions.append(pred)
                        # print(asp, " : ", str(cat), " =>", mapping_sentiments(pred), end=" ; ")
                        sample = [
                            s.strip() for s in re.split('[\.\?!,;:]', review)
                            if re.sub(' ', '', asp.lower()) in re.sub(
                                ' ', '', s.lower())
                        ][0]
                        # print(sample)
                        samples[str(cat) + '_' + str(pred)] = sample

            # summary review
            # print("\n------SUMMARY REVIEW-------")
            if len(all_aspect_words) > 0:
                categories = [
                    'SERVICE', 'AMBIANCE', 'QUALITE', 'PRIX', 'GENERAL',
                    'LOCALISATION'
                ]
                j = 0
                for categ in categories:
                    asp_cat = ""
                    exists = False
                    total = 0
                    val = 0
                    for asp, cat, pred in zip(all_aspect_words,
                                              all_aspect_categories,
                                              all_predictions):
                        if str(cat) == categ:
                            exists = True
                            total += 1
                            val += pred
                            asp_cat += asp + ", "
                    if exists:
                        tab_sent[j] += round(val / total)
                        tab_sum[j] += 1
                        csv_file.write(";\"" +
                                       mapping_sentiments(round(val / total)) +
                                       "\";\"" + asp_cat[0:-2] + "\";\"")
                        # print(categ, " ", mapping_sentiments(round(val / total)), "exemple : ")
                        try:
                            csv_file.write(samples[
                                categ + '_' + str(int(round(val / total)))] +
                                           "\"")
                        # print(samples[categ + '_' + str(int(round(val / total)))])
                        except:
                            csv_file.write("\"conflict\"")
                            print("conflict")
                    else:
                        csv_file.write(";\"-\";\"-\";\"-\"")
                    j += 1
            else:
                csv_file.write(
                    ";\"-\";\"-\";\"-\";\"-\";\"-\";\"-\";\"-\";\"-"
                    "\";\"-\";\"-\";\"-\";\"-\";\"-\";\"-\";\"-\";\"-\";\"-\";\"-\""
                )
                #   print("PAS D'ASPECTS !")
                # k+=1
        for l in range(len(tab_sent)):
            if tab_sum[l] == 0:
                csv_file.write(";\"-\"")
            else:
                csv_file.write(
                    ";\"" +
                    mapping_sentiments(round(tab_sent[l] / tab_sum[l])) + "\"")
        csv_file.write("\n")
Ejemplo n.º 4
0
def main():
    # sequence tagging
    model_tag = load_tagging_model()
    model_sa, FLAGS, source_count, source_word2idx = load_sentiment_model(
        fr_nlp, wiki_model)

    csv_file = open(configuration.filename_procsv, 'w')
    csv_file.write(
        "code_etab;comment;note_globale;"
        "sentiment_SERVICE;target_SERVICE;sample_SERVICE;sentiment_AMBIANCE;target_AMBIANCE;sample_AMBIANCE;sentiment_QUALITY;target_QUALITY;sample_QUALITY;"
        "sentiment_PRICE;target_PRICE;sample_PRICE;sentiment_GENERAL;target_GENERAL;sample_GENERAL;sentiment_LOCATION;target_LOCATION;sample_LOCATION\n"
    )

    # k=0
    with open(configuration.filename_proreviews, "r") as f:
        for line in f:
            pro = line[0:7]
            note = line[8:9]
            line = line[11:-2]
            csv_file.write(
                str(pro) + ";\"" + re.sub('\"', '&quot', line.strip()) + "\"" +
                ";" + str(note))

            review = line.strip()
            sentences = re.split('\.+|\!|\?', review)
            all_aspect_words, all_aspect_categories, all_predictions=[], [], []
            samples = {}
            for sentence in sentences:
                sentence_nlp = fr_nlp(sentence)
                words_raw = []
                words_raw.extend([sp.text for sp in sentence_nlp])
                # print(words_raw)
                _, aspects = model_tag.predict(words_raw)

                # print(res)
                if len(aspects) > 0:
                    aspect_words = np.array(aspects)[:, 0]
                    all_aspect_words.extend(aspect_words)

                    aspect_categories = np.array(aspects)[:, 1]
                    all_aspect_categories.extend(aspect_categories)

                    aspect_idx = np.array(aspects)[:, 2]

                    test_data = read_sample(fr_nlp, review, aspect_words,
                                            aspect_idx, source_count,
                                            source_word2idx)
                    FLAGS.pre_trained_context_wt = init_word_embeddings(
                        wiki_model, source_word2idx, FLAGS.nbwords)
                    FLAGS.pre_trained_context_wt[FLAGS.pad_idx, :] = 0

                    predictions = model_sa.predict(test_data, source_word2idx)
                    # all_predictions.extend(predictions)

                    for asp, cat, pred in zip(aspect_words, aspect_categories,
                                              predictions):
                        all_predictions.append(pred)
                        # print(asp, " : ", str(cat), " =>", mapping_sentiments(pred), end=" ; ")
                        sample = [
                            s.strip() for s in re.split('[\.\?!,;:]', review)
                            if re.sub(' ', '', asp.lower()) in re.sub(
                                ' ', '', s.lower())
                        ][0]
                        # print(sample)
                        samples[str(cat) + '_' + str(pred)] = sample
            # print(all_predictions)
            # summary review
            # print("\n------SUMMARY REVIEW-------")
            categories = [
                'SERVICE', 'AMBIANCE', 'QUALITE', 'PRIX', 'GENERAL',
                'LOCALISATION'
            ]

            for categ in categories:
                asp_cat = ""
                exists = False
                total = 0
                val = 0
                for asp, cat, pred in zip(all_aspect_words,
                                          all_aspect_categories,
                                          all_predictions):
                    if str(cat) == categ:
                        exists = True
                        total += 1
                        val += pred
                        asp_cat += asp + ", "
                if exists:
                    # print(categ,mapping_sentiments(round(val / total)))
                    csv_file.write(";\"" +
                                   mapping_sentiments(round(val / total)) +
                                   "\";\"" + asp_cat[0:-2] + "\";\"")
                    try:
                        csv_file.write(samples[categ + '_' +
                                               str(int(round(val / total)))] +
                                       "\"")
                    except:
                        csv_file.write("\"conflict\"")
                        print("conflict")
                else:
                    csv_file.write(";\"-\";\"-\";\"-\"")

            csv_file.write("\n")