def sentiment_analysis(model_tag, model_sa, FLAGS, source_count, source_word2idx, review, fr_nlp, wiki_model): # start_time=datetime.now() samples={} opinions=[] summury=[] all_aspect_words, all_aspect_categories, all_predictions=[], [], [] # sentences=review.split(".?!") sentences=re.split('\.+|\!|\?', review) for sentence in sentences: sentence_nlp=fr_nlp(sentence) words_raw=[] words_raw.extend([sp.text for sp in sentence_nlp]) # interval1=millis(start_time) # print("word processing spacy :", interval1) preds, aspects=model_tag.predict(words_raw) # to_print=align_data({"input": words_raw, "output": preds}) # for key, seq in to_print.items(): # model_tag.config.logger.info(seq) # interval2=millis(start_time) # print("aspect extraction :", (interval2 - interval1)) if len(aspects) > 0: # model_sa, source_count, source_word2idx=load_sentiment_model() aspect_words=np.array(aspects)[:, 0] aspect_categories=np.array(aspects)[:, 1] aspect_idx=np.array(aspects)[:, 2] all_aspect_words.extend(aspect_words) all_aspect_categories.extend(aspect_categories) test_data=read_sample(fr_nlp, sentence, aspect_words, aspect_idx, source_count, source_word2idx) # interval31=millis(start_time) # print("31 :", (interval31 - interval2)) FLAGS.pre_trained_context_wt=init_word_embeddings(wiki_model, source_word2idx, FLAGS.nbwords) # interval32=millis(start_time) # print("init 32 :", (interval32 - interval31)) FLAGS.pre_trained_context_wt[FLAGS.pad_idx, :]=0 # interval33=millis(start_time) # print("33 :", (interval33 - interval32)) # interval3=millis(start_time) # print("embedding & indexation :", (interval3 - interval2)) predictions=model_sa.predict(test_data, source_word2idx) # interval4=millis(start_time) # print("sentiment analysis :", (interval4 - interval3)) # all_predictions.extend(predictions) for asp, cat, idx, pred in zip(aspect_words, aspect_categories, aspect_idx, predictions): all_predictions.append(pred) print(asp, " : ", str(cat), " =>", mapping_sentiments(pred), end=" ; exemple : ") sample=[s.strip() for s in re.split('[\.\?!,;:]', sentence) if re.sub(' ', '', asp.lower()) in re.sub(' ', '', s.lower())][0] print(sample) samples[str(cat) + '_' + str(pred)]=sample opinion=[asp, str(cat), str(idx), str(int(idx) + len(asp)), mapping_sentiments(pred), sample] opinions.append(opinion) if len(all_aspect_words) > 0: # summary review print("\n------SUMMARY REVIEW-------") categories=['SERVICE', 'AMBIANCE', 'QUALITE', 'PRIX', 'GENERAL', 'LOCALISATION'] for categ in categories: exists=False total=0 val=0 for asp, cat, pred in zip(all_aspect_words, all_aspect_categories, all_predictions): if str(cat) == categ: exists=True total+=1 val+=pred if exists: sum=[categ, mapping_sentiments(round(val / total))] print(categ, " ", mapping_sentiments(round(val / total)), "; exemple : ", end=" ") summury.append(sum) try: print(samples[categ + '_' + str(int(round(val / total)))]) except: print("conflict sentiments") # interval5=millis(start_time) # print("average aspects & summary review :", (interval5 - interval4)) # interval6=millis(start_time) # print("Total :", interval6) else: print("PAS D'ASPECTS !") return opinions, summury
def main(_): configure.pp.pprint(FLAGS.__flags) source_count = [] source_word2idx = {} read_vocabulary(fr_nlp, FLAGS.train_data, source_count, source_word2idx) print('loading pre-trained word vectors...') FLAGS.pre_trained_context_wt = init_word_embeddings( wiki_model, source_word2idx, FLAGS.nbwords) FLAGS.pre_trained_context_wt[FLAGS.pad_idx, :] = 0 model = MemN2N(FLAGS) model.build_model() saver = tf.train.Saver(tf.trainable_variables()) print('Loading Model...') ckpt = tf.train.get_checkpoint_state(FLAGS.pathModel) saver.restore(model.sess, ckpt.model_checkpoint_path) print("Model loaded") with open(FLAGS.test_samples, "r") as f: reviews = [] for line in f: reviews.append(line) with open(FLAGS.test_aspects, 'rb') as fp: aspects = pickle.load(fp) for review, aspects_ in zip(reviews, aspects): print("\n", review, end='') if len(aspects_) > 0: aspect_words = np.array(aspects_)[:, 0] aspect_categories = np.array(aspects_)[:, 1] aspect_idx = np.array(aspects_)[:, 2] print(aspect_words, aspect_categories, aspect_idx) test_data = read_sample(fr_nlp, review, aspect_words, aspect_idx, source_count, source_word2idx) FLAGS.pre_trained_context_wt = init_word_embeddings( wiki_model, source_word2idx, FLAGS.nbwords) FLAGS.pre_trained_context_wt[FLAGS.pad_idx, :] = 0 predictions = model.predict(test_data, source_word2idx) samples = {} for asp, cat, idx, pred in zip(aspect_words, aspect_categories, aspect_idx, predictions): print(asp, " : ", str(cat), " =>", mapping_sentiments(pred), end=" ; ") sample = [ s.strip() for s in re.split('[\.\?!,;:]', review) if re.sub(' ', '', asp.lower()) in re.sub( ' ', '', s.lower()) ][0] print(sample) samples[str(cat) + '_' + str(pred)] = sample #summary review print("\n------SUMMARY REVIEW-------") categories = [ 'SERVICE', 'AMBIANCE', 'QUALITE', 'PRIX', 'GENERAL', 'LOCALISATION' ] for categ in categories: exists = False total = 0 val = 0 for asp, cat, pred in zip(aspect_words, aspect_categories, predictions): #print(mapping(str(cat)),categ) if str(cat) == categ: exists = True total += 1 val += pred if exists: #print(categ, " ", mapping_sentiments(round(val / total))) #print(samples) #print(categ+'_'+str(int(round(val/total)))) print(categ, " ", mapping_sentiments(round(val / total)), "exemple : ", samples[categ + '_' + str(int(round(val / total)))]) else: print("PAS D'ASPECTS !")
def main(): # sequence tagging model_tag = load_tagging_model() model_sa, FLAGS, source_count, source_word2idx = load_sentiment_model( fr_nlp, wiki_model) profs = generate_review_data() # with open(configuration.filename_reviews, "r") as f: # reviews=[] # for line in f: # reviews.append(line) # with open(configuration.filename_aspects, 'rb') as fp: # aspects=pickle.load(fp) csv_file = open(configuration.filename_csv, 'w') csv_file.write( "code_etab;id_contrib;comment_1;note_globale_1;" "sentiment_SERVICE_1;target_SERVICE_1;sample_SERVICE_1;" "sentiment_AMBIANCE_1;target_AMBIANCE_1;sample_AMBIANCE_1;" "sentiment_QUALITY_1;target_QUALITY_1;sample_QUALITY_1;" "sentiment_PRICE_1;target_PRICE_1;sample_PRICE_1;" "sentiment_GENERAL_1;target_GENERAL_1;sample_GENERAL_1;" "sentiment_LOCATION_1;target_LOCATION_1;sample_LOCATION_1;comment_2;note_globale_2;" "sentiment_SERVICE_2;target_SERVICE_2;sample_SERVICE_2;" "sentiment_AMBIANCE_2;target_AMBIANCE_2;sample_AMBIANCE_2;" "sentiment_QUALITY_2;target_QUALITY_2;sample_QUALITY_2;" "sentiment_PRICE_2;target_PRICE_2;sample_PRICE_2;" "sentiment_GENERAL_2;target_GENERAL_2;sample_GENERAL_2;" "sentiment_LOCATION_2;target_LOCATION_2;sample_LOCATION_2;comment_3;note_globale_3;" "sentiment_SERVICE_3;target_SERVICE_3;sample_SERVICE_3;" "sentiment_AMBIANCE_3;target_AMBIANCE_3;sample_AMBIANCE_3;" "sentiment_QUALITY_3;target_QUALITY_3;sample_QUALITY_3;" "sentiment_PRICE_3;target_PRICE_3;sample_PRICE_3;" "sentiment_GENERAL_3;target_GENERAL_3;sample_GENERAL_3;" "sentiment_LOCATION_3;target_LOCATION_3;sample_LOCATION_3;" "summary_SERVICE;summary_AMBIANCE;" "summary_QUALITY;summary_PRICE;" "summary_GENERAL;summary_LOCATION\n") # k=0 for prof in profs: csv_file.write(str(prof[0][0]) + ";" + str(prof[0][1])) tab_sent = [0, 0, 0, 0, 0, 0] tab_sum = [0, 0, 0, 0, 0, 0] for i in range(len(prof)): all_aspect_words, all_aspect_categories, all_predictions=[], [], [] samples = {} # for review, aspects_,rev in zip(reviews, aspects,data): csv_file.write(";\"" + str( re.sub('\"', '"', prof[i][2]) + "\"" + ";" + str(prof[0][3]))) review = prof[i][2] review = review.strip() sentences = re.split('\.+|\!|\?', review) for sentence in sentences: sentence_nlp = fr_nlp(sentence) words_raw = [] words_raw.extend([sp.text for sp in sentence_nlp]) # print(words_raw) _, aspects = model_tag.predict(words_raw) # print(res) if len(aspects) > 0: aspect_words = np.array(aspects)[:, 0] all_aspect_words.extend(aspect_words) aspect_categories = np.array(aspects)[:, 1] all_aspect_categories.extend(aspect_categories) aspect_idx = np.array(aspects)[:, 2] test_data = read_sample(fr_nlp, review, aspect_words, aspect_idx, source_count, source_word2idx) FLAGS.pre_trained_context_wt = init_word_embeddings( wiki_model, source_word2idx, FLAGS.nbwords) FLAGS.pre_trained_context_wt[FLAGS.pad_idx, :] = 0 predictions = model_sa.predict(test_data, source_word2idx) for asp, cat, pred in zip(aspect_words, aspect_categories, predictions): all_predictions.append(pred) # print(asp, " : ", str(cat), " =>", mapping_sentiments(pred), end=" ; ") sample = [ s.strip() for s in re.split('[\.\?!,;:]', review) if re.sub(' ', '', asp.lower()) in re.sub( ' ', '', s.lower()) ][0] # print(sample) samples[str(cat) + '_' + str(pred)] = sample # summary review # print("\n------SUMMARY REVIEW-------") if len(all_aspect_words) > 0: categories = [ 'SERVICE', 'AMBIANCE', 'QUALITE', 'PRIX', 'GENERAL', 'LOCALISATION' ] j = 0 for categ in categories: asp_cat = "" exists = False total = 0 val = 0 for asp, cat, pred in zip(all_aspect_words, all_aspect_categories, all_predictions): if str(cat) == categ: exists = True total += 1 val += pred asp_cat += asp + ", " if exists: tab_sent[j] += round(val / total) tab_sum[j] += 1 csv_file.write(";\"" + mapping_sentiments(round(val / total)) + "\";\"" + asp_cat[0:-2] + "\";\"") # print(categ, " ", mapping_sentiments(round(val / total)), "exemple : ") try: csv_file.write(samples[ categ + '_' + str(int(round(val / total)))] + "\"") # print(samples[categ + '_' + str(int(round(val / total)))]) except: csv_file.write("\"conflict\"") print("conflict") else: csv_file.write(";\"-\";\"-\";\"-\"") j += 1 else: csv_file.write( ";\"-\";\"-\";\"-\";\"-\";\"-\";\"-\";\"-\";\"-" "\";\"-\";\"-\";\"-\";\"-\";\"-\";\"-\";\"-\";\"-\";\"-\";\"-\"" ) # print("PAS D'ASPECTS !") # k+=1 for l in range(len(tab_sent)): if tab_sum[l] == 0: csv_file.write(";\"-\"") else: csv_file.write( ";\"" + mapping_sentiments(round(tab_sent[l] / tab_sum[l])) + "\"") csv_file.write("\n")
def main(): # sequence tagging model_tag = load_tagging_model() model_sa, FLAGS, source_count, source_word2idx = load_sentiment_model( fr_nlp, wiki_model) csv_file = open(configuration.filename_procsv, 'w') csv_file.write( "code_etab;comment;note_globale;" "sentiment_SERVICE;target_SERVICE;sample_SERVICE;sentiment_AMBIANCE;target_AMBIANCE;sample_AMBIANCE;sentiment_QUALITY;target_QUALITY;sample_QUALITY;" "sentiment_PRICE;target_PRICE;sample_PRICE;sentiment_GENERAL;target_GENERAL;sample_GENERAL;sentiment_LOCATION;target_LOCATION;sample_LOCATION\n" ) # k=0 with open(configuration.filename_proreviews, "r") as f: for line in f: pro = line[0:7] note = line[8:9] line = line[11:-2] csv_file.write( str(pro) + ";\"" + re.sub('\"', '"', line.strip()) + "\"" + ";" + str(note)) review = line.strip() sentences = re.split('\.+|\!|\?', review) all_aspect_words, all_aspect_categories, all_predictions=[], [], [] samples = {} for sentence in sentences: sentence_nlp = fr_nlp(sentence) words_raw = [] words_raw.extend([sp.text for sp in sentence_nlp]) # print(words_raw) _, aspects = model_tag.predict(words_raw) # print(res) if len(aspects) > 0: aspect_words = np.array(aspects)[:, 0] all_aspect_words.extend(aspect_words) aspect_categories = np.array(aspects)[:, 1] all_aspect_categories.extend(aspect_categories) aspect_idx = np.array(aspects)[:, 2] test_data = read_sample(fr_nlp, review, aspect_words, aspect_idx, source_count, source_word2idx) FLAGS.pre_trained_context_wt = init_word_embeddings( wiki_model, source_word2idx, FLAGS.nbwords) FLAGS.pre_trained_context_wt[FLAGS.pad_idx, :] = 0 predictions = model_sa.predict(test_data, source_word2idx) # all_predictions.extend(predictions) for asp, cat, pred in zip(aspect_words, aspect_categories, predictions): all_predictions.append(pred) # print(asp, " : ", str(cat), " =>", mapping_sentiments(pred), end=" ; ") sample = [ s.strip() for s in re.split('[\.\?!,;:]', review) if re.sub(' ', '', asp.lower()) in re.sub( ' ', '', s.lower()) ][0] # print(sample) samples[str(cat) + '_' + str(pred)] = sample # print(all_predictions) # summary review # print("\n------SUMMARY REVIEW-------") categories = [ 'SERVICE', 'AMBIANCE', 'QUALITE', 'PRIX', 'GENERAL', 'LOCALISATION' ] for categ in categories: asp_cat = "" exists = False total = 0 val = 0 for asp, cat, pred in zip(all_aspect_words, all_aspect_categories, all_predictions): if str(cat) == categ: exists = True total += 1 val += pred asp_cat += asp + ", " if exists: # print(categ,mapping_sentiments(round(val / total))) csv_file.write(";\"" + mapping_sentiments(round(val / total)) + "\";\"" + asp_cat[0:-2] + "\";\"") try: csv_file.write(samples[categ + '_' + str(int(round(val / total)))] + "\"") except: csv_file.write("\"conflict\"") print("conflict") else: csv_file.write(";\"-\";\"-\";\"-\"") csv_file.write("\n")