Example #1
0
def adj_patterns_table():
    import os
    from loacore.conf import RESULT_PATH
    import loacore.utils.file_writer as file_writer
    import loacore.load.file_load as file_load
    import loacore.analysis.pattern_recognition as pattern_recognition

    # English files
    ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+'])
    files = file_load.load_database(id_files=ids)
    for file in files:
        table = pattern_recognition.adj_pattern_table(file.sentence_list())
        directory = os.path.join(RESULT_PATH, 'context_tables', 'adj',
                                 file.get_directory_name())
        file_writer.write(table, directory, file.get_filename())

    # Spanish files
    ids = file_load.get_id_files_by_file_paths([r'.*/corrected/.+'])
    files = file_load.load_database(id_files=ids)
    for file in files:
        table = pattern_recognition.adj_pattern_table(file.sentence_list(),
                                                      lang='es')
        directory = os.path.join(RESULT_PATH, 'context_tables', 'adj',
                                 file.get_directory_name())
        file_writer.write(table, directory, file.get_filename())
Example #2
0
def write():
    import loacore.load.file_load as file_load
    import loacore.utils.file_writer as file_writer

    files = file_load.load_database(load_deptrees=False)

    for file in files:
        directory = re.findall(r'../../data/raw/(.+)', file.file_name)
        filename = re.findall(r'.+/(.+\.txt)', file.file_name)
        words = []
        for reviews in file.reviews:
            for sentence in reviews.sentences:
                for word in sentence.words:
                    words.append(word)
        words = sorted(words, key=lambda w: w.word)
        text = 'ID_Word\tID_Synset\tSynset\n'
        for word in words:
            if word.synset is not None:
                text += (str(word.id_word) + '\t' +
                         str(word.synset.id_synset) + '\t' +
                         word.synset.synset_name + '\t# ' + word.lemma + '\n')
            else:
                text += (str(word.id_word) + '\t' + str(None) + '\t' +
                         str(None) + '\t# ' + word.lemma + '\n')

        file_writer.write(
            text, os.path.join('../../data/disambiguated/', directory[0]),
            filename[0])
Example #3
0
def test_count():
    import loacore.load.file_load as file_load
    import loacore.analysis.vocabularies as voc
    from prettytable import PrettyTable

    ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+', r'.*/corrected/.+'])
    files = file_load.load_database(id_files=ids, load_deptrees=False)

    word_count = {}
    for file in files:
        word_count[file.id_file] = voc.word_count(file)

    lemma_count = {}
    for file in files:
        lemma_count[file.id_file] = voc.lemma_count(file)

    synset_count = {}
    for file in files:
        synset_count[file.id_file] = voc.synset_count(file)

    print("Sizes of vocabularies")
    table = PrettyTable(['File', 'Word Count', 'Lemma Count', 'Synset Count'])
    for file in files:
        table.add_row([file.get_filename(),
                       word_count[file.id_file],
                       lemma_count[file.id_file],
                       synset_count[file.id_file]])

    print(table)
Example #4
0
def test_multiprocessed_load():
    import loacore.load.file_load as file_load
    files = file_load.load_database(id_files=[1], workers=4)
    for file in files:
        for r in file.reviews:
            for s in r.sentences:
                print(s.dep_tree.dep_tree_str(colored_polarity=True))
Example #5
0
def test_tokens():
    import loacore.load.file_load as file_load
    import loacore.learning.word2vec as we
    ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+'])
    files = file_load.load_database(id_files=ids, load_deptrees=False)

    print(we.get_tokens_list(files))
Example #6
0
def test_check_polarities():
    import loacore.load.file_load as file_load
    import loacore.analysis.polarity_check as polarity_check

    ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+'])
    files = file_load.load_database(id_files=ids, load_sentences=False)
    polarity_check.check_polarity(files)
Example #7
0
def test_full_process():
    import loacore.learning.svm as svm
    import loacore.load.file_load as file_load

    ids = file_load.get_id_files_by_file_paths([r'.*/uci/yelp_labelled.txt'])
    files = file_load.load_database(id_files=ids, load_deptrees=False)
    svm.full_process(files)
Example #8
0
def test_colored_reviews():
    import loacore.load.file_load as file_load

    ids = file_load.get_id_files_by_file_path(r'.*/uci/.+')
    files = file_load.load_database(id_files=ids, load_deptrees=False)
    for file in files:
        for review in file.reviews:
            print(review.review_str(colored_polarity=True, analysis=['label']))
Example #9
0
def test_vectors():
    import loacore.load.file_load as file_load
    import loacore.learning.word2vec as we
    ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+'])
    files = file_load.load_database(id_files=ids, load_deptrees=False)

    vectors = we.word_2_vec(files)
    print(vectors['movie'])
    print(len(vectors['movie']))
Example #10
0
def test_print_colored_deptree():
    import loacore.load.file_load as file_load
    import itertools

    # ids = file_load.get_id_files_by_file_paths([r'.*/uci/yelp_labelled.txt'])
    ids = [1]
    files = file_load.load_database(id_files=ids)
    for file in files:
        for review in file.reviews:
            for sentence in review.sentences:
                print(sentence.dep_tree.dep_tree_str(colored_polarity=True))
Example #11
0
def test_load_polarities():
    import loacore.load.file_load as file_load
    import itertools

    ids = file_load.get_id_files_by_file_path(r'.*/uci/.+')
    files = file_load.load_database(id_files=ids, load_sentences=False)
    reviews = itertools.chain.from_iterable([f.reviews for f in files])
    for review in reviews:
        print(review.review, " : ", review.polarities["label"].pos_score, ", ",
              review.polarities["label"].neg_score, ", ",
              review.polarities["label"].obj_score)
Example #12
0
def test_pos_tag_pattern():
    import loacore.analysis.pattern_recognition as pattern_recognition
    files = file_load.load_database(id_files=[31, 33])
    # files = file_load.load_database()
    sentences = []
    for file in files:
        for review in file.reviews:
            sentences += review.sentences

    patterns = pattern_recognition.pos_tag_patterns_recognition(
        sentences, ['RN'])
    pattern_recognition.print_patterns(patterns)
Example #13
0
def test_polarity_pie_charts():
    import loacore.load.file_load as file_load
    import loacore.analysis.sentiment_analysis as sentiment_analysis
    import loacore.utils.plot_polarities as plot_polarities
    import os
    from loacore.conf import RESULT_PATH

    ids = file_load.get_id_files_by_file_paths([r'./uci/.+'])
    files = file_load.load_database(id_files=ids)
    polarities = sentiment_analysis.compute_simple_files_polarity(files)
    plot_polarities.save_polarity_pie_charts(polarities,
                                             file_path=os.path.join(RESULT_PATH, 'sentiment_analysis', 'simple', 'uci'),
                                             file_name='uci_polarity_pie_charts.pdf')
Example #14
0
def test_write_polarity_check():
    import loacore.load.file_load as file_load
    import loacore.analysis.polarity_check as polarity_check
    from loacore.conf import RESULT_PATH
    import os

    ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+'])
    files = file_load.load_database(id_files=ids, load_deptrees=False)
    polarity_check.write_polarity_check(files,
                                        analysis_to_check=['simple', 'pattern_adj_cc'],
                                        colored_polarity=True,
                                        select='false_negative',
                                        directory_path=os.path.join(RESULT_PATH, 'sentiment_analysis', 'check', 'uci'))
Example #15
0
def test_review_vectors():
    import loacore.load.file_load as file_load
    import loacore.learning.word2vec as we
    ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+'])
    files = file_load.load_database(id_files=ids, load_deptrees=False)

    word_vectors = we.word_2_vec(files)

    reviews = [r for review in [f.reviews for f in files] for r in review]

    review_vectors = we.reviews_2_vec(reviews, word_vectors)
    for i in range(3):
        print(reviews[i].review)
        print(review_vectors[i])
Example #16
0
def test_label_pattern():
    import loacore.analysis.pattern_recognition as pattern_recognition
    files = file_load.load_database(id_files=[31, 33])
    # files = file_load.load_database()
    sentences = []
    for file in files:
        for review in file.reviews:
            sentences += review.sentences

    patterns = pattern_recognition.label_patterns_recognition(
        sentences, ['neg'])
    for pattern in patterns:
        for node in pattern:
            print(node.word.word + " : " + node.label)
Example #17
0
def verb_patterns_tables():
    import os
    from loacore.conf import RESULT_PATH
    import loacore.utils.file_writer as file_writer
    import loacore.load.file_load as file_load
    import loacore.analysis.pattern_recognition as pattern_recognition

    ids = file_load.get_id_files_by_file_paths(
        [r'.*/uci/.+', r'.*/corrected/.+'])
    files = file_load.load_database(id_files=ids, load_deptrees=False)
    for file in files:
        table = pattern_recognition.verb_context_table(file.sentence_list())
        directory = os.path.join(RESULT_PATH, 'context_tables', 'verbs',
                                 file.get_directory_name())
        file_writer.write(table, directory, file.get_filename())
Example #18
0
def test_load_db():
    files = file_load.load_database()

    for file in files:
        for review in file.reviews:
            print("Review : ")
            print(str(review.id_review) + " : " + review.review)
            for sentence in review.sentences:
                print("    Sentence : ")
                for word in sentence.words:
                    print("        Word : " + word.word + " : " + word.lemma)
                    print("            Lemma : " + word.lemma)
                    if word.synset is not None:
                        print("            Synset : " +
                              word.synset.synset_name)
                    else:
                        print("            Synset : None")
Example #19
0
def write():
    import loacore.load.file_load as file_load
    import loacore.utils.file_writer as file_writer

    files = file_load.load_database(load_deptrees=False)

    for file in files:
        directory = re.findall(r'../../data/raw/(.+)', file.file_name)
        filename = re.findall(r'.+/(.+\.txt)', file.file_name)
        text = ''
        for review in file.reviews:
            text += (str(review.file_index) + '\t')
            text += review.review
            text += "\n"
        file_writer.write(text,
                          os.path.join('../../data/normalized/', directory[0]),
                          filename[0])
Example #20
0
def test_pos_tag_frequencies():
    import loacore.load.file_load as file_load
    import loacore.analysis.frequencies as frequencies
    import loacore.utils.plot_frequencies as plot_frequencies
    import os
    from loacore.conf import RESULT_PATH

    ids = file_load.get_id_files_by_file_paths([r'.*/corrected/.+'])
    files = file_load.load_database(id_files=ids, load_reviews=False)
    labels, freq = frequencies.pos_tag_frequencies(files)
    plot_frequencies.write_frequencies(
        freq,
        file_path=os.path.join(RESULT_PATH, 'frequencies', 'simple_pos_tag_frequencies', 'table', 'corrected'))
    plot_frequencies.frequencies_bar_chart(
        freq,
        plot=False,
        save=True,
        file_path=os.path.join(RESULT_PATH, 'frequencies', 'simple_pos_tag_frequencies', 'charts', 'corrected'),
        file_name="corrected_simple_pos_tag_frequencies.pdf",
        val_number=60)
Example #21
0
def print_polarity_table(file_score_dict):
    """

    Print a table with columns File path, Positive Score, Negative Score and Objective Score.\n
    Notice that displayed scores are rounded values.

    :param file_score_dict: A :obj:`dict` that maps file_paths to a score tuple.
    :type file_score_dict: :obj:`dict` of :obj:`int` : :obj:`tuple`
    """

    import re
    from prettytable import PrettyTable
    import loacore.load.file_load as file_load
    files = file_load.load_database(id_files=file_score_dict.keys(), load_reviews=False, load_sentences=False,
                                    load_words=False, load_deptrees=False)
    file_names = dict([(f.id_file, re.findall(r'.+/(.+\.txt)', f.file_name)[0]) for f in files])
    table = PrettyTable(['File', 'Pos_Score', 'Neg_Score', 'Obj_Score'])
    for id_file in file_score_dict.keys():
        table.add_row([file_names[id_file], "%.3f" % file_score_dict[id_file][0], "%.3f" % file_score_dict[id_file][1],
                       "%.3f" % file_score_dict[id_file][2]])
    print(table)
Example #22
0
def test_general_pattern():
    import loacore.analysis.pattern_recognition as pattern_recognition
    files = file_load.load_database(id_files=[31, 33])
    # files = file_load.load_database()
    sentences = []
    for file in files:
        for review in file.reviews:
            sentences += review.sentences

    patterns = pattern_recognition.general_pattern_recognition(
        sentences, [['V'], ['cc', 'cd', 'ci']], ['pos_tag', 'label'])
    for pattern in patterns:
        print("( ", end='')
        for node in pattern[:-1]:
            print(node.word.word,
                  " : ",
                  node.word.PoS_tag,
                  " : ",
                  node.label,
                  end=", ")
        print(pattern[-1].word.word, " : ", pattern[-1].word.PoS_tag, " : ",
              pattern[-1].label, " )")
Example #23
0
def write():
    import loacore.load.file_load as file_load
    import loacore.utils.file_writer as file_writer

    files = file_load.load_database(load_deptrees=False)

    for file in files:
        directory = re.findall(r'../../data/raw/(.+)', file.file_name)
        filename = re.findall(r'.+/(.+\.txt)', file.file_name)
        words = []
        for reviews in file.reviews:
            for sentence in reviews.sentences:
                for word in sentence.words:
                    words.append(word)
        words = sorted(words, key=lambda w: w.word)
        text = 'ID_Word\ttoken'
        for word in words:
            text += (str(word.id_word) + '\t' + word.word + '\n')

        file_writer.write(text,
                          os.path.join('../../data/tokenized/', directory[0]),
                          filename[0])
Example #24
0
def write():
    import loacore.load.file_load as file_load
    import loacore.utils.file_writer as file_writer

    files = file_load.load_database()

    for file in files:
        directory = re.findall(r'../../data/raw/(.+)', file.file_name)
        filename = re.findall(r'.+/(.+\.txt)', file.file_name)
        text = ''
        for review in file.reviews:
            for sentence in review.sentences:
                text += "+-------------------------------------------------------------------------------------------\n"
                text += "| File Index : " + str(review.file_index) + "\n"
                text += "| Sentence : " + " ".join(
                    [w.word for w in sentence.words]) + "\n"
                text += "+-------------------------------------------------------------------------------------------\n"
                text += sentence.dep_tree.dep_tree_str(print_dep_tree=False)
                text += "\n\n"
        file_writer.write(text,
                          os.path.join('../../data/dep_trees/', directory[0]),
                          filename[0])
Example #25
0
def test_polarity_pos_tags(polarity):
    import loacore.load.file_load as file_load
    import loacore.analysis.frequencies as frequencies
    import loacore.utils.plot_frequencies as plot_frequencies
    import os
    from loacore.conf import RESULT_PATH

    ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+'])
    files = file_load.load_database(id_files=ids, load_reviews=False)
    labels, freq = frequencies.polarity_word_pos_tag_frequencies(files, polarity)
    plot_frequencies.write_frequencies(
        freq,
        file_path=os.path.join(
            RESULT_PATH, 'frequencies', 'polarity', polarity, polarity + '_pos_tag_frequencies', 'table', 'uci'))
    plot_frequencies.frequencies_bar_chart(
        freq,
        plot=False,
        save=True,
        file_path=os.path.join(
            RESULT_PATH, 'frequencies', 'polarity', polarity, polarity + '_pos_tag_frequencies', 'charts', 'uci'),
        file_name="uci_" + polarity + "_pos_tag_frequencies.pdf",
        val_number=60)
Example #26
0
def save_polarity_pie_charts(file_score_dict, gui=False,
                             file_path=os.path.join(RESULT_PATH, 'sentiment_analysis'),
                             file_name='polarity_pie_charts.pdf'):
    """
    Plot polarity pie charts using Matplotlib, and save them into a .pdf file.

    :param file_score_dict:
        Data to plot and save. The :obj:`dict` maps ID_Files to a polarity tuple (pos_score, neg_score, obj_score).
    :type file_score_dict: :obj:`dict` of :obj:`int` : :obj:`tuple`
    :param gui: Specify if a gui should be used to save file.
    :type gui: boolean
    :param file_path:
        If gui is not called : path of the directory in which plots will be saved.
        If directory doesn't exist, will be created.
        Default is set to *RESULT_PATH/sentiment_analysis/*
    :type file_path: |path-like-object|
    :param file_name: Name of the saved file.
    :type file_name: string
    """
    pies_data = [t for t in file_score_dict.values()]

    import loacore.load.file_load as file_load
    files = file_load.load_database(id_files=file_score_dict.keys(), load_reviews=False, load_sentences=False,
                                    load_words=False, load_deptrees=False)
    pies_titles = [re.findall(r'.+/(.+\.txt)', f.file_name)[0] for f in files]

    index = 0
    colors = ['green', 'red', 'skyblue']
    num_row = 1 + int(len(pies_titles) / 4)
    if num_row <= 1:
        num_column = len(pies_titles)
    else:
        num_column = 4
    fig, axes = plt.subplots(num_row, num_column, constrained_layout=False)
    if num_column <= 1:
        axes.axis("off")
    else:
        for x in axes.flatten():
            x.axis("off")
    fig.set_figheight(5 * num_row)
    fig.set_figwidth(5 * num_column)
    while index < len(pies_titles):
        axe = fig.add_subplot(num_row, num_column, index + 1)
        axe.pie(pies_data[index], colors=colors, autopct='%1.1f%%')
        axe.set_title(pies_titles[index], size='x-small')
        index += 1
    pos_patch = mpatches.Patch(color='green', label='Positive')
    neg_patch = mpatches.Patch(color='red', label='Negative')
    obj_patch = mpatches.Patch(color='skyblue', label='Objective')
    plt.figlegend(handles=[pos_patch, neg_patch, obj_patch])
    plt.suptitle("Polarity computation results", size='xx-large')

    # Save PDF
    from matplotlib.backends.backend_pdf import PdfPages
    if gui:
        from tkinter import filedialog
        from tkinter import Tk

        root = Tk()
        root.withdraw()
        root.filename = filedialog.asksaveasfilename(initialdir="/", title="Select file",
                                                     filetypes=[("PDF", "*.pdf")])
        pp = PdfPages(root.filename)
    else:
        if not os.path.exists(file_path):
            os.makedirs(file_path)
        pp = PdfPages(os.path.join(file_path, file_name))
    pp.savefig()
    pp.close()
Example #27
0
def load_uci():
    import loacore.load.file_load as file_load
    return file_load.load_database(id_files=[2])
Example #28
0
def pattern_test():
    import loacore.load.file_load as file_load
    files = file_load.load_database(id_files=[1, 2, 3])

    import loacore.analysis.sentiment_analysis as sentiment_analysis
    sentiment_analysis.compute_pattern_files_polarity(files)