def write(): import loacore.load.file_load as file_load import loacore.utils.file_writer as file_writer files = file_load.load_database(load_deptrees=False) for file in files: directory = re.findall(r'../../data/raw/(.+)', file.file_name) filename = re.findall(r'.+/(.+\.txt)', file.file_name) words = [] for reviews in file.reviews: for sentence in reviews.sentences: for word in sentence.words: words.append(word) words = sorted(words, key=lambda w: w.word) text = 'ID_Word\tID_Synset\tSynset\n' for word in words: if word.synset is not None: text += (str(word.id_word) + '\t' + str(word.synset.id_synset) + '\t' + word.synset.synset_name + '\t# ' + word.lemma + '\n') else: text += (str(word.id_word) + '\t' + str(None) + '\t' + str(None) + '\t# ' + word.lemma + '\n') file_writer.write( text, os.path.join('../../data/disambiguated/', directory[0]), filename[0])
def adj_patterns_table(): import os from loacore.conf import RESULT_PATH import loacore.utils.file_writer as file_writer import loacore.load.file_load as file_load import loacore.analysis.pattern_recognition as pattern_recognition # English files ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+']) files = file_load.load_database(id_files=ids) for file in files: table = pattern_recognition.adj_pattern_table(file.sentence_list()) directory = os.path.join(RESULT_PATH, 'context_tables', 'adj', file.get_directory_name()) file_writer.write(table, directory, file.get_filename()) # Spanish files ids = file_load.get_id_files_by_file_paths([r'.*/corrected/.+']) files = file_load.load_database(id_files=ids) for file in files: table = pattern_recognition.adj_pattern_table(file.sentence_list(), lang='es') directory = os.path.join(RESULT_PATH, 'context_tables', 'adj', file.get_directory_name()) file_writer.write(table, directory, file.get_filename())
def verb_patterns_tables(): import os from loacore.conf import RESULT_PATH import loacore.utils.file_writer as file_writer import loacore.load.file_load as file_load import loacore.analysis.pattern_recognition as pattern_recognition ids = file_load.get_id_files_by_file_paths( [r'.*/uci/.+', r'.*/corrected/.+']) files = file_load.load_database(id_files=ids, load_deptrees=False) for file in files: table = pattern_recognition.verb_context_table(file.sentence_list()) directory = os.path.join(RESULT_PATH, 'context_tables', 'verbs', file.get_directory_name()) file_writer.write(table, directory, file.get_filename())
def write(): import loacore.load.file_load as file_load import loacore.utils.file_writer as file_writer files = file_load.load_database(load_deptrees=False) for file in files: directory = re.findall(r'../../data/raw/(.+)', file.file_name) filename = re.findall(r'.+/(.+\.txt)', file.file_name) text = '' for review in file.reviews: text += (str(review.file_index) + '\t') text += review.review text += "\n" file_writer.write(text, os.path.join('../../data/normalized/', directory[0]), filename[0])
def write_frequencies(files_frequencies, file_path=os.path.join(RESULT_PATH, 'frequencies', 'tables')): """ Write frequencies to a txt file in file_path folder. Original raw file names are used. :param files_frequencies: Dictionary that maps file names to frequencies. :type files_frequencies: :obj:`dict` of :obj:`str` : :obj:`dict` of label : :obj:`float` . :param file_path: Directory path :type file_path: |path-like-object| """ import loacore.utils.file_writer as file_writer for file_name in files_frequencies.keys(): table = '' for label in files_frequencies[file_name].keys(): table += str(label) + '\t' + str(files_frequencies[file_name][label]) + '\n' file_writer.write(table, file_path, file_name)
def write(): import loacore.load.file_load as file_load import loacore.utils.file_writer as file_writer files = file_load.load_database() for file in files: directory = re.findall(r'../../data/raw/(.+)', file.file_name) filename = re.findall(r'.+/(.+\.txt)', file.file_name) text = '' for review in file.reviews: for sentence in review.sentences: text += "+-------------------------------------------------------------------------------------------\n" text += "| File Index : " + str(review.file_index) + "\n" text += "| Sentence : " + " ".join( [w.word for w in sentence.words]) + "\n" text += "+-------------------------------------------------------------------------------------------\n" text += sentence.dep_tree.dep_tree_str(print_dep_tree=False) text += "\n\n" file_writer.write(text, os.path.join('../../data/dep_trees/', directory[0]), filename[0])
def write(): import loacore.load.file_load as file_load import loacore.utils.file_writer as file_writer files = file_load.load_database(load_deptrees=False) for file in files: directory = re.findall(r'../../data/raw/(.+)', file.file_name) filename = re.findall(r'.+/(.+\.txt)', file.file_name) words = [] for reviews in file.reviews: for sentence in reviews.sentences: for word in sentence.words: words.append(word) words = sorted(words, key=lambda w: w.word) text = 'ID_Word\ttoken' for word in words: text += (str(word.id_word) + '\t' + word.word + '\n') file_writer.write(text, os.path.join('../../data/tokenized/', directory[0]), filename[0])
def write_polarity_check(files, analysis_to_check=("simple", "optimistic", "pessimistic", "pattern_adj_cc", "pattern_adj", "pattern_cc"), ref="label", select="all", terminal_print=True, colored_polarity=True, directory_path=os.path.join(RESULT_PATH, 'sentiment_analysis', 'check')): """ Write polarity in .txt files. If *select* is set to all, each review is written with polarities corresponding to *ref* and *analysis_to_check*. If *select* is set to *false_positive* or *false_negative*, only the review with a false positive or false negative for at least one analysis of *analysis_to_check* are written, with the corresponding(s) polarities. :param files: Files to process. :type files: :obj:`list` of |File| :param analysis_to_check: Analysis to compare with *ref* :type analysis_to_check: :obj:`sequence` of :obj:`str` :param ref: Reference analysis :type ref: string :param select: Select option :type select: :obj:`list` of :obj:`str` : {'all', 'false_positive', 'false_negative'} :param terminal_print: If True, print results in the terminal. :type terminal_print: boolean :param colored_polarity: If True, write words with colored polarity. Notice that colors are not display in most of the .txt editors. For example, in Linux, use .. code-block:: console cat file_name.txt To show the colored file in terminal. :param directory_path: Path of the directory in which to write files. Default value computed using os.path.join(RESULT_PATH, 'sentiment_analysis', 'check') :type directory_path: |path-like-object| """ import loacore.utils.file_writer as file_writer for file in files: if select == "all": check_str =\ '\n'.join([r.review_str(colored_polarity=colored_polarity, analysis=ref+analysis_to_check) for r in file.reviews]) else: false_str = [] for review in file.reviews: selected_analysis = [ref] for analysis in analysis_to_check: if select == "false_positive" and is_false_positive(review, ref, analysis) \ or select == "false_negative" and is_false_negative(review, ref, analysis): selected_analysis.append(analysis) if len(selected_analysis) > 1: false_str.append( review.review_str(colored_polarity=colored_polarity, analysis=selected_analysis)) check_str = '\n'.join(false_str) if terminal_print: print(check_str) if colored_polarity: file_writer.write(check_str, os.path.join(directory_path, "colored"), "check_" + select + "_" + file.get_filename()) print("File write : " + str( os.path.join(os.path.join(directory_path, "colored"), "check_" + select + "_" + file.get_filename()))) else: file_writer.write(check_str, os.path.join(directory_path, "uncolored"), "check_" + select + "_" + file.get_filename()) print("File write : " + str( os.path.join(os.path.join(directory_path, "uncolored"), "check_" + select + "_" + file.get_filename())))