Ejemplo n.º 1
0
def eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir,
               cmd='-c 95 -r 1000 -n 2 -m', system_id=1):
    """ evaluate by original Perl implementation"""
    # silence pyrouge logging
    try:
        _ROUGE_PATH = '/home/yhj/ROUGE/RELEASE-1.5.5'
    except KeyError:
        print('Warning: ROUGE is not configured')
        _ROUGE_PATH = None

    assert _ROUGE_PATH is not None
    log.get_global_console_logger().setLevel(logging.WARNING)
    with tempfile.TemporaryDirectory() as tmp_dir:
        Rouge155.convert_summaries_to_rouge_format(
            dec_dir, os.path.join(tmp_dir, 'dec'))
        Rouge155.convert_summaries_to_rouge_format(
            ref_dir, os.path.join(tmp_dir, 'ref'))
        Rouge155.write_config_static(
            os.path.join(tmp_dir, 'dec'), dec_pattern,
            os.path.join(tmp_dir, 'ref'), ref_pattern,
            os.path.join(tmp_dir, 'settings.xml'), system_id
        )
        cmd = (os.path.join(_ROUGE_PATH, 'ROUGE-1.5.5.pl')
               + ' -e {} '.format(os.path.join(_ROUGE_PATH, 'data'))
               + cmd
               + ' -a {}'.format(os.path.join(tmp_dir, 'settings.xml')))
        output = sp.check_output(cmd.split(' '), universal_newlines=True)
    return output
Ejemplo n.º 2
0
def eval_rouge(dec_dir, ref_dir):
    """ evaluate by original Perl implementation"""
    # silence pyrouge logging
    assert _ROUGE_PATH is not None
    assert os.path.exists(_ROUGE_PATH)
    print(_ROUGE_PATH)
    log.get_global_console_logger().setLevel(logging.WARNING)
    dec_pattern = '(\d+).dec'
    ref_pattern = '#ID#.ref'
    cmd = '-c 95 -r 1000 -n 2 -m'
    with tempfile.TemporaryDirectory() as tmp_dir:
        Rouge155.convert_summaries_to_rouge_format(
            dec_dir, join(tmp_dir, 'dec'))
        Rouge155.convert_summaries_to_rouge_format(
            ref_dir, join(tmp_dir, 'ref'))
        Rouge155.write_config_static(
            join(tmp_dir, 'dec'), dec_pattern,
            join(tmp_dir, 'ref'), ref_pattern,
            join(tmp_dir, 'settings.xml'), system_id=1
        )
        cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl')
               + ' -e {} '.format(join(_ROUGE_PATH, 'data'))
               + cmd
               + ' -a {}'.format(join(tmp_dir, 'settings.xml')))
        print("cmd:{}".format(cmd))
        output = sp.check_output(cmd.split(' '), universal_newlines=True)
    return output
Ejemplo n.º 3
0
def eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir,
               cmd='-c 95 -r 1000 -n 2 -m', system_id=1):
    """ evaluate by original Perl implementation"""
    # silence pyrouge logging
    assert _ROUGE_PATH is not None
    log.get_global_console_logger().setLevel(logging.WARNING)
    with tempfile.TemporaryDirectory() as tmp_dir:
        try:
            Rouge155.convert_summaries_to_rouge_format(
                dec_dir, join(tmp_dir, 'dec'))
        except:
            print("part1:")
            #pdb.set_trace()
        try:
            Rouge155.convert_summaries_to_rouge_format(
                ref_dir, join(tmp_dir, 'ref'))
        except:
            print("part2:")
            #pdb.set_trace()
        try:
            Rouge155.write_config_static(
                join(tmp_dir, 'dec'), dec_pattern,
                join(tmp_dir, 'ref'), ref_pattern,
                join(tmp_dir, 'settings.xml'), system_id
            )
        except:
            print("part3:")
            #pdb.set_trace()
        cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl')
               + ' -e {} '.format(join(_ROUGE_PATH, 'data'))
               + cmd
               + ' -a {}'.format(join(tmp_dir, 'settings.xml')))
        output = sp.check_output(cmd.split(' '), universal_newlines=True)
    return output
Ejemplo n.º 4
0
def get_rouge(path, dec):
    log.get_global_console_logger().setLevel(logging.WARNING)
    dec_pattern = '(\d+).dec'
    ref_pattern = '#ID#.ref'
    dec_dir = join(path, 'decode')
    ref_dir = join(path, 'reference')

    with open(join(dec_dir, '0.dec'), 'w') as f:
        for sentence in dec:
            print(sentence, file=f)

    cmd = '-c 95 -r 1000 -n 2 -m'
    with tempfile.TemporaryDirectory() as tmp_dir:
        Rouge155.convert_summaries_to_rouge_format(dec_dir,
                                                   join(tmp_dir, 'dec'))
        Rouge155.convert_summaries_to_rouge_format(ref_dir,
                                                   join(tmp_dir, 'ref'))
        Rouge155.write_config_static(join(tmp_dir, 'dec'),
                                     dec_pattern,
                                     join(tmp_dir, 'ref'),
                                     ref_pattern,
                                     join(tmp_dir, 'settings.xml'),
                                     system_id=1)
        cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl') +
               ' -e {} '.format(join(_ROUGE_PATH, 'data')) + cmd +
               ' -a {}'.format(join(tmp_dir, 'settings.xml')))
        output = sp.check_output(cmd.split(' '), universal_newlines=True)

        line = output.split('\n')
        rouge1 = float(line[3].split(' ')[3])
        rouge2 = float(line[7].split(' ')[3])
        rougel = float(line[11].split(' ')[3])
    return (rouge1 + rouge2 + rougel) / 3
def eval_rouge(dec_pattern,
               dec_dir,
               ref_pattern,
               ref_dir,
               dir_name,
               cmd='-c 95 -r 1000 -n 2 -m',
               system_id=1):
    """ evaluate by original Perl implementation"""
    # silence pyrouge logging
    assert _ROUGE_PATH is not None
    log.get_global_console_logger().setLevel(logging.WARNING)
    # with tempfile.TemporaryDirectory() as tmp_dir:
    tmp_dir = '/mnt/e/Work/Ahmed/Summarization/SummRuNNer/output/{}/temp/'.format(
        dir_name)

    if os.path.exists(tmp_dir):
        shutil.rmtree(tmp_dir)
    os.mkdir(tmp_dir)

    Rouge155.convert_summaries_to_rouge_format(dec_dir, join(tmp_dir, 'dec'))
    Rouge155.convert_summaries_to_rouge_format(ref_dir, join(tmp_dir, 'ref'))
    Rouge155.write_config_static(join(tmp_dir, 'dec'), dec_pattern,
                                 join(tmp_dir, 'ref'), ref_pattern,
                                 join(tmp_dir, 'settings.xml'), system_id)
    cmd = ('sudo perl ' + _ROUGE_PATH + '/ROUGE-1.5.5.pl' +
           ' -e {} '.format(join(_ROUGE_PATH, 'data')) + cmd +
           ' -a {}'.format(join(tmp_dir, 'settings.xml')))
    output = sp.check_output(cmd, universal_newlines=True, shell=True)
    return output
Ejemplo n.º 6
0
def eval_rouge(dec_pattern,
               dec_dir,
               ref_pattern,
               ref_dir,
               cmd='-c 95 -r 1000 -n 2 -m',
               system_id=1,
               force=False):
    """ evaluate by original Perl implementation"""
    assert _ROUGE_PATH is not None
    # silence pyrouge logging
    log.get_global_console_logger().setLevel(logging.WARNING)
    rouge_dec = join(dec_dir, '../rouge_dec')
    if not os.path.exists(rouge_dec) or force:
        Rouge155.convert_summaries_to_rouge_format(dec_dir, rouge_dec)
    rouge_ref = join(ref_dir,
                     '../rouge_{}_ref'.format(basename(normpath(ref_dir))))
    if not os.path.exists(rouge_ref) or force:
        Rouge155.convert_summaries_to_rouge_format(ref_dir, rouge_ref)
    rouge_settings = join(dec_dir, '../rouge_settings.xml')
    if not os.path.exists(rouge_settings) or force:
        Rouge155.write_config_static(rouge_dec, dec_pattern, rouge_ref,
                                     ref_pattern, rouge_settings, system_id)
    cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl') +
           ' -e {} '.format(join(_ROUGE_PATH, 'data')) + cmd +
           ' -a {}'.format(rouge_settings))
    output = sp.check_output(cmd.split(' '), universal_newlines=True)
    return output
Ejemplo n.º 7
0
 def eval_rouge(dec_dir, ref_dir):
     assert _ROUGE_PATH is not None
     log.get_global_console_logger().setLevel(logging.WARNING)
     dec_pattern = '(\d+).dec'
     ref_pattern = '#ID#.ref'
     cmd = '-c 95 -r 1000 -n 2 -m'
     with tempfile.TemporaryDirectory() as tmp_dir:
         Rouge155.convert_summaries_to_rouge_format(dec_dir,
                                                    join(tmp_dir, 'dec'))
         Rouge155.convert_summaries_to_rouge_format(ref_dir,
                                                    join(tmp_dir, 'ref'))
         Rouge155.write_config_static(join(tmp_dir, 'dec'),
                                      dec_pattern,
                                      join(tmp_dir, 'ref'),
                                      ref_pattern,
                                      join(tmp_dir, 'settings.xml'),
                                      system_id=1)
         cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl') +
                ' -e {} '.format(join(_ROUGE_PATH, 'data')) + cmd +
                ' -a {}'.format(join(tmp_dir, 'settings.xml')))
         output = sp.check_output(cmd.split(' '), universal_newlines=True)
         R_1 = float(output.split('\n')[3].split(' ')[3])
         R_2 = float(output.split('\n')[7].split(' ')[3])
         R_L = float(output.split('\n')[11].split(' ')[3])
         print(output)
     return R_1, R_2, R_L
Ejemplo n.º 8
0
def setup_and_eval(system_summaries, model_summaries, with_meteor=False):
    '''
    temporarily setup rouge structure and evaluate given summaries
    :param system_summaries: list of abstract-summaries in text ["John Stein went ...", ...]
    :param model_summaries: list of corresponding model-summaries ["John walked to ...", ...]
    :return: count of evaluated pairs, rouge result dictionary
    '''
    system_dir_name = "system_summaries"
    model_dir_name = "model_summaries"
    #log.get_global_console_logger().setLevel(logging.WARNING)
    log.get_global_console_logger().disabled = True
    sents_filter = lambda summary: [
        sent.split(" ").__len__() > 1 for sent in summary.split(".")
    ]
    summary_filter = lambda summary, sents_filter: np.array(summary.split(".")
                                                            )[sents_filter]

    with tempfile.TemporaryDirectory() as tmp_dir:
        system_dir = os.path.join(tmp_dir, system_dir_name)
        model_dir = os.path.join(tmp_dir, model_dir_name)
        os.mkdir(system_dir)
        os.mkdir(model_dir)
        count = 0

        for sys, mod in zip(system_summaries, model_summaries):
            system_sents_filter = sents_filter(sys)
            model_sents_filter = sents_filter(mod)
            system_summary_filter = summary_filter(sys, system_sents_filter)
            model_summary_filter = summary_filter(mod, model_sents_filter)

            if system_summary_filter.__len__(
            ) > 0 and model_summary_filter.__len__() > 0:
                system_file = f"system.{count}.txt"
                model_file = f"model.{count}.txt"

                with open(os.path.join(system_dir, system_file), "w") as sf:
                    for i, sent in enumerate(system_summary_filter, 1):
                        sf.write(sent.lstrip() + "\n") if i < len(system_summary_filter) \
                            else sf.write(sent.lstrip())

                with open(os.path.join(model_dir, model_file), "w") as mf:
                    for i, sent in enumerate(model_summary_filter, 1):
                        mf.write(sent.lstrip() + "\n") if i < len(model_summary_filter) \
                            else mf.write(sent.lstrip())

                count += 1

        output = rouge_scores(system_dir, model_dir)
        meteor = None
        if with_meteor:
            meteor = meteor_score(system_dir, model_dir)

    log.get_global_console_logger().disabled = False

    return count, output, meteor
Ejemplo n.º 9
0
 def __init__(self, rouge_dir=None, rouge_args=None, log_level=None):
     """
     Create a Rouge155 object.
         rouge_dir:  Directory containing Rouge-1.5.5.pl
         rouge_args: Arguments to pass through to ROUGE if you
                     don't want to use the default pyrouge
                     arguments.
     """
     if log_level is None:
         self.log = log.get_global_console_logger()
     else:
         self.log = log.get_global_console_logger(log_level)
     self.__set_dir_properties()
     self._config_file = None
     self._settings_file = self.__get_config_path()
     self.__set_rouge_dir(rouge_dir)
     self.args = self.__clean_rouge_args(rouge_args)
     self._system_filename_pattern = None
     self._model_filename_pattern = None
Ejemplo n.º 10
0
    def __init__(self, rouge_dir=None, rouge_args=None, log_level=None):
        """
        Create a Rouge155 object.

            rouge_dir:  Directory containing Rouge-1.5.5.pl
            rouge_args: Arguments to pass through to ROUGE if you
                        don't want to use the default pyrouge
                        arguments.

        """
        if log_level is None:
                self.log = log.get_global_console_logger()
        else:
                self.log = log.get_global_console_logger(log_level)		
        self.__set_dir_properties()
        self._config_file = None
        self._settings_file = self.__get_config_path()
        self.__set_rouge_dir(rouge_dir)
        self.args = self.__clean_rouge_args(rouge_args)
        self._system_filename_pattern = None
        self._model_filename_pattern = None
Ejemplo n.º 11
0
def eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir,
               cmd='-c 95 -r 1000 -n 2 -m -d', system_id=1):
    print('evaluate')
    """ evaluate by original Perl implementation"""
    # silence pyrouge logging
    assert _ROUGE_PATH is not None
    log.get_global_console_logger().setLevel(logging.WARNING)
    with tempfile.TemporaryDirectory() as tmp_dir:
        Rouge155.convert_summaries_to_rouge_format(
            dec_dir, join(tmp_dir, 'dec'))
        Rouge155.convert_summaries_to_rouge_format(
            ref_dir, join(tmp_dir, 'ref'))
        Rouge155.write_config_static(
            join(tmp_dir, 'dec'), dec_pattern,
            join(tmp_dir, 'ref'), ref_pattern,
            join(tmp_dir, 'settings.xml'), system_id
        )
        cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl')
               + ' -e {} '.format(join(_ROUGE_PATH, 'data'))
               + cmd
               + ' -a {}'.format(join(tmp_dir, 'settings.xml')))
        output = sp.check_output(cmd.split(' '), universal_newlines=True)

    rouge_1 = []
    rouge_2 = []
    rouge_l = []

    for line in output.split('\n'):
        if 'ROUGE-1 Eval' in line:
            rouge_1.append(line.split()[-1][2:])
        if 'ROUGE-2 Eval' in line:
            rouge_2.append(line.split()[-1][2:])
        if 'ROUGE-L Eval' in line:
            rouge_l.append(line.split()[-1][2:])

    rouge_1 = '\n'.join(rouge_1)
    rouge_2 = '\n'.join(rouge_2)
    rouge_l = '\n'.join(rouge_l)

    return rouge_1, rouge_2, rouge_l
Ejemplo n.º 12
0
def eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir,
               cmd='-c 95 -r 1000 -n 2 -m', system_id=1):
    """ evaluate by original Perl implementation"""
    # silence pyrouge logging
    assert _ROUGE_PATH is not None
    log.get_global_console_logger().setLevel(logging.WARNING)
    with tempfile.TemporaryDirectory() as tmp_dir:
        Rouge155.convert_summaries_to_rouge_format(
            dec_dir, join(tmp_dir, 'dec'))
        Rouge155.convert_summaries_to_rouge_format(
            ref_dir, join(tmp_dir, 'ref'))
        Rouge155.write_config_static(
            join(tmp_dir, 'dec'), dec_pattern,
            join(tmp_dir, 'ref'), ref_pattern,
            join(tmp_dir, 'settings.xml'), system_id
        )
        cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl')
               + ' -e {} '.format(join(_ROUGE_PATH, 'data'))
               + cmd
               + ' -a {}'.format(join(tmp_dir, 'settings.xml')))
        output = sp.check_output(cmd.split(' '), universal_newlines=True)
    return output
Ejemplo n.º 13
0
def rouge_eval(ref_dir,
               dec_dir,
               dec_pattern='(\d+)_decoded.txt',
               ref_pattern='#ID#_reference.txt',
               cmd="-c 95 -r 1000 -n 2 -m",
               system_id=1):
    # only print rouge 1 2 L
    assert _ROUGE_PATH is not None
    log.get_global_console_logger().setLevel(logging.WARNING)
    with tempfile.TemporaryDirectory() as tmp_dir:
        tmp_dec_dir = os.path.join(tmp_dir, 'dec')
        tmp_ref_dir = os.path.join(tmp_dir, 'ref')
        Rouge155.convert_summaries_to_rouge_format(dec_dir, tmp_dec_dir)
        Rouge155.convert_summaries_to_rouge_format(ref_dir, tmp_ref_dir)
        Rouge155.write_config_static(tmp_dec_dir, dec_pattern, tmp_ref_dir,
                                     ref_pattern,
                                     os.path.join(tmp_dir,
                                                  'settings.xml'), system_id)
        cmd = (os.path.join(_ROUGE_PATH, 'ROUGE-1.5.5.pl') +
               ' -e {} '.format(os.path.join(_ROUGE_PATH, 'data')) + cmd +
               ' -a {}'.format(os.path.join(tmp_dir, 'settings.xml')))
        output = sp.check_output(cmd.split(' '), universal_newlines=True)
    return output
Ejemplo n.º 14
0
    def evaluate_static(home_dir, config_file_path, rouge_args=None):
        """
        This is the static version of the evaluate method.
        Run ROUGE to evaluate the configuration file using the arguments
        provided.

        Returns: Rouge output as string.
        """
        options = rouge_args + ['-m'] + [config_file_path]
        command = [os.path.join(home_dir, 'ROUGE-1.5.5.pl')] + options

        logger = log.get_global_console_logger()
        logger.info(
            "Running ROUGE with command {}".format(" ".join(command)))
        return check_output(command).decode("UTF-8")
 def __init__(self, language="en", punkt_data_path=None):
     self.lang2datapath = {"en": "tokenizers/punkt/english.pickle"}
     self.log = log.get_global_console_logger()
     try:
         import nltk.data
     except ImportError:
         self.log.error(
             "Cannot import NLTK data for the sentence splitter. Please "
             "check if the 'punkt' NLTK-package is installed correctly.")
     try:
         if not punkt_data_path:
             punkt_data_path = self.lang2datapath[language]
         self.sent_detector = nltk.data.load(punkt_data_path)
     except KeyError:
         self.log.error(
             "No sentence splitter data for language {}.".format(language))
     except:
         self.log.error("Could not load sentence splitter data: {}".format(
             self.lang2datapath[language]))
Ejemplo n.º 16
0
    def __init__(self, rouge_dir=None, rouge_args=None):
        """
        Create a Rouge155 object.

            rouge_dir:  Directory containing Rouge-1.5.5.pl
            rouge_args: Arguments to pass through to ROUGE if you
                        don't want to use the default pyrouge
                        arguments.

        """
        self.log = log.get_global_console_logger()
        self.__set_dir_properties()
        self._config_file = None
        self._settings_file = os.path.join(
            os.path.dirname(__file__), 'settings.ini')
        self.__set_rouge_dir(rouge_dir)
        self.args = self.__clean_rouge_args(rouge_args)
        self._system_filename_pattern = None
        self._model_filename_pattern = None
Ejemplo n.º 17
0
 def process(input_dir, output_dir, function):
     """
     Apply function to all files in input_dir and save the resulting ouput
     files in output_dir.
     """
     if not os.path.exists(output_dir):
         os.makedirs(output_dir)
     logger = log.get_global_console_logger()
     logger.info("Processing files in {}.".format(input_dir))
     input_file_names = os.listdir(input_dir)
     for input_file_name in input_file_names:
         input_file = os.path.join(input_dir, input_file_name)
         with codecs.open(input_file, "r", encoding="UTF-8") as f:
             input_string = f.read()
         output_string = function(input_string)
         output_file = os.path.join(output_dir, input_file_name)
         with codecs.open(output_file, "w", encoding="UTF-8") as f:
             f.write(clean(output_string.lower()))
     logger.info("Saved processed files to {}.".format(output_dir))
Ejemplo n.º 18
0
    def __init__(self, rouge_dir=None, rouge_args=None):
        """
        Create a Rouge155 object.

            rouge_dir:  Directory containing Rouge-1.5.5.pl
            rouge_args: Arguments to pass through to ROUGE if you
                        don't want to use the default pyrouge
                        arguments.

        """
        self.log = log.get_global_console_logger()
        self.__set_dir_properties()
        self._config_file = None
        self._settings_file = os.path.join(os.path.dirname(__file__),
                                           'settings.ini')
        self.__set_rouge_dir(rouge_dir)
        self.args = self.__clean_rouge_args(rouge_args)
        self._system_filename_pattern = None
        self._model_filename_pattern = None
Ejemplo n.º 19
0
 def __init__(self, language="en", punkt_data_path=None):
     self.lang2datapath = {"en": "tokenizers/punkt/english.pickle"}
     self.log = log.get_global_console_logger()
     try:
         import nltk.data
     except ImportError:
         self.log.error(
             "Cannot import NLTK data for the sentence splitter. Please "
             "check if the 'punkt' NLTK-package is installed correctly.")
     try:
         if not punkt_data_path:
             punkt_data_path = self.lang2datapath[language]
         self.sent_detector = nltk.data.load(punkt_data_path)
     except KeyError:
         self.log.error(
             "No sentence splitter data for language {}.".format(language))
     except:
         self.log.error(
             "Could not load sentence splitter data: {}".format(
                 self.lang2datapath[language]))
Ejemplo n.º 20
0
    def process(input_dir, output_dir, function):
        """
        Apply function to all files in input_dir and save the resulting ouput
        files in output_dir.

        """
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        logger = log.get_global_console_logger()
        logger.info("Processing files in {}.".format(input_dir))
        input_file_names = os.listdir(input_dir)
        for input_file_name in input_file_names:
            logger.info("Processing {}.".format(input_file_name))
            input_file = os.path.join(input_dir, input_file_name)
            with codecs.open(input_file, "r", encoding="UTF-8") as f:
                input_string = f.read()
            output_string = function(input_string)
            output_file = os.path.join(output_dir, input_file_name)
            with codecs.open(output_file, "w", encoding="UTF-8") as f:
                f.write(output_string)
        logger.info("Saved processed files to {}.".format(output_dir))
Ejemplo n.º 21
0
    def __init__(self, rouge_dir=None, verbose=True, rouge_args=None):
        """
        Create a Rouge155 object.

            rouge_dir:  Directory containing Rouge-1.5.5.pl
            verbose:    Prints a detailed log.
            rouge_args: Arguments to pass through to ROUGE if you
                        don't want to use the default pyrouge
                        arguments.

        """
        self.log = log.get_global_console_logger()
        if not verbose:
            self.log.setLevel(30)   # Disable all "info" logs.

        self.__set_dir_properties()
        self._config_file = None
        self._settings_file = self.__get_config_path()
        self.__set_rouge_dir(rouge_dir)
        self.args = self.__clean_rouge_args(rouge_args)
        self._system_filename_pattern = None
        self._model_filename_pattern = None
Ejemplo n.º 22
0
    def process(input_dir, output_dir, function):
        """
        Apply function to all files in input_dir and save the resulting ouput
        files in output_dir.

        """
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        logger = log.get_global_console_logger()
        logger.info("Processing files in {}.".format(input_dir))
        input_file_names = os.listdir(input_dir)
        for input_file_name in input_file_names:
            logger.info("Processing {}.".format(input_file_name))
            input_file = os.path.join(input_dir, input_file_name)
            input_file = re.sub(r'\\', '/', input_file)
            with codecs.open(
                    input_file, "r", encoding="UTF-8",
                    errors='ignore') as f:  #JC ERRORS IGNORE BAD ENCODING
                input_string = f.read()
            output_string = function(input_string)
            output_file = os.path.join(output_dir, input_file_name)
            with codecs.open(output_file, "w", encoding="UTF-8") as f:
                f.write(output_string)
        logger.info("Saved processed files to {}.".format(output_dir))