Ejemplo n.º 1
0
def otpl_to_text(configuration):
    """
    Extract the text using the tokens of the OTPL files and store the results
    into separate plain-text files.

    :param configuration: a :class:`otplc.settings.Configuration` object
    :return: The number of failed conversion for the input files.
    """
    errors = 0

    for otpl_file in configuration.input_files:
        text_file = make_path_to(otpl_file, configuration.text_suffix)
        msg = "output text file and input OTPL file have the same path " \
              "(ensure the OTPL file does not use the extension '{}')"
        assert otpl_file != text_file, msg.format(configuration.text_suffix)
        segments = configure_reader(otpl_file, configuration)

        if segments is None:
            errors += 1
            continue

        if configuration.colspec is None:
            configuration.colspec = guess_colspec(segments)

        token = configuration.colspec.token

        try:
            with open(text_file,
                      encoding=configuration.encoding,
                      mode='wt') as out_stream:
                for seg in segments:
                    print(*[row[token] for row in seg], file=out_stream)
        except IOError as e:
            L.error('I/O error while extracting %s to %s: %s',
                    otpl_file, text_file, str(e))
            errors += 1

    return errors
Ejemplo n.º 2
0
 def setUp(self):
     super(TestConverter, self).setUp()
     config = Configuration([__file__])
     config.separator = r"\s+"
     self.segments = configure_reader(self.otpl_file.name, config)