Ejemplo n.º 1
0
 def guessColspec(self, otpl_text, header):
     self.otpl_file.write(otpl_text)
     self.otpl_file.close()
     expected = ColumnSpecification.from_string(header)
     result = guess_colspec(self.segments)
     self.assertSequenceEqual(str(expected), str(result))
     self.assertEqual(expected, result)
Ejemplo n.º 2
0
def otpl_to_brat(configuration):
    """
    For a list of `text_files` (paths), read the associated OTPL files and
    write the converted brat files.

    :type configuration: Configuration
    :return: the error count (number of failed conversions)
    """
    converter = OtplBratConverter()
    converter.set_colspec(configuration.colspec)
    errors = 0

    if configuration.name_labels is not None:
        converter.set_name_dict(configuration.name_labels)

    for text_file in configuration.input_files:
        otpl_file = make_path_to(text_file, configuration.otpl_suffix)
        brat_file = make_path_to(text_file, configuration.brat_suffix)

        if exists(otpl_file):
            segments = configure_reader(otpl_file, configuration)

            if segments is None:
                errors += 1
                continue

            if configuration.colspec is None:
                configuration.colspec = guess_colspec(segments)
                converter.set_colspec(configuration.colspec)

            if not converter.convert(segments, text_file, brat_file):
                L.error('conversion for "%s" failed', text_file)
                errors += 1
        else:
            L.error('could not locate OTPL file "%s" for "%s"',
                    otpl_file, text_file)
            errors += 1

    if not errors:
        brat_config_file = join(dirname(configuration.input_files[-1]),
                                configuration.config)

        if not exists(brat_config_file):
            converter.write_config_file(brat_config_file)

    if errors:
        L.debug('conversion of %s file%s failed',
                errors, '' if errors == 1 else 's')

    return errors