Esempio n. 1
0
def parse_incr(in_file,
               fields=None,
               field_parsers=None,
               metadata_parsers=None):
    if not fields:
        fields = parse_conllu_plus_fields(in_file,
                                          metadata_parsers=metadata_parsers)

    for sentence in parse_sentences(in_file):
        yield TokenList(
            *parse_token_and_metadata(sentence,
                                      fields=fields,
                                      field_parsers=field_parsers,
                                      metadata_parsers=metadata_parsers))
Esempio n. 2
0
    def test_simple(self):
        data = dedent("""\
            1\thej
            2\tdå
            3\thej

            1\thej
            2\tdå
            3\thej
        """)
        sentences = list(parse_sentences(string_to_file(data)))
        self.assertEqual(sentences, [
            '1\thej\n2\tdå\n3\thej',
            '1\thej\n2\tdå\n3\thej',
        ])
Esempio n. 3
0
def parse_incr(in_file,
               fields=None,
               field_parsers=None,
               metadata_parsers=None):
    if not hasattr(in_file, 'read'):
        raise FileNotFoundError(
            "Invalid file, 'parse_incr' needs an opened file as input")

    if not fields:
        fields = parse_conllu_plus_fields(in_file,
                                          metadata_parsers=metadata_parsers)

    for sentence in parse_sentences(in_file):
        yield TokenList(
            *parse_token_and_metadata(sentence,
                                      fields=fields,
                                      field_parsers=field_parsers,
                                      metadata_parsers=metadata_parsers))
Esempio n. 4
0
def parse_incr(
    in_file: T.TextIO,
    fields: T.Optional[T.Sequence[str]] = None,
    field_parsers: T.Dict[str, _FieldParserType] = None,
    metadata_parsers: T.Optional[T.Dict[str, _MetadataParserType]] = None
) -> T.Iterator[TokenList]:
    if not hasattr(in_file, 'read'):
        raise FileNotFoundError(
            "Invalid file, 'parse_incr' needs an opened file as input")

    if not fields:
        fields = parse_conllu_plus_fields(in_file,
                                          metadata_parsers=metadata_parsers)

    for sentence in parse_sentences(in_file):
        yield parse_token_and_metadata(sentence,
                                       fields=fields,
                                       field_parsers=field_parsers,
                                       metadata_parsers=metadata_parsers)
Esempio n. 5
0
    def test_multiple_newlines(self):
        data = dedent("""\
            1\thej
            2\tdå


            1\thej
            2\tdå



            1\thej
            2\tdå
        """)
        sentences = list(parse_sentences(string_to_file(data)))
        self.assertEqual(sentences, [
            '1\thej\n2\tdå',
            '1\thej\n2\tdå',
            '1\thej\n2\tdå',
        ])
Esempio n. 6
0
 def test_ends_without_newline(self):
     data = "1\thej\n2\tdå"
     sentences = list(parse_sentences(string_to_file(data)))
     self.assertEqual(sentences, [
         '1\thej\n2\tdå',
     ])
Esempio n. 7
0
 def test_empty(self):
     self.assertEqual(list(parse_sentences(string_to_file(""))), [])
     self.assertEqual(list(parse_sentences(string_to_file(None))), [])
Esempio n. 8
0
 def test_empty(self):
     self.assertEqual(list(parse_sentences(StringIO(""))), [])
     self.assertEqual(list(parse_sentences(StringIO(None))), [])
Esempio n. 9
0
def parse_incr(in_file, fields=None, field_parsers=None):
    for sentence in parse_sentences(in_file):
        yield TokenList(*parse_token_and_metadata(sentence, fields=fields, field_parsers=field_parsers))