Esempio n. 1
0
 def test_simple(self):
     data = dedent("""\
         # global.columns = ID FORM UPOS HEAD DEPREL MISC PARSEME:MWE
         1\tDer\tDET\t2\tdet\t_\t*
     """)
     self.assertEqual(
         parse_conllu_plus_fields(string_to_file(data)),
         ["id", "form", "upos", "head", "deprel", "misc", "parseme:mwe"])
Esempio n. 2
0
def parse_incr(in_file,
               fields=None,
               field_parsers=None,
               metadata_parsers=None):
    if not fields:
        fields = parse_conllu_plus_fields(in_file,
                                          metadata_parsers=metadata_parsers)

    for sentence in parse_sentences(in_file):
        yield TokenList(
            *parse_token_and_metadata(sentence,
                                      fields=fields,
                                      field_parsers=field_parsers,
                                      metadata_parsers=metadata_parsers))
Esempio n. 3
0
def parse_incr(in_file,
               fields=None,
               field_parsers=None,
               metadata_parsers=None):
    if not hasattr(in_file, 'read'):
        raise FileNotFoundError(
            "Invalid file, 'parse_incr' needs an opened file as input")

    if not fields:
        fields = parse_conllu_plus_fields(in_file,
                                          metadata_parsers=metadata_parsers)

    for sentence in parse_sentences(in_file):
        yield TokenList(
            *parse_token_and_metadata(sentence,
                                      fields=fields,
                                      field_parsers=field_parsers,
                                      metadata_parsers=metadata_parsers))
Esempio n. 4
0
def parse_incr(
    in_file: T.TextIO,
    fields: T.Optional[T.Sequence[str]] = None,
    field_parsers: T.Dict[str, _FieldParserType] = None,
    metadata_parsers: T.Optional[T.Dict[str, _MetadataParserType]] = None
) -> T.Iterator[TokenList]:
    if not hasattr(in_file, 'read'):
        raise FileNotFoundError(
            "Invalid file, 'parse_incr' needs an opened file as input")

    if not fields:
        fields = parse_conllu_plus_fields(in_file,
                                          metadata_parsers=metadata_parsers)

    for sentence in parse_sentences(in_file):
        yield parse_token_and_metadata(sentence,
                                       fields=fields,
                                       field_parsers=field_parsers,
                                       metadata_parsers=metadata_parsers)
Esempio n. 5
0
 def test_empty_columns(self):
     data = dedent("""\
         # global.columns =
         1\tDer\tDET\t2\tdet\t_\t*
     """)
     self.assertEqual(parse_conllu_plus_fields(string_to_file(data)), None)
Esempio n. 6
0
 def test_empty(self):
     self.assertEqual(parse_conllu_plus_fields(string_to_file("")), None)
     self.assertEqual(parse_conllu_plus_fields(string_to_file(None)), None)
Esempio n. 7
0
 def test_empty(self):
     self.assertEqual(parse_conllu_plus_fields(StringIO("")), None)
     self.assertEqual(parse_conllu_plus_fields(StringIO(None)), None)