def test_simple(self): data = dedent("""\ # global.columns = ID FORM UPOS HEAD DEPREL MISC PARSEME:MWE 1\tDer\tDET\t2\tdet\t_\t* """) self.assertEqual( parse_conllu_plus_fields(string_to_file(data)), ["id", "form", "upos", "head", "deprel", "misc", "parseme:mwe"])
def parse_incr(in_file, fields=None, field_parsers=None, metadata_parsers=None): if not fields: fields = parse_conllu_plus_fields(in_file, metadata_parsers=metadata_parsers) for sentence in parse_sentences(in_file): yield TokenList( *parse_token_and_metadata(sentence, fields=fields, field_parsers=field_parsers, metadata_parsers=metadata_parsers))
def parse_incr(in_file, fields=None, field_parsers=None, metadata_parsers=None): if not hasattr(in_file, 'read'): raise FileNotFoundError( "Invalid file, 'parse_incr' needs an opened file as input") if not fields: fields = parse_conllu_plus_fields(in_file, metadata_parsers=metadata_parsers) for sentence in parse_sentences(in_file): yield TokenList( *parse_token_and_metadata(sentence, fields=fields, field_parsers=field_parsers, metadata_parsers=metadata_parsers))
def parse_incr( in_file: T.TextIO, fields: T.Optional[T.Sequence[str]] = None, field_parsers: T.Dict[str, _FieldParserType] = None, metadata_parsers: T.Optional[T.Dict[str, _MetadataParserType]] = None ) -> T.Iterator[TokenList]: if not hasattr(in_file, 'read'): raise FileNotFoundError( "Invalid file, 'parse_incr' needs an opened file as input") if not fields: fields = parse_conllu_plus_fields(in_file, metadata_parsers=metadata_parsers) for sentence in parse_sentences(in_file): yield parse_token_and_metadata(sentence, fields=fields, field_parsers=field_parsers, metadata_parsers=metadata_parsers)
def test_empty_columns(self): data = dedent("""\ # global.columns = 1\tDer\tDET\t2\tdet\t_\t* """) self.assertEqual(parse_conllu_plus_fields(string_to_file(data)), None)
def test_empty(self): self.assertEqual(parse_conllu_plus_fields(string_to_file("")), None) self.assertEqual(parse_conllu_plus_fields(string_to_file(None)), None)
def test_empty(self): self.assertEqual(parse_conllu_plus_fields(StringIO("")), None) self.assertEqual(parse_conllu_plus_fields(StringIO(None)), None)