def test_tmpreproc_en_filter_for_pos_none(tmpreproc_en): all_tok = tmpreproc_en.tokenize().pos_tag().tokens_with_pos_tags filtered_tok = tmpreproc_en.filter_for_pos(None).tokens_with_pos_tags assert set(all_tok.keys()) == set(filtered_tok.keys()) for dl, tok_pos in all_tok.items(): tok_pos_ = filtered_tok[dl] assert len(tok_pos_) <= len(tok_pos) simpl_postags = [simplified_pos(pos) for _, pos in tok_pos_] assert all(pos is None for pos in simpl_postags) _check_save_load_state(tmpreproc_en)
def test_simplified_pos(): assert simplified_pos('') is None assert simplified_pos('N') == 'N' assert simplified_pos('V') == 'V' assert simplified_pos('ADJ') == 'ADJ' assert simplified_pos('ADV') == 'ADV' assert simplified_pos('AD') is None assert simplified_pos('ADX') is None assert simplified_pos('PRP') is None assert simplified_pos('XYZ') is None assert simplified_pos('NN') == 'N' assert simplified_pos('NNP') == 'N' assert simplified_pos('VX') == 'V' assert simplified_pos('ADJY') == 'ADJ' assert simplified_pos('ADVZ') == 'ADV' assert simplified_pos('NNP', tagset='penn') == 'N' assert simplified_pos('VFOO', tagset='penn') == 'V' assert simplified_pos('JJ', tagset='penn') == 'ADJ' assert simplified_pos('JJX', tagset='penn') == 'ADJ' assert simplified_pos('RB', tagset='penn') == 'ADV' assert simplified_pos('RBFOO', tagset='penn') == 'ADV'