def test_id_to_sentence():
    """Tokenizing and converting back preserves the input.
    """
    vb = {'CUSTOM_MASK': 0, 'aasdf': 1000, 'basdf': 2000}

    sentence = u'aasdf basdf basdf basdf'
    st = SentenceTokenizer(vb, 30)
    token, _, _ = st.tokenize_sentences([sentence])
    assert st.to_sentence(token[0]) == sentence
def test_id_to_sentence_with_unknown():
    """Tokenizing and converting back preserves the input, except for unknowns.
    """
    vb = {'CUSTOM_MASK': 0, 'CUSTOM_UNKNOWN': 1, 'aasdf': 1000, 'basdf': 2000}

    sentence = u'aasdf basdf ccc'
    expected = u'aasdf basdf CUSTOM_UNKNOWN'
    st = SentenceTokenizer(vb, 30)
    token, _, _ = st.tokenize_sentences([sentence])
    assert st.to_sentence(token[0]) == expected
def test_id_to_sentence():
    """Tokenizing and converting back preserves the input.
    """
    vb = {'CUSTOM_MASK': 0,
          'aasdf': 1000,
          'basdf': 2000}

    sentence = u'aasdf basdf basdf basdf'
    st = SentenceTokenizer(vb, 30)
    token, _, _ = st.tokenize_sentences([sentence])
    assert st.to_sentence(token[0]) == sentence
def test_id_to_sentence_with_unknown():
    """Tokenizing and converting back preserves the input, except for unknowns.
    """
    vb = {'CUSTOM_MASK': 0,
          'CUSTOM_UNKNOWN': 1,
          'aasdf': 1000,
          'basdf': 2000}

    sentence = u'aasdf basdf ccc'
    expected = u'aasdf basdf CUSTOM_UNKNOWN'
    st = SentenceTokenizer(vb, 30)
    token, _, _ = st.tokenize_sentences([sentence])
    assert st.to_sentence(token[0]) == expected