def run_test():

    import problem_unittests as t

    t.test_create_lookup_tables(create_lookup_tables)
    t.test_get_batches(get_batches)
    t.test_tokenize(token_lookup)
    t.test_get_inputs(get_inputs)
    t.test_get_init_cell(get_init_cell)
    t.test_get_embed(get_embed)
    t.test_build_rnn(build_rnn)
    t.test_build_nn(build_nn)
    t.test_get_tensors(get_tensors)
    t.test_pick_word(pick_word)
    Create lookup tables for vocabulary
    :param text: The text of tv scripts split into words
    :return: A tuple of dicts (vocab_to_int, int_to_vocab)
    """
    word_counts = Counter(text)
    
    # sorting the words from most to least frequent in text occurrence
    sorted_vocab = sorted(word_counts, key=word_counts.get, reverse=True)
    
    # create int_to_vocab dictionaries
    int_to_vocab = {ii: word for ii, word in enumerate(sorted_vocab)}
    vocab_to_int = {word: ii for ii, word in int_to_vocab.items()}

    # return tuple
    return (vocab_to_int, int_to_vocab)
tests.test_create_lookup_tables(create_lookup_tables)

def token_lookup():
    """
    Generate a dict to turn punctuation into a token.
    :return: Tokenized dictionary where the key is the punctuation and the value is the token
    """
    tokens = dict()
    tokens['.'] = '<PERIOD>'
    tokens[','] = '<COMMA>'
    tokens['"'] = '<QUOTATION_MARK>'
    tokens[';'] = '<SEMICOLON>'
    tokens['!'] = '<EXCLAMATION_MARK>'
    tokens['?'] = '<QUESTION_MARK>'
    tokens['('] = '<LEFT_PAREN>'
    tokens[')'] = '<RIGHT_PAREN>'
Exemple #3
0
 def test_lookup_tables(self):
     test_create_lookup_tables(create_lookup_tables=create_maps)