Python tokenize_files Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: tokenize_files

Examples at hotexamples.com: 2

Python tokenize_files - 2 examples found. These are the top rated real world Python examples of utils.tokenize_files extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: rnn_extended_test.py Project: pombredanne/tangerine

def testRNN(vocabulary_file, training_dir):
    print("Reading vocabulary " + vocabulary_file + "...")
    words, dictionary = read_vocabulary(vocabulary_file, MAX_VOCAB_SIZE)
    print("Reading sentences and training RNN...")
    start = timer()

    rnn = RNNExtended(len(words), HIDDEN_LAYER_SIZE)
    num_words = 0
    for i in range(NUM_ITER):
        sentences = tokenize_files(dictionary, training_dir)    
        for sentence in itertools.islice(sentences, MAX_SENTENCES):
            # Todo, create context window for each sentence?
            rnn.train(sentence)
            num_words += len(sentence)

        print("Iteration " + str(i + 1) + "/" + str(NUM_ITER) + " finished (" + str(num_words) + " words)")
        num_words = 0

    print("- Took %.2f sec" % (timer() - start))

Example #2

Show file

File: skipgram_test.py Project: pombredanne/tangerine

def testSkipGram(vocabulary_file, training_dir):
    last_sentence = None
    print("Reading vocabulary " + vocabulary_file + "...")
    words, dictionary = read_vocabulary(vocabulary_file, MAX_VOCAB_SIZE)
    print("Reading sentences and training SkipGram...")
    start = timer()
    skip_gram = SkipGram(len(words), WINDOW_SIZE, HIDDEN_LAYER_SIZE)
    num_words = 0
    for i in range(NUM_ITER):
        sentences = tokenize_files(dictionary, training_dir)    
        for sentence in itertools.islice(sentences, MAX_SENTENCES):
            last_sentence = sentence
            skip_gram.train(sentence)
            num_words += len(sentence)

        ll = skip_gram.train(last_sentence, compute_ll=True)
        print("Iteration " + str(i + 1) + "/" + str(NUM_ITER) + " finished (" + str(num_words) + " words)")
        print("Log-likelihood: " + str(ll))

        num_words = 0

    print("- Took %.2f sec" % (timer() - start))