Python create_vocabulary 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: some_useful_functions

메소드/함수: create_vocabulary

hotexamples.com에서의 예제들: 4

Python create_vocabulary - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 some_useful_functions.create_vocabulary에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def predict(string, vocabulary=VOCABULARY, dataset=DATASET, restore=RESTORE):
    # print('BEGIN' + string + 'END')
    if vocabulary is None:
        if dataset is not None:
            with open(dataset, 'r') as f:
                text = f.read()
            vocabulary = create_vocabulary(text)
    else:
        vocabulary = load_vocabulary(vocabulary)
    vocabulary_size = len(vocabulary)
    # print('(typos.predict)vocabulary_size:', vocabulary_size)
    # print('(typos.predict)vocabulary:\n', vocabulary)

    env = Environment(Lstm, LstmBatchGenerator, vocabulary=vocabulary)

    valid_add_feed = [  # {'placeholder': 'sampling_prob', 'value': 1.},
        {
            'placeholder': 'dropout',
            'value': 1.
        }
    ]

    env.build(batch_size=64,
              num_layers=2,
              num_nodes=[1300, 1300],
              num_output_layers=2,
              num_output_nodes=[2048],
              vocabulary_size=vocabulary_size,
              embedding_size=512,
              num_unrollings=100,
              init_parameter=3.,
              regime='inference',
              num_gpus=1)

    _, example_res = env.test(restore_path=restore,
                              additions_to_feed_dict=valid_add_feed,
                              validation_dataset_texts=[string],
                              printed_result_types=[],
                              example_length=len(string),
                              vocabulary=vocabulary,
                              print_results=False,
                              verbose=False)
    return example_res[0]['input'][1:], example_res[0]['output'][
        1:], example_res[0]['prob_vecs'][1:]

예제 #2

파일 보기

파일: attention_no_authors_no_sampling_par.py 프로젝트: deepmipt/char-language-model

 def create_vocabulary(texts):
     text = ''
     for t in texts:
         text += t
     return create_vocabulary(text)

예제 #3

파일 보기

파일: parameter_tuning.py 프로젝트: deepmipt/char-language-model

from some_useful_functions import create_vocabulary, get_positions_in_vocabulary

f = open('datasets/ted.txt', 'r', encoding='utf-8')
text = f.read()
f.close()

# different
offset = 10000
valid_size = 1000
valid_text = text[offset:offset + valid_size]
train_text = text[offset + valid_size:]
train_size = len(train_text)

# In[5]:

vocabulary = create_vocabulary(text)
vocabulary_size = len(vocabulary)

env = Environment(Lstm, LstmBatchGenerator)
cpiv = get_positions_in_vocabulary(vocabulary)

evaluation = dict(
    save_path='residuals_no_authors_no_sampling/parameter_tuning/just_lstm_go',
    result_types=['perplexity', 'loss', 'bpc', 'accuracy'],
    datasets={
        'train': None,
        'default_1': [valid_text, 'default_1']
    },
    batch_gen_class=LstmBatchGenerator,
    batch_kwargs={'vocabulary': vocabulary},
    batch_size=1,

예제 #4

파일 보기

파일: launch_lstm_parallel.py 프로젝트: deepmipt/char-language-model

import re
from environment import Environment
# from gru_par import Gru, BatchGenerator
from lstm_par import Lstm, LstmBatchGenerator
from some_useful_functions import create_vocabulary, get_positions_in_vocabulary

f = open('datasets/scipop_v3.0/scipop_train.txt', 'r', encoding='utf-8')
train_text = re.sub('<[^>]*>', '', f.read())
f.close()

f = open('datasets/scipop_v3.0/scipop_valid.txt', 'r', encoding='utf-8')
valid_text = re.sub('<[^>]*>', '', ''.join(f.readlines()[:10]))
f.close()

vocabulary = create_vocabulary(train_text + valid_text)
vocabulary_size = len(vocabulary)

env = Environment(Lstm, LstmBatchGenerator, vocabulary=vocabulary)

# env = Environment(Gru, BatchGenerator)
cpiv = get_positions_in_vocabulary(vocabulary)

connection_interval = 8
connection_visibility = 5
subsequence_length_in_intervals = 10

add_feed = [
    {
        'placeholder': 'dropout',
        'value': 0.9
    }  # ,