コード例 #1
0
def predict(string, vocabulary=VOCABULARY, dataset=DATASET, restore=RESTORE):
    # print('BEGIN' + string + 'END')
    if vocabulary is None:
        if dataset is not None:
            with open(dataset, 'r') as f:
                text = f.read()
            vocabulary = create_vocabulary(text)
    else:
        vocabulary = load_vocabulary(vocabulary)
    vocabulary_size = len(vocabulary)
    # print('(typos.predict)vocabulary_size:', vocabulary_size)
    # print('(typos.predict)vocabulary:\n', vocabulary)

    env = Environment(Lstm, LstmBatchGenerator, vocabulary=vocabulary)

    valid_add_feed = [  # {'placeholder': 'sampling_prob', 'value': 1.},
        {
            'placeholder': 'dropout',
            'value': 1.
        }
    ]

    env.build(batch_size=64,
              num_layers=2,
              num_nodes=[1300, 1300],
              num_output_layers=2,
              num_output_nodes=[2048],
              vocabulary_size=vocabulary_size,
              embedding_size=512,
              num_unrollings=100,
              init_parameter=3.,
              regime='inference',
              num_gpus=1)

    _, example_res = env.test(restore_path=restore,
                              additions_to_feed_dict=valid_add_feed,
                              validation_dataset_texts=[string],
                              printed_result_types=[],
                              example_length=len(string),
                              vocabulary=vocabulary,
                              print_results=False,
                              verbose=False)
    return example_res[0]['input'][1:], example_res[0]['output'][
        1:], example_res[0]['prob_vecs'][1:]
 def create_vocabulary(texts):
     text = ''
     for t in texts:
         text += t
     return create_vocabulary(text)
コード例 #3
0
from some_useful_functions import create_vocabulary, get_positions_in_vocabulary

f = open('datasets/ted.txt', 'r', encoding='utf-8')
text = f.read()
f.close()

# different
offset = 10000
valid_size = 1000
valid_text = text[offset:offset + valid_size]
train_text = text[offset + valid_size:]
train_size = len(train_text)

# In[5]:

vocabulary = create_vocabulary(text)
vocabulary_size = len(vocabulary)

env = Environment(Lstm, LstmBatchGenerator)
cpiv = get_positions_in_vocabulary(vocabulary)

evaluation = dict(
    save_path='residuals_no_authors_no_sampling/parameter_tuning/just_lstm_go',
    result_types=['perplexity', 'loss', 'bpc', 'accuracy'],
    datasets={
        'train': None,
        'default_1': [valid_text, 'default_1']
    },
    batch_gen_class=LstmBatchGenerator,
    batch_kwargs={'vocabulary': vocabulary},
    batch_size=1,
コード例 #4
0
import re
from environment import Environment
# from gru_par import Gru, BatchGenerator
from lstm_par import Lstm, LstmBatchGenerator
from some_useful_functions import create_vocabulary, get_positions_in_vocabulary

f = open('datasets/scipop_v3.0/scipop_train.txt', 'r', encoding='utf-8')
train_text = re.sub('<[^>]*>', '', f.read())
f.close()

f = open('datasets/scipop_v3.0/scipop_valid.txt', 'r', encoding='utf-8')
valid_text = re.sub('<[^>]*>', '', ''.join(f.readlines()[:10]))
f.close()

vocabulary = create_vocabulary(train_text + valid_text)
vocabulary_size = len(vocabulary)

env = Environment(Lstm, LstmBatchGenerator, vocabulary=vocabulary)

# env = Environment(Gru, BatchGenerator)
cpiv = get_positions_in_vocabulary(vocabulary)

connection_interval = 8
connection_visibility = 5
subsequence_length_in_intervals = 10

add_feed = [
    {
        'placeholder': 'dropout',
        'value': 0.9
    }  # ,