def load_text_dataset(path, train_size, valid_size, test_size):
    file_name = os.path.join('..', 'datasets', path)
    old_dir = os.getcwd()
    abspath = os.path.abspath(__file__)
    dname = os.path.dirname(abspath)
    os.chdir(dname)
    with open(file_name, 'r') as f:
        text = f.read()
    os.chdir(old_dir)
    vocabulary = create_vocabulary(text)
    if test_size is not None:
        test_text = text[:test_size]
        text = text[test_size:]
    else:
        test_text = None
    if valid_size is not None:
        valid_text = text[:valid_size]
        text = text[valid_size:]
    else:
        valid_text = None
    if train_size is not None:
        train_text = text[:train_size]
    else:
        train_text = text
    return vocabulary, train_text, valid_text, test_text
Ejemplo n.º 2
0
def get_vocab_by_given_path(file_name, text, create=False):
    if os.path.isfile(file_name) and not create:
        vocabulary = load_vocabulary_with_unk(file_name)
    else:
        vocabulary = create_vocabulary(text, with_unk=True)
        if not os.path.exists(os.path.dirname(file_name)) and len(
                os.path.dirname(file_name)) > 0:
            os.makedirs(os.path.dirname(file_name))
        with open(file_name, 'w') as f:
            f.write(''.join(vocabulary))
    vocabulary_size = len(vocabulary)
    return vocabulary, vocabulary_size
Ejemplo n.º 3
0
os.chdir(dname)
#
# with open(conf_file, 'r') as f:
#     lines = f.read().split('\n')

dataset_path = os.path.join(*(['..'] * ROOT_HEIGHT +
                              ['datasets', 'text8.txt']))
with open(dataset_path, 'r') as f:
    text = f.read()

valid_size = 500

valid_text = text[:valid_size]
train_text = text[valid_size:]

vocabulary = create_vocabulary(text)
vocabulary_size = len(vocabulary)
print(vocabulary_size)
tf.set_random_seed(1)
env = Environment(pupil_class=Lstm,
                  meta_optimizer_class=ResNet4Lstm,
                  batch_generator_classes=BatchGenerator,
                  vocabulary=vocabulary)

add_metrics = ['bpc', 'perplexity', 'accuracy']
NUM_EXERCISES = 10
NUM_UNROLLINGS = 4
OPT_INF_NAME = 'COLD'
OPT_INF_RESTORE_PUPIL_PATHS = [(OPT_INF_NAME, None)]

env.build_pupil(batch_size=32,
Ejemplo n.º 4
0
 def create_vocabulary(texts):
     text = ''
     for t in texts:
         text += t
     return create_vocabulary(text)
Ejemplo n.º 5
0
import tensorflow as tf

from learning_to_learn.environment import Environment
from learning_to_learn.pupils.lstm_for_meta import Lstm, LstmFastBatchGenerator as BatchGenerator
from learning_to_learn.useful_functions import create_vocabulary, get_positions_in_vocabulary

with open('datasets/text8.txt', 'r') as f:
    text = f.read()

valid_size = 500
valid_text = text[:valid_size]
train_text = text[valid_size:]

vocabulary = create_vocabulary(train_text + valid_text)
vocabulary_size = len(vocabulary)

env = Environment(Lstm, BatchGenerator, vocabulary=vocabulary)

cpiv = get_positions_in_vocabulary(vocabulary)

add_feed = [
    {
        'placeholder': 'dropout',
        'value': 0.9
    }  #,
    # {'placeholder': 'sampling_prob',
    #  'value': {'type': 'linear', 'start': 0., 'end': 1., 'interval': 3000}},
    # {'placeholder': 'loss_comp_prob',
    #  'value': {'type': 'linear', 'start': 1., 'end': 0., 'interval': 3000}}
]
valid_add_feed = [  # {'placeholder': 'sampling_prob', 'value': 1.},