def generator_cfg(): cfg = AttrDict() cfg.fonts = fonts # cfg.dict = lines cfg.bgs = bgs cfg.fs = [n for n in range(15,50,3)] cfg.sw = np.linspace(1,2.5,10) cfg.d = d cfg.fnames = file_ids _alph = '' for l in cfg.d: for c in l: if c not in _alph: _alph += c cfg.alph = ''.join(sorted(_alph)) cfg.colors = 'black,yellow,red,green,magenta,blue' return cfg
def get_params(): checkpoint_dir = './arxiv-predictive-coding' max_length = 50 sampling_temperature = 0.7 rnn_cell = tf.nn.rnn_cell.GRUCell rnn_hidden = 200 rnn_layers = 2 learning_rate = 0.002 optimizer = tf.train.AdamOptimizer(0.002) gradient_clipping = 5 batch_size = 100 epochs = 20 epoch_size = 200 return AttrDict(**locals())
def get_params(): corpus_name = "tiny-shakespeare.txt" checkpoint_dir = './shakespeare_model' max_length = 50 sampling_temperature = 0.7 rnn_cell = tf.nn.rnn_cell.GRUCell rnn_hidden = 200 rnn_layers = 2 learning_rate = 0.002 optimizer = tf.train.AdamOptimizer(0.002) gradient_clipping = 5 batch_size = 100 epochs = 20 epoch_size = 200 return AttrDict(**locals())
import tensorflow as tf from helpers import AttrDict from Embedding import Embedding from ImdbMovieReviews import ImdbMovieReviews from preprocess_batched import preprocess_batched from SequenceClassificationModel import SequenceClassificationModel IMDB_DOWNLOAD_DIR = './imdb' WIKI_VOCAB_DIR = '../01_wikipedia/wikipedia' WIKI_EMBED_DIR = '../01_wikipedia/wikipedia' params = AttrDict( rnn_cell=tf.nn.rnn_cell.GRUCell, rnn_hidden=300, optimizer=tf.train.RMSPropOptimizer(0.002), batch_size=20, ) reviews = ImdbMovieReviews(IMDB_DOWNLOAD_DIR) length = max(len(x[0]) for x in reviews) embedding = Embedding(WIKI_VOCAB_DIR + '/vocabulary.bz2', WIKI_EMBED_DIR + '/embeddings.npy', length) batches = preprocess_batched(reviews, length, embedding, params.batch_size) data = tf.placeholder(tf.float32, [None, length, embedding.dimensions]) target = tf.placeholder(tf.float32, [None, 2]) model = SequenceClassificationModel(data, target, params) sess = tf.Session()
import tensorflow as tf import numpy as np from helpers import AttrDict from OcrDataset import OcrDataset from BidirectionalSequenceLabellingModel import \ BidirectionalSequenceLabellingModel from batched import batched params = AttrDict(rnn_cell=tf.nn.rnn_cell.GRUCell, rnn_hidden=300, optimizer=tf.train.RMSPropOptimizer(0.002), gradient_clipping=5, batch_size=10, epochs=5, epoch_size=50) def get_dataset(): dataset = OcrDataset('./ocr') # Flatten images into vectors. dataset.data = dataset.data.reshape(dataset.data.shape[:2] + (-1, )) # One-hot encode targets. target = np.zeros(dataset.target.shape + (26, )) for index, letter in np.ndenumerate(dataset.target): if letter: target[index][ord(letter) - ord('a')] = 1 dataset.target = target # Shuffle order of examples. order = np.random.permutation(len(dataset.data))
import numpy as np from batched import batched from EmbeddingModel import EmbeddingModel from skipgrams import skipgrams from Wikipedia import Wikipedia from helpers import AttrDict WIKI_DOWNLOAD_DIR = './wikipedia' params = AttrDict( vocabulary_size=10000, max_context=10, embedding_size=200, contrastive_examples=100, learning_rate=0.5, momentum=0.5, batch_size=1000, ) data = tf.placeholder(tf.int32, [None]) target = tf.placeholder(tf.int32, [None]) model = EmbeddingModel(data, target, params) corpus = Wikipedia( 'https://dumps.wikimedia.org/enwiki/20160501/' 'enwiki-20160501-pages-meta-current1.xml-p000000010p000030303.bz2', WIKI_DOWNLOAD_DIR, params.vocabulary_size) examples = skipgrams(corpus, params.max_context) batches = batched(examples, params.batch_size)
def trainer_cfg(): cfg = AttrDict() cfg.DATANAME = './data2/data.csv' cfg.bs = 256 cfg.epochs = 60 cfg.lr = 1e-3 cfg.wl2 = 1e-7 cfg.pivot = 15 cfg.valid_loss = float('Inf') cfg.train_loss = float('Inf') cfg.val_acc = float('Inf') cfg.train_acc = float('Inf') return cfg