from dagbldr.datasets import make_ocr from dagbldr.nodes import log_ctc_cost from dagbldr.nodes import gru_recurrent_layer, softmax_layer from dagbldr.optimizers import adadelta from dagbldr.utils import TrainingLoop from dagbldr.utils import add_datasets_to_graph, make_masked_minibatch from dagbldr.utils import get_params_and_grads import numpy as np import theano from collections import OrderedDict random_state = np.random.RandomState(1999) graph = OrderedDict() data = make_ocr(["Hello", "humans"]) X = data["data"] y = data["target"] vocab_size = data["vocabulary_size"] vocab = data["vocabulary"] train_indices = data["train_indices"] valid_indices = train_indices X_mb, X_mb_mask = make_masked_minibatch(X, slice(0, len(X))) y_mb, y_mb_mask = make_masked_minibatch(y, slice(0, len(y))) n_hid = 256 n_out = vocab_size + 1 datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask] names_list = ["X", "X_mask", "y", "y_mask"]
from dagbldr.nodes import location_attention_tanh_recurrent_layer from dagbldr.nodes import sigmoid_layer from dagbldr.nodes import binary_crossentropy from dagbldr.nodes import masked_cost import theano import itertools from collections import OrderedDict import numpy as np random_state = np.random.RandomState(1999) graph = OrderedDict() base_string = "cat" true_strings = sorted(list(set(["".join(i) for i in [ s for s in itertools.permutations(base_string)]]))) ocr = make_ocr(true_strings) X = ocr["data"] vocab = ocr["vocabulary"] y = convert_to_one_hot(ocr["target"], n_classes=len(vocab)).astype( theano.config.floatX) minibatch_size = mbs = 2 train_itr = minibatch_iterator([X, y], minibatch_size, make_mask=True, axis=1) X_mb, X_mb_mask, y_mb, y_mb_mask = next(train_itr) train_itr.reset() valid_itr = minibatch_iterator([X, y], minibatch_size, make_mask=True, axis=1) datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask] names_list = ["X", "X_mask", "y", "y_mask"] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph, list_of_test_values=datasets_list) n_hid = 256