コード例 #1
0
ファイル: ocr_ctc.py プロジェクト: kastnerkyle/dagbldr
from dagbldr.datasets import make_ocr
from dagbldr.nodes import log_ctc_cost
from dagbldr.nodes import gru_recurrent_layer, softmax_layer
from dagbldr.optimizers import adadelta
from dagbldr.utils import TrainingLoop
from dagbldr.utils import add_datasets_to_graph, make_masked_minibatch
from dagbldr.utils import get_params_and_grads

import numpy as np
import theano
from collections import OrderedDict

random_state = np.random.RandomState(1999)
graph = OrderedDict()

data = make_ocr(["Hello", "humans"])
X = data["data"]
y = data["target"]
vocab_size = data["vocabulary_size"]
vocab = data["vocabulary"]
train_indices = data["train_indices"]
valid_indices = train_indices

X_mb, X_mb_mask = make_masked_minibatch(X, slice(0, len(X)))
y_mb, y_mb_mask = make_masked_minibatch(y, slice(0, len(y)))

n_hid = 256
n_out = vocab_size + 1

datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
names_list = ["X", "X_mask", "y", "y_mask"]
コード例 #2
0
from dagbldr.nodes import location_attention_tanh_recurrent_layer
from dagbldr.nodes import sigmoid_layer
from dagbldr.nodes import binary_crossentropy
from dagbldr.nodes import masked_cost
import theano
import itertools
from collections import OrderedDict
import numpy as np


random_state = np.random.RandomState(1999)
graph = OrderedDict()
base_string = "cat"
true_strings = sorted(list(set(["".join(i) for i in [
    s for s in itertools.permutations(base_string)]])))
ocr = make_ocr(true_strings)
X = ocr["data"]
vocab = ocr["vocabulary"]
y = convert_to_one_hot(ocr["target"], n_classes=len(vocab)).astype(
    theano.config.floatX)
minibatch_size = mbs = 2
train_itr = minibatch_iterator([X, y], minibatch_size, make_mask=True, axis=1)
X_mb, X_mb_mask, y_mb, y_mb_mask = next(train_itr)
train_itr.reset()
valid_itr = minibatch_iterator([X, y], minibatch_size, make_mask=True, axis=1)
datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
names_list = ["X", "X_mask", "y", "y_mask"]
X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
    datasets_list, names_list, graph, list_of_test_values=datasets_list)

n_hid = 256
コード例 #3
0
ファイル: ocr_ctc.py プロジェクト: hdubey/dagbldr
from dagbldr.datasets import make_ocr
from dagbldr.nodes import log_ctc_cost
from dagbldr.nodes import gru_recurrent_layer, softmax_layer
from dagbldr.optimizers import adadelta
from dagbldr.utils import TrainingLoop
from dagbldr.utils import add_datasets_to_graph, make_masked_minibatch
from dagbldr.utils import get_params_and_grads

import numpy as np
import theano
from collections import OrderedDict

random_state = np.random.RandomState(1999)
graph = OrderedDict()

data = make_ocr(["Hello", "humans"])
X = data["data"]
y = data["target"]
vocab_size = data["vocabulary_size"]
vocab = data["vocabulary"]
train_indices = data["train_indices"]
valid_indices = train_indices

X_mb, X_mb_mask = make_masked_minibatch(X, slice(0, len(X)))
y_mb, y_mb_mask = make_masked_minibatch(y, slice(0, len(y)))

n_hid = 256
n_out = vocab_size + 1

datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
names_list = ["X", "X_mask", "y", "y_mask"]