Ejemplo n.º 1
0
from blocks.algorithms import StepClipping, GradientDescent, CompositeRule, RMSProp
from blocks.filter import VariableFilter
from blocks.extensions import FinishAfter, Timing, Printing, saveload
from blocks.extensions.training import SharedVariableModifier
from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
from blocks.monitoring import aggregation
from utils import get_metadata, get_stream, track_best, MainLoop
from model import nn_fprop
from config import config

# Load config parameters
locals().update(config)

# DATA
ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file)
train_stream = get_stream(hdf5_file, 'train', batch_size)
dev_stream = get_stream(hdf5_file, 'dev', batch_size)


# MODEL
x = tensor.matrix('features', dtype='uint8')
y = tensor.matrix('targets', dtype='uint8')
y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model)

# COST
cg = ComputationGraph(cost)

if dropout > 0:
    # Apply dropout only to the non-recurrent inputs (Zaremba et al. 2015)
    inputs = VariableFilter(theano_name_regex=r'.*apply_input.*')(cg.variables)
    cg = apply_dropout(cg, inputs, dropout)
Ejemplo n.º 2
0
from blocks.filter import VariableFilter
from blocks.extensions import FinishAfter, Timing, Printing
from blocks.extensions.training import SharedVariableModifier
from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
from blocks.monitoring import aggregation
from blocks.extensions import saveload
from utils import get_metadata, get_stream, track_best, MainLoop
from model import nn_fprop
from config import config

# Load config parameters
locals().update(config)

# DATA
ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file)
train_stream = get_stream(hdf5_file, 'train', batch_size)
dev_stream = get_stream(hdf5_file, 'dev', batch_size)


# MODEL
x = tensor.matrix('features', dtype='uint8')
y = tensor.matrix('targets', dtype='uint8')
y_hat, cost = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model)

# COST
cg = ComputationGraph(cost)

if dropout > 0:
    # Apply dropout only to the non-recurrent inputs (Zaremba et al. 2015)
    inputs = VariableFilter(theano_name_regex=r'.*apply_input.*')(cg.variables)
    cg = apply_dropout(cg, inputs, dropout)
Ejemplo n.º 3
0
from blocks.monitoring import aggregation
# from blocks.extras.extensions.plot import Plot
from utils import get_stream, track_best, MainLoop, Dropout, apply_dropout, SetTrainFlag, load_encoder
from model import nn_fprop
from config import config
from blocks.bricks.conv import ConvolutionalSequence, Convolutional
from blocks.bricks import MLP
from blocks.bricks.recurrent.base import BaseRecurrent
from blocks.roles import PARAMETER, FILTER, INPUT
from blocks import roles
import operator

# Load config parameters
locals().update(config)
# DATA
train_stream = get_stream(hdf5_file, 'train', batch_size)
test_stream = get_stream(hdf5_file, 'test', batch_size)

# MODEL
x = T.TensorType('floatX', [False] * 3)('features')
y = T.tensor3('targets', dtype='floatX')
train_flag = [theano.shared(0)]
x = x.swapaxes(0, 1)
y = y.swapaxes(0, 1)
out_size = len(output_columns) - 1 if cost_mode == 'RL-MDN' else len(
    output_columns)
_, latent_size = load_encoder()
in_size = latent_size + len(input_columns)
# mean = x[:,:,0:latent_size]
# var = T.clip(T.exp(x[:,:,latent_size:latent_size*2]), .0001, 1000)
# rrng = MRG_RandomStreams(seed)
Ejemplo n.º 4
0
                        type=int, help='number of characters to sample')
    parser.add_argument('-seed', default=None,
                        type=int, help='seed for random number generator')
    parser.add_argument('-temperature', type=float,
                        default=1.0, help='temperature of sampling')
    args = parser.parse_args()

    # Define primetext
    ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file)
    if args.primetext and len(args.primetext) > 0:
        primetext = ''.join(
            [ch for ch in args.primetext if ch in char_to_ix.keys()])
        x_curr = numpy.expand_dims(
            numpy.array([char_to_ix[ch] for ch in primetext], dtype='uint8'), axis=1)
    else:
        dev_stream = get_stream(hdf5_file, 'dev', batch_size)
        x_curr, y_curr = dev_stream.get_epoch_iterator().next()
        x_curr = x_curr[:, -1].reshape(seq_length, 1)

    print 'Loading model from {0}...'.format(args.model)
    main_loop = load(args.model)
    print 'Model loaded. Building prediction function...'
    model = main_loop.model
    y, x = model.inputs
    softmax = NDimensionalSoftmax()
    linear_output = [
        v for v in model.variables if v.name == 'linear_output'][0]
    y_hat = softmax.apply(linear_output, extra_ndim=1)
    predict = theano.function([x], y_hat)

    print 'Starting sampling'
Ejemplo n.º 5
0
from blocks.algorithms import StepClipping, GradientDescent, CompositeRule, RMSProp
from blocks.filter import VariableFilter
from blocks.extensions import FinishAfter, Timing, Printing, saveload
from blocks.extensions.training import SharedVariableModifier
from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
from blocks.monitoring import aggregation
from utils import get_metadata, get_stream, track_best, MainLoop
from model import nn_fprop
from config import config

# Load config parameters
locals().update(config)

# DATA
ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file)
train_stream = get_stream(hdf5_file, "train", batch_size)
dev_stream = get_stream(hdf5_file, "dev", batch_size)


# MODEL
x = tensor.matrix("features", dtype="uint8")
y = tensor.matrix("targets", dtype="uint8")
y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model)

# COST
cg = ComputationGraph(cost)

if dropout > 0:
    # Apply dropout only to the non-recurrent inputs (Zaremba et al. 2015)
    inputs = VariableFilter(theano_name_regex=r".*apply_input.*")(cg.variables)
    cg = apply_dropout(cg, inputs, dropout)
Ejemplo n.º 6
0
from blocks.model import Model
from blocks.graph import ComputationGraph, apply_dropout
from blocks.algorithms import StepClipping, GradientDescent, CompositeRule, RMSProp, Adam
from blocks.filter import VariableFilter
from blocks.extensions import FinishAfter, Timing, Printing, saveload, ProgressBar
from blocks.extensions.training import SharedVariableModifier
from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
from blocks.monitoring import aggregation
from utils import get_stream, track_best, MainLoop
from model import nn_fprop
from config import config

# Load config parameters
locals().update(config)
# DATA
train_stream = get_stream(hdf5_file[network_mode], 'train',
                          batch_size[network_mode])
test_stream = get_stream(hdf5_file[network_mode], 'test',
                         batch_size[network_mode])

# MODEL
x = T.tensor3('features', dtype='floatX')
y = T.tensor3('targets', dtype='floatX')
x = x.swapaxes(0, 1)
y = y.swapaxes(0, 1)
in_size = num_features
out_size = num_features
linear_output, cost, cells = nn_fprop(x,
                                      y,
                                      in_size,
                                      out_size,
                                      hidden_size[network_mode],
Ejemplo n.º 7
0
    parser.add_argument('-temperature',
                        type=float,
                        default=1.0,
                        help='temperature of sampling')
    args = parser.parse_args()

    # Define primetext
    ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file)
    if args.primetext and len(args.primetext) > 0:
        primetext = ''.join(
            [ch for ch in args.primetext if ch in char_to_ix.keys()])
        x_curr = numpy.expand_dims(numpy.array(
            [char_to_ix[ch] for ch in primetext], dtype='uint8'),
                                   axis=1)
    else:
        dev_stream = get_stream(hdf5_file, 'dev', batch_size)
        x_curr, y_curr = dev_stream.get_epoch_iterator().next()
        x_curr = x_curr[:, -1].reshape(seq_length, 1)

    print 'Loading model from {0}...'.format(args.model)
    main_loop = load(args.model)
    print 'Model loaded. Building prediction function...'
    model = main_loop.model
    y, x = model.inputs
    softmax = NDimensionalSoftmax()
    linear_output = [v for v in model.variables
                     if v.name == 'linear_output'][0]
    y_hat = softmax.apply(linear_output, extra_ndim=1)
    predict = theano.function([x], y_hat)

    print 'Starting sampling'