Exemple #1
0
def Dropout(p_drop, inputs):
    """
    Drop each input randomly with probability `p_drop`, and scale the remaining
    ones to preserve the overall variance. This op doesn't yet support a
    test-time mode (where all inputs are kept).
    """
    srng = RandomStreams(seed=234)
    scaled_inputs = inputs / swft.floatX(1 - p_drop)
    return scaled_inputs * srng.binomial(
        inputs.shape, p=swft.floatX(1 - p_drop), dtype=theano.config.floatX)
Exemple #2
0
def Dropout(p_drop, inputs):
    """
    Drop each input randomly with probability `p_drop`, and scale the remaining
    ones to preserve the overall variance. This op doesn't yet support a
    test-time mode (where all inputs are kept).
    """
    srng = RandomStreams(seed=234)
    scaled_inputs = inputs / swft.floatX(1-p_drop)
    return scaled_inputs * srng.binomial(
        inputs.shape, 
        p=swft.floatX(1-p_drop),
       dtype=theano.config.floatX
    )
Exemple #3
0
def generator(n_samples):
    noise = theano_srng.uniform(
        size=(n_samples, 100), 
        low=-swft.floatX(numpy.sqrt(3)),
        high=swft.floatX(numpy.sqrt(3))
    )

    output = ReLULayer('Generator.1', 100, 1200, noise)
    output = ReLULayer('Generator.2', 1200, 1200, output)
    output = ReLULayer('Generator.3', 1200, 1200, output)
    output = ReLULayer('Generator.4', 1200, 1200, output)
    
    return T.nnet.sigmoid(
        swft.ops.Linear('Generator.5', 1200, 784, output, initialization=('uniform', 0.05))
    )
Exemple #4
0
    def step(current_processed_input, last_hidden):
        gates = T.nnet.sigmoid(
            swft.ops.Linear(
                name+'.Recurrent_Gates', 
                hidden_dim, 
                2 * hidden_dim, 
                last_hidden,
                biases=False
            ) + current_processed_input[:, :2*hidden_dim]
        )

        update = gates[:, :hidden_dim]
        reset  = gates[:, hidden_dim:]

        scaled_hidden = reset * last_hidden

        candidate = T.tanh(
            swft.ops.Linear(
                name+'.Recurrent_Candidate', 
                hidden_dim, 
                hidden_dim, 
                scaled_hidden,
                biases=False,
                initialization='orthogonal'
            ) + current_processed_input[:, 2*hidden_dim:]
        )

        one = swft.floatX(1.0)
        return (update * candidate) + ((one - update) * last_hidden)
Exemple #5
0
def generator(n_samples):
    noise = theano_srng.uniform(size=(n_samples, 100),
                                low=-swft.floatX(numpy.sqrt(3)),
                                high=swft.floatX(numpy.sqrt(3)))

    output = ReLULayer('Generator.1', 100, 1200, noise)
    output = ReLULayer('Generator.2', 1200, 1200, output)
    output = ReLULayer('Generator.3', 1200, 1200, output)
    output = ReLULayer('Generator.4', 1200, 1200, output)

    return T.nnet.sigmoid(
        swft.ops.Linear('Generator.5',
                        1200,
                        784,
                        output,
                        initialization=('uniform', 0.05)))
Exemple #6
0
def BatchNormalize(name, input_dim, inputs, stepwise=False):
    """
    Batch normalization. By default, normalizes across all but the last axis.
    Set `stepwise` to true if you're batch-norming an RNN and want to normalize
    each timestep separately (e.g. for a language model, where you can't let
    information from step `t+1` leak into step `t`).
    """
    if stepwise:
        means = inputs.mean(axis=1, keepdims=True)
        variances = inputs.var(axis=1, keepdims=True)
    else:
        means = inputs.reshape((-1, input_dim)).mean(axis=0)
        variances = inputs.reshape((-1, input_dim)).var(axis=0)

    beta = swft.param(
        name + '.beta',
        numpy.zeros(input_dim, dtype='float32')
    )

    gamma = swft.param(
        name + '.gamma',
        numpy.ones(input_dim, dtype='float32')
    )

    stdevs = T.sqrt(variances + swft.floatX(1e-4))

    return (inputs - means) * (gamma / stdevs) + beta
Exemple #7
0
    def step(current_processed_input, last_hidden):
        gates = T.nnet.sigmoid(
            swft.ops.Linear(name + ".Recurrent_Gates", hidden_dim, 2 * hidden_dim, last_hidden, biases=False)
            + current_processed_input[:, : 2 * hidden_dim]
        )

        update = gates[:, :hidden_dim]
        reset = gates[:, hidden_dim:]

        scaled_hidden = reset * last_hidden

        candidate = T.tanh(
            swft.ops.Linear(
                name + ".Recurrent_Candidate",
                hidden_dim,
                hidden_dim,
                scaled_hidden,
                biases=False,
                initialization="orthogonal",
            )
            + current_processed_input[:, 2 * hidden_dim :]
        )

        one = swft.floatX(1.0)
        return (update * candidate) + ((one - update) * last_hidden)
Exemple #8
0
def evaluate(fakes):
    real_images = T.matrix()
    fake_images = T.matrix()

    cost  = T.nnet.binary_crossentropy(_evaluator(real_images), swft.floatX(1)).mean()
    cost += T.nnet.binary_crossentropy(_evaluator(fake_images), swft.floatX(0)).mean()

    real_accuracy = T.ge(_evaluator(real_images), swft.floatX(0.5)).mean()
    fake_accuracy = T.lt(_evaluator(fake_images), swft.floatX(0.5)).mean()
    accuracy = (real_accuracy + fake_accuracy) / swft.floatX(2)

    real_train, real_dev, real_test = swft.mnist.load(BATCH_SIZE)

    assert(len(fakes) == 60000)
    fakes_train = fakes[:50000]
    fakes_dev   = fakes[50000:]

    def train_epoch():
        numpy.random.shuffle(fakes_train)
        batched = fakes_train.reshape(-1, BATCH_SIZE, 784)
        for i, (real_images, _) in enumerate(real_train()):
            yield [real_images, batched[i]]

    def dev_epoch():
        yield [real_dev().next()[0], fakes_dev]

    swft.train(
        [real_images, fake_images],
        [cost],
        train_epoch,
        dev_data=dev_epoch,
        epochs=EPOCHS,
        print_every=1000
    )

    fn = theano.function([real_images, fake_images], cost)
    result = fn(real_dev().next()[0], fakes_dev)

    swft.delete_params('Evaluator')

    return result
Exemple #9
0
def evaluate(fakes):
    real_images = T.matrix()
    fake_images = T.matrix()

    cost = T.nnet.binary_crossentropy(_evaluator(real_images),
                                      swft.floatX(1)).mean()
    cost += T.nnet.binary_crossentropy(_evaluator(fake_images),
                                       swft.floatX(0)).mean()

    real_accuracy = T.ge(_evaluator(real_images), swft.floatX(0.5)).mean()
    fake_accuracy = T.lt(_evaluator(fake_images), swft.floatX(0.5)).mean()
    accuracy = (real_accuracy + fake_accuracy) / swft.floatX(2)

    real_train, real_dev, real_test = swft.mnist.load(BATCH_SIZE)

    assert (len(fakes) == 60000)
    fakes_train = fakes[:50000]
    fakes_dev = fakes[50000:]

    def train_epoch():
        numpy.random.shuffle(fakes_train)
        batched = fakes_train.reshape(-1, BATCH_SIZE, 784)
        for i, (real_images, _) in enumerate(real_train()):
            yield [real_images, batched[i]]

    def dev_epoch():
        yield [real_dev().next()[0], fakes_dev]

    swft.train([real_images, fake_images], [cost],
               train_epoch,
               dev_data=dev_epoch,
               epochs=EPOCHS,
               print_every=1000)

    fn = theano.function([real_images, fake_images], cost)
    result = fn(real_dev().next()[0], fakes_dev)

    swft.delete_params('Evaluator')

    return result
Exemple #10
0
def BatchNormalize(name, input_dim, inputs, stepwise=False):
    """
    Batch normalization. By default, normalizes across all but the last axis.
    Set `stepwise` to true if you're batch-norming an RNN and want to normalize
    each timestep separately (e.g. for a language model, where you can't let
    information from step `t+1` leak into step `t`).
    """
    if stepwise:
        means = inputs.mean(axis=1, keepdims=True)
        variances = inputs.var(axis=1, keepdims=True)
    else:
        means = inputs.reshape((-1, input_dim)).mean(axis=0)
        variances = inputs.reshape((-1, input_dim)).var(axis=0)

    beta = swft.param(name + '.beta', numpy.zeros(input_dim, dtype='float32'))

    gamma = swft.param(name + '.gamma', numpy.ones(input_dim, dtype='float32'))

    stdevs = T.sqrt(variances + swft.floatX(1e-4))

    return (inputs - means) * (gamma / stdevs) + beta
Exemple #11
0
def rectify(x):
    """ReLU nonlinearity: max(0, x)"""
    return (x + abs(x)) / swft.floatX(2.0)
Exemple #12
0
    output = swft.ops.Dropout(0.5, output)

    # We apply the sigmoid in a later step
    return swft.ops.Linear('Discriminator.Output', 240, 1, output, initialization=('uniform', 0.005)).flatten()

symbolic_inputs = swft.mnist.symbolic_inputs()
images, targets = symbolic_inputs

generator_output = generator(BATCH_SIZE)

disc_out = discriminator(T.concatenate([generator_output, images], axis=0))
disc_gen_out = T.nnet.sigmoid(disc_out[:BATCH_SIZE])
disc_inputs  = T.nnet.sigmoid(disc_out[BATCH_SIZE:])

# Gen objective:  push D(G) to one
gen_cost      = T.nnet.binary_crossentropy(disc_gen_out, swft.floatX(1)).mean()
gen_cost.name = 'gen_cost'

# Discrim objective: push D(G) to zero, and push D(real) to one
discrim_cost  = T.nnet.binary_crossentropy(disc_gen_out, swft.floatX(0)).mean()
discrim_cost += T.nnet.binary_crossentropy(disc_inputs, swft.floatX(1)).mean()
discrim_cost /= swft.floatX(2.0)
discrim_cost.name = 'discrim_cost'

train_data, dev_data, test_data = swft.mnist.load(BATCH_SIZE)

gen_params     = swft.search(gen_cost,     lambda x: hasattr(x, 'param') and 'Generator' in x.name)
discrim_params = swft.search(discrim_cost, lambda x: hasattr(x, 'param') and 'Discriminator' in x.name)

_sample_fn = theano.function([], generator(100))
def generate_image(epoch):
Exemple #13
0
                        output).flatten())


def noise(n_samples):
    output = theano_srng.normal(size=(n_samples, LATENT_DIM))
    return swft.floatX(LATENT_STDEV) * output


images, targets = swft.mnist.symbolic_inputs()

latents = encoder(images)
reconstructions = decoder(latents)

# Encoder objective:  push D(latents) to one...
reg_cost = T.nnet.binary_crossentropy(discriminator(latents),
                                      swft.floatX(1)).mean()
reg_cost.name = 'reg_cost'

# ... and minimize reconstruction error
reconst_cost = T.sqr(reconstructions - images).mean()
reconst_cost.name = 'reconst_cost'

# this seems to be an important hyperparam, maybe try playing with it more.
full_enc_cost = (swft.floatX(100) * reconst_cost) + reg_cost

# Decoder objective: minimize reconstruction loss
dec_cost = reconst_cost

# Discrim objective: push D(latents) to zero, D(noise) to one
discrim_cost = T.nnet.binary_crossentropy(discriminator(latents),
                                          swft.floatX(0)).mean()
Exemple #14
0
                           1,
                           output,
                           initialization=('uniform', 0.005)).flatten()


symbolic_inputs = swft.mnist.symbolic_inputs()
images, targets = symbolic_inputs

generator_output = generator(BATCH_SIZE)

disc_out = discriminator(T.concatenate([generator_output, images], axis=0))
disc_gen_out = T.nnet.sigmoid(disc_out[:BATCH_SIZE])
disc_inputs = T.nnet.sigmoid(disc_out[BATCH_SIZE:])

# Gen objective:  push D(G) to one
gen_cost = T.nnet.binary_crossentropy(disc_gen_out, swft.floatX(1)).mean()
gen_cost.name = 'gen_cost'

# Discrim objective: push D(G) to zero, and push D(real) to one
discrim_cost = T.nnet.binary_crossentropy(disc_gen_out, swft.floatX(0)).mean()
discrim_cost += T.nnet.binary_crossentropy(disc_inputs, swft.floatX(1)).mean()
discrim_cost /= swft.floatX(2.0)
discrim_cost.name = 'discrim_cost'

train_data, dev_data, test_data = swft.mnist.load(BATCH_SIZE)

gen_params = swft.search(
    gen_cost, lambda x: hasattr(x, 'param') and 'Generator' in x.name)
discrim_params = swft.search(
    discrim_cost, lambda x: hasattr(x, 'param') and 'Discriminator' in x.name)
Exemple #15
0
def noise(n_samples):
    output = theano_srng.normal(size=(n_samples, LATENT_DIM))
    return swft.floatX(LATENT_STDEV) * output
Exemple #16
0
def rectify(x):
    """ReLU nonlinearity: max(0, x)"""
    return (x + abs(x)) / swft.floatX(2.0)
    output = Layer('Discriminator.Layer2', HIDDEN_DIM, HIDDEN_DIM, True,  output)
    return T.nnet.sigmoid(
        swft.ops.Linear('Discriminator.Layer3', HIDDEN_DIM, 1, output).flatten()
    )

def noise(n_samples):
    output = theano_srng.normal(size=(n_samples,LATENT_DIM))
    return swft.floatX(LATENT_STDEV) * output

images, targets = swft.mnist.symbolic_inputs()

latents = encoder(images)
reconstructions = decoder(latents)

# Encoder objective:  push D(latents) to one...
reg_cost = T.nnet.binary_crossentropy(discriminator(latents), swft.floatX(1)).mean()
reg_cost.name = 'reg_cost'

# ... and minimize reconstruction error
reconst_cost = T.sqr(reconstructions - images).mean()
reconst_cost.name = 'reconst_cost'

# this seems to be an important hyperparam, maybe try playing with it more.
full_enc_cost = (swft.floatX(100)*reconst_cost) + reg_cost

# Decoder objective: minimize reconstruction loss
dec_cost = reconst_cost

# Discrim objective: push D(latents) to zero, D(noise) to one
discrim_cost  = T.nnet.binary_crossentropy(discriminator(latents),           swft.floatX(0)).mean()
discrim_cost += T.nnet.binary_crossentropy(discriminator(noise(BATCH_SIZE)), swft.floatX(1)).mean()
Exemple #18
0
def train(symbolic_inputs,
          costs,
          train_data,
          dev_data=None,
          test_data=None,
          param_sets=None,
          optimizers=[lasagne.updates.adam],
          print_vars=None,
          epochs=10,
          print_every=10,
          callback=None):
    # TODO write documentation

    if param_sets == None:
        param_sets = [swft.search(costs[0], lambda x: hasattr(x, 'param'))]

    assert len(costs) == len(param_sets), "train() needs 1 param set per cost!"

    _print_paramsets_info(costs, param_sets)

    print "Building updates..."

    if print_vars is None:
        print_vars = [c for c in costs]
    for cost in costs:
        print_vars += swft.search(cost, lambda x: hasattr(x, '_print'))
    # Remove duplicate values in print_vars
    print_vars = list(set(print_vars))

    all_updates = []
    for cost, params, optimizer in zip(costs, param_sets, optimizers):
        grads = T.grad(cost, wrt=params)
        # Clip gradients elementwise
        grads = [T.clip(g, swft.floatX(-1.0), swft.floatX(1.0)) for g in grads]

        cost_updates = optimizer(grads, params)
        for k, v in cost_updates.items():
            all_updates.append((k, v))

    print "Compiling train function..."

    train_ = theano.function(symbolic_inputs,
                             print_vars,
                             updates=all_updates,
                             on_unused_input='warn')

    print "Compiling evaluate function..."

    evaluate = theano.function(symbolic_inputs,
                               print_vars,
                               on_unused_input='warn')

    print "Training!"

    splits = [('train', train_, train_data)]
    if dev_data is not None:
        splits.append(('dev', evaluate, dev_data))
    if test_data is not None:
        splits.append(('test', evaluate, test_data))

    for epoch in xrange(epochs):
        for title, fn, data in splits:

            epoch_totals = []
            since_last_print = []
            n_inputs = 0

            for iteration, inputs in enumerate(data(), start=1):
                n_inputs += 1

                start_time = time.time()

                outputs_ = fn(*inputs)

                if iteration == 1:
                    epoch_totals = [o.copy() for o in outputs_]
                    since_last_print = [o.copy() for o in outputs_]
                else:
                    for i, o in enumerate(outputs_):
                        epoch_totals[i] += o
                        since_last_print[i] += o

                if iteration % print_every == 0:

                    new_time = time.time()

                    values_to_print = [('epoch', epoch), ('input', iteration),
                                       ('time_per_input',
                                        (time.time() - start_time))]

                    for symbolic, totalval in zip(print_vars,
                                                  since_last_print):
                        values_to_print.append(
                            (str(symbolic), totalval / print_every))

                    print "{0}\t".format(title) + "\t".join([
                        "{0}:{1}".format(name, val)
                        for name, val in values_to_print
                    ])

                    last_print_time = new_time

                    for i, t in enumerate(since_last_print):
                        since_last_print[i].fill(0)

            values_to_print = [('epoch', epoch), ('n_inputs', n_inputs)]

            for symbolic_var, total_val in zip(print_vars, epoch_totals):
                values_to_print.append(
                    (str(symbolic_var), total_val / n_inputs))

            print "{0} summary\t".format(title) + "\t".join(
                ["{0}:{1}".format(name, val) for name, val in values_to_print])

        if callback:
            callback(epoch)
def noise(n_samples):
    output = theano_srng.normal(size=(n_samples,LATENT_DIM))
    return swft.floatX(LATENT_STDEV) * output
Exemple #20
0
    last_hidden = T.concatenate([gru1[:, -1], gru2[:, -1], gru3[:, -1]], axis=1)

    return (output, last_hidden)

sequences   = T.imatrix('sequences')
transcripts = T.imatrix('transcripts')
h0          = T.matrix('h0')

frame_level_outputs, new_h0 = predict(sequences, h0)

cost = T.nnet.categorical_crossentropy(
    T.nnet.softmax(frame_level_outputs[:, :-1].reshape((-1, Q_LEVELS))),
    sequences[:, 1:].flatten()
).mean()

cost = cost * swft.floatX(1.44269504089)
cost.name = 'cost'

params = swft.search(cost, lambda x: hasattr(x, 'param'))
swft._train._print_paramsets_info([cost], [params])

grads = T.grad(cost, wrt=params, disconnected_inputs='warn')
grads = [T.clip(g, swft.floatX(-GRAD_CLIP), swft.floatX(GRAD_CLIP)) for g in grads]

updates = lasagne.updates.adam(grads, params)

train_fn = theano.function(
    [sequences, transcripts, h0],
    [cost, new_h0],
    updates=updates,
    on_unused_input='warn'
Exemple #21
0
def train(
        symbolic_inputs,
        costs,
        train_data,
        dev_data=None,
        test_data=None,
        param_sets=None,
        optimizers=[lasagne.updates.adam],
        print_vars=None,
        epochs=10,
        print_every=10,
        callback=None
    ):
    # TODO write documentation

    if param_sets == None:
        param_sets = [ swft.search(costs[0], lambda x: hasattr(x, 'param')) ]

    assert len(costs)==len(param_sets), "train() needs 1 param set per cost!"

    _print_paramsets_info(costs, param_sets)

    print "Building updates..."

    if print_vars is None:
        print_vars = [c for c in costs]
    for cost in costs:
        print_vars += swft.search(cost, lambda x: hasattr(x, '_print'))
    # Remove duplicate values in print_vars
    print_vars = list(set(print_vars))

    all_updates = []
    for cost, params, optimizer in zip(costs, param_sets, optimizers):
        grads = T.grad(cost, wrt=params)
        # Clip gradients elementwise
        grads = [
            T.clip(g, swft.floatX(-1.0), swft.floatX(1.0))
            for g in grads
        ]

        cost_updates = optimizer(grads, params)
        for k, v in cost_updates.items():
            all_updates.append((k,v))

    print "Compiling train function..."

    train_ = theano.function(
        symbolic_inputs, 
        print_vars,
        updates=all_updates,
        on_unused_input='warn'
    )

    print "Compiling evaluate function..."

    evaluate = theano.function(
        symbolic_inputs, 
        print_vars,
        on_unused_input='warn'
    )

    print "Training!"

    splits = [
        ('train', train_, train_data)
    ]
    if dev_data is not None:
        splits.append(('dev', evaluate, dev_data))
    if test_data is not None:
        splits.append(('test', evaluate, test_data))

    for epoch in xrange(epochs):
        for title, fn, data in splits:

            epoch_totals      = []
            since_last_print  = []
            n_inputs = 0

            for iteration, inputs in enumerate(data(), start=1):
                n_inputs += 1

                start_time = time.time()

                outputs_ = fn(*inputs)

                if iteration == 1:
                    epoch_totals     = [o.copy() for o in outputs_]
                    since_last_print = [o.copy() for o in outputs_]
                else:
                    for i, o in enumerate(outputs_):
                        epoch_totals[i]     += o
                        since_last_print[i] += o

                if iteration % print_every == 0:

                    new_time = time.time()

                    values_to_print = [
                        ('epoch', epoch),
                        ('input', iteration),
                        ('time_per_input', (time.time() - start_time))
                    ]

                    for symbolic, totalval in zip(print_vars, since_last_print):
                        values_to_print.append(
                            (str(symbolic), totalval / print_every)
                        )

                    print "{0}\t".format(title) + "\t".join([
                        "{0}:{1}".format(name, val)
                        for name, val in values_to_print
                    ])

                    last_print_time = new_time

                    for i, t in enumerate(since_last_print):
                        since_last_print[i].fill(0)

            values_to_print = [
                ('epoch', epoch),
                ('n_inputs', n_inputs)
            ]

            for symbolic_var, total_val in zip(print_vars, epoch_totals):
                values_to_print.append(
                    (str(symbolic_var), total_val / n_inputs)
                )

            print "{0} summary\t".format(title) + "\t".join(
                ["{0}:{1}".format(name, val) for name, val in values_to_print]
            )

        if callback:
            callback(epoch)