예제 #1
0
def build(P, input_size, hidden_sizes):
    test_time = False
    def activation(X):
        global test_time
        if not test_time:
            mask = U.theano_rng.binomial(size=X.shape, p=0.5)
            return T.switch(mask, T.nnet.relu(X), 0)
        else:
            return 0.5 * T.nnet.relu(X)


    classifier = feedforward.build_classifier(
        P, name="classifier",
        input_sizes=[input_size],
        hidden_sizes=hidden_sizes,
        output_size=1,
        initial_weights=feedforward.relu_init,
        output_initial_weights=lambda x,y: np.zeros((x,y)),
        activation=activation,
        output_activation=T.nnet.sigmoid)

    def predict(X, test=False):
        global test_time
        test_time = test
        return classifier([X])[:, 0]
    return predict
예제 #2
0
def build_decoder(P, latent_size, hidden_size, output_size):
    decode_ = feedforward.build_classifier(
        P,
        name='decoder',
        input_sizes=[latent_size],
        hidden_sizes=[hidden_size],
        output_size=output_size,
        initial_weights=feedforward.relu_init,
        activation=T.nnet.softplus,
        output_activation=T.nnet.sigmoid)

    def decode(X):
        return decode_([X])[1]

    return decode
예제 #3
0
def build(P,structure,weights_file,training=True):
    input_size  = structure[0]
    layer_sizes = structure[1:-1]
    output_size = structure[-1]

    classifier = feedforward.build_classifier(
            P, "classifier",
            [input_size], layer_sizes, output_size,
            activation=T.nnet.sigmoid
        )
    def predict(X):
        hiddens, outputs = classifier([X])
        return hiddens, outputs
    if weights_file != "":
        P.load(weights_file)
    return predict
예제 #4
0
def build(P, structure, weights_file, training=True):
    input_size = structure[0]
    layer_sizes = structure[1:-1]
    output_size = structure[-1]

    classifier = feedforward.build_classifier(P,
                                              "classifier", [input_size],
                                              layer_sizes,
                                              output_size,
                                              activation=T.nnet.sigmoid)

    def predict(X):
        hiddens, outputs = classifier([X])
        return hiddens, outputs

    if weights_file != "":
        P.load(weights_file)
    return predict
예제 #5
0
def build(P, name,
          input_size=200, z_size=200,
          hidden_layer_size=2500,
          x_extractor_layers=[600] * 4,
          z_extractor_layers=[500] * 4,
          prior_layers=[500] * 4,
          generation_layers=[600] * 4,
          inference_layers=[500] * 4):
    def weight_init(x,y):
        return np.random.uniform(-0.08, 0.08, (x,y))


    X_extractor = feedforward.build_classifier(
        P, "x_extractor",
        input_sizes=[input_size],
        hidden_sizes=x_extractor_layers[:-1],
        output_size=x_extractor_layers[-1],
        initial_weights=weight_init,
        output_initial_weights=weight_init,
        activation=T.nnet.relu,
        output_activation=T.nnet.relu
    )

    Z_extractor = feedforward.build_classifier(
        P, "z_extractor",
        input_sizes=[z_size],
        hidden_sizes=z_extractor_layers[:-1],
        output_size=z_extractor_layers[-1],
        initial_weights=weight_init,
        output_initial_weights=weight_init,
        activation=T.nnet.relu,
        output_activation=T.nnet.relu
    )

    prior = vae.build_inferer(
        P, "prior",
        input_sizes=[hidden_layer_size],
        hidden_sizes=prior_layers,
        output_size=z_size,
        initial_weights=weight_init,
        activation=T.nnet.relu,
        initialise_outputs=False
    )

    generate = vae.build_inferer(
        P, "generator",
        input_sizes=[hidden_layer_size, z_extractor_layers[-1]],
        hidden_sizes=generation_layers,
        output_size=input_size,
        initial_weights=weight_init,
        activation=T.nnet.relu,
        initialise_outputs=False
    )

    P.init_recurrence_hidden = np.zeros((hidden_layer_size,))
    P.init_recurrence_cell = np.zeros((hidden_layer_size,))
    recurrence = lstm.build_step(
        P, "recurrence",
        input_sizes=[x_extractor_layers[-1],z_extractor_layers[-1]],
        hidden_size=hidden_layer_size
    )

    infer = vae.build_inferer(
        P, "infer",
        input_sizes=[hidden_layer_size, x_extractor_layers[-1]],
        hidden_sizes=generation_layers,
        output_size=z_size,
        initial_weights=weight_init,
        activation=T.nnet.relu,
        initialise_outputs=False
    )

    def sample():
        init_hidden = T.tanh(P.init_recurrence_hidden)
        init_cell = P.init_recurrence_cell
        init_hidden_batch = T.alloc(init_hidden, 1, hidden_layer_size)
        init_cell_batch = T.alloc(init_cell, 1, hidden_layer_size)
        noise = U.theano_rng.normal(size=(40,1,z_size))

        def _step(eps, prev_cell, prev_hidden):
           _, z_prior_mean, z_prior_logvar = prior([prev_hidden])
           z_sample = z_prior_mean + eps * T.exp(0.5 * z_prior_logvar)
           z_feat = Z_extractor([z_sample])
           _, x_mean, _ = generate([prev_hidden, z_feat])
           x_feat = X_extractor([x_mean])
           curr_cell, curr_hidden = recurrence(x_feat, z_feat, prev_cell, prev_hidden)
           return curr_cell, curr_hidden, x_mean

        [cells,hiddens,x_means],_ = theano.scan(
                _step,
                sequences=[noise],
                outputs_info=[init_cell_batch,init_hidden_batch,None],
            )
        return x_means


    def extract(X,l):

        init_hidden = T.tanh(P.init_recurrence_hidden)
        init_cell = P.init_recurrence_cell
        init_hidden_batch = T.alloc(init_hidden, X.shape[1], hidden_layer_size)
        init_cell_batch = T.alloc(init_cell, X.shape[1], hidden_layer_size)
        noise = U.theano_rng.normal(size=(X.shape[0],X.shape[1],z_size))
        reset_init_mask = U.theano_rng.binomial(size=(X.shape[0],X.shape[1]),p=0.00)

        X_feat = X_extractor([X])

        def _step(t,x_feat, eps, reset_mask, prev_cell, prev_hidden):
            reset_mask = reset_mask.dimshuffle(0,'x')

            _, z_prior_mean, z_prior_logvar = prior([prev_hidden])
            _, z_mean, z_logvar = infer([prev_hidden, x_feat])
            z_sample = z_mean + eps * T.exp(0.5 * z_logvar)
            z_feat = Z_extractor([z_sample])
            _, x_mean, x_logvar = generate([prev_hidden, z_feat])

            curr_cell, curr_hidden = recurrence(x_feat, z_feat, prev_cell, prev_hidden)
            curr_cell = T.switch(
                    reset_mask, init_cell_batch, curr_cell)
            curr_hidden = T.switch(
                    reset_mask, init_hidden_batch, curr_hidden)

            mask = (t < l).dimshuffle(0,'x')
            return tuple(
                T.switch(mask,out,0)
                for out in (
                    curr_cell, curr_hidden,
                    z_prior_mean, z_prior_logvar,
                    z_sample, z_mean, z_logvar,
                    x_mean, x_logvar
                ))

        [_, _,
         Z_prior_mean, Z_prior_logvar,
         Z_sample, Z_mean, Z_logvar,
         X_mean, X_logvar], _ = theano.scan(
            _step,
            sequences=[T.arange(X_feat.shape[0]),X_feat,noise,reset_init_mask],
            outputs_info=[init_cell_batch, init_hidden_batch] +
            [None] * 7,
        )
        return [
            Z_prior_mean, Z_prior_logvar,
            Z_mean, Z_logvar,
            X_mean, X_logvar,
        ]
    return extract, sample
예제 #6
0
def build(P, input_size, output_size, mem_size, mem_width, controller_size):
    head_count = 1
    P.memory_init = np.random.randn(mem_size, mem_width)

    weight_init_params = []
    for i in xrange(head_count):
        P['read_weight_init_%d' % i] = 0.01 * np.random.randn(mem_size)
        P['write_weight_init_%d' % i] = 0.01 * np.random.randn(mem_size)
        weight_init_params.append((P['read_weight_init_%d' % i],
                                   P['write_weight_init_%d' % i]))
#        weight_init_params.append((init,init))

    heads_size, head_activations = head.build(
            head_count=head_count,
            mem_width=mem_width,
            shift_width=3
        )
    print "Size of heads:", heads_size

    def controller_activation(X):
        return (head_activations(X[:, :heads_size]), X[:, heads_size:])

    def output_inits(ins, outs):
        init = feedforward.initial_weights(ins, outs)
        init[:, heads_size:] = 0
        return init

    controller = feedforward.build_classifier(
            P, "controller",
            input_sizes=[input_size, mem_width],
            hidden_sizes=[controller_size],
            output_size=heads_size + output_size,
            activation=T.tanh,
            output_activation=controller_activation,
            output_initial_weights=output_inits
        )

    ntm_step = ntm.build(mem_size, mem_width)

    def process(X):
        # input_sequences: batch_size x sequence_length x input_size
        memory_init = P.memory_init / T.sqrt(T.sum(T.sqr(P.memory_init),
                                                   axis=1, keepdims=True))
        batch_size = X.shape[0]
        batch_size.name = 'batch_size'
        ones = T.ones_like(X[:, 0, 0])
        batch_memory_init = T.alloc(memory_init, batch_size, mem_size, mem_width)
        batch_memory_init.name = 'batch_memory_init'

        import head
        batch_weight_inits = [
                (
                    head.softmax(r) * ones.dimshuffle(0, 'x'),
                    head.softmax(w) * ones.dimshuffle(0, 'x')
                ) for r, w in weight_init_params]

        def step(X, M_prev, *heads):
            X.name = 'x[t]'
            # weights [ batch_size x mem_size ]
            # M_prev  [ batch_size x mem_size x mem_width ]
            weights_prev = zip(heads[0*head_count:1*head_count],
                               heads[1*head_count:2*head_count])
            for r, w in weights_prev:
                r.name = 'read_prev'
                w.name = 'write_prev'

            reads_prev = [T.sum(r.dimshuffle(0, 1, 'x') * M_prev, axis=1)
                          for r, _ in weights_prev]

            heads, output = controller([X] + reads_prev)
            M_curr, weights_curr = ntm_step(M_prev, heads, weights_prev)

            return [M_curr] + \
                   [r for r, _ in weights_curr] +\
                   [w for _, w in weights_curr] +\
                   [output]

        scan_outs, _ = theano.scan(
                step,
                sequences=[X.dimshuffle(1, 0, 2)],
                outputs_info=[batch_memory_init] +
                             [r for r, _ in batch_weight_inits] +
                             [w for _, w in batch_weight_inits] +
                             [None]
            )
        outputs = scan_outs[-1]
        return outputs.dimshuffle(1, 0, 2)
    return process
예제 #7
0
def build(P,
          name,
          input_size=200,
          z_size=200,
          hidden_layer_size=2500,
          x_extractor_layers=[600] * 4,
          z_extractor_layers=[500] * 4,
          prior_layers=[500] * 4,
          generation_layers=[600] * 4,
          inference_layers=[500] * 4):
    def weight_init(x, y):
        return np.random.uniform(-0.08, 0.08, (x, y))

    X_extractor = feedforward.build_classifier(
        P,
        "x_extractor",
        input_sizes=[input_size],
        hidden_sizes=x_extractor_layers[:-1],
        output_size=x_extractor_layers[-1],
        initial_weights=weight_init,
        output_initial_weights=weight_init,
        activation=T.nnet.relu,
        output_activation=T.nnet.relu)

    Z_extractor = feedforward.build_classifier(
        P,
        "z_extractor",
        input_sizes=[z_size],
        hidden_sizes=z_extractor_layers[:-1],
        output_size=z_extractor_layers[-1],
        initial_weights=weight_init,
        output_initial_weights=weight_init,
        activation=T.nnet.relu,
        output_activation=T.nnet.relu)

    prior = vae.build_inferer(P,
                              "prior",
                              input_sizes=[hidden_layer_size],
                              hidden_sizes=prior_layers,
                              output_size=z_size,
                              initial_weights=weight_init,
                              activation=T.nnet.relu,
                              initialise_outputs=True)

    generate = vae.build_inferer(
        P,
        "generator",
        input_sizes=[hidden_layer_size, z_extractor_layers[-1]],
        hidden_sizes=generation_layers,
        output_size=input_size,
        initial_weights=weight_init,
        activation=T.nnet.relu,
        initialise_outputs=True)

    P.init_recurrence_hidden = np.zeros((hidden_layer_size, ))
    P.init_recurrence_cell = np.zeros((hidden_layer_size, ))
    recurrence = lstm.build_step(
        P,
        "recurrence",
        input_sizes=[x_extractor_layers[-1], z_extractor_layers[-1]],
        hidden_size=hidden_layer_size)

    infer = vae.build_inferer(
        P,
        "infer",
        input_sizes=[hidden_layer_size, x_extractor_layers[-1]],
        hidden_sizes=generation_layers,
        output_size=z_size,
        initial_weights=weight_init,
        activation=T.nnet.relu,
        initialise_outputs=True)

    def sample():
        init_hidden = T.tanh(P.init_recurrence_hidden)
        init_cell = P.init_recurrence_cell
        init_hidden_batch = T.alloc(init_hidden, 1, hidden_layer_size)
        init_cell_batch = T.alloc(init_cell, 1, hidden_layer_size)
        noise = U.theano_rng.normal(size=(40, 1, z_size))

        def _step(eps, prev_cell, prev_hidden):
            _, z_prior_mean, z_prior_std = prior([prev_hidden])
            z_sample = z_prior_mean + eps * z_prior_std
            z_feat = Z_extractor([z_sample])
            _, x_mean, _ = generate([prev_hidden, z_feat])
            x_feat = X_extractor([x_mean])
            curr_cell, curr_hidden = recurrence(x_feat, z_feat, prev_cell,
                                                prev_hidden)
            return curr_cell, curr_hidden, x_mean

        [cells, hiddens, x_means], _ = theano.scan(
            _step,
            sequences=[noise],
            outputs_info=[init_cell_batch, init_hidden_batch, None],
        )
        return x_means

    def extract(X, l):

        init_hidden = T.tanh(P.init_recurrence_hidden)
        init_cell = P.init_recurrence_cell
        init_hidden_batch = T.alloc(init_hidden, X.shape[1], hidden_layer_size)
        init_cell_batch = T.alloc(init_cell, X.shape[1], hidden_layer_size)
        noise = U.theano_rng.normal(size=(X.shape[0], X.shape[1], z_size))
        reset_init_mask = U.theano_rng.binomial(size=(X.shape[0], X.shape[1]),
                                                p=0.025)

        X_feat = X_extractor([X])

        def _step(t, x_feat, eps, reset_mask, prev_cell, prev_hidden):
            reset_mask = reset_mask.dimshuffle(0, 'x')

            _, z_prior_mean, z_prior_std = prior([prev_hidden])
            _, z_mean, z_std = infer([prev_hidden, x_feat])
            z_sample = z_mean + eps * z_std
            z_feat = Z_extractor([z_sample])
            _, x_mean, x_std = generate([prev_hidden, z_feat])

            curr_cell, curr_hidden = recurrence(x_feat, z_feat, prev_cell,
                                                prev_hidden)
            curr_cell = T.switch(reset_mask, init_cell_batch, curr_cell)
            curr_hidden = T.switch(reset_mask, init_hidden_batch, curr_hidden)

            mask = (t < l).dimshuffle(0, 'x')
            return tuple(
                T.switch(mask, out, 0)
                for out in (curr_cell, curr_hidden, z_prior_mean, z_prior_std,
                            z_sample, z_mean, z_std, x_mean, x_std))

        [
            _, _, Z_prior_mean, Z_prior_std, Z_sample, Z_mean, Z_std, X_mean,
            X_std
        ], _ = theano.scan(
            _step,
            sequences=[
                T.arange(X_feat.shape[0]), X_feat, noise, reset_init_mask
            ],
            outputs_info=[init_cell_batch, init_hidden_batch] + [None] * 7,
        )
        return [
            Z_prior_mean,
            Z_prior_std,
            Z_mean,
            Z_std,
            X_mean,
            X_std,
        ]

    return extract, sample