Beispiel #1
0
    def copy_coherence_function(self,
                                input_a=None,
                                input_b=None,
                                arg_idx_input=None,
                                salience_input=None):
        """
        Build a new coherence function, copying all weights and such from
        this network, replacing components given as kwargs. Note that this
        uses the same shared variables and any other non-replaced components
        as the network's original expression graph: bear in mind if you use
        it to update weights or combine with other graphs.

        """
        input_a = input_a or self.input_a
        input_b = input_b or self.input_b
        arg_idx_input = arg_idx_input or self.arg_idx_input
        salience_input = salience_input or self.salience_input

        # Build a new coherence function, combining these two projections
        input_vector = T.concatenate([
            input_a, input_b,
            arg_idx_input.dimshuffle(0, 'x'), salience_input
        ],
                                     axis=input_a.ndim - 1)

        # Initialize each layer as an autoencoder.
        # We'll then set its weights and never use it as an autoencoder
        layers = []
        layer_outputs = []
        input_size = \
            self.event_vector_network.layer_sizes[-1] * 2 + \
            1 + self.num_salience_features
        layer_input = input_vector
        for layer_size in self.layer_sizes:
            layers.append(
                DenoisingAutoencoder(x=layer_input,
                                     n_visible=input_size,
                                     n_hidden=layer_size,
                                     non_linearity='tanh'))
            input_size = layer_size
            layer_input = layers[-1].hidden_layer
            layer_outputs.append(layer_input)
        final_projection = layer_input

        # Set the weights of all layers to the ones trained in the base network
        for layer, layer_weights in zip(layers, self.get_weights()):
            layer.set_weights(layer_weights)

        # Add a final layer
        # This is simply a logistic regression layer to predict
        # a coherence score for the input pair
        activation = \
            T.dot(final_projection, self.prediction_weights) + \
            self.prediction_bias
        # Remove the last dimension, which should now just be of size 1
        activation = activation.reshape(activation.shape[:-1],
                                        ndim=activation.ndim - 1)
        prediction = T.nnet.sigmoid(activation)

        return prediction, input_vector, layers, layer_outputs, activation
Beispiel #2
0
    def build_projection_layer(pred_input, subj_input, obj_input, pobj_input,
                               vectors, empty_subj_vector, empty_obj_vector,
                               empty_pobj_vector, input_size, layer_sizes):
        # Rearrange these so we can test for -1 indices
        # In the standard case, this does dimshuffle((0, "x")), which changes
        # a 1D vector into a column vector
        shuffled_dims = tuple(list(range(subj_input.ndim)) + ['x'])
        subj_input_col = subj_input.dimshuffle(shuffled_dims)
        obj_input_col = obj_input.dimshuffle(shuffled_dims)
        pobj_input_col = pobj_input.dimshuffle(shuffled_dims)

        # Make the input to the first autoencoder by selecting the appropriate
        # vectors from the given matrices
        input_vector = T.concatenate([
            vectors[pred_input],
            T.switch(T.neq(subj_input_col, -1), vectors[subj_input],
                     empty_subj_vector),
            T.switch(T.neq(obj_input_col, -1), vectors[obj_input],
                     empty_obj_vector),
            T.switch(T.neq(pobj_input_col, -1), vectors[pobj_input],
                     empty_pobj_vector),
        ],
                                     axis=pred_input.ndim)

        # Build and initialize each layer of the autoencoder
        previous_output = input_vector
        layers = []
        layer_outputs = []
        for layer_size in layer_sizes:
            layers.append(
                DenoisingAutoencoder(
                    x=previous_output,
                    n_hidden=layer_size,
                    n_visible=input_size,
                    non_linearity='tanh',
                ))
            input_size = layer_size
            previous_output = layers[-1].hidden_layer
            layer_outputs.append(previous_output)
        projection_layer = previous_output

        return input_vector, layers, layer_outputs, projection_layer
Beispiel #3
0
        # =============================================================================
        if arg == 'ncomp':
            feed_list = [2**x for x in range(2, 12)]
            for i in feed_list:
                print "\n Evaluating for ncomp=" + str(i)
                t = 'hlayer=' + str(i)
                dae = DAE(model_name='hidden_layers',
                          pickle_name=arg,
                          test_name=t,
                          n_components=i,
                          main_dir='hidden_layers/',
                          enc_act_func='sigmoid',
                          dec_act_func='sigmoid',
                          loss_func='mean_squared',
                          num_epochs=31,
                          batch_size=12,
                          dataset='cifar10',
                          xavier_init=1,
                          opt='adam',
                          learning_rate=0.001,
                          momentum=0.5,
                          corr_type='gaussian',
                          corr_frac=0.6,
                          verbose=1,
                          seed=-1)
                dae.fit(trX, val_dict, teX, restore_previous_model=False)
                dae.reset()

        # =============================================================================
        #         Testing learning rates
        # =============================================================================
    def __init__(self,
                 event_vector_network,
                 layer_sizes,
                 use_salience=True,
                 salience_features=None):
        self.event_vector_network = event_vector_network
        self.layer_sizes = layer_sizes

        self.input_a, self.input_b = \
            self.event_vector_network.get_projection_pair()

        self.arg_idx_input = T.vector('arg_type')
        self.neg_arg_idx_input = T.vector('neg_arg_type')

        self.input_vector = T.concatenate(
            (self.input_a, self.input_b, self.arg_idx_input.dimshuffle(0,
                                                                       'x')),
            axis=1)

        self.use_salience = use_salience
        if self.use_salience:
            self.salience_input = T.matrix('salience')
            # variables for negative entity salience
            self.neg_salience_input = T.matrix('neg_salience')

            self.input_vector = T.concatenate(
                (self.input_vector, self.salience_input), axis=1)

        # Initialize each layer as an autoencoder,
        # allowing us to initialize it by pretraining
        self.layers = []
        self.layer_outputs = []
        input_size = \
            self.event_vector_network.layer_sizes[-1] * 2 + 1

        self.num_salience_features = 0
        self.salience_features = []
        if self.use_salience:
            assert salience_features is not None
            self.salience_features = salience_features
            self.num_salience_features = len(self.salience_features)
            input_size += self.num_salience_features

        layer_input = self.input_vector
        for layer_size in layer_sizes:
            self.layers.append(
                DenoisingAutoencoder(input=layer_input,
                                     n_visible=input_size,
                                     n_hidden=layer_size,
                                     non_linearity="tanh"))
            input_size = layer_size
            layer_input = self.layers[-1].hidden_layer
            self.layer_outputs.append(layer_input)
        self.final_projection = layer_input

        # Add a final layer, which will only ever be trained with
        # a supervised objective
        # This is simply a logistic regression layer to predict
        # a coherence score for the input pair
        self.prediction_weights = theano.shared(
            # Just initialize to zeros, so we start off predicting 0.5
            # for every input
            numpy.asarray(numpy.random.uniform(
                low=2. * -numpy.sqrt(6. / (layer_sizes[-1] + 1)),
                high=2. * numpy.sqrt(6. / (layer_sizes[-1] + 1)),
                size=(layer_sizes[-1], 1),
            ),
                          dtype=theano.config.floatX),
            name="prediction_w",
            borrow=True)
        self.prediction_bias = theano.shared(value=numpy.zeros(
            1, dtype=theano.config.floatX),
                                             name="prediction_b",
                                             borrow=True)
        self.prediction = T.nnet.sigmoid(
            T.dot(self.final_projection, self.prediction_weights) +
            self.prediction_bias)

        self.pair_inputs = [
            self.event_vector_network.pred_input_a,
            self.event_vector_network.subj_input_a,
            self.event_vector_network.obj_input_a,
            self.event_vector_network.pobj_input_a,
            self.event_vector_network.pred_input_b,
            self.event_vector_network.subj_input_b,
            self.event_vector_network.obj_input_b,
            self.event_vector_network.pobj_input_b, self.arg_idx_input
        ]
        if self.use_salience:
            self.pair_inputs.append(self.salience_input)

        self.triple_inputs = [
            self.event_vector_network.pred_input_a,
            self.event_vector_network.subj_input_a,
            self.event_vector_network.obj_input_a,
            self.event_vector_network.pobj_input_a,
            self.event_vector_network.pred_input_b,
            self.event_vector_network.subj_input_b,
            self.event_vector_network.obj_input_b,
            self.event_vector_network.pobj_input_b,
            self.event_vector_network.pred_input_c,
            self.event_vector_network.subj_input_c,
            self.event_vector_network.obj_input_c,
            self.event_vector_network.pobj_input_c, self.arg_idx_input,
            self.neg_arg_idx_input
        ]
        if self.use_salience:
            self.triple_inputs.append(self.salience_input)
            self.triple_inputs.append(self.neg_salience_input)

        self._coherence_fn = None
Beispiel #5
0
    def __init__(self, arg_comp_model, layer_sizes):
        self.arg_comp_model = arg_comp_model
        self.input_a, self.input_b = self.arg_comp_model.get_projection_pair()
        self.input_arg_type = T.vector("arg_type", dtype="int32")
        self.input_vector = T.concatenate(
            (self.input_a, self.input_b, self.input_arg_type.dimshuffle(
                0, 'x')),
            axis=1)
        self.layer_sizes = layer_sizes

        # Initialize each layer as an autoencoder,
        # allowing us to initialize it by pretraining
        self.layers = []
        self.layer_outputs = []
        input_size = self.arg_comp_model.layer_sizes[-1] * 2 + 1
        layer_input = self.input_vector
        for layer_size in layer_sizes:
            self.layers.append(
                DenoisingAutoencoder(input=layer_input,
                                     n_visible=input_size,
                                     n_hidden=layer_size,
                                     non_linearity="tanh"))
            input_size = layer_size
            layer_input = self.layers[-1].hidden_layer
            self.layer_outputs.append(layer_input)
        self.final_projection = layer_input

        # Add a final layer, which will only ever be trained with
        # a supervised objective
        # This is simply a logistic regression layer to predict
        # a coherence score for the input pair
        self.prediction_weights = theano.shared(
            # Just initialize to zeros, so we start off predicting 0.5
            # for every input
            numpy.asarray(numpy.random.uniform(
                low=2. * -numpy.sqrt(6. / (layer_sizes[-1] + 1)),
                high=2. * numpy.sqrt(6. / (layer_sizes[-1] + 1)),
                size=(layer_sizes[-1], 1),
            ),
                          dtype=theano.config.floatX),
            name="prediction_w",
            borrow=True)
        self.prediction_bias = theano.shared(value=numpy.zeros(
            1, dtype=theano.config.floatX),
                                             name="prediction_b",
                                             borrow=True)
        self.prediction = T.nnet.sigmoid(
            T.dot(self.final_projection, self.prediction_weights) +
            self.prediction_bias)

        self.pair_inputs = [
            self.arg_comp_model.pred_input_a, self.arg_comp_model.subj_input_a,
            self.arg_comp_model.obj_input_a, self.arg_comp_model.pobj_input_a,
            self.arg_comp_model.pred_input_b, self.arg_comp_model.subj_input_b,
            self.arg_comp_model.obj_input_b, self.arg_comp_model.pobj_input_b,
            self.input_arg_type
        ]
        self.triple_inputs = [
            self.arg_comp_model.pred_input_a, self.arg_comp_model.subj_input_a,
            self.arg_comp_model.obj_input_a, self.arg_comp_model.pobj_input_a,
            self.arg_comp_model.pred_input_b, self.arg_comp_model.subj_input_b,
            self.arg_comp_model.obj_input_b, self.arg_comp_model.pobj_input_b,
            self.arg_comp_model.pred_input_c, self.arg_comp_model.subj_input_c,
            self.arg_comp_model.obj_input_c, self.arg_comp_model.pobj_input_c,
            self.input_arg_type
        ]

        self._coherence_fn = None
Beispiel #6
0
    [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
     [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
     [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
     [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
     [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1],
     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1],
     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1],
     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0]]
)

rng = np.random.RandomState(123)

# construct dA
da = DenoisingAutoencoder(input=data, n_visible=20, n_hidden=5, np_rng=rng)

# train
for epoch in range(training_epochs):
    da.train(lr=learning_rate, corruption_level=corruption_level)
    # cost = da.negative_log_likelihood(corruption_level=corruption_level)
    # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
    # learning_rate *= 0.95

# test
x = np.array([[1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
              [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0]])

print(da.get_hidden_values(x).shape)
print(x.shape)
x = da.reconstruct(x)
Beispiel #7
0
import sys
import os
sys.path.insert(0, '../utils/')
import numpy as np

import getdata
from LeNet5 import LeNet

from autoencoder import DenoisingAutoencoder as DAE

if sys.argv[1] =='dae':
    dae = DAE(model_name='dae_svm', pickle_name='svm', test_name='svm',
                 n_components=256, main_dir='dae/', 
                 enc_act_func='sigmoid', dec_act_func='none', 
                 loss_func='mean_squared', num_epochs=50, batch_size=20, 
                 dataset='cifar10', xavier_init=1, opt='adam', 
                 learning_rate=0.0001, momentum=0.5, corr_type='gaussian',
                 corr_frac=0.5, verbose=1, seed=1)    
    
    trX, trY, teX, teY = getdata.load_cifar10_dataset('../cifar-10-batches-py/', mode='supervised')
    val_dict = {}
    dae.fit(trX, val_dict, teX, restore_previous_model=True) 
    
    #dae.load_model(256, 'models/dae/dae_svm')
    dae_svm_train = dae.transform(trX, name='dae_svm_train_na', save=True)
    dae_svm_test = dae.transform(teX, name='dae_svm_test_na', save=True)

    
elif sys.argv[1]=='cnn':
    cifar_train = getdata.get_train()
    cifar_test = getdata.get_test()