def _build_model(self):
        self.vocabulary = self.data_set['embeddings_dict'].keys()
        self.embeddings_matrix = self.data_set['embeddings_dict'].values()

        # cast embeddings matrix to theano type
        self.embeddings_matrix = theano.shared(numpy.asarray(self.embeddings_matrix, dtype=theano.config.floatX),
                                               borrow=True)
        # add embeddings matrix to list of parameters, to compute gradient later on
        self.parameters.append(self.embeddings_matrix)
        # a training example is a row in the embeddings matrix
        self.input_ = self.embeddings_matrix[self.embedding_index]
        # instantiate softmax model
        self.softmax_layer = Softmax(input=self.input_, n_in=self.input_size, n_out=self.n_next_words)
        # add softmax parameters (weight matrix) to self.parameters, to compute gradient later on
        self.parameters.extend(self.softmax_layer.params)
        self.cost_function = self.softmax_layer.negative_log_likelihood(self.y)
Esempio n. 2
0
y_train = data['y_train']
x_val = data['x_val']
y_val = data['y_val']
x_test = data['x_test']
y_test = data['y_test']

N = x_train.shape[0]
epoch = 10
batch_size = 64
stride = 1
padding = 1
kernel_size = 3
lr = 4e-1

conv1_1 = conv((3, 32, 32), 32, kernel_size, stride, padding)
ReLu1_1 = ReLU()
MaxPooling1 = MaxPooling()
conv2_1 = conv((32, 16, 16), 32, kernel_size, stride, padding)
ReLu2_1 = ReLU()
MaxPooling2 = MaxPooling()
FC1 = FC_Layer(32 * 8 * 8, 256)
FC2 = FC_Layer(256, 10)
Softmax_classifier = Softmax()

model = [
    conv1_1, ReLu1_1, MaxPooling1, conv2_1, ReLu2_1, MaxPooling2, FC1, FC2
]
solver = Solver.Solver(model)

solver.train(x_train, y_train, epoch, batch_size, lr, 1)
class SoftmaxPredictor(object):

    def __init__(self, data_set, n_next_words, learning_rate, input_size):
        """
        Wrapper class for a Softmax layer that predicts words from the right context and tweaks word embeddings on the
        basis of the prediction error. At each epoch, computes the accuracy of a 10-NN classifier trained on the
        embeddings. Stops training as soon as accuracy does not increase anymore.

        :type data_set:                 dict
        :param data_set:                data set with embeddings that are to be further modified during training.

        :type n_next_words:             int
        :param n_next_words:            number of words that are to predicted / output classes for the softmax model

        :type learning_rate:            float
        :param learning_rate:           learning rate for the softmax model

        :type input_size:               int
        :param input_size:              size of the word embeddings
        """
        self.data_set = data_set
        self.learning_rate = learning_rate
        self.input_size = input_size
        self.n_next_words = n_next_words

        self.parameters = []                        # model parameters -- compute gradient with respect to them
        self.vocabulary = None
        self.embeddings_matrix = None
        self.softmax_layer = None
        self.cost_function = None

        # symbolic thenao variables needed for the theano train function
        self.y = T.ivector('y')                    # true class label
        self.y_idx = T.lscalar('y_idx')            # index pointing to true class label
        self.embedding_index = T.lscalar('index')  # index pointing to a row in the embeddings matrix

        self.training_error_over_epochs = []
        self.f1_over_epochs = []                   # micro f1 scores over epochs
        self.embeddings_over_epochs = []
        self.epochs = None                         # number of epochs the model was trained

        self._build_model()


    def _build_model(self):
        self.vocabulary = self.data_set['embeddings_dict'].keys()
        self.embeddings_matrix = self.data_set['embeddings_dict'].values()

        # cast embeddings matrix to theano type
        self.embeddings_matrix = theano.shared(numpy.asarray(self.embeddings_matrix, dtype=theano.config.floatX),
                                               borrow=True)
        # add embeddings matrix to list of parameters, to compute gradient later on
        self.parameters.append(self.embeddings_matrix)
        # a training example is a row in the embeddings matrix
        self.input_ = self.embeddings_matrix[self.embedding_index]
        # instantiate softmax model
        self.softmax_layer = Softmax(input=self.input_, n_in=self.input_size, n_out=self.n_next_words)
        # add softmax parameters (weight matrix) to self.parameters, to compute gradient later on
        self.parameters.extend(self.softmax_layer.params)
        self.cost_function = self.softmax_layer.negative_log_likelihood(self.y)


    def _get_train_function(self):
        # map each word from the vocabulary to a unique integer
        # (the index of the word's embedding in self.embeddings_matrix)
        l_to_idx = dict(zip(self.vocabulary, range(len(self.vocabulary))))
        train_set_idxs = [l_to_idx[w] for w in self.data_set['target_words']]

         # softmax classes to be predicted -- each class is a word from the right context
        train_set_y = [l_to_idx[w] for w in self.data_set['right_context_words']]

        # cast to theano type
        train_set_y = theano.shared(numpy.asarray(train_set_y, dtype=theano.config.floatX), borrow=True)
        train_set_y = T.cast(train_set_y, 'int32')

        gr1 = T.grad(cost=self.cost_function, wrt=self.input_)
        gr2 = T.grad(cost=self.cost_function, wrt=self.softmax_layer.W)

        # this calculates gradients only for the row in the embeddings matrix that corresponds to the current input,
        # rather than calculating gradients for the entire matrix, len(matrix - 1) of which would be zero
        # see http://deeplearning.net/software/theano/tutorial/faq_tutorial.html, "How to update a subset of weights?"
        updates = [(self.embeddings_matrix, T.inc_subtensor(self.input_, - self.learning_rate * gr1)),
                   (self.softmax_layer.W, self.softmax_layer.W - self.learning_rate * gr2)]

        # train function takes an index pointing to a row in the embeddings matrix (self.embedding_index)
        # and an index pointing to an integer (true class label) in 'train_set_y' (self.y_idx)
        train_fn = theano.function(
            inputs=[self.embedding_index, self.y_idx],
            outputs=self.cost_function,
            givens={self.y: train_set_y[self.y_idx: self.y_idx + 1]},
            updates=updates,
            name='train'
        )

        return train_set_idxs, train_fn


    def train(self):

        print "There are %s training examples." % self.embeddings_matrix.get_value(borrow=True).shape[0]
        print

        # list of indices, each pointing to a row in the embeddings matrix
        train_set_idxs, train_fn = self._get_train_function()

        start_time = time.time()
        epoch = -1
        previous_accuracy = -numpy.inf
        while True:
            epoch += 1

            costs_over_batches = []
            for idx, y_idx in zip(train_set_idxs, range(len(train_set_idxs))):
                index = idx
                cost = train_fn(index, y_idx)
                costs_over_batches.append(cost)

            # store embeddings and evaluation metrics at each epoch
            training_loss = numpy.mean(costs_over_batches)
            embeddings = dict(zip(self.vocabulary, self.embeddings_matrix.get_value()))
            micro_f1, macro_f1, classification_report = get_f1_and_classification_report(embeddings, '10-NN')

            print 'epoch: %s -- loss: %s -- 10-NN accuracy: %s' % (epoch, training_loss, micro_f1)
            print classification_report
            print

            if micro_f1 <= previous_accuracy:
                print 'Accuracy did not increase relative to last epoch.'
                print 'I am aborting training and discarding results from the current epoch'
                break

            self.training_error_over_epochs.append(training_loss)
            self.f1_over_epochs.append(micro_f1)
            embeddings = dict(zip(self.vocabulary, self.embeddings_matrix.get_value()))
            self.embeddings_over_epochs.append(embeddings)
            self.epochs = epoch
            previous_accuracy = micro_f1

            print 'Time since beginning of training: %s' % ((time.time() - start_time) / 60)
            print

        print 'Training took: %s' % ((time.time() - start_time) / 60)