Ejemplo n.º 1
0
	def create_cost_fun (self):

		# create a cost function that
		# takes each prediction at every timestep
		# and guesses next timestep's value:
		what_to_predict = self.input_mat[:, 1:]
		# because some sentences are shorter, we
		# place masks where the sentences end:
		# (for how long is zero indexed, e.g. an example going from `[2,3)`)
		# has this value set 0 (here we substract by 1):
		for_how_long = self.for_how_long - 1
		# all sentences start at T=0:
		starting_when = T.zeros_like(self.for_how_long)
								 
		self.lstm_cost = masked_loss(self.lstm_predictions,
								what_to_predict,
								for_how_long,
								starting_when).sum()

		zero_entropy = T.zeros_like(self.entropy)
		real_entropy = T.switch(self.mask_matrix,self.entropy,zero_entropy)
		zero_key_entropy = T.zeros_like(self.key_entropy)
		real_key_entropy = T.switch(self.mask_matrix,self.key_entropy,zero_key_entropy)

		self.final_cost = masked_loss(self.final_predictions,
								what_to_predict,
								for_how_long,
								starting_when).sum()+self.entropy_reg*real_entropy.sum()+self.key_entropy_reg*real_key_entropy.sum()
Ejemplo n.º 2
0
    def create_cost_fun(self):
        # create a cost function that
        # takes each prediction at every timestep
        # and guesses next timestep's value:
        what_to_predict = self.input_mat[:, 1:]
        # because some sentences are shorter, we
        # place masks where the sentences end:
        # (for how long is zero indexed, e.g. an example going from `[2,3)`)
        # has this value set 0 (here we substract by 1):
        for_how_long = self.for_how_long - 1
        # all sentences start at T=0:
        starting_when = T.zeros_like(self.for_how_long)

        self.cost = masked_loss(self.predictions, what_to_predict,
                                for_how_long, starting_when).sum()
Ejemplo n.º 3
0
 def create_cost_fun (self):
     # create a cost function that
     # takes each prediction at every timestep
     # and guesses next timestep's value:
     what_to_predict = self.input_mat[:, 1:]#每一句话除了第一个字符之后的所有字符,等于给了第一个,之后整句话是predict出来
     # because some sentences are shorter, we
     # place masks where the sentences end:
     # (for how long is zero indexed, e.g. an example going from `[2,3)`)
     # has this value set 0 (here we substract by 1):
     for_how_long = self.for_how_long - 1
     # all sentences start at T=0:
     starting_when = T.zeros_like(self.for_how_long)
                              
     '''predict的是完整的句子后面的各个词,注意这个predictions只调用了一遍,那就是说这一遍就是一个mini batch了'''
     self.cost = masked_loss(self.predictions,
                             what_to_predict,
                             for_how_long,
                             starting_when).sum()
    def __init__(self,
                 word_size,
                 vocabulary_size,
                 stack_size,
                 hidden_size,
                 hidden_price_size,
                 price_stack_size,
                 output_vocabulary,
                 index2word,
                 word2index,
                 index2category,
                 category2index,
                 memory_sparsity = 0.0001,
                 rho = 0.95,
                 verbose=False,
                 theano_mode = "FAST_RUN"):
        

        self.index2word = index2word
        self.word2index = word2index
        self.index2category = index2category
        self.category2index = category2index

        self.memory_sparsity= theano.shared(np.float64(memory_sparsity), name="memory_sparsity")
        self.theano_mode = theano_mode
        self.word_size = word_size
        self.vocabulary_size = theano.shared(np.int32(vocabulary_size), name="vocabulary_size")
        self.stack_size = stack_size
        self.hidden_size = hidden_size
        self.output_vocabulary = output_vocabulary
        
        ### CREATE THE CELLS:
        
        model = theano_lstm.StackedCells(word_size, layers=[hidden_size] * stack_size, celltype=theano_lstm.LSTM, activation=T.tanh)
        # add a softmax layer at the end (non-recurrent)
        
        # special end token:
        model.layers.append(theano_lstm.Layer(hidden_size, output_vocabulary + 1, to_softmax))
        
        # add an embedding layer at the beginning (non-recurrent):
        model.layers = [theano_lstm.Embedding(vocabulary_size + output_vocabulary + 1, word_size),
                        theano_lstm.GatedInput(word_size, hidden_size, T.nnet.sigmoid)] + model.layers
        self.model = model
        
        model2 = theano_lstm.StackedCells(hidden_size, layers=[hidden_price_size] * (price_stack_size - 1) + [1], celltype=theano_lstm.Layer, activation=T.tanh)
        
        # price is a linear function of its inputs:
        model2.layers[-1].activation = T.exp
        
        self.price_model = model2
        
        
        ### CONSTRUCT THE PREDICTION / WIRING:
        
        def step(word_id, *prev_hiddens):
            if prev_hiddens[-1].ndim > 1:
                top_level_activ = prev_hiddens[-1][:, self.hidden_size:]
            else:
                top_level_activ = prev_hiddens[-1][self.hidden_size:]
            
            new_state = model.forward(word_id, [None, top_level_activ] + list(prev_hiddens), [])
            # all outputs should be returned, except embeddings, and the first gates
            return new_state[1:]
        
        def pred_step(word_id, *prev_hiddens):
            if prev_hiddens[-1].ndim > 1:
                top_level_activ = prev_hiddens[-1][:, self.hidden_size:]
            else:
                top_level_activ = prev_hiddens[-1][self.hidden_size:]
            new_state = model.forward(word_id, [None, top_level_activ] + list(prev_hiddens), [])
            # all outputs should be returned, except embeddings, and the first gates
            return [T.cast(new_state[-1].argmax() + self.vocabulary_size, dtype='int32')] + new_state[2:-1]
        
        def predict_sequence(x, lengths, return_all=False, return_memory=False):
            if x.ndim > 1:
                outputs_info = [None] + [dict(initial=T.repeat(T.shape_padleft(layer.initial_hidden_state), x.shape[0], axis=0), taps=[-1]) for layer in model.layers if hasattr(layer, 'initial_hidden_state')]
            else:
                outputs_info = [None] + [dict(initial=layer.initial_hidden_state, taps=[-1]) for layer in model.layers if hasattr(layer, 'initial_hidden_state')]
            outputs_info = outputs_info + [None]
            result, updates = theano.scan(step,
                                          sequences = [x.T if x.ndim > 1 else x],
                                          outputs_info = outputs_info)

            if return_all:
                return result
            else:
                res = result[-1].dimshuffle(1, 0, 2) if x.ndim > 1 else result[-1]
                
                price_preds = self.price_model.forward(
                                self.model.layers[-2].postprocess_activation(
                                    result[-2][lengths, T.arange(0, lengths.shape[0])]
                                ), None, []
                            )[-1][:,0] if x.ndim > 1 else \
                            self.price_model.forward(
                                self.model.layers[-2].postprocess_activation(
                                    result[-2][-1]
                            ), None, [])[-1][0]
                # gate values can be obtained by asking for them from the stacked cells
                if return_memory:
                    return result[0], res, price_preds
                else:
                    return res, price_preds
        
        
        # every sequence is a series of indices
        # for words:
        input_sentences    = T.imatrix()
        
        # some sequences are shorter than others, so we'll note where they
        # end in a zero-indexed fashion
        sequence_lengths  = T.ivector()
        sequence_starts   = T.ivector()
        # the labels are integers in the range of dictionary
        
        self.input_sentences = input_sentences
        self.sequence_lengths = sequence_lengths
        self.sequence_starts = sequence_starts
        
        self.prices = T.vector()
        
        memory_usage, self.predictions, self.price_predictions = predict_sequence(input_sentences, self.sequence_starts, return_memory=True)
        
        self.error = (
            theano_lstm.masked_loss(
                self.predictions,
                input_sentences[:,1:] - self.vocabulary_size,
                sequence_lengths,
                sequence_starts).mean() +
            (memory_usage.sum() * self.memory_sparsity) / input_sentences.shape[0] +
            ((self.price_predictions - self.prices)**2).mean()
        )
        
        self.memory_fun = theano.function([input_sentences], memory_usage,
                                           allow_input_downcast=True,
                                           mode=self.theano_mode)
        
        self.price_predict_fun = theano.function([input_sentences, sequence_starts],
                                           self.price_predictions,
                                           allow_input_downcast=True,
                                           mode=self.theano_mode)
        
        self.predict_fun = theano.function([input_sentences],
                                           self.predictions,
                                           allow_input_downcast=True,
                                           mode=self.theano_mode)
        self.error_fun = theano.function([input_sentences, sequence_lengths, sequence_starts, self.prices],
                                         self.error,
                                         allow_input_downcast=True,
                                         mode=self.theano_mode)
        
        self.input_sentence = T.ivector()
        
        prep_result = predict_sequence(self.input_sentence, None, return_all=True)
        
        pred_outputs_info = [dict(initial=self.input_sentence[-1], taps=[-1])] + [dict(initial=prep_hidden[-1], taps=[-1]) for prep_hidden in prep_result[1:-1]]
        
        prediction_steps = T.iscalar()
        pred_result, _ = theano.scan(pred_step,
                                     n_steps = prediction_steps,
                                     outputs_info = pred_outputs_info)
        
        self.reconstruct_fun = theano.function([self.input_sentence, prediction_steps],
                                               pred_result[0],
                                               allow_input_downcast=True,
                                               mode=self.theano_mode)
        self.input_labels = theano.function([input_sentences],
                                            input_sentences[:,1:] - self.vocabulary_size,
                                            mode=self.theano_mode)
        
        if verbose:
            print("created prediction & error functions")
        updates, gsums, xsums, lr, max_norm = theano_lstm.create_optimization_updates(self.error, model.params + model2.params, max_norm=None, rho=rho, method="adadelta")
        self.lr = lr
        if verbose:
            print("took the gradient")
        
        self.gsums = gsums
        self.xsums = xsums
        
        self.update_fun = theano.function([input_sentences, sequence_lengths, sequence_starts, self.prices],
                                          outputs=None,
                                          updates=updates,
                                          mode=self.theano_mode)
        if verbose:
            print("created the gradient descent function")