Beispiel #1
0
    def test_single_var(self):
        # Test `is_same_graph` with some trivial graphs (one Variable).

        x, y, z = tensor.vectors('x', 'y', 'z')
        self.check([
                   (x, x, (({}, True), )),
                   (x, y, (({}, False), ({y: x}, True), )),
                   (x, tensor.neg(x), (({}, False), )),
                   (x, tensor.neg(y), (({}, False), )),
                   ])
Beispiel #2
0
 def test_single_var(self):
     """
     Test `is_same_graph` with some trivial graphs (one Variable).
     """
     x, y, z = tensor.vectors('x', 'y', 'z')
     self.check([
         (x, x, (({}, True), )),
         (x, y, (({}, False), ({y: x}, True), )),
         (x, tensor.neg(x), (({}, False), )),
         (x, tensor.neg(y), (({}, False), )),
         ])
Beispiel #3
0
 def test_single_var(self):
     """
     Test `is_same_graph` with some trivial graphs (one Variable).
     """
     x, y, z = tensor.vectors("x", "y", "z")
     self.check(
         [
             (x, x, (({}, True),)),
             (x, y, (({}, False), ({y: x}, True))),
             (x, tensor.neg(x), (({}, False),)),
             (x, tensor.neg(y), (({}, False),)),
         ]
     )
Beispiel #4
0
    def test_single_var(self):
        # Test `is_same_graph` with some trivial graphs (one Variable).

        x, y, z = tensor.vectors("x", "y", "z")
        self.check([
            (x, x, (({}, True), )),
            (x, y, (
                ({}, False),
                ({
                    y: x
                }, True),
            )),
            (x, tensor.neg(x), (({}, False), )),
            (x, tensor.neg(y), (({}, False), )),
        ])
    def negative_log_likelihood(self, y):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
                \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a matrix where 1 indicates which class the sample belongs to

        """

        return T.mean(T.neg(y) * T.log(self.p_y_given_x) - (1+T.neg(y))*T.log(1-self.p_y_given_x)) + self.lambda_reg * T.sum(self.W ** 2)
Beispiel #6
0
 def error(self, y):
     if y.ndim != self.y_pred.ndim:
         raise TypeError('y should have the same shape as self.y_pred',
                         ('y', y.type, 'y_pred', self.y_pred.type))
     if y.dtype.startwith("int"):
         return T.mean(T.neg(self.y_pred, y))
     else:
         raise NotImplementedError
Beispiel #7
0
def minus_corr(u, v):
    um = T.sub(u, T.mean(u))
    vm = T.sub(v, T.mean(v))
    r_num = T.sum(T.mul(um, vm))
    r_den = T.sqrt(T.mul(T.sum(T.sqr(um)), T.sum(T.sqr(vm))))
    r = T.true_div(r_num, r_den)
    r = T.neg(r)
    return r
Beispiel #8
0
def objective(y_true, y_pred):
    active_notes = T.shape_padright(y_true[:, :, :, 0])
    mask = T.concatenate([T.ones_like(active_notes), active_notes], axis=3)

    log_likelihoods = mask * T.log(2 * y_pred * y_true - y_pred - y_true + 1 +
                                   EPSILON)

    return T.neg(T.sum(log_likelihoods))
    def negative_log_likelihood(self, y):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
                \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a matrix where 1 indicates which class the sample belongs to

        """

        return T.mean(
            T.neg(y) * T.log(self.p_y_given_x) -
            (1 + T.neg(y)) * T.log(1 - self.p_y_given_x))
 def dtw(i, q_p, b_p, Q, D, inf):
   i0 = T.eq(i, 0)
   # inf = T.cast(1e10,'float32') * T.cast(T.switch(T.eq(self.n,0), T.switch(T.eq(i,0), 0, 1), 1), 'float32')
   penalty = T.switch(T.and_(T.neg(n0), i0), big, T.constant(0.0, 'float32'))
   loop = T.constant(0.0, 'float32') + q_p
   forward = T.constant(0.0, 'float32') + T.switch(T.or_(n0, i0), 0, Q[i - 1])
   opt = T.stack([loop, forward])
   k_out = T.cast(T.argmin(opt, axis=0), 'int32')
   return opt[k_out, T.arange(opt.shape[1])] + D[i] + penalty, k_out
Beispiel #11
0
 def dtw(i, q_p, b_p, Q, D, inf):
   i0 = T.eq(i, 0)
   # inf = T.cast(1e10,'float32') * T.cast(T.switch(T.eq(self.n,0), T.switch(T.eq(i,0), 0, 1), 1), 'float32')
   penalty = T.switch(T.and_(T.neg(n0), i0), big, T.constant(0.0, 'float32'))
   loop = T.constant(0.0, 'float32') + q_p
   forward = T.constant(0.0, 'float32') + T.switch(T.or_(n0, i0), 0, Q[i - 1])
   opt = T.stack([loop, forward])
   k_out = T.cast(T.argmin(opt, axis=0), 'int32')
   return opt[k_out, T.arange(opt.shape[1])] + D[i] + penalty, k_out
Beispiel #12
0
    def get_loss(adjusted_output, prediction):
        epsilon = 1e-7

        active_notes = T.shape_padright(adjusted_output[:, :, :, 0])
        masks = T.concatenate([T.ones_like(active_notes), active_notes], axis=3)

        log_likelihoods = T.log(2 * prediction * adjusted_output - prediction - adjusted_output + 1 + epsilon)
        masked_log_likelihoods = masks * log_likelihoods

        return T.neg(T.sum(masked_log_likelihoods))
 def loss_func(self, y_true, y_predict):
     active_notes = T.shape_padright(y_true[:, :, :, 0])
     mask = T.concatenate([
         T.ones_like(active_notes), active_notes,
         T.repeat(T.ones_like(active_notes), self.output_size - 2, -1)
     ],
                          axis=-1)
     loglikelihoods = mask * T.log(2 * y_predict * y_true - y_predict -
                                   y_true + 1 + self.epsilon)
     return T.neg(T.sum(loglikelihoods))
Beispiel #14
0
    def predict(self, y):

        # check if y has same dimension of y_pred
        if y.ndim != self.y_pred.ndim:
            raise TypeError('y should have the same shape as self.y_pred',
                            ('y', target.type, 'y_pred', self.y_pred.type))
        # check if y is of the correct datatype
        if y.dtype.startswith('int'):
            # the T.neq operator returns a vector of 0s and 1s, where 1
            # represents a mistake in prediction
            return T.neg(self.y_pred)
        else:
            raise notimplementederror()
    def loss(self, n_samples, regularization_strength, mix, mu, sigma):
        log_sum_loss = -tensor.sum(tensor.log(
                            tensor.sum(mix * tensor.inv(np.sqrt(2 * np.pi) * sigma) *
                                       tensor.exp(tensor.neg(tensor.sqr(mu - self.target_vector)) *
                                                  tensor.inv(2 * tensor.sqr(sigma))), axis=0)
        ))

        # reg_loss = tensor.sum(tensor.sqr(self.layers.values()[0].W))
        # for layer in self.layers.values()[1:]:
        #     reg_loss += tensor.sum(tensor.sqr(layer.W))

        # regularization = 1/n_samples * regularization_strength/2 * reg_loss

        return log_sum_loss #+ regularization
Beispiel #16
0
    def loss(self, n_samples, regularization_strength, mix, mu, sigma):
        log_sum_loss = -tensor.sum(
            tensor.log(
                tensor.sum(
                    mix * tensor.inv(np.sqrt(2 * np.pi) * sigma) * tensor.exp(
                        tensor.neg(tensor.sqr(mu - self.target_vector)) *
                        tensor.inv(2 * tensor.sqr(sigma))),
                    axis=0)))

        # reg_loss = tensor.sum(tensor.sqr(self.layers.values()[0].W))
        # for layer in self.layers.values()[1:]:
        #     reg_loss += tensor.sum(tensor.sqr(layer.W))

        # regularization = 1/n_samples * regularization_strength/2 * reg_loss

        return log_sum_loss  #+ regularization
Beispiel #17
0
    def compute_loss(probs, absolute_melody, extra_info=False):
        """
        Compute loss between probs and an absolute melody

        Parameters:
            probs: A theano tensor of shape (batch, time, 2+high_bound-low_bound)
            absolute_melody: A tensor of shape (batch, time) with correct indices
            extra_info: If True, return extra info

        Returns
            A theano tensor loss value.
            Also, if extra_info is true, an additional info dict.
        """
        n_batch, n_time, prob_width = probs.shape
        correct_encoded_form = T.reshape(
            T.extra_ops.to_one_hot(T.flatten(absolute_melody), prob_width),
            probs.shape)
        loglikelihoods = T.log(probs +
                               constants.EPSILON) * correct_encoded_form
        full_loss = T.neg(T.sum(loglikelihoods))

        if extra_info:
            loss_per_timestep = full_loss / T.cast(n_batch * n_time,
                                                   theano.config.floatX)
            accuracy_per_timestep = T.exp(-loss_per_timestep)

            loss_per_batch = full_loss / T.cast(n_batch, theano.config.floatX)
            accuracy_per_batch = T.exp(-loss_per_batch)

            num_jumps = T.sum(correct_encoded_form[:, :, 2:])
            loss_per_jump = full_loss / T.cast(num_jumps, theano.config.floatX)
            accuracy_per_jump = T.exp(-loss_per_jump)

            return full_loss, {
                "loss_per_timestep": loss_per_timestep,
                "accuracy_per_timestep": accuracy_per_timestep,
                "loss_per_batch": loss_per_batch,
                "accuracy_per_batch": accuracy_per_batch,
                "loss_per_jump": loss_per_jump,
                "accuracy_per_jump": accuracy_per_jump
            }
        else:
            return full_loss
Beispiel #18
0
    def logsum_loss(self, n_samples, l1_regularization_strength, l2_regularization_strength):
        log_sum_loss = -tensor.sum(tensor.log(
                            tensor.sum(self.mix * tensor.inv(np.sqrt(2 * np.pi) * self.sigma) *
                                       tensor.exp(tensor.neg(tensor.sqr(self.mu - self.target_vector)) *
                                                  tensor.inv(2 * tensor.sqr(self.sigma))), axis=0)
        ))

        l1_reg_loss = tensor.sum(np.abs(self.layers.values()[0].W))
        for layer in self.layers.values()[1:]:
            l1_reg_loss += tensor.sum(np.abs(layer.W))

        l2_reg_loss = tensor.sum(tensor.sqr(self.layers.values()[0].W))
        for layer in self.layers.values()[1:]:
            l2_reg_loss += tensor.sum(tensor.sqr(layer.W))

        l1_regularization = 1/n_samples * l1_regularization_strength/2 * l1_reg_loss

        l2_regularization = 1/n_samples * l2_regularization_strength/2 * l2_reg_loss

        return log_sum_loss + l1_regularization + l2_regularization
    def compute_loss(probs, absolute_melody, extra_info=False):
        """
        Compute loss between probs and an absolute melody

        Parameters:
            probs: A theano tensor of shape (batch, time, 2+high_bound-low_bound)
            absolute_melody: A tensor of shape (batch, time) with correct indices
            extra_info: If True, return extra info

        Returns
            A theano tensor loss value.
            Also, if extra_info is true, an additional info dict.
        """
        n_batch, n_time, prob_width = probs.shape
        correct_encoded_form = T.reshape(T.extra_ops.to_one_hot(T.flatten(absolute_melody), prob_width), probs.shape)
        loglikelihoods = T.log( probs + constants.EPSILON )*correct_encoded_form
        full_loss = T.neg(T.sum(loglikelihoods))

        if extra_info:
            loss_per_timestep = full_loss/T.cast(n_batch*n_time, theano.config.floatX)
            accuracy_per_timestep = T.exp(-loss_per_timestep)

            loss_per_batch = full_loss/T.cast(n_batch, theano.config.floatX)
            accuracy_per_batch = T.exp(-loss_per_batch)

            num_jumps = T.sum(correct_encoded_form[:,:,2:])
            loss_per_jump = full_loss/T.cast(num_jumps, theano.config.floatX)
            accuracy_per_jump = T.exp(-loss_per_jump)

            return full_loss, {
                "loss_per_timestep":loss_per_timestep,
                "accuracy_per_timestep":accuracy_per_timestep,
                "loss_per_batch":loss_per_batch,
                "accuracy_per_batch":accuracy_per_batch,
                "loss_per_jump":loss_per_jump,
                "accuracy_per_jump":accuracy_per_jump
            }
        else:
            return full_loss
Beispiel #20
0
    def logsum_loss(self, n_samples, l1_regularization_strength,
                    l2_regularization_strength):
        log_sum_loss = -tensor.sum(
            tensor.log(
                tensor.sum(self.mix * tensor.inv(
                    np.sqrt(2 * np.pi) * self.sigma) * tensor.exp(
                        tensor.neg(tensor.sqr(self.mu - self.target_vector)) *
                        tensor.inv(2 * tensor.sqr(self.sigma))),
                           axis=0)))

        l1_reg_loss = tensor.sum(np.abs(self.layers.values()[0].W))
        for layer in self.layers.values()[1:]:
            l1_reg_loss += tensor.sum(np.abs(layer.W))

        l2_reg_loss = tensor.sum(tensor.sqr(self.layers.values()[0].W))
        for layer in self.layers.values()[1:]:
            l2_reg_loss += tensor.sum(tensor.sqr(layer.W))

        l1_regularization = 1 / n_samples * l1_regularization_strength / 2 * l1_reg_loss

        l2_regularization = 1 / n_samples * l2_regularization_strength / 2 * l2_reg_loss

        return log_sum_loss + l1_regularization + l2_regularization
Beispiel #21
0
    def LMmulcloss(self,kth,x,y,label,nextwords):
        #multiple label loss + language model loss
        #nextwords  = START + words + END
        hidden = self.hidden_k(x,self.w,self.dicw,kth)
        print "hidden  type :  "+str(hidden.type)
        size = y.ndim
        y = T.addbroadcast(y,size - 1)
        embedding = T.sum(hidden*y,0)/T.addbroadcast(T.cast(T.sum(y,0), 'int16'), size - 2)
        #embedding = T.sum(hidden*y,0)/ T.addbroadcast(T.sum(y,0), size-2)
        print "embedding type  :  "+str(embedding.type)
        mulloss = (0. - T.sum(T.log(1. / (1. + T.exp(0. - (T.dot(embedding, self.w["mulw"])+self.w["mulb"])*label)))))/embedding.shape[0]


        if self.hsoftmax:
            #pos language model
            hshape = self.hshape
            newhidden = hidden[:,:,:hidden.shape[2]/2].reshape((hidden.shape[0]*hidden.shape[1],hidden.shape[2]/2))

            smax_group = T.nnet.h_softmax(newhidden, newhidden.shape[0],self.wordnum, hshape[0], hshape[1], self.w["posLMw1"], self.b["posLMb1"], self.w["posLMw2"], self.w["posLMb2"], nextwords[2:].ravel())
            losslist = T.neg(T.log(smax_group.reshape(nextwords[2:].shape)))
            mask = T.cast(T.neq(nextwords[2:], self.padding_id), theano.config.floatX)
            losslist = losslist*mask
            posLMloss = T.cast(T.mean(T.sum(losslist,axis=0)), theano.config.floatX)

            #neg language model

            newhidden = hidden[:,:,hidden.shape[2]/2:].reshape((hidden.shape[0]*hidden.shape[1],hidden.shape[2]/2))

            smax_group = T.nnet.h_softmax(newhidden, newhidden.shape[0],self.wordnum, hshape[0], hshape[1], self.w["negLMw1"], self.b["negLMb1"], self.w["negLMw2"], self.w["negLMb2"], nextwords[:-2].ravel())
            losslist = T.neg(T.log(smax_group.reshape(nextwords[:-2].shape)))
            mask = T.cast(T.neq(nextwords[:-2], self.padding_id), theano.config.floatX)
            losslist = losslist*mask
            negLMloss = T.cast(T.mean(T.sum(losslist,axis=0)), theano.config.floatX)

        else:


            def categorical_loss(ihidden,words,w,b):
                scores = T.dot(ihidden,w)+b
                prep = T.exp(scores)/T.sum(T.exp(scores),1).dimshuffle(0,'x')
                loss = T.nnet.categorical_crossentropy(prep, words)
                return loss
            #newhidden = hidden.reshape((hidden.shape[0]*hidden.shape[1], hidden.shape[2]))
            #pos language model
            #prep = T.exp(T.dot(newhidden[:,:newhidden.shape[1]/2],self.w["posLMw"])+self.w["posLMb"])/T.sum(T.exp(T.dot(newhidden[:,:newhidden.shape[1]/2],self.w["posLMw"])+self.w["posLMb"]), 1).dimshuffle(0,'x')
            
            scores = T.dot(hidden[:,:,:hidden.shape[2]/2], self.w["posLMw"])+self.w["posLMb"]
            scores = scores.reshape((scores.shape[0]*scores.shape[1], scores.shape[2]))
            prep = T.exp(scores)/T.sum(T.exp(scores), scores.ndim - 1).dimshuffle((0,'x'))
            #(len*batch)
            losslist = T.nnet.categorical_crossentropy(prep,nextwords[2:].ravel())
            losslist = losslist.reshape(nextwords[2:].shape)
            
            #losslist, _ = theano.scan(fn = categorical_loss, sequences = [hidden[:,:,:hidden.shape[2]/2], nextwords[2:]], outputs_info = None, 
            #    non_sequences = [self.w["posLMw"], self.w["posLMb"]]) 



            mask = T.cast(T.neq(nextwords[2:], self.padding_id), theano.config.floatX)
            losslist = losslist*mask
            posLMloss = T.cast(T.mean(T.sum(losslist,axis=0)), theano.config.floatX)


            #neg language model
            #prep = T.exp(T.dot(newhidden[:,newhidden.shape[1]/2:],self.w["negLMw"])+self.w["negLMb"])/T.sum(T.exp(T.dot(newhidden[:,newhidden.shape[1]/2:],self.w["negLMw"])+self.w["negLMb"]), 1).dimshuffle(0,'x')
            
            scores = T.dot(hidden[:,:,hidden.shape[2]/2:], self.w["negLMw"])+self.w["negLMb"]
            scores = scores.reshape((scores.shape[0]*scores.shape[1], scores.shape[2]))
            prep = T.exp(scores)/T.sum(T.exp(scores), scores.ndim - 1).dimshuffle((0,'x'))
            #(len*batch)
            losslist = T.nnet.categorical_crossentropy(prep,nextwords[0:-2].ravel())
            losslist = losslist.reshape(nextwords[0:-2].shape)
            

            

            #losslist, _ = theano.scan(fn = categorical_loss, sequences = [hidden[:,:,hidden.shape[2]/2:], nextwords[:-2]], outputs_info = None, 
            #    non_sequences = [self.w["negLMw"], self.w["negLMb"]]) 

            mask = T.cast(T.neq(nextwords[0:-2], self.padding_id), theano.config.floatX)
            losslist = losslist*mask
            negLMloss = T.cast(T.mean(T.sum(losslist,axis=0)), theano.config.floatX)


        return mulloss, posLMloss, negLMloss
Beispiel #22
0
    def setup_train(self):

        # dimensions: (batch, time, notes, input_data) with input_data as in architecture
        self.input_mat = T.btensor4()
        # dimensions: (batch, time, notes, onOrArtic) with 0:on, 1:artic
        self.output_mat = T.btensor4()
        
        self.epsilon = np.spacing(np.float32(1.0))

        def step_time(in_data, *other):
            other = list(other)
            split = -len(self.t_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.time_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)
            return new_states
        
        def step_note(in_data, *other):
            other = list(other)
            split = -len(self.p_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.pitch_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)
            return new_states
        
        # We generate an output for each input, so it doesn't make sense to use the last output as an input.
        # Note that we assume the sentinel start value is already present
        # TEMP CHANGE: NO SENTINEL
        input_slice = self.input_mat[:,0:-1]
        n_batch, n_time, n_note, n_ipn = input_slice.shape
        
        # time_inputs is a matrix (time, batch/note, input_per_note)
        time_inputs = input_slice.transpose((1,0,2,3)).reshape((n_time,n_batch*n_note,n_ipn))
        num_time_parallel = time_inputs.shape[1]
        
        # apply dropout
        if self.dropout > 0:
            time_masks = theano_lstm.MultiDropout( [(num_time_parallel, shape) for shape in self.t_layer_sizes], self.dropout)
        else:
            time_masks = []

        time_outputs_info = [initial_state_with_taps(layer, num_time_parallel) for layer in self.time_model.layers]
        time_result, _ = theano.scan(fn=step_time, sequences=[time_inputs], non_sequences=time_masks, outputs_info=time_outputs_info)
        
        self.time_thoughts = time_result
        
        # Now time_result is a list of matrix [layer](time, batch/note, hidden_states) for each layer but we only care about 
        # the hidden state of the last layer.
        # Transpose to be (note, batch/time, hidden_states)
        last_layer = get_last_layer(time_result)
        n_hidden = last_layer.shape[2]
        time_final = get_last_layer(time_result).reshape((n_time,n_batch,n_note,n_hidden)).transpose((2,1,0,3)).reshape((n_note,n_batch*n_time,n_hidden))
        
        # note_choices_inputs represents the last chosen note. Starts with [0,0], doesn't include last note.
        # In (note, batch/time, 2) format
        # Shape of start is thus (1, N, 2), concatenated with all but last element of output_mat transformed to (x, N, 2)
        start_note_values = T.alloc(np.array(0,dtype=np.int8), 1, time_final.shape[1], 2 )
        correct_choices = self.output_mat[:,1:,0:-1,:].transpose((2,0,1,3)).reshape((n_note-1,n_batch*n_time,2))
        note_choices_inputs = T.concatenate([start_note_values, correct_choices], axis=0)
        
        # Together, this and the output from the last LSTM goes to the new LSTM, but rotated, so that the batches in
        # one direction are the steps in the other, and vice versa.
        note_inputs = T.concatenate( [time_final, note_choices_inputs], axis=2 )
        num_timebatch = note_inputs.shape[1]
        
        # apply dropout
        if self.dropout > 0:
            pitch_masks = theano_lstm.MultiDropout( [(num_timebatch, shape) for shape in self.p_layer_sizes], self.dropout)
        else:
            pitch_masks = []

        note_outputs_info = [initial_state_with_taps(layer, num_timebatch) for layer in self.pitch_model.layers]
        note_result, _ = theano.scan(fn=step_note, sequences=[note_inputs], non_sequences=pitch_masks, outputs_info=note_outputs_info)
        
        self.note_thoughts = note_result
        
        # Now note_result is a list of matrix [layer](note, batch/time, onOrArticProb) for each layer but we only care about 
        # the hidden state of the last layer.
        # Transpose to be (batch, time, note, onOrArticProb)
        note_final = get_last_layer(note_result).reshape((n_note,n_batch,n_time,2)).transpose(1,2,0,3)
        
        # The cost of the entire procedure is the negative log likelihood of the events all happening.
        # For the purposes of training, if the ouputted probability is P, then the likelihood of seeing a 1 is P, and
        # the likelihood of seeing 0 is (1-P). So the likelihood is (1-P)(1-x) + Px = 2Px - P - x + 1
        # Since they are all binary decisions, and are all probabilities given all previous decisions, we can just
        # multiply the likelihoods, or, since we are logging them, add the logs.
        
        # Note that we mask out the articulations for those notes that aren't played, because it doesn't matter
        # whether or not those are articulated.
        # The padright is there because self.output_mat[:,:,:,0] -> 3D matrix with (b,x,y), but we need 3d tensor with 
        # (b,x,y,1) instead
        active_notes = T.shape_padright(self.output_mat[:,1:,:,0])
        mask = T.concatenate([T.ones_like(active_notes),active_notes], axis=3)
        
        loglikelihoods = mask * T.log( 2*note_final*self.output_mat[:,1:] - note_final - self.output_mat[:,1:] + 1 + self.epsilon )
        self.cost = T.neg(T.sum(loglikelihoods))
        
        updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta")
        self.update_fun = theano.function(
            inputs=[self.input_mat, self.output_mat],
            outputs=self.cost,
            updates=updates,
            allow_input_downcast=True)

        self.update_thought_fun = theano.function(
            inputs=[self.input_mat, self.output_mat],
            outputs= ensure_list(self.time_thoughts) + ensure_list(self.note_thoughts) + [self.cost],
            allow_input_downcast=True)
Beispiel #23
0
    def setup_train(self):

        # dimensions: (batch, time, notes, input_data) with input_data as in architecture
        self.input_mat = T.btensor4()
        # dimensions: (batch, time, notes, onOrArtic) with 0:on, 1:artic
        self.output_mat = T.btensor4()

        self.epsilon = np.spacing(np.float32(1.0))

        print "model-setup-train::Trace-1"


        def step_time(in_data, *other):
            other = list(other)
            split = -len(self.t_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.time_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)
            return new_states

        def step_note(in_data, *other):
            other = list(other)
            split = -len(self.p_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.pitch_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)
            return new_states

        # We generate an output for each input, so it doesn't make sense to use the last output as an input.
        # Note that we assume the sentinel start value is already present
        # TEMP CHANGE: NO SENTINEL

        print "model-setup-train::Trace-2"

        input_slice = self.input_mat[:,0:-1]
        n_batch, n_time, n_note, n_ipn = input_slice.shape

        # time_inputs is a matrix (time, batch/note, input_per_note)
        time_inputs = input_slice.transpose((1,0,2,3)).reshape((n_time,n_batch*n_note,n_ipn))
        num_time_parallel = time_inputs.shape[1]

        # apply dropout
        if self.dropout > 0:
            time_masks = MultiDropout( [(num_time_parallel, shape) for shape in self.t_layer_sizes], self.dropout)
        else:
            time_masks = []

        print "model-setup-train::Trace-3"

        time_outputs_info = [initial_state_with_taps(layer, num_time_parallel) for layer in self.time_model.layers]
        time_result, _ = theano.scan(fn=step_time, sequences=[time_inputs], non_sequences=time_masks, outputs_info=time_outputs_info)

        print "model-setup-train::Trace-4"


        self.time_thoughts = time_result

        # Now time_result is a list of matrix [layer](time, batch/note, hidden_states) for each layer but we only care about
        # the hidden state of the last layer.
        # Transpose to be (note, batch/time, hidden_states)
        last_layer = get_last_layer(time_result)
        n_hidden = last_layer.shape[2]
        time_final = get_last_layer(time_result).reshape((n_time,n_batch,n_note,n_hidden)).transpose((2,1,0,3)).reshape((n_note,n_batch*n_time,n_hidden))

        # note_choices_inputs represents the last chosen note. Starts with [0,0], doesn't include last note.
        # In (note, batch/time, 2) format
        # Shape of start is thus (1, N, 2), concatenated with all but last element of output_mat transformed to (x, N, 2)
        start_note_values = T.alloc(0, 1, time_final.shape[1], 2 )
        correct_choices = self.output_mat[:,1:,0:-1,:].transpose((2,0,1,3)).reshape((n_note-1,n_batch*n_time,2))
        note_choices_inputs = T.concatenate([start_note_values, correct_choices], axis=0)

        print "model-setup-train::Trace-5"


        # Together, this and the output from the last LSTM goes to the new LSTM, but rotated, so that the batches in
        # one direction are the steps in the other, and vice versa.
        note_inputs = T.concatenate( [time_final, note_choices_inputs], axis=2 )
        num_timebatch = note_inputs.shape[1]

        # apply dropout
        if self.dropout > 0:
            pitch_masks = MultiDropout( [(num_timebatch, shape) for shape in self.p_layer_sizes], self.dropout)
        else:
            pitch_masks = []

        print "model-setup-train::Trace-6"


        note_outputs_info = [initial_state_with_taps(layer, num_timebatch) for layer in self.pitch_model.layers]
        note_result, _ = theano.scan(fn=step_note, sequences=[note_inputs], non_sequences=pitch_masks, outputs_info=note_outputs_info)

        self.note_thoughts = note_result

        # Now note_result is a list of matrix [layer](note, batch/time, onOrArticProb) for each layer but we only care about
        # the hidden state of the last layer.
        # Transpose to be (batch, time, note, onOrArticProb)
        note_final = get_last_layer(note_result).reshape((n_note,n_batch,n_time,2)).transpose(1,2,0,3)

        print "model-setup-train::Trace-7"


        # The cost of the entire procedure is the negative log likelihood of the events all happening.
        # For the purposes of training, if the ouputted probability is P, then the likelihood of seeing a 1 is P, and
        # the likelihood of seeing 0 is (1-P). So the likelihood is (1-P)(1-x) + Px = 2Px - P - x + 1
        # Since they are all binary decisions, and are all probabilities given all previous decisions, we can just
        # multiply the likelihoods, or, since we are logging them, add the logs.

        # Note that we mask out the articulations for those notes that aren't played, because it doesn't matter
        # whether or not those are articulated.
        # The padright is there because self.output_mat[:,:,:,0] -> 3D matrix with (b,x,y), but we need 3d tensor with
        # (b,x,y,1) instead
        active_notes = T.shape_padright(self.output_mat[:,1:,:,0])
        mask = T.concatenate([T.ones_like(active_notes),active_notes], axis=3)

        loglikelihoods = mask * T.log( 2*note_final*self.output_mat[:,1:] - note_final - self.output_mat[:,1:] + 1 + self.epsilon )

        print "model-setup-train::Trace-8"

        self.cost = T.neg(T.sum(loglikelihoods))

        print "model-setup-train::Trace-9"

        updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta")

        print "model-setup-train::Trace-10"

        self.update_fun = theano.function(
            inputs=[self.input_mat, self.output_mat],
            outputs=self.cost,
            updates=updates,
            allow_input_downcast=True)


        self.update_thought_fun = theano.function(
            inputs=[self.input_mat, self.output_mat],
            outputs= ensure_list(self.time_thoughts) + ensure_list(self.note_thoughts) + [self.cost],
            allow_input_downcast=True)
Beispiel #24
0
    def LMmulcloss(self, kth, x, y, label, nextwords):
        #multiple label loss + language model loss
        #nextwords  = START + words + END
        hidden = self.hidden_k(x, self.w, self.dicw, kth)
        print "hidden  type :  " + str(hidden.type)
        size = y.ndim
        y = T.addbroadcast(y, size - 1)
        embedding = T.sum(hidden * y, 0) / T.addbroadcast(
            T.cast(T.sum(y, 0), 'int16'), size - 2)
        #embedding = T.sum(hidden*y,0)/ T.addbroadcast(T.sum(y,0), size-2)
        print "embedding type  :  " + str(embedding.type)
        mulloss = (0. - T.sum(
            T.log(1. / (1. + T.exp(
                0. -
                (T.dot(embedding, self.w["mulw"]) + self.w["mulb"]) * label))))
                   ) / embedding.shape[0]

        if self.hsoftmax:
            #pos language model
            hshape = self.hshape
            newhidden = hidden[:, :, :hidden.shape[2] / 2].reshape(
                (hidden.shape[0] * hidden.shape[1], hidden.shape[2] / 2))

            smax_group = T.nnet.h_softmax(newhidden, newhidden.shape[0],
                                          self.wordnum, hshape[0], hshape[1],
                                          self.w["posLMw1"], self.b["posLMb1"],
                                          self.w["posLMw2"], self.w["posLMb2"],
                                          nextwords[2:].ravel())
            losslist = T.neg(T.log(smax_group.reshape(nextwords[2:].shape)))
            mask = T.cast(T.neq(nextwords[2:], self.padding_id),
                          theano.config.floatX)
            losslist = losslist * mask
            posLMloss = T.cast(T.mean(T.sum(losslist, axis=0)),
                               theano.config.floatX)

            #neg language model

            newhidden = hidden[:, :, hidden.shape[2] / 2:].reshape(
                (hidden.shape[0] * hidden.shape[1], hidden.shape[2] / 2))

            smax_group = T.nnet.h_softmax(newhidden, newhidden.shape[0],
                                          self.wordnum, hshape[0], hshape[1],
                                          self.w["negLMw1"], self.b["negLMb1"],
                                          self.w["negLMw2"], self.w["negLMb2"],
                                          nextwords[:-2].ravel())
            losslist = T.neg(T.log(smax_group.reshape(nextwords[:-2].shape)))
            mask = T.cast(T.neq(nextwords[:-2], self.padding_id),
                          theano.config.floatX)
            losslist = losslist * mask
            negLMloss = T.cast(T.mean(T.sum(losslist, axis=0)),
                               theano.config.floatX)

        else:

            def categorical_loss(ihidden, words, w, b):
                scores = T.dot(ihidden, w) + b
                prep = T.exp(scores) / T.sum(T.exp(scores), 1).dimshuffle(
                    0, 'x')
                loss = T.nnet.categorical_crossentropy(prep, words)
                return loss

            #newhidden = hidden.reshape((hidden.shape[0]*hidden.shape[1], hidden.shape[2]))
            #pos language model
            #prep = T.exp(T.dot(newhidden[:,:newhidden.shape[1]/2],self.w["posLMw"])+self.w["posLMb"])/T.sum(T.exp(T.dot(newhidden[:,:newhidden.shape[1]/2],self.w["posLMw"])+self.w["posLMb"]), 1).dimshuffle(0,'x')

            scores = T.dot(hidden[:, :, :hidden.shape[2] / 2],
                           self.w["posLMw"]) + self.w["posLMb"]
            scores = scores.reshape(
                (scores.shape[0] * scores.shape[1], scores.shape[2]))
            prep = T.exp(scores) / T.sum(T.exp(scores),
                                         scores.ndim - 1).dimshuffle((0, 'x'))
            #(len*batch)
            losslist = T.nnet.categorical_crossentropy(prep,
                                                       nextwords[2:].ravel())
            losslist = losslist.reshape(nextwords[2:].shape)

            #losslist, _ = theano.scan(fn = categorical_loss, sequences = [hidden[:,:,:hidden.shape[2]/2], nextwords[2:]], outputs_info = None,
            #    non_sequences = [self.w["posLMw"], self.w["posLMb"]])

            mask = T.cast(T.neq(nextwords[2:], self.padding_id),
                          theano.config.floatX)
            losslist = losslist * mask
            posLMloss = T.cast(T.mean(T.sum(losslist, axis=0)),
                               theano.config.floatX)

            #neg language model
            #prep = T.exp(T.dot(newhidden[:,newhidden.shape[1]/2:],self.w["negLMw"])+self.w["negLMb"])/T.sum(T.exp(T.dot(newhidden[:,newhidden.shape[1]/2:],self.w["negLMw"])+self.w["negLMb"]), 1).dimshuffle(0,'x')

            scores = T.dot(hidden[:, :, hidden.shape[2] / 2:],
                           self.w["negLMw"]) + self.w["negLMb"]
            scores = scores.reshape(
                (scores.shape[0] * scores.shape[1], scores.shape[2]))
            prep = T.exp(scores) / T.sum(T.exp(scores),
                                         scores.ndim - 1).dimshuffle((0, 'x'))
            #(len*batch)
            losslist = T.nnet.categorical_crossentropy(prep,
                                                       nextwords[0:-2].ravel())
            losslist = losslist.reshape(nextwords[0:-2].shape)

            #losslist, _ = theano.scan(fn = categorical_loss, sequences = [hidden[:,:,hidden.shape[2]/2:], nextwords[:-2]], outputs_info = None,
            #    non_sequences = [self.w["negLMw"], self.w["negLMb"]])

            mask = T.cast(T.neq(nextwords[0:-2], self.padding_id),
                          theano.config.floatX)
            losslist = losslist * mask
            negLMloss = T.cast(T.mean(T.sum(losslist, axis=0)),
                               theano.config.floatX)

        return mulloss, posLMloss, negLMloss
Beispiel #25
0
def expit(v):
    return tt.inv(1. + tt.exp(tt.neg(v)))
Beispiel #26
0
def neg(x):
  return T.neg(x)