Example #1
0
    def _initialize_update_function(self):
        def time_step(input, *previous_hidden_state):
            return self.time_model.forward(input, prev_hiddens=previous_hidden_state)

        def note_step(input, *previous_hidden_state):
            return self.note_model.forward(input, prev_hiddens=previous_hidden_state)

        input = T.btensor4()
        adjusted_input = input[:, :-1]

        output = T.btensor4()
        adjusted_output = output[:, 1:]

        time_model_input = self.get_time_model_input(adjusted_input)
        time_model_outputs_info = self.get_outputs_info(time_model_input, self.time_model.layers)
        time_model_output = self.get_output(time_step, time_model_input, time_model_outputs_info)

        note_model_input = self.get_note_model_input(adjusted_input, adjusted_output, time_model_output)
        note_outputs_info = self.get_outputs_info(note_model_input, self.note_model.layers)
        note_model_output = self.get_output(note_step, note_model_input, note_outputs_info)

        prediction = self.get_prediction(adjusted_input, note_model_output)
        loss = self.get_loss(adjusted_output, prediction)

        updates, _, _, _, _ = create_optimization_updates(loss, self.params)

        self.update = theano.function(inputs=[input, output], outputs=loss, updates=updates, allow_input_downcast=True)
Example #2
0
def build_iterators(dset, network_output, batchsize=1, lr=0.1, momentum=0.3):
    # assume dset is a dict of 'train', 'test', 'valid' each being a tuple of X,y
    # build theano functions to evaluate each one
    index = T.iscalar('ind')
    X_b = T.btensor4('Xb') # each image is 3-D, and there's technically a batch size to take into consideration making it 4-D
    y_b = T.btensor4('yb') # output a 2-D image in an FCN, 1 channel for each class

    bslice = slice(index * batchsize, (index + 1) * batchsize)

    objf = lasagne.objectives.Objective(network_output, loss_function=lasagne.objectives.mse)

    train_loss = objf.get_loss(X_b, target=y_b)
    eval_loss = objf.get_loss(X_b, target=y_b, deterministic=True)

    pred_func = T.argmax(ll.get_output(network_output, X_b, deterministic=True), axis=1) # I think that should be the channel axis
    acc_func = T.mean(T.eq(pred_func, y_b), dtype=theano.config.floatX)

    # training schedule

    weights = lasagne.layers.get_all_params(network_output)
    updates = lasagne.updates.nesterov_momentum(
            train_loss, weights, lr, momentum)

    training_iterator = theano.function(
            [index], train_loss, updates=updates, givens={
                X_b: dset['train'][0][bslice],
                y_b: dset['train'][1][bslice]}
            )

    validation_iterator = theano.function(
            [index], [eval_loss, acc_func],
            givens={
                X_b: dset['valid'][0][bslice],
                y_b: dset['valid'][1][bslice]}
            )

    test_iterator = theano.function(
            [index], [eval_loss, acc_func],
            givens={
                X_b: dset['test'][0][bslice],
                y_b: dset['test'][1][bslice]}
            )
    return {
            'train':training_iterator,
            'valid':validation_iterator,
            'test':test_iterator,
    }
Example #3
0
def ndim_btensor(ndim, name=None):
    if ndim == 2:
        return T.bmatrix(name)
    elif ndim == 3:
        return T.btensor3(name)
    elif ndim == 4:
        return T.btensor4(name)
    return T.imatrix(name)
Example #4
0
def ndim_btensor(ndim, name=None):
    if ndim == 2:
        return T.bmatrix(name)
    elif ndim == 3:
        return T.btensor3(name)
    elif ndim == 4:
        return T.btensor4(name)
    return T.imatrix(name)
Example #5
0
    def setup_train(self):

        # dimensions: (batch, time, notes, input_data) with input_data as in architecture
        self.input_mat = T.btensor4()
        # dimensions: (batch, time, notes, onOrArtic) with 0:on, 1:artic
        self.output_mat = T.btensor4()
        
        self.epsilon = np.spacing(np.float32(1.0))

        def step_time(in_data, *other):
            other = list(other)
            split = -len(self.t_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.time_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)
            return new_states
        
        def step_note(in_data, *other):
            other = list(other)
            split = -len(self.p_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.pitch_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)
            return new_states
        
        # We generate an output for each input, so it doesn't make sense to use the last output as an input.
        # Note that we assume the sentinel start value is already present
        # TEMP CHANGE: NO SENTINEL
        input_slice = self.input_mat[:,0:-1]
        n_batch, n_time, n_note, n_ipn = input_slice.shape
        
        # time_inputs is a matrix (time, batch/note, input_per_note)
        time_inputs = input_slice.transpose((1,0,2,3)).reshape((n_time,n_batch*n_note,n_ipn))
        num_time_parallel = time_inputs.shape[1]
        
        # apply dropout
        if self.dropout > 0:
            time_masks = theano_lstm.MultiDropout( [(num_time_parallel, shape) for shape in self.t_layer_sizes], self.dropout)
        else:
            time_masks = []

        time_outputs_info = [initial_state_with_taps(layer, num_time_parallel) for layer in self.time_model.layers]
        time_result, _ = theano.scan(fn=step_time, sequences=[time_inputs], non_sequences=time_masks, outputs_info=time_outputs_info)
        
        self.time_thoughts = time_result
        
        # Now time_result is a list of matrix [layer](time, batch/note, hidden_states) for each layer but we only care about 
        # the hidden state of the last layer.
        # Transpose to be (note, batch/time, hidden_states)
        last_layer = get_last_layer(time_result)
        n_hidden = last_layer.shape[2]
        time_final = get_last_layer(time_result).reshape((n_time,n_batch,n_note,n_hidden)).transpose((2,1,0,3)).reshape((n_note,n_batch*n_time,n_hidden))
        
        # note_choices_inputs represents the last chosen note. Starts with [0,0], doesn't include last note.
        # In (note, batch/time, 2) format
        # Shape of start is thus (1, N, 2), concatenated with all but last element of output_mat transformed to (x, N, 2)
        start_note_values = T.alloc(np.array(0,dtype=np.int8), 1, time_final.shape[1], 2 )
        correct_choices = self.output_mat[:,1:,0:-1,:].transpose((2,0,1,3)).reshape((n_note-1,n_batch*n_time,2))
        note_choices_inputs = T.concatenate([start_note_values, correct_choices], axis=0)
        
        # Together, this and the output from the last LSTM goes to the new LSTM, but rotated, so that the batches in
        # one direction are the steps in the other, and vice versa.
        note_inputs = T.concatenate( [time_final, note_choices_inputs], axis=2 )
        num_timebatch = note_inputs.shape[1]
        
        # apply dropout
        if self.dropout > 0:
            pitch_masks = theano_lstm.MultiDropout( [(num_timebatch, shape) for shape in self.p_layer_sizes], self.dropout)
        else:
            pitch_masks = []

        note_outputs_info = [initial_state_with_taps(layer, num_timebatch) for layer in self.pitch_model.layers]
        note_result, _ = theano.scan(fn=step_note, sequences=[note_inputs], non_sequences=pitch_masks, outputs_info=note_outputs_info)
        
        self.note_thoughts = note_result
        
        # Now note_result is a list of matrix [layer](note, batch/time, onOrArticProb) for each layer but we only care about 
        # the hidden state of the last layer.
        # Transpose to be (batch, time, note, onOrArticProb)
        note_final = get_last_layer(note_result).reshape((n_note,n_batch,n_time,2)).transpose(1,2,0,3)
        
        # The cost of the entire procedure is the negative log likelihood of the events all happening.
        # For the purposes of training, if the ouputted probability is P, then the likelihood of seeing a 1 is P, and
        # the likelihood of seeing 0 is (1-P). So the likelihood is (1-P)(1-x) + Px = 2Px - P - x + 1
        # Since they are all binary decisions, and are all probabilities given all previous decisions, we can just
        # multiply the likelihoods, or, since we are logging them, add the logs.
        
        # Note that we mask out the articulations for those notes that aren't played, because it doesn't matter
        # whether or not those are articulated.
        # The padright is there because self.output_mat[:,:,:,0] -> 3D matrix with (b,x,y), but we need 3d tensor with 
        # (b,x,y,1) instead
        active_notes = T.shape_padright(self.output_mat[:,1:,:,0])
        mask = T.concatenate([T.ones_like(active_notes),active_notes], axis=3)
        
        loglikelihoods = mask * T.log( 2*note_final*self.output_mat[:,1:] - note_final - self.output_mat[:,1:] + 1 + self.epsilon )
        self.cost = T.neg(T.sum(loglikelihoods))
        
        updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta")
        self.update_fun = theano.function(
            inputs=[self.input_mat, self.output_mat],
            outputs=self.cost,
            updates=updates,
            allow_input_downcast=True)

        self.update_thought_fun = theano.function(
            inputs=[self.input_mat, self.output_mat],
            outputs= ensure_list(self.time_thoughts) + ensure_list(self.note_thoughts) + [self.cost],
            allow_input_downcast=True)
Example #6
0
    def setup_train(self):

        # dimensions: (batch, time, notes, input_data) with input_data as in architecture
        self.input_mat = T.btensor4()
        # dimensions: (batch, time, notes, onOrArtic) with 0:on, 1:artic
        self.output_mat = T.btensor4()

        self.epsilon = np.spacing(np.float32(1.0))

        print "model-setup-train::Trace-1"


        def step_time(in_data, *other):
            other = list(other)
            split = -len(self.t_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.time_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)
            return new_states

        def step_note(in_data, *other):
            other = list(other)
            split = -len(self.p_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.pitch_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)
            return new_states

        # We generate an output for each input, so it doesn't make sense to use the last output as an input.
        # Note that we assume the sentinel start value is already present
        # TEMP CHANGE: NO SENTINEL

        print "model-setup-train::Trace-2"

        input_slice = self.input_mat[:,0:-1]
        n_batch, n_time, n_note, n_ipn = input_slice.shape

        # time_inputs is a matrix (time, batch/note, input_per_note)
        time_inputs = input_slice.transpose((1,0,2,3)).reshape((n_time,n_batch*n_note,n_ipn))
        num_time_parallel = time_inputs.shape[1]

        # apply dropout
        if self.dropout > 0:
            time_masks = MultiDropout( [(num_time_parallel, shape) for shape in self.t_layer_sizes], self.dropout)
        else:
            time_masks = []

        print "model-setup-train::Trace-3"

        time_outputs_info = [initial_state_with_taps(layer, num_time_parallel) for layer in self.time_model.layers]
        time_result, _ = theano.scan(fn=step_time, sequences=[time_inputs], non_sequences=time_masks, outputs_info=time_outputs_info)

        print "model-setup-train::Trace-4"


        self.time_thoughts = time_result

        # Now time_result is a list of matrix [layer](time, batch/note, hidden_states) for each layer but we only care about
        # the hidden state of the last layer.
        # Transpose to be (note, batch/time, hidden_states)
        last_layer = get_last_layer(time_result)
        n_hidden = last_layer.shape[2]
        time_final = get_last_layer(time_result).reshape((n_time,n_batch,n_note,n_hidden)).transpose((2,1,0,3)).reshape((n_note,n_batch*n_time,n_hidden))

        # note_choices_inputs represents the last chosen note. Starts with [0,0], doesn't include last note.
        # In (note, batch/time, 2) format
        # Shape of start is thus (1, N, 2), concatenated with all but last element of output_mat transformed to (x, N, 2)
        start_note_values = T.alloc(0, 1, time_final.shape[1], 2 )
        correct_choices = self.output_mat[:,1:,0:-1,:].transpose((2,0,1,3)).reshape((n_note-1,n_batch*n_time,2))
        note_choices_inputs = T.concatenate([start_note_values, correct_choices], axis=0)

        print "model-setup-train::Trace-5"


        # Together, this and the output from the last LSTM goes to the new LSTM, but rotated, so that the batches in
        # one direction are the steps in the other, and vice versa.
        note_inputs = T.concatenate( [time_final, note_choices_inputs], axis=2 )
        num_timebatch = note_inputs.shape[1]

        # apply dropout
        if self.dropout > 0:
            pitch_masks = MultiDropout( [(num_timebatch, shape) for shape in self.p_layer_sizes], self.dropout)
        else:
            pitch_masks = []

        print "model-setup-train::Trace-6"


        note_outputs_info = [initial_state_with_taps(layer, num_timebatch) for layer in self.pitch_model.layers]
        note_result, _ = theano.scan(fn=step_note, sequences=[note_inputs], non_sequences=pitch_masks, outputs_info=note_outputs_info)

        self.note_thoughts = note_result

        # Now note_result is a list of matrix [layer](note, batch/time, onOrArticProb) for each layer but we only care about
        # the hidden state of the last layer.
        # Transpose to be (batch, time, note, onOrArticProb)
        note_final = get_last_layer(note_result).reshape((n_note,n_batch,n_time,2)).transpose(1,2,0,3)

        print "model-setup-train::Trace-7"


        # The cost of the entire procedure is the negative log likelihood of the events all happening.
        # For the purposes of training, if the ouputted probability is P, then the likelihood of seeing a 1 is P, and
        # the likelihood of seeing 0 is (1-P). So the likelihood is (1-P)(1-x) + Px = 2Px - P - x + 1
        # Since they are all binary decisions, and are all probabilities given all previous decisions, we can just
        # multiply the likelihoods, or, since we are logging them, add the logs.

        # Note that we mask out the articulations for those notes that aren't played, because it doesn't matter
        # whether or not those are articulated.
        # The padright is there because self.output_mat[:,:,:,0] -> 3D matrix with (b,x,y), but we need 3d tensor with
        # (b,x,y,1) instead
        active_notes = T.shape_padright(self.output_mat[:,1:,:,0])
        mask = T.concatenate([T.ones_like(active_notes),active_notes], axis=3)

        loglikelihoods = mask * T.log( 2*note_final*self.output_mat[:,1:] - note_final - self.output_mat[:,1:] + 1 + self.epsilon )

        print "model-setup-train::Trace-8"

        self.cost = T.neg(T.sum(loglikelihoods))

        print "model-setup-train::Trace-9"

        updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta")

        print "model-setup-train::Trace-10"

        self.update_fun = theano.function(
            inputs=[self.input_mat, self.output_mat],
            outputs=self.cost,
            updates=updates,
            allow_input_downcast=True)


        self.update_thought_fun = theano.function(
            inputs=[self.input_mat, self.output_mat],
            outputs= ensure_list(self.time_thoughts) + ensure_list(self.note_thoughts) + [self.cost],
            allow_input_downcast=True)
    def setup_train(self):
        print('{:25}'.format("Setup Train"), end='', flush=True)

        self.input_mat = T.btensor4()
        self.output_mat = T.btensor4()

        def step_time(in_data, *other):
            other = list(other)
            split = -len(self.t_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.time_model.forward(in_data,
                                                 prev_hiddens=hiddens,
                                                 dropout=masks)
            return new_states

        def step_note(in_data, *other):
            other = list(other)
            split = -len(self.p_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.pitch_model.forward(in_data,
                                                  prev_hiddens=hiddens,
                                                  dropout=masks)
            return new_states

        def get_dropout(layers, num_time_parallel=1):
            if self.dropout > 0:
                return theano_lstm.MultiDropout([(num_time_parallel, shape)
                                                 for shape in layers],
                                                self.dropout)
            else:
                return []

        # TIME PASS
        input_slice = self.input_mat[:, 0:-1]
        n_batch, n_time, n_note, n_ipn = input_slice.shape
        time_inputs = input_slice.transpose((1, 0, 2, 3)).reshape(
            (n_time, n_batch * n_note, n_ipn))

        time_masks = get_dropout(self.t_layer_sizes, time_inputs.shape[1])
        time_outputs_info = [
            initial_state_with_taps(layer, time_inputs.shape[1])
            for layer in self.time_model.layers
        ]
        time_result, _ = theano.scan(fn=step_time,
                                     sequences=[time_inputs],
                                     non_sequences=time_masks,
                                     outputs_info=time_outputs_info)
        self.time_thoughts = time_result

        last_layer = get_last_layer(time_result)
        n_hidden = last_layer.shape[2]
        time_final = get_last_layer(time_result).reshape(
            (n_time, n_batch, n_note, n_hidden)).transpose(
                (2, 1, 0, 3)).reshape((n_note, n_batch * n_time, n_hidden))

        # PITCH PASS
        start_note_values = T.alloc(np.array(0, dtype=np.int8), 1,
                                    time_final.shape[1], self.output_size)
        correct_choices = self.output_mat[:, 1:, 0:-1, :].transpose(
            (2, 0, 1, 3)).reshape(
                (n_note - 1, n_batch * n_time, self.output_size))
        note_choices_inputs = T.concatenate(
            [start_note_values, correct_choices], axis=0)

        note_inputs = T.concatenate([time_final, note_choices_inputs], axis=2)

        note_masks = get_dropout(self.p_layer_sizes, note_inputs.shape[1])
        note_outputs_info = [
            initial_state_with_taps(layer, note_inputs.shape[1])
            for layer in self.pitch_model.layers
        ]
        note_result, _ = theano.scan(fn=step_note,
                                     sequences=[note_inputs],
                                     non_sequences=note_masks,
                                     outputs_info=note_outputs_info)

        self.note_thoughts = note_result

        note_final = get_last_layer(note_result).reshape(
            (n_note, n_batch, n_time, self.output_size)).transpose(1, 2, 0, 3)

        self.cost = self.loss_func(self.output_mat[:, 1:], note_final)

        updates, _, _, _, _ = create_optimization_updates(self.cost,
                                                          self.params,
                                                          method="adadelta")
        self.update_fun = theano.function(
            inputs=[self.input_mat, self.output_mat],
            outputs=self.cost,
            updates=updates,
            allow_input_downcast=True)

        print("Done")
Example #8
0
# 2-dimensional ndarray
v = T.matrix(name=None, dtype=T.config.floatX)
report(v)

# 3-dimensional ndarray
v = T.tensor3(name=None, dtype=T.config.floatX)
report(v)

# 4-dimensional ndarray
v = T.tensor4(name=None, dtype=T.config.floatX)
report(v)

# constructors with fixed data type. (examples with tensor4)
# b: byte, w: word(16bit), l: int64, i: int32
# d:float64, f: float32, c: complex64, z: complex128
v = T.btensor4(name="v")
report(v)

v = T.wtensor4(name="v")
report(v)

v = T.itensor4(name="v")
report(v)

v = T.ltensor4(name="v")
report(v)

v = T.dtensor4(name="v")
report(v)

v = T.ftensor4(name="v")
Example #9
0
# 2-dimensional ndarray
v = T.matrix(name=None, dtype=T.config.floatX)
report(v)

# 3-dimensional ndarray
v = T.tensor3(name=None, dtype=T.config.floatX)
report(v)

# 4-dimensional ndarray
v = T.tensor4(name=None, dtype=T.config.floatX)
report(v)

# constructors with fixed data type. (examples with tensor4)
# b: byte, w: word(16bit), l: int64, i: int32
# d:float64, f: float32, c: complex64, z: complex128
v = T.btensor4(name='v')
report(v)

v = T.wtensor4(name='v')
report(v)

v = T.itensor4(name='v')
report(v)

v = T.ltensor4(name='v')
report(v)

v = T.dtensor4(name='v')
report(v)

v = T.ftensor4(name='v')