Example #1
0
    def mask_chorale(self, chorale, constraints_location=None):
        """
        (batch_size, num_voices, chorale_length)
        :param chorale:
        :return:
        """
        p = random.random() * 0.5
        if constraints_location is None:
            constraints_location = cuda_variable(
                (torch.rand(*chorale.size()) < p).long())
        else:
            assert constraints_location.size() == chorale.size()
            constraints_location = cuda_variable(constraints_location)

        batch_size, num_voices, chorale_length = chorale.size()
        no_constraint = torch.from_numpy(
            np.array([
                len(note2index)
                for note2index in self.chorale_dataset.note2index_dicts
            ]))
        no_constraint = no_constraint[None, :, None]
        no_constraint = no_constraint.long().clone().repeat(
            batch_size, 1, chorale_length)
        no_constraint = cuda_variable(no_constraint)
        return chorale * constraints_location + no_constraint * (
            1 - constraints_location)
Example #2
0
    def loss_and_acc_on_epoch(self, data_loader, train=True):
        """
        :param data_loader:
        :param train:
        :return: (float, float)
        """
        mean_loss = 0
        mean_accuracy = 0

        for sample_id, (chorale, metadata) in tqdm(enumerate(data_loader)):

            chorale, metadata = (cuda_variable(chorale.long()),
                                 cuda_variable(metadata.long()))

            self.optimizer.zero_grad()
            weights = self.forward(chorale=chorale, metadata=metadata)

            t = int((self.chorale_dataset.sequences_size *
                     self.chorale_dataset.subdivision / 2))

            targets = chorale[:, :, t]
            targets = targets.transpose(0, 1)
            # targets is now (num_voices, batch)
            weights = [
                weight_per_voice[:, t, :] for weight_per_voice in weights
            ]
            # list of (batch, num_notes)

            loss = self.mean_crossentropy_loss(weights=weights,
                                               targets=targets)
            if train:
                loss.backward()
                self.optimizer.step()

            # compute mean loss and accuracy
            mean_loss += to_numpy(loss.mean())[0]
            accuracy = self.mean_accuracy(weights=weights, targets=targets)

            mean_accuracy += to_numpy(accuracy)[0]

        mean_loss /= len(data_loader)
        mean_accuracy /= len(data_loader)
        return (mean_loss, mean_accuracy)
Example #3
0
    def forward(self, chorale: Variable, metadata: Variable):
        """
        :param chorale: (batch, num_voices, length in ticks)
        :param metadata: (batch, num_voices, length in ticks, num_metadatas)
        :return: list of probabilities per voice (batch, chorale_length, num_notes)
        """
        batch_size, num_voices, chorale_length = chorale.size()
        sequence_length = num_voices * chorale_length

        # === embed as wrapped sequence ===
        # --- chorale
        x = self.embed_chorale(chorale)

        # --- metadata
        m = self.embed_metadata(metadata, chorale)

        # === LSTM on constraints ===
        output_constraints = self.output_lstm_constraints(m)

        # === LSTM on notes ===
        offset_seq = torch.cat([
            cuda_variable(torch.zeros(batch_size, 1, self.note_embedding_dim)),
            x[:, :sequence_length - 1, :]
        ], 1)

        if self.dropout_input_prob > 0:
            offset_seq = self.drop_input(offset_seq)

        input = torch.cat([offset_seq, output_constraints], 2)

        hidden = self.init_hidden(batch_size=batch_size, type='generation')

        output_gen, hidden = self.lstm_generation(input, hidden)

        # distributed NN on output
        weights = [
            F.relu(self.linear_1(time_slice))
            for time_slice in output_gen.split(split_size=1, dim=1)
        ]
        weights = torch.cat(weights, 1)
        weights = weights.view(batch_size, chorale_length, num_voices,
                               self.num_units_linear)

        # CrossEntropy includes a LogSoftMax layer
        weights = [
            linear_layer(voice[:, :, 0, :]) for voice, linear_layer in zip(
                weights.split(split_size=1, dim=2), self.linear_ouput_notes)
        ]
        return weights
Example #4
0
    def output_lstm_constraints(self, flat_embedded_metadata):
        """

        :param flat_embedded_metadata: (batch_size, length, total_embedding_dim)
        :return:
        """
        batch_size = flat_embedded_metadata.size(0)
        hidden = self.init_hidden(batch_size=batch_size, type='constraint')
        # reverse seq
        idx = [i for i in range(flat_embedded_metadata.size(1) - 1, -1, -1)]
        idx = cuda_variable(torch.LongTensor(idx))
        flat_embedded_metadata = flat_embedded_metadata.index_select(1, idx)
        output_constraints, hidden = self.lstm_constraint(
            flat_embedded_metadata, hidden)
        output_constraints = output_constraints.index_select(1, idx)
        return output_constraints
Example #5
0
    def fill(self, ascii_input):
        self.eval()
        # constants
        num_voices = self.chorale_dataset.num_voices
        padding_size = self.chorale_dataset.num_voices * 8 * self.chorale_dataset.subdivision
        temperature = 1.
        chorale_length = len(ascii_input[0])
        # preprocessing
        constraint_metadata = [[
            d[c] if c != 'NC' else len(d) for c in ascii_voice
        ] for d, ascii_voice in zip(self.chorale_dataset.note2index_dicts,
                                    ascii_input)]

        constraint_metadata = torch.from_numpy(
            np.array(constraint_metadata)).long()

        constraint_metadata = self.chorale_dataset.extract_metadata_with_padding(
            constraint_metadata[:, :, None],
            -padding_size,
            end_tick=chorale_length + padding_size)[:, :, 0]
        constraint_metadata = cuda_variable(constraint_metadata, volatile=True)
        constraint_metadata = self.embed_chorale(
            constraint_metadata[None, :, :])

        other_metadata = cuda_variable(torch.from_numpy(
            np.array([
                metadata.generate(chorale_length + 2 * padding_size)
                for metadata in self.chorale_dataset.metadatas
            ])),
                                       volatile=True)
        # add voice index?!
        other_metadata = torch.cat(
            [other_metadata, torch.zeros_like(other_metadata)], 0)
        other_metadata = other_metadata.transpose(0, 1)
        other_metadata = other_metadata[None, None, :, :]
        other_metadata = self.embed_metadata(other_metadata)

        tensor_metadata = torch.cat([
            other_metadata,
            constraint_metadata,
        ], 2)

        # generated chorale
        gen_chorale = self.chorale_dataset.empty_chorale(chorale_length)

        output_constraints = self.output_lstm_constraints(tensor_metadata)

        hidden = self.init_hidden(batch_size=1, type='generation')

        # 1 bar of start symbols
        for tick_index in range(padding_size):
            voice_index = tick_index % self.chorale_dataset.num_voices
            # notes
            time_slice = gen_chorale[voice_index, 0]
            time_slice = torch.from_numpy(np.array([time_slice]))[None, :]
            note = self.note_embeddings[voice_index](cuda_variable(
                time_slice, volatile=True))
            time_slice = note
            # concat with first metadata
            time_slice_cat = torch.cat(
                (time_slice, output_constraints[:,
                                                tick_index:tick_index + 1, :]),
                2)

            output_gen, hidden = self.lstm_generation(time_slice_cat, hidden)

        output_constraints = output_constraints[:,
                                                padding_size:-padding_size, :]
        # generation:
        for tick_index in range(-1, chorale_length * num_voices - 1):
            voice_index = tick_index % num_voices
            time_index = (tick_index - voice_index) // num_voices
            next_voice_index = (tick_index + 1) % num_voices
            next_time_index = (tick_index + 1 - next_voice_index) // num_voices

            if tick_index == -1:
                last_start_symbol = gen_chorale[-1, 0]
                last_start_symbol = torch.from_numpy(
                    np.array([last_start_symbol]))[None, :]
                time_slice = self.note_embeddings[-1](cuda_variable(
                    (last_start_symbol), volatile=True))
            else:
                time_slice = gen_chorale[voice_index, time_index]
                time_slice = torch.from_numpy(np.array([time_slice]))[None, :]
                note = self.note_embeddings[voice_index](cuda_variable(
                    time_slice, volatile=True))
                time_slice = note

            time_slice_cat = torch.cat(
                (time_slice,
                 output_constraints[:, tick_index + 1:tick_index + 2, :]), 2)

            output_gen, hidden = self.lstm_generation(time_slice_cat, hidden)

            weights = F.relu(self.linear_1(output_gen[:, 0, :]))
            weights = self.linear_ouput_notes[next_voice_index](weights)

            # compute predictions
            # temperature
            weights = weights * temperature
            preds = F.softmax(weights)

            # first batch element
            preds = to_numpy(preds[0])
            new_pitch_index = np.random.choice(np.arange(
                self.num_notes_per_voice[next_voice_index]),
                                               p=preds)

            gen_chorale[next_voice_index,
                        next_time_index] = int(new_pitch_index)

        score = self.chorale_dataset.tensor_chorale_to_score(
            tensor_chorale=gen_chorale)
        return score, gen_chorale, tensor_metadata
Example #6
0
    def generate(self,
                 original_tensor_chorale,
                 tensor_metadata,
                 constraints_location,
                 temperature=1.):
        self.eval()
        original_tensor_chorale = cuda_variable(original_tensor_chorale,
                                                volatile=True)

        num_voices, chorale_length, num_metadatas = tensor_metadata.size()

        # generated chorale
        gen_chorale = self.chorale_dataset.empty_chorale(chorale_length)

        m = cuda_variable(tensor_metadata[None, :, :, :], volatile=True)
        m = self.embed_metadata(
            m,
            original_tensor_chorale[None, :, :],
            constraints_location=constraints_location[None, :, :])

        output_constraints = self.output_lstm_constraints(m)

        hidden = self.init_hidden(batch_size=1, type='generation')

        for tick_index in range(self.chorale_dataset.num_voices * 4 *
                                self.chorale_dataset.subdivision - 1):
            voice_index = tick_index % self.chorale_dataset.num_voices
            # notes
            time_slice = gen_chorale[voice_index, 0]
            time_slice = torch.from_numpy(np.array([time_slice]))[None, :]
            note = self.note_embeddings[voice_index](cuda_variable(
                time_slice, volatile=True))
            time_slice = note
            time_slice_cat = torch.cat(
                (time_slice,
                 output_constraints[:, tick_index + 1:tick_index + 2, :]), 2)

            output_gen, hidden = self.lstm_generation(time_slice_cat, hidden)

        # generation:
        for tick_index in range(-1, chorale_length * num_voices - 1):
            voice_index = tick_index % num_voices
            time_index = (tick_index - voice_index) // num_voices
            next_voice_index = (tick_index + 1) % num_voices
            next_time_index = (tick_index + 1 - next_voice_index) // num_voices

            if tick_index == -1:
                last_start_symbol = gen_chorale[-1, 0]
                last_start_symbol = torch.from_numpy(
                    np.array([last_start_symbol]))[None, :]
                time_slice = self.note_embeddings[-1](cuda_variable(
                    (last_start_symbol), volatile=True))
            else:
                time_slice = gen_chorale[voice_index, time_index]
                time_slice = torch.from_numpy(np.array([time_slice]))[None, :]
                note = self.note_embeddings[voice_index](cuda_variable(
                    time_slice, volatile=True))
                time_slice = note

            time_slice_cat = torch.cat(
                (time_slice,
                 output_constraints[:, tick_index + 1:tick_index + 2, :]), 2)

            output_gen, hidden = self.lstm_generation(time_slice_cat, hidden)

            weights = F.relu(self.linear_1(output_gen[:, 0, :]))
            weights = self.linear_ouput_notes[next_voice_index](weights)

            # compute predictions
            # temperature
            weights = weights * temperature
            preds = F.softmax(weights)

            # first batch element
            preds = to_numpy(preds[0])
            new_pitch_index = np.random.choice(np.arange(
                self.num_notes_per_voice[next_voice_index]),
                                               p=preds)

            gen_chorale[next_voice_index,
                        next_time_index] = int(new_pitch_index)

        score = self.chorale_dataset.tensor_chorale_to_score(
            tensor_chorale=gen_chorale)
        return score, gen_chorale, tensor_metadata