def main():
    # parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--timesteps',
                        help="model's range (default: %(default)s)",
                        type=int,
                        default=16)
    parser.add_argument(
        '-b',
        '--batch_size_train',
        help='batch size used during training phase (default: %(default)s)',
        type=int,
        default=128)
    parser.add_argument(
        '-s',
        '--samples_per_epoch',
        help='number of samples per epoch (default: %(default)s)',
        type=int,
        default=12800 * 7)
    parser.add_argument(
        '--num_val_samples',
        help='number of validation samples (default: %(default)s)',
        type=int,
        default=1280)
    parser.add_argument('-u',
                        '--num_units_lstm',
                        nargs='+',
                        help='number of lstm units (default: %(default)s)',
                        type=int,
                        default=[200, 200])
    parser.add_argument(
        '-d',
        '--num_dense',
        help='size of non recurrent hidden layers (default: %(default)s)',
        type=int,
        default=200)
    parser.add_argument('-n',
                        '--name',
                        help='model name (default: %(default)s)',
                        choices=['deepbach', 'skip', 'norelu'],
                        type=str,
                        default='skip')
    parser.add_argument(
        '-i',
        '--num_iterations',
        help='number of gibbs iterations (default: %(default)s)',
        type=int,
        default=20000)
    parser.add_argument('-t',
                        '--train',
                        nargs='?',
                        help='train models for N epochs (default: 15)',
                        default=0,
                        const=15,
                        type=int)
    parser.add_argument('-p',
                        '--parallel',
                        nargs='?',
                        help='number of parallel updates (default: 16)',
                        type=int,
                        const=16,
                        default=1)
    parser.add_argument('--overwrite',
                        help='overwrite previously computed models',
                        action='store_true')
    parser.add_argument('-m',
                        '--midi_file',
                        nargs='?',
                        help='relative path to midi file',
                        type=str,
                        const='datasets/god_save_the_queen.mid')
    parser.add_argument('-l',
                        '--length',
                        help='length of unconstrained generation',
                        type=int,
                        default=160)
    parser.add_argument('--ext',
                        help='extension of model name',
                        type=str,
                        default='')
    parser.add_argument('-o',
                        '--output_file',
                        nargs='?',
                        help='path to output file',
                        type=str,
                        default='',
                        const='generated_examples/example.mid')
    parser.add_argument('--dataset',
                        nargs='?',
                        help='path to dataset folder',
                        type=str,
                        default='')
    parser.add_argument(
        '-r',
        '--reharmonization',
        nargs='?',
        help='reharmonization of a melody from the corpus identified by its id',
        type=int)
    args = parser.parse_args()
    print(args)

    if args.ext:
        ext = '_' + args.ext
    else:
        ext = ''

    dataset_path = None
    pickled_dataset = BACH_DATASET

    # metadatas = [TickMetadatas(SUBDIVISION), FermataMetadatas(), KeyMetadatas(window_size=1)]
    metadatas = [TickMetadatas(SUBDIVISION), FermataMetadatas()]

    timesteps = args.timesteps
    batch_size = args.batch_size_train
    samples_per_epoch = args.samples_per_epoch
    nb_val_samples = args.num_val_samples
    num_units_lstm = args.num_units_lstm
    model_name = args.name.lower() + ext
    sequence_length = args.length
    batch_size_per_voice = args.parallel
    num_units_lstm = args.num_units_lstm
    num_dense = args.num_dense
    if args.output_file:
        output_file = args.output_file
    else:
        output_file = None

    parallel = batch_size_per_voice > 1
    train = args.train > 0
    num_epochs = args.train
    overwrite = args.overwrite

    # Create pickled dataset
    if not os.path.exists(pickled_dataset):
        initialization(dataset_path,
                       metadatas=metadatas,
                       voice_ids=[SOP_INDEX],
                       BACH_DATASET=BACH_DATASET)

    # load dataset
    X, X_metadatas, voice_ids, index2notes, note2indexes, metadatas = pickle.load(
        open(pickled_dataset, 'rb'))

    # dataset dependant variables
    NUM_VOICES = len(voice_ids)
    num_voices = NUM_VOICES
    num_pitches = list(map(len, index2notes))
    num_iterations = args.num_iterations // batch_size_per_voice // num_voices

    # Create, train load models
    if not os.path.exists('models/' + model_name + '_' + str(NUM_VOICES - 1) +
                          '.yaml'):
        create_models(model_name,
                      create_new=overwrite,
                      num_units_lstm=num_units_lstm,
                      num_dense=num_dense,
                      pickled_dataset=pickled_dataset,
                      num_voices=num_voices,
                      metadatas=metadatas,
                      timesteps=timesteps)
    if train:
        models = train_models(model_name=model_name,
                              samples_per_epoch=samples_per_epoch,
                              num_epochs=num_epochs,
                              nb_val_samples=nb_val_samples,
                              timesteps=timesteps,
                              pickled_dataset=pickled_dataset,
                              num_voices=NUM_VOICES,
                              metadatas=metadatas,
                              batch_size=batch_size)
    else:
        models = load_models(model_name, num_voices=NUM_VOICES)

    # todo to remove
    # model_name = 'skip_large'
    # timesteps = 32
    #
    # test_autoencoder(model_name='models/' + model_name + '_0',
    #                  timesteps=timesteps,
    #                  pickled_dataset=pickled_dataset)

    distance_model = load_model('models/seq2seq_masking')
    # distance_model.compile(optimizer='adam', loss='categorical_crossentropy',
    #                        metrics=['accuracy'])

    hidden_repr_model = Model(input=distance_model.input,
                              output=distance_model.layers[1].output)
    hidden_repr_model.compile(optimizer='adam',
                              loss='categorical_crossentropy',
                              metrics=['accuracy'])

    # create target
    left_features, _, _, _ = all_features(np.transpose(X[21], axes=(1, 0)),
                                          voice_index=0,
                                          time_index=16 * 4,
                                          timesteps=32,
                                          num_pitches=num_pitches,
                                          num_voices=num_voices)
    left_metas, central_metas, _ = all_metadatas(X_metadatas[21],
                                                 time_index=16 * 4,
                                                 timesteps=32,
                                                 metadatas=metadatas)

    inputs_target_chorale = {
        'left_features': np.array([left_features]),
        'left_metas': np.array([left_metas]),
        'central_metas': np.array([central_metas])
    }

    # show target
    score = indexed_chorale_to_score(X[21][:, 16 * 4 - 32:16 * 4],
                                     pickled_dataset=pickled_dataset)
    score.show()

    generated_chorale = gibbs(generation_models=models,
                              hidden_repr_model=hidden_repr_model,
                              inputs_target_chorale=inputs_target_chorale,
                              chorale_metas=X_metadatas[12][:150],
                              num_iterations=200,
                              pickled_dataset=pickled_dataset,
                              timesteps=timesteps)

    # convert
    score = indexed_chorale_to_score(np.transpose(generated_chorale,
                                                  axes=(1, 0)),
                                     pickled_dataset=pickled_dataset)
    score.show()
def gibbs(generation_models=None,
          hidden_repr_model=None,
          inputs_target_chorale=None,
          chorale_metas=None,
          sequence_length=50,
          num_iterations=1000,
          timesteps=16,
          temperature=1.,
          batch_size_per_voice=16,
          pickled_dataset=BACH_DATASET):
    """
    samples from models in model_base_name
    """

    X, X_metadatas, voices_ids, index2notes, note2indexes, metadatas = pickle.load(
        open(pickled_dataset, 'rb'))
    num_pitches = list(map(len, index2notes))
    num_voices = len(voices_ids)
    # load models if not
    if generation_models is None:
        raise ValueError

    # initialization sequence
    if chorale_metas is not None:
        sequence_length = len(chorale_metas[0])

    seq = np.zeros(shape=(2 * timesteps + sequence_length, num_voices))
    for expert_index in range(num_voices):
        # Add start and end symbol + random init
        seq[:timesteps,
            expert_index] = [note2indexes[expert_index][START_SYMBOL]
                             ] * timesteps
        seq[timesteps:-timesteps,
            expert_index] = np.random.randint(num_pitches[expert_index],
                                              size=sequence_length)

        seq[-timesteps:,
            expert_index] = [note2indexes[expert_index][END_SYMBOL]
                             ] * timesteps

    if chorale_metas is not None:
        # chorale_metas is a list
        extended_chorale_metas = [
            np.concatenate((np.zeros(
                (timesteps, )), chorale_meta, np.zeros((timesteps, ))),
                           axis=0) for chorale_meta in chorale_metas
        ]

    else:
        raise NotImplementedError

    # # set target
    # hidden_repr_target = hidden_repr_model.predict({'input_seq': inputs_target_chorale['left_features']}, batch_size=1)[0]

    min_temperature = temperature
    temperature = 2.0
    min_voice = 0
    std_dist = 0.5
    # Main loop
    for iteration in tqdm(range(num_iterations)):

        temperature = max(min_temperature, temperature * 0.995)  # Recuit
        std_dist = min(2., std_dist * 1.010)
        print(std_dist)
        print(temperature)

        time_indexes = {}
        probas = {}

        # recompute target
        left_features_target, _, _, _ = all_features(seq,
                                                     voice_index=0,
                                                     time_index=16 * 9,
                                                     timesteps=32,
                                                     num_pitches=num_pitches,
                                                     num_voices=num_voices)
        hidden_repr_target = \
            hidden_repr_model.predict(
                {'input_seq': np.array([left_features_target])}, batch_size=1)[
                0]

        for voice_index in range(min_voice, num_voices):
            batch_input_features = []

            time_indexes[voice_index] = []

            for batch_index in range(batch_size_per_voice):
                time_index = np.random.randint(timesteps,
                                               sequence_length + timesteps)
                time_indexes[voice_index].append(time_index)

                (left_feature, central_feature, right_feature,
                 label) = all_features(seq, voice_index, time_index, timesteps,
                                       num_pitches, num_voices)

                left_metas, central_metas, right_metas = all_metadatas(
                    chorale_metadatas=extended_chorale_metas,
                    metadatas=metadatas,
                    time_index=time_index,
                    timesteps=timesteps)

                input_features = {
                    'left_features': left_feature[:, :],
                    'central_features': central_feature[:],
                    'right_features': right_feature[:, :],
                    'left_metas': left_metas,
                    'central_metas': central_metas,
                    'right_metas': right_metas
                }

                # list of dicts: predict need dict of numpy arrays
                batch_input_features.append(input_features)

            # convert input_features
            batch_input_features = {
                key: np.array([
                    input_features[key]
                    for input_features in batch_input_features
                ])
                for key in batch_input_features[0].keys()
            }
            # make all estimations
            probas[voice_index] = generation_models[voice_index].predict(
                batch_input_features, batch_size=batch_size_per_voice)

            # update
            for batch_index in range(batch_size_per_voice):
                probas_pitch = probas[voice_index][batch_index]
                dists = np.zeros_like(probas_pitch)

                # tweak probas with distance model
                if time_indexes[voice_index][batch_index] in range(
                        16 * 6 - 32, 16 * 6):
                    # test all indexes
                    # create batch for parallel updates
                    batch_current = []
                    for pitch_index in range(num_pitches[voice_index]):
                        # compute current

                        # todo copy only the portion needed
                        seq_current = seq.copy()
                        seq_current[time_indexes[voice_index][batch_index],
                                    voice_index] = pitch_index

                        left_features_current, _, _, _ = all_features(
                            seq_current,
                            voice_index=0,
                            time_index=16 * 6,
                            timesteps=32,
                            num_pitches=num_pitches,
                            num_voices=num_voices)

                        left_metas_current, central_metas_current, _ = all_metadatas(
                            chorale_metadatas=extended_chorale_metas,
                            metadatas=metadatas,
                            time_index=16 * 6,
                            timesteps=32)

                        input_features_current = {
                            'input_seq': left_features_current,
                        }

                        batch_current.append(input_features_current)

                    # convert input_features
                    batch_current = {
                        key: np.array([
                            input_features[key]
                            for input_features in batch_current
                        ])
                        for key in batch_current[0].keys()
                    }

                    # predict all hidden_repr in parallel
                    hidden_reprs_current = hidden_repr_model.predict(
                        batch_current, batch_size=len(batch_current))
                    # predict all distances
                    dists = np.array(
                        list(
                            map(
                                lambda hidden_repr_current: spearman_rho(
                                    hidden_repr_current, hidden_repr_target),
                                hidden_reprs_current)))
                    # todo add sigma
                    # print('Before')
                    # print(dists.argmin(), probas_pitch.argmax())
                    # print(np.amin(dists), np.amax(dists))
                    normalized_dists = (
                        dists - np.mean(dists)) / np.std(dists) * std_dist
                    exp_dist = np.exp(-normalized_dists)

                    exp_dist /= np.sum(exp_dist)
                    exp_dist -= 1e-10
                    # print(np.min(exp_dist), np.max(exp_dist))
                else:
                    exp_dist = np.ones_like(probas_pitch)
                    exp_dist /= np.sum(exp_dist) - 1e-7

                # todo two temperatures!
                # use temperature
                probas_pitch = np.log(probas_pitch) / temperature
                probas_pitch = np.exp(probas_pitch) / np.sum(
                    np.exp(probas_pitch)) - 1e-7

                # combine both probability distributions
                probas_pitch *= exp_dist
                # probas_pitch = np.power(probas_pitch, 0.5) * np.power(exp_dist / np.sum(exp_dist), 0.5)
                probas_pitch = probas_pitch / np.sum(probas_pitch) - 1e-7

                # todo to remove

                # pitch can include slur_symbol
                pitch = np.argmax(np.random.multinomial(1, probas_pitch))

                seq[time_indexes[voice_index][batch_index],
                    voice_index] = pitch

    return seq[timesteps:-timesteps, :]
Beispiel #3
0
def canon(
    models=None,
    chorale_metas=None,
    sequence_length=50,
    num_iterations=1000,
    timesteps=16,
    model_base_name='models/raw_dataset/tmp/',
    temperature=1.,
    batch_size_per_voice=16,
    pickled_dataset=BACH_DATASET,
    intervals=[7],
    delays=[32],
):
    """
    samples from models in model_base_name
    """
    # load dataset
    X, X_metadatas, voice_ids, index2notes, note2indexes, metadatas = pickle.load(
        open(pickled_dataset, 'rb'))

    # variables
    num_voices = len(voice_ids)
    assert num_voices == 2

    num_pitches = list(map(len, index2notes))
    max_delay = max(delays)
    delays = np.array([0] + delays)
    intervals = np.array([0] + intervals)

    # compute tables
    diatonic_note_names2indexes = _diatonic_note_names2indexes(index2notes)
    print(diatonic_note_names2indexes)
    # load models if not
    if models is None:
        for expert_index in range(num_voices):
            model_name = model_base_name + str(expert_index)

            model = load_model(model_name=model_name, yaml=False)
            models.append(model)

    seq = np.zeros(shape=(2 * timesteps + max_delay + sequence_length,
                          num_voices))
    for expert_index in range(num_voices):
        # Add start and end symbol + random init
        seq[:timesteps,
            expert_index] = [note2indexes[expert_index][START_SYMBOL]
                             ] * timesteps
        seq[timesteps:-timesteps - max_delay,
            expert_index] = np.random.randint(num_pitches[expert_index],
                                              size=sequence_length)

        seq[-timesteps - max_delay:,
            expert_index] = [note2indexes[expert_index][END_SYMBOL]
                             ] * (timesteps + max_delay)

    if chorale_metas is not None:
        # chorale_metas is a list
        extended_chorale_metas = [
            np.concatenate((np.zeros((timesteps, )), chorale_meta,
                            np.zeros((timesteps + max_delay, ))),
                           axis=0) for chorale_meta in chorale_metas
        ]

    else:
        raise NotImplementedError

    min_temperature = temperature
    temperature = 1.5

    # Main loop
    for iteration in tqdm(range(num_iterations)):

        temperature = max(min_temperature, temperature * 0.9995)  # Recuit
        print(temperature)

        time_indexes = {}
        probas = {}

        for voice_index in range(num_voices):
            batch_input_features = []
            time_indexes[voice_index] = []

            for batch_index in range(batch_size_per_voice):
                # soprano based
                if voice_index == 0:
                    time_index = np.random.randint(timesteps,
                                                   sequence_length + timesteps)
                else:
                    # time_index = sequence_length + timesteps * 2 - time_indexes[0][batch_index]
                    time_index = time_indexes[0][batch_index] + delays[
                        voice_index]

                time_indexes[voice_index].append(time_index)

                (left_feature, central_feature, right_feature,
                 label) = all_features(seq, voice_index, time_index, timesteps,
                                       num_pitches, num_voices)

                left_metas, central_metas, right_metas = all_metadatas(
                    chorale_metadatas=extended_chorale_metas,
                    metadatas=metadatas,
                    time_index=time_index,
                    timesteps=timesteps)

                input_features = {
                    'left_features': left_feature[:, :],
                    'central_features': central_feature[:],
                    'right_features': right_feature[:, :],
                    'left_metas': left_metas,
                    'central_metas': central_metas,
                    'right_metas': right_metas
                }

                # list of dicts: predict need dict of numpy arrays
                batch_input_features.append(input_features)

            # convert input_features
            batch_input_features = {
                key: np.array([
                    input_features[key]
                    for input_features in batch_input_features
                ])
                for key in batch_input_features[0].keys()
            }
            # make all estimations
            probas[voice_index] = models[voice_index].predict(
                batch_input_features, batch_size=batch_size_per_voice)

        # parallel updates
        for batch_index in range(batch_size_per_voice):
            # create list of masks for each note name
            proba_sop = probas[SOP][batch_index]
            proba_bass = probas[BASS][batch_index]

            proba_sop_split = _split_proba(proba_sop,
                                           diatonic_note_names2indexes[SOP])
            proba_bass_split = _split_proba(proba_bass,
                                            diatonic_note_names2indexes[BASS])

            interval = intervals[1]

            # multiply probas
            canon_product_probas, index_merge2pitches = _merge_probas_canon(
                proba_sop_split, proba_bass_split, interval,
                diatonic_note_names2indexes)

            # draw
            # use temperature
            canon_product_probas /= np.sum(canon_product_probas)
            canon_product_probas = np.log(canon_product_probas) / temperature
            canon_product_probas = np.exp(canon_product_probas) / np.sum(
                np.exp(canon_product_probas)) - 1e-7

            # pitch can include slur_symbol
            index_drawn_pitches = np.argmax(
                np.random.multinomial(1, canon_product_probas))
            pitches = index_merge2pitches[index_drawn_pitches]
            for voice_index, pitch in enumerate(pitches):
                seq[time_indexes[voice_index][batch_index],
                    voice_index] = pitch

    return seq[timesteps:-timesteps, :]
Beispiel #4
0
def parallel_gibbs(models=None,
                   melody=None,
                   chorale_metas=None,
                   sequence_length=50,
                   num_iterations=1000,
                   timesteps=16,
                   model_base_name='models/raw_dataset/tmp/',
                   temperature=1.,
                   initial_seq=None,
                   batch_size_per_voice=16,
                   parallel_updates=True,
                   pickled_dataset=BACH_DATASET):
    """
    samples from models in model_base_name
    """

    X, X_metadatas, voices_ids, index2notes, note2indexes, metadatas = pickle.load(
        open(pickled_dataset, 'rb'))
    num_pitches = list(map(len, index2notes))
    num_voices = len(voices_ids)
    # load models if not
    if models is None:
        for expert_index in range(num_voices):
            model_name = model_base_name + str(expert_index)

            model = load_model(model_name=model_name, yaml=False)
            models.append(model)

    # initialization sequence
    if melody is not None:
        sequence_length = len(melody)
        if chorale_metas is not None:
            sequence_length = min(sequence_length, len(chorale_metas[0]))
    elif chorale_metas is not None:
        sequence_length = len(chorale_metas[0])

    seq = np.zeros(shape=(2 * timesteps + sequence_length, num_voices))
    for expert_index in range(num_voices):
        # Add start and end symbol + random init
        seq[:timesteps,
            expert_index] = [note2indexes[expert_index][START_SYMBOL]
                             ] * timesteps
        seq[timesteps:-timesteps,
            expert_index] = np.random.randint(num_pitches[expert_index],
                                              size=sequence_length)

        seq[-timesteps:,
            expert_index] = [note2indexes[expert_index][END_SYMBOL]
                             ] * timesteps

    if initial_seq is not None:
        seq = initial_seq
        min_voice = 1
        # works only with reharmonization

    if melody is not None:
        seq[timesteps:-timesteps, 0] = melody
        min_voice = 1
    else:
        min_voice = 0

    if chorale_metas is not None:
        # chorale_metas is a list
        extended_chorale_metas = [
            np.concatenate((np.zeros(
                (timesteps, )), chorale_meta, np.zeros((timesteps, ))),
                           axis=0) for chorale_meta in chorale_metas
        ]

    else:
        raise NotImplementedError

    min_temperature = temperature
    temperature = 1.5

    # Main loop
    for iteration in tqdm(range(num_iterations)):

        temperature = max(min_temperature, temperature * 0.9992)  # Recuit
        print(temperature)

        time_indexes = {}
        probas = {}
        for voice_index in range(min_voice, num_voices):
            batch_input_features = []

            time_indexes[voice_index] = []

            for batch_index in range(batch_size_per_voice):
                time_index = np.random.randint(timesteps,
                                               sequence_length + timesteps)
                time_indexes[voice_index].append(time_index)

                (left_feature, central_feature, right_feature,
                 label) = all_features(seq, voice_index, time_index, timesteps,
                                       num_pitches, num_voices)

                left_metas, central_metas, right_metas = all_metadatas(
                    chorale_metadatas=extended_chorale_metas,
                    metadatas=metadatas,
                    time_index=time_index,
                    timesteps=timesteps)

                input_features = {
                    'left_features': left_feature[:, :],
                    'central_features': central_feature[:],
                    'right_features': right_feature[:, :],
                    'left_metas': left_metas,
                    'central_metas': central_metas,
                    'right_metas': right_metas
                }

                # list of dicts: predict need dict of numpy arrays
                batch_input_features.append(input_features)

            # convert input_features
            batch_input_features = {
                key: np.array([
                    input_features[key]
                    for input_features in batch_input_features
                ])
                for key in batch_input_features[0].keys()
            }
            # make all estimations
            probas[voice_index] = models[voice_index].predict(
                batch_input_features, batch_size=batch_size_per_voice)
            if not parallel_updates:
                # update
                for batch_index in range(batch_size_per_voice):
                    probas_pitch = probas[voice_index][batch_index]

                    # use temperature
                    probas_pitch = np.log(probas_pitch) / temperature
                    probas_pitch = np.exp(probas_pitch) / np.sum(
                        np.exp(probas_pitch)) - 1e-7

                    # pitch can include slur_symbol
                    pitch = np.argmax(np.random.multinomial(1, probas_pitch))

                    seq[time_indexes[voice_index][batch_index],
                        voice_index] = pitch

        if parallel_updates:
            # update
            for voice_index in range(min_voice, num_voices):
                for batch_index in range(batch_size_per_voice):
                    probas_pitch = probas[voice_index][batch_index]

                    # use temperature
                    probas_pitch = np.log(probas_pitch) / temperature
                    probas_pitch = np.exp(probas_pitch) / np.sum(
                        np.exp(probas_pitch)) - 1e-7

                    # pitch can include slur_symbol
                    pitch = np.argmax(np.random.multinomial(1, probas_pitch))

                    seq[time_indexes[voice_index][batch_index],
                        voice_index] = pitch

    return seq[timesteps:-timesteps, :]
Beispiel #5
0
def parallel_gibbs(models=None,
                   melody=None,
                   chorale_metas=None,
                   sequence_length=50,
                   num_iterations=1000,
                   timesteps=16,
                   model_base_name='models/raw_dataset/tmp/',
                   temperature=1.,
                   initial_seq=None,
                   batch_size_per_voice=16,
                   parallel_updates=True,
                   pickled_dataset=ARASHI_DATASET):
    """
    samples from models in model_base_name
    """

    X, X_metadatas, voices_ids, index2notes, note2indexes, metadatas, chordname2chordset = pickle.load(
        open(pickled_dataset, 'rb'))
    num_pitches = list(map(len, index2notes))
    num_voices = len(voices_ids)
    # load models if not
    if models is None:
        for expert_index in range(num_voices):
            model_name = model_base_name + str(expert_index)

            model = load_model(model_name=model_name, yaml=False)
            models.append(model)

    # initialization sequence
    if melody is not None:
        sequence_length = len(melody)
        if chorale_metas is not None:
            sequence_length = min(sequence_length, len(chorale_metas[0]))
    elif chorale_metas is not None:
        sequence_length = len(chorale_metas[0])

    #print(chorale_metas)
    #print(sequence_length)

    seq = np.zeros(shape=(2 * timesteps + sequence_length, num_voices))
    #print(seq)
    for expert_index in range(num_voices):
        # Add start and end symbol + random init
        seq[:timesteps,
            expert_index] = [note2indexes[expert_index][START_SYMBOL]
                             ] * timesteps
        #はじめのtimesteps分だけSTART_SYMBOLに対応する数字で埋める
        seq[timesteps:-timesteps,
            expert_index] = np.random.randint(num_pitches[expert_index],
                                              size=sequence_length)
        if expert_index == 1:  ######################################################################追加さすがにコードの初期値はこうして良いのでは?8分音符刻みのところ以外ははじめからスラーシンボルで埋めとく
            insert_seq = np.full(sequence_length, note2indexes[1]['__'])
            for i in range(len(insert_seq)):
                if i % 2 == 0:
                    insert_seq[i] = np.random.randint(
                        num_pitches[expert_index])
            seq[timesteps:-timesteps, expert_index] = insert_seq
        ######################################################################ここまで追加
        #間(楽譜に反映される部分)はとりあえず適当な数字で埋める
        seq[-timesteps:,
            expert_index] = [note2indexes[expert_index][END_SYMBOL]
                             ] * timesteps
        #終わりのtimesteps分だけEND_SYMBOLに対応する数字で埋める

    #print(seq) #この時点では初期値

    if initial_seq is not None:
        seq = initial_seq
        min_voice = 1
        # works only with reharmonization

    if melody is not None:
        seq[timesteps:-timesteps, 0] = melody
        min_voice = 1
    else:
        min_voice = 0

    if chorale_metas is not None:
        # chorale_metas is a list
        extended_chorale_metas = [
            np.concatenate((np.zeros(
                (timesteps, )), chorale_meta, np.zeros((timesteps, ))),
                           axis=0) for chorale_meta in chorale_metas
        ]
    else:
        raise NotImplementedError
    #extended_chorale_metasはchoralemetasを0拡張したもの

    min_temperature = temperature
    temperature = 1.5

    # Main loop   #num_iterationsは自分で -i で入力した値を生成する曲のパート数の数で割ったもの
    for iteration in tqdm(range(num_iterations)):
        temperature = max(min_temperature, temperature * 0.999)  # Recuit
        #print(temperature)

        time_indexes = {}
        probas = {}
        for voice_index in range(min_voice, num_voices):
            batch_input_features = []
            time_indexes[voice_index] = []

            for batch_index in range(batch_size_per_voice):
                time_index = np.random.randint(timesteps,
                                               sequence_length + timesteps)
                time_indexes[voice_index].append(time_index)
                #print(time_index)

                (left_feature, central_feature, right_feature,
                 label) = all_features(seq, voice_index, time_index, timesteps,
                                       num_pitches, num_voices)

                left_local_seq, right_local_seq = make_local_sequences(
                    seq, voice_index, time_index, num_pitches, note2indexes)

                left_metas, central_metas, right_metas = all_metadatas(
                    chorale_metadatas=extended_chorale_metas,
                    metadatas=metadatas,
                    time_index=time_index,
                    timesteps=timesteps)

                input_features = {
                    'left_features': left_feature[:, :],
                    'central_features': central_feature[:],
                    'right_features': right_feature[:, :],
                    'left_metas': left_metas,
                    'central_metas': central_metas,
                    'right_metas': right_metas,
                    'left_local_seqs': left_local_seq[:, :],
                    'right_local_seqs': right_local_seq[:, :]
                }
                #print('='*10, voice_index, '='*10)
                #print(input_features)

                # list of dicts: predict need dict of numpy arrays
                batch_input_features.append(input_features)

            # convert input_features
            batch_input_features = {
                key: np.array([
                    input_features[key]
                    for input_features in batch_input_features
                ])
                for key in batch_input_features[0].keys()
            }
            #print(batch_input_features)

            # make all estimations
            probas[voice_index] = models[voice_index].predict(
                batch_input_features,
                batch_size=batch_size_per_voice)  #softmax関数での出力
            #print(probas[voice_index])

            if not parallel_updates:
                # update
                for batch_index in range(batch_size_per_voice):
                    probas_pitch = probas[voice_index][batch_index]

                    # use temperature
                    probas_pitch = np.log(probas_pitch) / temperature
                    probas_pitch = np.exp(probas_pitch) / np.sum(
                        np.exp(probas_pitch)) - 1e-7

                    # pitch can include slur_symbol
                    pitch = np.argmax(np.random.multinomial(1, probas_pitch))

                    seq[time_indexes[voice_index][batch_index],
                        voice_index] = pitch

        if parallel_updates:
            # update

            for voice_index in range(min_voice, num_voices):
                for batch_index in range(batch_size_per_voice):
                    #print(batch_index)
                    probas_pitch = probas[voice_index][batch_index]

                    # use temperature
                    probas_pitch = np.log(probas_pitch) / temperature
                    probas_pitch = np.exp(probas_pitch) / np.sum(
                        np.exp(probas_pitch)) - 1e-7

                    # pitch can include slur_symbol
                    pitch = np.argmax(np.random.multinomial(1, probas_pitch))

                    seq[time_indexes[voice_index][batch_index],
                        voice_index] = pitch

        ###################################################################################追加##メロディー:コード=1:2#####コード部分を反復
        for voice_index in range(num_voices - 1, num_voices):
            batch_input_features = []
            time_indexes[voice_index] = []

            for batch_index in range(batch_size_per_voice):
                time_index = np.random.randint(timesteps,
                                               sequence_length + timesteps)
                time_index = time_index  #-  time_index % 2###################################追加4の倍数のところだけサンプリング
                time_indexes[voice_index].append(time_index)
                #print(time_index)

                (left_feature, central_feature, right_feature,
                 label) = all_features(seq, voice_index, time_index, timesteps,
                                       num_pitches, num_voices)

                left_metas, central_metas, right_metas = all_metadatas(
                    chorale_metadatas=extended_chorale_metas,
                    metadatas=metadatas,
                    time_index=time_index,
                    timesteps=timesteps)
                left_local_seq, right_local_seq = make_local_sequences(
                    seq, voice_index, time_index, num_pitches, note2indexes)

                input_features = {
                    'left_features': left_feature[:, :],
                    'central_features': central_feature[:],
                    'right_features': right_feature[:, :],
                    'left_metas': left_metas,
                    'central_metas': central_metas,
                    'right_metas': right_metas,
                    'left_local_seqs': left_local_seq[:, :],
                    'right_local_seqs': right_local_seq[:, :]
                }
                #print('='*10, voice_index, '='*10)
                #print(input_features)

                # list of dicts: predict need dict of numpy arrays
                batch_input_features.append(input_features)

            # convert input_features
            batch_input_features = {
                key: np.array([
                    input_features[key]
                    for input_features in batch_input_features
                ])
                for key in batch_input_features[0].keys()
            }
            #print(batch_input_features)

            # make all estimations
            probas[voice_index] = models[voice_index].predict(
                batch_input_features,
                batch_size=batch_size_per_voice)  #softmax関数での出力
            #print(probas[voice_index])

            if not parallel_updates:
                # update
                for batch_index in range(batch_size_per_voice):
                    probas_pitch = probas[voice_index][batch_index]

                    # use temperature
                    probas_pitch = np.log(probas_pitch) / temperature
                    probas_pitch = np.exp(probas_pitch) / np.sum(
                        np.exp(probas_pitch)) - 1e-7

                    # pitch can include slur_symbol
                    pitch = np.argmax(np.random.multinomial(1, probas_pitch))

                    seq[time_indexes[voice_index][batch_index],
                        voice_index] = pitch

        if parallel_updates:
            # update

            for voice_index in range(num_voices - 1, num_voices):
                for batch_index in range(batch_size_per_voice):
                    #print(batch_index)
                    probas_pitch = probas[voice_index][batch_index]

                    # use temperature
                    probas_pitch = np.log(probas_pitch) / temperature
                    probas_pitch = np.exp(probas_pitch) / np.sum(
                        np.exp(probas_pitch)) - 1e-7

                    # pitch can include slur_symbol
                    pitch = np.argmax(np.random.multinomial(1, probas_pitch))

                    seq[time_indexes[voice_index][batch_index],
                        voice_index] = pitch
        ##########################################################################################ここまで追加

    #print(seq)

    return seq[timesteps:-timesteps, :]
Beispiel #6
0
def parallel_gibbs_server(models=None,
                          start_tick=None,
                          end_tick=None,
                          start_voice_index=None,
                          end_voice_index=None,
                          chorale_metas=None,
                          num_iterations=1000,
                          timesteps=16,
                          num_voices=None,
                          temperature=1.,
                          input_chorale=None,
                          batch_size_per_voice=16,
                          parallel_updates=True,
                          metadatas=None):
    """
    input_chorale is time major
    Returns (time, num_voices) matrix of indexes

    """
    assert models is not None
    assert input_chorale is not None

    print(models)
    print(type(models))

    sequence_length = len(input_chorale[:, 0])

    # init
    seq = np.zeros(shape=(2 * timesteps + sequence_length, num_voices))
    seq[timesteps:-timesteps, :] = input_chorale

    for expert_index in range(num_voices):
        # Add start and end symbol
        seq[:timesteps,
            expert_index] = [note2indexes[expert_index][START_SYMBOL]
                             ] * timesteps
        seq[-timesteps:,
            expert_index] = [note2indexes[expert_index][END_SYMBOL]
                             ] * timesteps
    for expert_index in range(start_voice_index, end_voice_index + 1):
        # Randomize selected zone
        seq[timesteps + start_tick:timesteps + end_tick,
            expert_index] = np.random.randint(num_pitches[expert_index],
                                              size=end_tick - start_tick)

    if chorale_metas is not None:
        # chorale_metas is a list
        # todo how to specify chorale_metas from musescore
        extended_chorale_metas = [
            np.concatenate((np.zeros(
                (timesteps, )), chorale_meta, np.zeros((timesteps, ))),
                           axis=0) for chorale_meta in chorale_metas
        ]

    else:
        raise NotImplementedError

    min_temperature = temperature
    temperature = 1.3
    discount_factor = np.power(1. / temperature, 3 / 2 / num_iterations)
    # Main loop
    for iteration in tqdm(range(num_iterations)):

        temperature = max(min_temperature,
                          temperature * discount_factor)  # Simulated annealing

        time_indexes = {}
        probas = {}
        for voice_index in range(start_voice_index, end_voice_index + 1):

            batch_input_features = []

            time_indexes[voice_index] = []

            for batch_index in range(batch_size_per_voice):
                time_index = np.random.randint(timesteps + start_tick,
                                               timesteps + end_tick)
                time_indexes[voice_index].append(time_index)

                (left_feature, central_feature, right_feature,
                 label) = all_features(seq, voice_index, time_index, timesteps,
                                       num_pitches, num_voices)

                left_metas, central_metas, right_metas = all_metadatas(
                    chorale_metadatas=extended_chorale_metas,
                    metadatas=metadatas,
                    time_index=time_index,
                    timesteps=timesteps)

                input_features = {
                    'left_features': left_feature[:, :],
                    'central_features': central_feature[:],
                    'right_features': right_feature[:, :],
                    'left_metas': left_metas,
                    'central_metas': central_metas,
                    'right_metas': right_metas
                }

                # list of dicts: predict need dict of numpy arrays
                batch_input_features.append(input_features)

            # convert input_features
            batch_input_features = {
                key: np.array([
                    input_features[key]
                    for input_features in batch_input_features
                ])
                for key in batch_input_features[0].keys()
            }
            # make all estimations
            probas[voice_index] = models[voice_index].predict(
                batch_input_features, batch_size=batch_size_per_voice)
            if not parallel_updates:
                # update
                for batch_index in range(batch_size_per_voice):
                    probas_pitch = probas[voice_index][batch_index]

                    # use temperature
                    probas_pitch = np.log(probas_pitch) / temperature
                    probas_pitch = np.exp(probas_pitch) / np.sum(
                        np.exp(probas_pitch)) - 1e-7

                    # pitch can include slur_symbol
                    pitch = np.argmax(np.random.multinomial(1, probas_pitch))

                    seq[time_indexes[voice_index][batch_index],
                        voice_index] = pitch

        if parallel_updates:
            # update
            for voice_index in range(start_voice_index, end_voice_index + 1):
                for batch_index in range(batch_size_per_voice):
                    probas_pitch = probas[voice_index][batch_index]

                    # use temperature
                    probas_pitch = np.log(probas_pitch) / temperature
                    probas_pitch = np.exp(probas_pitch) / np.sum(
                        np.exp(probas_pitch)) - 1e-7

                    # pitch can include slur_symbol
                    pitch = np.argmax(np.random.multinomial(1, probas_pitch))

                    seq[time_indexes[voice_index][batch_index],
                        voice_index] = pitch

    return seq[timesteps:-timesteps, :]