def main(): # parse arguments parser = argparse.ArgumentParser() parser.add_argument('--timesteps', help="model's range (default: %(default)s)", type=int, default=16) parser.add_argument( '-b', '--batch_size_train', help='batch size used during training phase (default: %(default)s)', type=int, default=128) parser.add_argument( '-s', '--samples_per_epoch', help='number of samples per epoch (default: %(default)s)', type=int, default=12800 * 7) parser.add_argument( '--num_val_samples', help='number of validation samples (default: %(default)s)', type=int, default=1280) parser.add_argument('-u', '--num_units_lstm', nargs='+', help='number of lstm units (default: %(default)s)', type=int, default=[200, 200]) parser.add_argument( '-d', '--num_dense', help='size of non recurrent hidden layers (default: %(default)s)', type=int, default=200) parser.add_argument('-n', '--name', help='model name (default: %(default)s)', choices=['deepbach', 'skip', 'norelu'], type=str, default='skip') parser.add_argument( '-i', '--num_iterations', help='number of gibbs iterations (default: %(default)s)', type=int, default=20000) parser.add_argument('-t', '--train', nargs='?', help='train models for N epochs (default: 15)', default=0, const=15, type=int) parser.add_argument('-p', '--parallel', nargs='?', help='number of parallel updates (default: 16)', type=int, const=16, default=1) parser.add_argument('--overwrite', help='overwrite previously computed models', action='store_true') parser.add_argument('-m', '--midi_file', nargs='?', help='relative path to midi file', type=str, const='datasets/god_save_the_queen.mid') parser.add_argument('-l', '--length', help='length of unconstrained generation', type=int, default=160) parser.add_argument('--ext', help='extension of model name', type=str, default='') parser.add_argument('-o', '--output_file', nargs='?', help='path to output file', type=str, default='', const='generated_examples/example.mid') parser.add_argument('--dataset', nargs='?', help='path to dataset folder', type=str, default='') parser.add_argument( '-r', '--reharmonization', nargs='?', help='reharmonization of a melody from the corpus identified by its id', type=int) args = parser.parse_args() print(args) if args.ext: ext = '_' + args.ext else: ext = '' dataset_path = None pickled_dataset = BACH_DATASET # metadatas = [TickMetadatas(SUBDIVISION), FermataMetadatas(), KeyMetadatas(window_size=1)] metadatas = [TickMetadatas(SUBDIVISION), FermataMetadatas()] timesteps = args.timesteps batch_size = args.batch_size_train samples_per_epoch = args.samples_per_epoch nb_val_samples = args.num_val_samples num_units_lstm = args.num_units_lstm model_name = args.name.lower() + ext sequence_length = args.length batch_size_per_voice = args.parallel num_units_lstm = args.num_units_lstm num_dense = args.num_dense if args.output_file: output_file = args.output_file else: output_file = None parallel = batch_size_per_voice > 1 train = args.train > 0 num_epochs = args.train overwrite = args.overwrite # Create pickled dataset if not os.path.exists(pickled_dataset): initialization(dataset_path, metadatas=metadatas, voice_ids=[SOP_INDEX], BACH_DATASET=BACH_DATASET) # load dataset X, X_metadatas, voice_ids, index2notes, note2indexes, metadatas = pickle.load( open(pickled_dataset, 'rb')) # dataset dependant variables NUM_VOICES = len(voice_ids) num_voices = NUM_VOICES num_pitches = list(map(len, index2notes)) num_iterations = args.num_iterations // batch_size_per_voice // num_voices # Create, train load models if not os.path.exists('models/' + model_name + '_' + str(NUM_VOICES - 1) + '.yaml'): create_models(model_name, create_new=overwrite, num_units_lstm=num_units_lstm, num_dense=num_dense, pickled_dataset=pickled_dataset, num_voices=num_voices, metadatas=metadatas, timesteps=timesteps) if train: models = train_models(model_name=model_name, samples_per_epoch=samples_per_epoch, num_epochs=num_epochs, nb_val_samples=nb_val_samples, timesteps=timesteps, pickled_dataset=pickled_dataset, num_voices=NUM_VOICES, metadatas=metadatas, batch_size=batch_size) else: models = load_models(model_name, num_voices=NUM_VOICES) # todo to remove # model_name = 'skip_large' # timesteps = 32 # # test_autoencoder(model_name='models/' + model_name + '_0', # timesteps=timesteps, # pickled_dataset=pickled_dataset) distance_model = load_model('models/seq2seq_masking') # distance_model.compile(optimizer='adam', loss='categorical_crossentropy', # metrics=['accuracy']) hidden_repr_model = Model(input=distance_model.input, output=distance_model.layers[1].output) hidden_repr_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # create target left_features, _, _, _ = all_features(np.transpose(X[21], axes=(1, 0)), voice_index=0, time_index=16 * 4, timesteps=32, num_pitches=num_pitches, num_voices=num_voices) left_metas, central_metas, _ = all_metadatas(X_metadatas[21], time_index=16 * 4, timesteps=32, metadatas=metadatas) inputs_target_chorale = { 'left_features': np.array([left_features]), 'left_metas': np.array([left_metas]), 'central_metas': np.array([central_metas]) } # show target score = indexed_chorale_to_score(X[21][:, 16 * 4 - 32:16 * 4], pickled_dataset=pickled_dataset) score.show() generated_chorale = gibbs(generation_models=models, hidden_repr_model=hidden_repr_model, inputs_target_chorale=inputs_target_chorale, chorale_metas=X_metadatas[12][:150], num_iterations=200, pickled_dataset=pickled_dataset, timesteps=timesteps) # convert score = indexed_chorale_to_score(np.transpose(generated_chorale, axes=(1, 0)), pickled_dataset=pickled_dataset) score.show()
def gibbs(generation_models=None, hidden_repr_model=None, inputs_target_chorale=None, chorale_metas=None, sequence_length=50, num_iterations=1000, timesteps=16, temperature=1., batch_size_per_voice=16, pickled_dataset=BACH_DATASET): """ samples from models in model_base_name """ X, X_metadatas, voices_ids, index2notes, note2indexes, metadatas = pickle.load( open(pickled_dataset, 'rb')) num_pitches = list(map(len, index2notes)) num_voices = len(voices_ids) # load models if not if generation_models is None: raise ValueError # initialization sequence if chorale_metas is not None: sequence_length = len(chorale_metas[0]) seq = np.zeros(shape=(2 * timesteps + sequence_length, num_voices)) for expert_index in range(num_voices): # Add start and end symbol + random init seq[:timesteps, expert_index] = [note2indexes[expert_index][START_SYMBOL] ] * timesteps seq[timesteps:-timesteps, expert_index] = np.random.randint(num_pitches[expert_index], size=sequence_length) seq[-timesteps:, expert_index] = [note2indexes[expert_index][END_SYMBOL] ] * timesteps if chorale_metas is not None: # chorale_metas is a list extended_chorale_metas = [ np.concatenate((np.zeros( (timesteps, )), chorale_meta, np.zeros((timesteps, ))), axis=0) for chorale_meta in chorale_metas ] else: raise NotImplementedError # # set target # hidden_repr_target = hidden_repr_model.predict({'input_seq': inputs_target_chorale['left_features']}, batch_size=1)[0] min_temperature = temperature temperature = 2.0 min_voice = 0 std_dist = 0.5 # Main loop for iteration in tqdm(range(num_iterations)): temperature = max(min_temperature, temperature * 0.995) # Recuit std_dist = min(2., std_dist * 1.010) print(std_dist) print(temperature) time_indexes = {} probas = {} # recompute target left_features_target, _, _, _ = all_features(seq, voice_index=0, time_index=16 * 9, timesteps=32, num_pitches=num_pitches, num_voices=num_voices) hidden_repr_target = \ hidden_repr_model.predict( {'input_seq': np.array([left_features_target])}, batch_size=1)[ 0] for voice_index in range(min_voice, num_voices): batch_input_features = [] time_indexes[voice_index] = [] for batch_index in range(batch_size_per_voice): time_index = np.random.randint(timesteps, sequence_length + timesteps) time_indexes[voice_index].append(time_index) (left_feature, central_feature, right_feature, label) = all_features(seq, voice_index, time_index, timesteps, num_pitches, num_voices) left_metas, central_metas, right_metas = all_metadatas( chorale_metadatas=extended_chorale_metas, metadatas=metadatas, time_index=time_index, timesteps=timesteps) input_features = { 'left_features': left_feature[:, :], 'central_features': central_feature[:], 'right_features': right_feature[:, :], 'left_metas': left_metas, 'central_metas': central_metas, 'right_metas': right_metas } # list of dicts: predict need dict of numpy arrays batch_input_features.append(input_features) # convert input_features batch_input_features = { key: np.array([ input_features[key] for input_features in batch_input_features ]) for key in batch_input_features[0].keys() } # make all estimations probas[voice_index] = generation_models[voice_index].predict( batch_input_features, batch_size=batch_size_per_voice) # update for batch_index in range(batch_size_per_voice): probas_pitch = probas[voice_index][batch_index] dists = np.zeros_like(probas_pitch) # tweak probas with distance model if time_indexes[voice_index][batch_index] in range( 16 * 6 - 32, 16 * 6): # test all indexes # create batch for parallel updates batch_current = [] for pitch_index in range(num_pitches[voice_index]): # compute current # todo copy only the portion needed seq_current = seq.copy() seq_current[time_indexes[voice_index][batch_index], voice_index] = pitch_index left_features_current, _, _, _ = all_features( seq_current, voice_index=0, time_index=16 * 6, timesteps=32, num_pitches=num_pitches, num_voices=num_voices) left_metas_current, central_metas_current, _ = all_metadatas( chorale_metadatas=extended_chorale_metas, metadatas=metadatas, time_index=16 * 6, timesteps=32) input_features_current = { 'input_seq': left_features_current, } batch_current.append(input_features_current) # convert input_features batch_current = { key: np.array([ input_features[key] for input_features in batch_current ]) for key in batch_current[0].keys() } # predict all hidden_repr in parallel hidden_reprs_current = hidden_repr_model.predict( batch_current, batch_size=len(batch_current)) # predict all distances dists = np.array( list( map( lambda hidden_repr_current: spearman_rho( hidden_repr_current, hidden_repr_target), hidden_reprs_current))) # todo add sigma # print('Before') # print(dists.argmin(), probas_pitch.argmax()) # print(np.amin(dists), np.amax(dists)) normalized_dists = ( dists - np.mean(dists)) / np.std(dists) * std_dist exp_dist = np.exp(-normalized_dists) exp_dist /= np.sum(exp_dist) exp_dist -= 1e-10 # print(np.min(exp_dist), np.max(exp_dist)) else: exp_dist = np.ones_like(probas_pitch) exp_dist /= np.sum(exp_dist) - 1e-7 # todo two temperatures! # use temperature probas_pitch = np.log(probas_pitch) / temperature probas_pitch = np.exp(probas_pitch) / np.sum( np.exp(probas_pitch)) - 1e-7 # combine both probability distributions probas_pitch *= exp_dist # probas_pitch = np.power(probas_pitch, 0.5) * np.power(exp_dist / np.sum(exp_dist), 0.5) probas_pitch = probas_pitch / np.sum(probas_pitch) - 1e-7 # todo to remove # pitch can include slur_symbol pitch = np.argmax(np.random.multinomial(1, probas_pitch)) seq[time_indexes[voice_index][batch_index], voice_index] = pitch return seq[timesteps:-timesteps, :]
def canon( models=None, chorale_metas=None, sequence_length=50, num_iterations=1000, timesteps=16, model_base_name='models/raw_dataset/tmp/', temperature=1., batch_size_per_voice=16, pickled_dataset=BACH_DATASET, intervals=[7], delays=[32], ): """ samples from models in model_base_name """ # load dataset X, X_metadatas, voice_ids, index2notes, note2indexes, metadatas = pickle.load( open(pickled_dataset, 'rb')) # variables num_voices = len(voice_ids) assert num_voices == 2 num_pitches = list(map(len, index2notes)) max_delay = max(delays) delays = np.array([0] + delays) intervals = np.array([0] + intervals) # compute tables diatonic_note_names2indexes = _diatonic_note_names2indexes(index2notes) print(diatonic_note_names2indexes) # load models if not if models is None: for expert_index in range(num_voices): model_name = model_base_name + str(expert_index) model = load_model(model_name=model_name, yaml=False) models.append(model) seq = np.zeros(shape=(2 * timesteps + max_delay + sequence_length, num_voices)) for expert_index in range(num_voices): # Add start and end symbol + random init seq[:timesteps, expert_index] = [note2indexes[expert_index][START_SYMBOL] ] * timesteps seq[timesteps:-timesteps - max_delay, expert_index] = np.random.randint(num_pitches[expert_index], size=sequence_length) seq[-timesteps - max_delay:, expert_index] = [note2indexes[expert_index][END_SYMBOL] ] * (timesteps + max_delay) if chorale_metas is not None: # chorale_metas is a list extended_chorale_metas = [ np.concatenate((np.zeros((timesteps, )), chorale_meta, np.zeros((timesteps + max_delay, ))), axis=0) for chorale_meta in chorale_metas ] else: raise NotImplementedError min_temperature = temperature temperature = 1.5 # Main loop for iteration in tqdm(range(num_iterations)): temperature = max(min_temperature, temperature * 0.9995) # Recuit print(temperature) time_indexes = {} probas = {} for voice_index in range(num_voices): batch_input_features = [] time_indexes[voice_index] = [] for batch_index in range(batch_size_per_voice): # soprano based if voice_index == 0: time_index = np.random.randint(timesteps, sequence_length + timesteps) else: # time_index = sequence_length + timesteps * 2 - time_indexes[0][batch_index] time_index = time_indexes[0][batch_index] + delays[ voice_index] time_indexes[voice_index].append(time_index) (left_feature, central_feature, right_feature, label) = all_features(seq, voice_index, time_index, timesteps, num_pitches, num_voices) left_metas, central_metas, right_metas = all_metadatas( chorale_metadatas=extended_chorale_metas, metadatas=metadatas, time_index=time_index, timesteps=timesteps) input_features = { 'left_features': left_feature[:, :], 'central_features': central_feature[:], 'right_features': right_feature[:, :], 'left_metas': left_metas, 'central_metas': central_metas, 'right_metas': right_metas } # list of dicts: predict need dict of numpy arrays batch_input_features.append(input_features) # convert input_features batch_input_features = { key: np.array([ input_features[key] for input_features in batch_input_features ]) for key in batch_input_features[0].keys() } # make all estimations probas[voice_index] = models[voice_index].predict( batch_input_features, batch_size=batch_size_per_voice) # parallel updates for batch_index in range(batch_size_per_voice): # create list of masks for each note name proba_sop = probas[SOP][batch_index] proba_bass = probas[BASS][batch_index] proba_sop_split = _split_proba(proba_sop, diatonic_note_names2indexes[SOP]) proba_bass_split = _split_proba(proba_bass, diatonic_note_names2indexes[BASS]) interval = intervals[1] # multiply probas canon_product_probas, index_merge2pitches = _merge_probas_canon( proba_sop_split, proba_bass_split, interval, diatonic_note_names2indexes) # draw # use temperature canon_product_probas /= np.sum(canon_product_probas) canon_product_probas = np.log(canon_product_probas) / temperature canon_product_probas = np.exp(canon_product_probas) / np.sum( np.exp(canon_product_probas)) - 1e-7 # pitch can include slur_symbol index_drawn_pitches = np.argmax( np.random.multinomial(1, canon_product_probas)) pitches = index_merge2pitches[index_drawn_pitches] for voice_index, pitch in enumerate(pitches): seq[time_indexes[voice_index][batch_index], voice_index] = pitch return seq[timesteps:-timesteps, :]
def parallel_gibbs(models=None, melody=None, chorale_metas=None, sequence_length=50, num_iterations=1000, timesteps=16, model_base_name='models/raw_dataset/tmp/', temperature=1., initial_seq=None, batch_size_per_voice=16, parallel_updates=True, pickled_dataset=BACH_DATASET): """ samples from models in model_base_name """ X, X_metadatas, voices_ids, index2notes, note2indexes, metadatas = pickle.load( open(pickled_dataset, 'rb')) num_pitches = list(map(len, index2notes)) num_voices = len(voices_ids) # load models if not if models is None: for expert_index in range(num_voices): model_name = model_base_name + str(expert_index) model = load_model(model_name=model_name, yaml=False) models.append(model) # initialization sequence if melody is not None: sequence_length = len(melody) if chorale_metas is not None: sequence_length = min(sequence_length, len(chorale_metas[0])) elif chorale_metas is not None: sequence_length = len(chorale_metas[0]) seq = np.zeros(shape=(2 * timesteps + sequence_length, num_voices)) for expert_index in range(num_voices): # Add start and end symbol + random init seq[:timesteps, expert_index] = [note2indexes[expert_index][START_SYMBOL] ] * timesteps seq[timesteps:-timesteps, expert_index] = np.random.randint(num_pitches[expert_index], size=sequence_length) seq[-timesteps:, expert_index] = [note2indexes[expert_index][END_SYMBOL] ] * timesteps if initial_seq is not None: seq = initial_seq min_voice = 1 # works only with reharmonization if melody is not None: seq[timesteps:-timesteps, 0] = melody min_voice = 1 else: min_voice = 0 if chorale_metas is not None: # chorale_metas is a list extended_chorale_metas = [ np.concatenate((np.zeros( (timesteps, )), chorale_meta, np.zeros((timesteps, ))), axis=0) for chorale_meta in chorale_metas ] else: raise NotImplementedError min_temperature = temperature temperature = 1.5 # Main loop for iteration in tqdm(range(num_iterations)): temperature = max(min_temperature, temperature * 0.9992) # Recuit print(temperature) time_indexes = {} probas = {} for voice_index in range(min_voice, num_voices): batch_input_features = [] time_indexes[voice_index] = [] for batch_index in range(batch_size_per_voice): time_index = np.random.randint(timesteps, sequence_length + timesteps) time_indexes[voice_index].append(time_index) (left_feature, central_feature, right_feature, label) = all_features(seq, voice_index, time_index, timesteps, num_pitches, num_voices) left_metas, central_metas, right_metas = all_metadatas( chorale_metadatas=extended_chorale_metas, metadatas=metadatas, time_index=time_index, timesteps=timesteps) input_features = { 'left_features': left_feature[:, :], 'central_features': central_feature[:], 'right_features': right_feature[:, :], 'left_metas': left_metas, 'central_metas': central_metas, 'right_metas': right_metas } # list of dicts: predict need dict of numpy arrays batch_input_features.append(input_features) # convert input_features batch_input_features = { key: np.array([ input_features[key] for input_features in batch_input_features ]) for key in batch_input_features[0].keys() } # make all estimations probas[voice_index] = models[voice_index].predict( batch_input_features, batch_size=batch_size_per_voice) if not parallel_updates: # update for batch_index in range(batch_size_per_voice): probas_pitch = probas[voice_index][batch_index] # use temperature probas_pitch = np.log(probas_pitch) / temperature probas_pitch = np.exp(probas_pitch) / np.sum( np.exp(probas_pitch)) - 1e-7 # pitch can include slur_symbol pitch = np.argmax(np.random.multinomial(1, probas_pitch)) seq[time_indexes[voice_index][batch_index], voice_index] = pitch if parallel_updates: # update for voice_index in range(min_voice, num_voices): for batch_index in range(batch_size_per_voice): probas_pitch = probas[voice_index][batch_index] # use temperature probas_pitch = np.log(probas_pitch) / temperature probas_pitch = np.exp(probas_pitch) / np.sum( np.exp(probas_pitch)) - 1e-7 # pitch can include slur_symbol pitch = np.argmax(np.random.multinomial(1, probas_pitch)) seq[time_indexes[voice_index][batch_index], voice_index] = pitch return seq[timesteps:-timesteps, :]
def parallel_gibbs(models=None, melody=None, chorale_metas=None, sequence_length=50, num_iterations=1000, timesteps=16, model_base_name='models/raw_dataset/tmp/', temperature=1., initial_seq=None, batch_size_per_voice=16, parallel_updates=True, pickled_dataset=ARASHI_DATASET): """ samples from models in model_base_name """ X, X_metadatas, voices_ids, index2notes, note2indexes, metadatas, chordname2chordset = pickle.load( open(pickled_dataset, 'rb')) num_pitches = list(map(len, index2notes)) num_voices = len(voices_ids) # load models if not if models is None: for expert_index in range(num_voices): model_name = model_base_name + str(expert_index) model = load_model(model_name=model_name, yaml=False) models.append(model) # initialization sequence if melody is not None: sequence_length = len(melody) if chorale_metas is not None: sequence_length = min(sequence_length, len(chorale_metas[0])) elif chorale_metas is not None: sequence_length = len(chorale_metas[0]) #print(chorale_metas) #print(sequence_length) seq = np.zeros(shape=(2 * timesteps + sequence_length, num_voices)) #print(seq) for expert_index in range(num_voices): # Add start and end symbol + random init seq[:timesteps, expert_index] = [note2indexes[expert_index][START_SYMBOL] ] * timesteps #はじめのtimesteps分だけSTART_SYMBOLに対応する数字で埋める seq[timesteps:-timesteps, expert_index] = np.random.randint(num_pitches[expert_index], size=sequence_length) if expert_index == 1: ######################################################################追加さすがにコードの初期値はこうして良いのでは?8分音符刻みのところ以外ははじめからスラーシンボルで埋めとく insert_seq = np.full(sequence_length, note2indexes[1]['__']) for i in range(len(insert_seq)): if i % 2 == 0: insert_seq[i] = np.random.randint( num_pitches[expert_index]) seq[timesteps:-timesteps, expert_index] = insert_seq ######################################################################ここまで追加 #間(楽譜に反映される部分)はとりあえず適当な数字で埋める seq[-timesteps:, expert_index] = [note2indexes[expert_index][END_SYMBOL] ] * timesteps #終わりのtimesteps分だけEND_SYMBOLに対応する数字で埋める #print(seq) #この時点では初期値 if initial_seq is not None: seq = initial_seq min_voice = 1 # works only with reharmonization if melody is not None: seq[timesteps:-timesteps, 0] = melody min_voice = 1 else: min_voice = 0 if chorale_metas is not None: # chorale_metas is a list extended_chorale_metas = [ np.concatenate((np.zeros( (timesteps, )), chorale_meta, np.zeros((timesteps, ))), axis=0) for chorale_meta in chorale_metas ] else: raise NotImplementedError #extended_chorale_metasはchoralemetasを0拡張したもの min_temperature = temperature temperature = 1.5 # Main loop #num_iterationsは自分で -i で入力した値を生成する曲のパート数の数で割ったもの for iteration in tqdm(range(num_iterations)): temperature = max(min_temperature, temperature * 0.999) # Recuit #print(temperature) time_indexes = {} probas = {} for voice_index in range(min_voice, num_voices): batch_input_features = [] time_indexes[voice_index] = [] for batch_index in range(batch_size_per_voice): time_index = np.random.randint(timesteps, sequence_length + timesteps) time_indexes[voice_index].append(time_index) #print(time_index) (left_feature, central_feature, right_feature, label) = all_features(seq, voice_index, time_index, timesteps, num_pitches, num_voices) left_local_seq, right_local_seq = make_local_sequences( seq, voice_index, time_index, num_pitches, note2indexes) left_metas, central_metas, right_metas = all_metadatas( chorale_metadatas=extended_chorale_metas, metadatas=metadatas, time_index=time_index, timesteps=timesteps) input_features = { 'left_features': left_feature[:, :], 'central_features': central_feature[:], 'right_features': right_feature[:, :], 'left_metas': left_metas, 'central_metas': central_metas, 'right_metas': right_metas, 'left_local_seqs': left_local_seq[:, :], 'right_local_seqs': right_local_seq[:, :] } #print('='*10, voice_index, '='*10) #print(input_features) # list of dicts: predict need dict of numpy arrays batch_input_features.append(input_features) # convert input_features batch_input_features = { key: np.array([ input_features[key] for input_features in batch_input_features ]) for key in batch_input_features[0].keys() } #print(batch_input_features) # make all estimations probas[voice_index] = models[voice_index].predict( batch_input_features, batch_size=batch_size_per_voice) #softmax関数での出力 #print(probas[voice_index]) if not parallel_updates: # update for batch_index in range(batch_size_per_voice): probas_pitch = probas[voice_index][batch_index] # use temperature probas_pitch = np.log(probas_pitch) / temperature probas_pitch = np.exp(probas_pitch) / np.sum( np.exp(probas_pitch)) - 1e-7 # pitch can include slur_symbol pitch = np.argmax(np.random.multinomial(1, probas_pitch)) seq[time_indexes[voice_index][batch_index], voice_index] = pitch if parallel_updates: # update for voice_index in range(min_voice, num_voices): for batch_index in range(batch_size_per_voice): #print(batch_index) probas_pitch = probas[voice_index][batch_index] # use temperature probas_pitch = np.log(probas_pitch) / temperature probas_pitch = np.exp(probas_pitch) / np.sum( np.exp(probas_pitch)) - 1e-7 # pitch can include slur_symbol pitch = np.argmax(np.random.multinomial(1, probas_pitch)) seq[time_indexes[voice_index][batch_index], voice_index] = pitch ###################################################################################追加##メロディー:コード=1:2#####コード部分を反復 for voice_index in range(num_voices - 1, num_voices): batch_input_features = [] time_indexes[voice_index] = [] for batch_index in range(batch_size_per_voice): time_index = np.random.randint(timesteps, sequence_length + timesteps) time_index = time_index #- time_index % 2###################################追加4の倍数のところだけサンプリング time_indexes[voice_index].append(time_index) #print(time_index) (left_feature, central_feature, right_feature, label) = all_features(seq, voice_index, time_index, timesteps, num_pitches, num_voices) left_metas, central_metas, right_metas = all_metadatas( chorale_metadatas=extended_chorale_metas, metadatas=metadatas, time_index=time_index, timesteps=timesteps) left_local_seq, right_local_seq = make_local_sequences( seq, voice_index, time_index, num_pitches, note2indexes) input_features = { 'left_features': left_feature[:, :], 'central_features': central_feature[:], 'right_features': right_feature[:, :], 'left_metas': left_metas, 'central_metas': central_metas, 'right_metas': right_metas, 'left_local_seqs': left_local_seq[:, :], 'right_local_seqs': right_local_seq[:, :] } #print('='*10, voice_index, '='*10) #print(input_features) # list of dicts: predict need dict of numpy arrays batch_input_features.append(input_features) # convert input_features batch_input_features = { key: np.array([ input_features[key] for input_features in batch_input_features ]) for key in batch_input_features[0].keys() } #print(batch_input_features) # make all estimations probas[voice_index] = models[voice_index].predict( batch_input_features, batch_size=batch_size_per_voice) #softmax関数での出力 #print(probas[voice_index]) if not parallel_updates: # update for batch_index in range(batch_size_per_voice): probas_pitch = probas[voice_index][batch_index] # use temperature probas_pitch = np.log(probas_pitch) / temperature probas_pitch = np.exp(probas_pitch) / np.sum( np.exp(probas_pitch)) - 1e-7 # pitch can include slur_symbol pitch = np.argmax(np.random.multinomial(1, probas_pitch)) seq[time_indexes[voice_index][batch_index], voice_index] = pitch if parallel_updates: # update for voice_index in range(num_voices - 1, num_voices): for batch_index in range(batch_size_per_voice): #print(batch_index) probas_pitch = probas[voice_index][batch_index] # use temperature probas_pitch = np.log(probas_pitch) / temperature probas_pitch = np.exp(probas_pitch) / np.sum( np.exp(probas_pitch)) - 1e-7 # pitch can include slur_symbol pitch = np.argmax(np.random.multinomial(1, probas_pitch)) seq[time_indexes[voice_index][batch_index], voice_index] = pitch ##########################################################################################ここまで追加 #print(seq) return seq[timesteps:-timesteps, :]
def parallel_gibbs_server(models=None, start_tick=None, end_tick=None, start_voice_index=None, end_voice_index=None, chorale_metas=None, num_iterations=1000, timesteps=16, num_voices=None, temperature=1., input_chorale=None, batch_size_per_voice=16, parallel_updates=True, metadatas=None): """ input_chorale is time major Returns (time, num_voices) matrix of indexes """ assert models is not None assert input_chorale is not None print(models) print(type(models)) sequence_length = len(input_chorale[:, 0]) # init seq = np.zeros(shape=(2 * timesteps + sequence_length, num_voices)) seq[timesteps:-timesteps, :] = input_chorale for expert_index in range(num_voices): # Add start and end symbol seq[:timesteps, expert_index] = [note2indexes[expert_index][START_SYMBOL] ] * timesteps seq[-timesteps:, expert_index] = [note2indexes[expert_index][END_SYMBOL] ] * timesteps for expert_index in range(start_voice_index, end_voice_index + 1): # Randomize selected zone seq[timesteps + start_tick:timesteps + end_tick, expert_index] = np.random.randint(num_pitches[expert_index], size=end_tick - start_tick) if chorale_metas is not None: # chorale_metas is a list # todo how to specify chorale_metas from musescore extended_chorale_metas = [ np.concatenate((np.zeros( (timesteps, )), chorale_meta, np.zeros((timesteps, ))), axis=0) for chorale_meta in chorale_metas ] else: raise NotImplementedError min_temperature = temperature temperature = 1.3 discount_factor = np.power(1. / temperature, 3 / 2 / num_iterations) # Main loop for iteration in tqdm(range(num_iterations)): temperature = max(min_temperature, temperature * discount_factor) # Simulated annealing time_indexes = {} probas = {} for voice_index in range(start_voice_index, end_voice_index + 1): batch_input_features = [] time_indexes[voice_index] = [] for batch_index in range(batch_size_per_voice): time_index = np.random.randint(timesteps + start_tick, timesteps + end_tick) time_indexes[voice_index].append(time_index) (left_feature, central_feature, right_feature, label) = all_features(seq, voice_index, time_index, timesteps, num_pitches, num_voices) left_metas, central_metas, right_metas = all_metadatas( chorale_metadatas=extended_chorale_metas, metadatas=metadatas, time_index=time_index, timesteps=timesteps) input_features = { 'left_features': left_feature[:, :], 'central_features': central_feature[:], 'right_features': right_feature[:, :], 'left_metas': left_metas, 'central_metas': central_metas, 'right_metas': right_metas } # list of dicts: predict need dict of numpy arrays batch_input_features.append(input_features) # convert input_features batch_input_features = { key: np.array([ input_features[key] for input_features in batch_input_features ]) for key in batch_input_features[0].keys() } # make all estimations probas[voice_index] = models[voice_index].predict( batch_input_features, batch_size=batch_size_per_voice) if not parallel_updates: # update for batch_index in range(batch_size_per_voice): probas_pitch = probas[voice_index][batch_index] # use temperature probas_pitch = np.log(probas_pitch) / temperature probas_pitch = np.exp(probas_pitch) / np.sum( np.exp(probas_pitch)) - 1e-7 # pitch can include slur_symbol pitch = np.argmax(np.random.multinomial(1, probas_pitch)) seq[time_indexes[voice_index][batch_index], voice_index] = pitch if parallel_updates: # update for voice_index in range(start_voice_index, end_voice_index + 1): for batch_index in range(batch_size_per_voice): probas_pitch = probas[voice_index][batch_index] # use temperature probas_pitch = np.log(probas_pitch) / temperature probas_pitch = np.exp(probas_pitch) / np.sum( np.exp(probas_pitch)) - 1e-7 # pitch can include slur_symbol pitch = np.argmax(np.random.multinomial(1, probas_pitch)) seq[time_indexes[voice_index][batch_index], voice_index] = pitch return seq[timesteps:-timesteps, :]