def load_glove_embeddings(dim, glove_path): # skip if done if _glove_embeddings: return global _glove_embeddings _glove_embeddings = [] # parse glove embeddings csv transforming TRACK_NUM to 128 and <unk> to 129 with open(os.path.join(glove_path, 'vectors_d{}.txt'.format(dim)), 'r') as f: for line in f.readlines(): split = line.split() if split[0] == '<unk>': split[0] = 130 elif split[0] == 'TRACK_START': split[0] = 129 _glove_embeddings.append(np.asarray([float(x) for x in split])) # add random rest vector as the 128th row with open(os.path.join(glove_path, 'rest.txt')) as f: vec = [128] + [float(x) for x in f.read().split(' ')][0:dim] _glove_embeddings.append(vec) # sort the list by index (numeric key) _glove_embeddings.sort(key=lambda x: x[0]) # remove index for i, _ in enumerate(_glove_embeddings): _glove_embeddings[i] = np.delete(_glove_embeddings[i], 0) log('loaded GloVe vector embeddings with dimension: {}'.format(dim), 'VERBOSE')
def _train_model(model, callbacks): start_time = time.time() kwargs = { "epochs": num_epochs, "callbacks": callbacks, "verbose": 1, } if train_gen and val_gen: # this is a somewhat magic number which is the average number of length-20 windows # calculated from ~5K MIDI files from the Lakh MIDI Dataset. magic_number = 827 kwargs['generator'] = train_gen kwargs['validation_data'] = val_gen kwargs[ 'steps_per_epoch'] = num_midi_files * magic_number / batch_size kwargs[ 'validation_steps'] = num_midi_files * 0.2 * magic_number / batch_size history = model.fit_generator(**kwargs) else: kwargs['x'] = train_data[0] kwargs['y'] = train_data[1] kwargs['validation_data'] = val_data kwargs['batch_size'] = batch_size pudb.set_trace() history = model.fit(**kwargs) log( 'Finished training model in {:.2f} seconds'.format( time.time() - start_time), 'NOTICE') return history
def save(self, experiment_dir): for i, model in enumerate(self.models): with open(os.path.join(experiment_dir, 'model_{}.json'.format(i)), 'w') as f: log('saved model {} to {}'\ .format(i, os.path.join(experiment_dir, 'model_{}.json'.format(i))), 'verbose') f.write(model.to_json())
def parse_midi(path): midi = None with open(path, 'r+b') as f: try: midi = pretty_midi.PrettyMIDI(f) midi.remove_invalid_notes() except Exception as e: log(e, 'WARNING') pass return midi
def generate(self, seeds, window_size, length, num_to_gen, encoding='one-hot'): def gen(model, seed, window_size, length): generated = [] # ring buffer buf = np.copy(seed).tolist() if encoding == 'glove-embedding': buf = data.input.one_hot_2_glove_embedding(buf) while len(generated) < length: arr = np.expand_dims(np.asarray(buf), 0) # error here: ValueError: Error when checking : expected lstm_1_input to have 3 dimensions, but got array with shape (1, 20) pred = model.predict(arr) # argmax sampling (NOT RECOMMENDED), or... # index = np.argmax(pred) # prob distrobuition sampling index = np.random.choice(range(0, len(pred[0])), p=pred[0]) pred = np.zeros(len(pred[0])) pred[index] = 1 generated.append(pred) if encoding == 'glove-embedding': pred = data.input.one_hot_2_glove_embedding([pred])[0] buf.pop(0) buf.append(pred) return generated per_model = [] for model in self.models: generated = [] for i in range(0, num_to_gen): seed = seeds[random.randint(0, len(seeds) - 1)] generated.append(gen(model, seed, window_size, length)) log('generated data of length {}'.format(length), 'VERBOSE') per_model.append(copy.deepcopy(generated)) return per_model
def save_midi(pm_midis, folder): existing_files = glob.glob(os.path.join(folder, '*.mid')) _max = 0 for file in existing_files: try: _max = max(int(os.path.basename(file[0:-4])), _max) except ValueError as e: # ignrore non-numeric folders in experiments/ pass if _max != 0: log('existing midi files found in {}, starting names after {} '\ 'to avoid collision'.format(folder, _max), 'VERBOSE') for i, midi in enumerate(pm_midis): file = os.path.join(folder, '{}.mid'.format(str(_max + i + 1).rjust(4, '0'))) midi.write(file) log('saved {} to disk'.format(file), 'VERBOSE')
def create_experiment_dir(self, note_representation, encoding, experiment_dir=None, midai_root=None, mode='develop'): log('creating experiment directory', 'VERBOSE') if not experiment_dir: if not midai_root: raise Exception('midai_root not set and experiment_dir'\ ' not provided as an argument') path = [ midai_root, 'trained_models', mode, self.name, '{}_{}'.format(note_representation, encoding) ] parent_dir = os.path.join(*path) if not os.path.exists(parent_dir): log('{} does not already exist, creating.'.format(parent_dir), 'notice') os.makedirs(parent_dir) experiments = os.listdir(parent_dir) experiments = [dir_ for dir_ in experiments \ if os.path.isdir(os.path.join(parent_dir, dir_))] log('{} existing directories found in {}'\ .format(len(experiments), parent_dir), 'VERBOSE') most_recent_exp = 0 for dir_ in experiments: try: most_recent_exp = max(int(dir_), most_recent_exp) except ValueError as e: # ignrore non-numeric folders in experiments/ pass experiment_dir = os.path.join( parent_dir, str(most_recent_exp + 1).rjust(3, '0')) os.makedirs(experiment_dir) log('created {}'.format(experiment_dir), 'VERBOSE') os.makedirs(os.path.join(experiment_dir, 'checkpoints')) log('created {}'\ .format(os.path.join(experiment_dir, 'checkpoints')), 'VERBOSE') os.makedirs(os.path.join(experiment_dir, 'generated')) log('created {}'\ .format(os.path.join(experiment_dir, 'generated')), 'VERBOSE') self.experiment_dir = experiment_dir return experiment_dir
def load(self, path, best=True, recent=False): def _find_experiment_dir(path, best, recent): models = [] checkpoints = [] for dirpath, dirnames, filenames in os.walk(path): if recent: if 'model_0.json' in filenames: models.append(os.path.join(dirpath, 'model_0.json')) else: # best [checkpoints.append(os.path.join(dirpath, c)) for c in \ filter(lambda x: '.hdf5' in x and 'checkpoint' in x, filenames)] if recent: return os.path.dirname(max(models, key=os.path.getctime)) else: checkpoint = _get_best_checkpoint(checkpoints) return os.path.dirname(os.path.dirname(checkpoint)) def _get_best_checkpoint(checkpoints): best = [] for check in checkpoints: try: val_acc = float(check[-10:-5]) best.append((val_acc, check)) except ValueError: pass best.sort(key=lambda x: -x[0]) return best[0][1] if self.ready: log('model ready is True, do you really mean to load?', 'WARNING') if len(self.models) > 0: log('models list is not empty.' \ 'Have you already loaded this model?', 'WARNING') if best and recent: message = '"best" and "recent" arguments are mutually exclusive' log(message, 'ERROR') raise Exception(message) experiment_dir = _find_experiment_dir(path, best, recent) self.models = [] num_models = len( glob.glob(os.path.join(experiment_dir, 'model_*.json'))) if num_models < 1: message = 'No models found in {}'.format(experiment_dir) log(message, 'ERROR') raise Exception(message) for i in range(num_models): with open(os.path.join(experiment_dir, 'model_{}.json'.format(i)), 'r') as f: model = model_from_json(f.read()) log('loaded model {} from JSON'.format(i), 'VERBOSE') # epoch = 0 path = [experiment_dir, 'checkpoints', 'model_{}*.hdf5'.format(i)] best_checkpoint = _get_best_checkpoint( glob.glob(os.path.join(*path))) if best_checkpoint: # epoch = int(newest_checkpoint[-22:-19]) model.load_weights(best_checkpoint) log( 'loaded model {} weights from checkpoint {}'.format( i, best_checkpoint), 'VERBOSE') self.models.append(model) self.experiment_dir = experiment_dir self.ready = True