def main(unused_argv): absl.flags.FLAGS.alsologtostderr = True # Set hyperparams from json args and defaults flags = lib_flags.Flags() # Config hparams if FLAGS.config: config_module = importlib.import_module( 'magenta.models.gansynth.configs.{}'.format(FLAGS.config)) flags.load(config_module.hparams) # Command line hparams flags.load_json(FLAGS.hparams) # Set default flags lib_model.set_flags(flags) print('Flags:') flags.print_values() # Create training directory flags['train_root_dir'] = util.expand_path(flags['train_root_dir']) if not tf.gfile.Exists(flags['train_root_dir']): tf.gfile.MakeDirs(flags['train_root_dir']) # Save the flags to help with loading the model latter fname = os.path.join(flags['train_root_dir'], 'experiment.json') with tf.gfile.Open(fname, 'w') as f: json.dump(flags, f) # pytype: disable=wrong-arg-types # Run training run(flags)
def load_from_path(cls, path, flags=None): """Instantiate a Model for eval using flags and weights from a saved model. Currently only supports models trained by the experiment runner, since Model itself doesn't save flags (so we rely the runner's experiment.json) Args: path: Path to model directory (which contains stage folders). flags: Additional flags for loading the model. Raises: ValueError: If folder of path contains no stage folders. Returns: model: Instantiated model with saved weights. """ # Read the flags from the experiment.json file # experiment.json is in the folder above # Remove last '/' if present path = path.rstrip('/') if not path.startswith('gs://'): path = util.expand_path(path) if flags is None: flags = lib_flags.Flags() flags['train_root_dir'] = path experiment_json_path = os.path.join(path, 'experiment.json') try: # Read json to dict with tf.gfile.GFile(experiment_json_path, 'r') as f: experiment_json = json.load(f) # Load dict as a Flags() object flags.load(experiment_json) except Exception as e: # pylint: disable=broad-except print("Warning! Couldn't load model flags from experiment.json") print(e) # Set default flags set_flags(flags) flags.print_values() # Get list_of_directories train_sub_dirs = sorted([ sub_dir for sub_dir in tf.gfile.ListDirectory(path) if sub_dir.startswith('stage_') ]) if not train_sub_dirs: raise ValueError( 'No stage folders found, is %s the correct model path?' % path) # Get last checkpoint last_stage_dir = train_sub_dirs[-1] stage_id = int(last_stage_dir.split('_')[-1].strip('/')) weights_dir = os.path.join(path, last_stage_dir) ckpt = tf.train.latest_checkpoint(weights_dir) print('Load model from {}'.format(ckpt)) # Load the model, use eval_batch_size if present batch_size = flags.get('eval_batch_size', train_util.get_batch_size(stage_id, **flags)) model = cls(stage_id, batch_size, flags) model.saver.restore(model.sess, ckpt) return model
def main(unused_argv): absl.flags.FLAGS.alsologtostderr = True # Load the model flags = lib_flags.Flags({'batch_size_schedule': [FLAGS.batch_size]}) model = lib_model.Model.load_from_path(FLAGS.ckpt_dir, flags) # Make an output directory if it doesn't exist output_dir = util.expand_path(FLAGS.output_dir) if not tf.gfile.Exists(output_dir): tf.gfile.MakeDirs(output_dir) # generate 4 random latent vectors z_instruments = model.generate_z(4) instrument_names = list( gen_instrument_name(random.randint(3, 8)) for _ in range(4)) # interpolate res = FLAGS.resolution pitches = parse_pitches(FLAGS.pitches) xy_grid = make_grid(res) print() print("resolution =", res) print("pitches =", pitches) print("z_instruments.shape =", z_instruments.shape) print("z_instruments =", z_instruments) print("instrument_names =", instrument_names) z_notes, note_metas = get_z_notes(z_instruments, instrument_names, xy_grid) print("z_notes.shape =", z_notes.shape) z_notes_rep = np.repeat(z_notes, len(pitches), axis=0) print("z_notes_rep.shape =", z_notes_rep.shape) pitches_rep = pitches * z_notes.shape[0] print("len(pitches_rep) =", len(pitches_rep)) print("generating {} samples,,".format(len(z_notes_rep))) audio_notes = model.generate_samples_from_z(z_notes_rep, pitches_rep) audio_metas = [] for note_meta in note_metas: for pitch in pitches: meta = dict(note_meta) meta["pitch"] = pitch audio_metas.append(meta) print("audio_notes.shape =", audio_notes.shape) print("len(audio_metas) =", len(audio_metas)) for i, (wave, meta) in enumerate(zip(audio_notes, audio_metas)): name = meta_to_name(meta) fn = os.path.join(output_dir, "gen_{}.wav".format(name)) gu.save_wav(wave, fn)
def main(unused_argv): absl.flags.FLAGS.alsologtostderr = True # Load the model flags = lib_flags.Flags( { 'batch_size_schedule': [FLAGS.batch_size], 'tfds_data_dir': FLAGS.tfds_data_dir }) model = lib_model.Model.load_from_path(FLAGS.ckpt_dir, flags) # Make an output directory if it doesn't exist output_dir = util.expand_path(FLAGS.output_dir) if not tf.gfile.Exists(output_dir): tf.gfile.MakeDirs(output_dir) if FLAGS.midi_file: # If a MIDI file is provided, synthesize interpolations across the clip unused_ns, notes = gu.load_midi(FLAGS.midi_file) # Distribute latent vectors linearly in time z_instruments, t_instruments = gu.get_random_instruments( model, notes['end_times'][-1], secs_per_instrument=FLAGS.secs_per_instrument) # Get latent vectors for each note z_notes = gu.get_z_notes(notes['start_times'], z_instruments, t_instruments) # Generate audio for each note print('Generating {} samples...'.format(len(z_notes))) audio_notes = model.generate_samples_from_z(z_notes, notes['pitches']) # Make a single audio clip audio_clip = gu.combine_notes(audio_notes, notes['start_times'], notes['end_times'], notes['velocities']) # Write the wave files fname = os.path.join(output_dir, 'generated_clip.wav') gu.save_wav(audio_clip, fname) else: # Otherwise, just generate a batch of random sounds waves = model.generate_samples(FLAGS.batch_size) # Write the wave files for i in range(len(waves)): fname = os.path.join(output_dir, 'generated_{}.wav'.format(i)) gu.save_wav(waves[i], fname)
def generate_audio(notes: dict, seconds_per_instrument: int = 5, batch_size: int = 16, checkpoint_dir: str = "checkpoints/acoustic_only") \ -> np.ndarray: """ Generates an audio clip from the notes information dictionary, by randomly sampling "instruments" from the latent space (sounds of a given time), and generating samples from them. :param notes: the notes dictionary, must come from magenta.models.gansynth.lib.generate_util.load_midi :param seconds_per_instrument: the number of seconds for each instrument :param batch_size: the batch size for the model :param checkpoint_dir: the checkpoint folder """ flags = lib_flags.Flags({"batch_size_schedule": [batch_size]}) model = lib_model.Model.load_from_path(checkpoint_dir, flags) # Distribute latent vectors linearly in time z_instruments, t_instruments = get_random_instruments( model, notes["end_times"][-1], secs_per_instrument=seconds_per_instrument) # Get latent vectors for each note z_notes = get_z_notes(notes["start_times"], z_instruments, t_instruments) # Generate audio for each note audio_notes = model.generate_samples_from_z(z_notes, notes["pitches"]) # Make a single audio clip audio_clip = combine_notes(audio_notes, notes["start_times"], notes["end_times"], notes["velocities"]) return audio_clip
def main(unused_argv): absl.flags.FLAGS.alsologtostderr = True if FLAGS.seed != None: np.random.seed(FLAGS.seed) tf.random.set_random_seed(FLAGS.seed) if sum((int(f) for f in [FLAGS.list_layers, FLAGS.random_z_count != None])) != 1: logging.info( "exactly one of --list_layers or --random_z_count must be specified" ) sys.exit(1) model = None if FLAGS.list_layers or FLAGS.random_z_count != None: # Load the model flags = lib_flags.Flags({"batch_size_schedule": [FLAGS.batch_size]}) model = lib_model.Model.load_from_path(FLAGS.ckpt_dir, flags) if FLAGS.list_layers: for name, layer in model.fake_data_endpoints.items(): internal_name = layer.name logging.info(name) logging.info(" name: {}".format(internal_name)) logging.info(" shape: {}".format(layer.shape)) logging.info(" min. random_z_count: {}".format( product(layer.shape[1:]))) return assert FLAGS.random_z_count != None activation_shape = model.fake_data_endpoints[ FLAGS.layer].shape.as_list()[1:] n_components = np.prod(activation_shape) logging.info("activation_shape = {}".format(activation_shape)) logging.info("n_components = {}".format(n_components)) # only applies to spca estimator but we need to provide it anyway sparsity = 1.0 estimator = estimators.get_estimator(FLAGS.estimator, n_components, sparsity) if estimator.batch_support: pca_batch_size = FLAGS.pca_batch_size or n_components assert pca_batch_size >= n_components assert FLAGS.random_z_count % pca_batch_size == 0, "random_z_count={} is not evenly divisible by pca_batch_size={}".format( FLAGS.random_z_count, pca_batch_size) n = FLAGS.random_z_count for activations in generate_activations_batches( model, FLAGS.layer, FLAGS.pitch, pca_batch_size, FLAGS.random_z_count): activations = reshape_for_pca(activations, activation_shape) logging.info("fit_partial()") if not estimator.fit_partial(activations): # fit_partial() should print an error if it fails, so just exit sys.exit(1) global_mean = estimator.transformer.mean_ else: activations = generate_activations(model, FLAGS.layer, FLAGS.pitch, FLAGS.random_z_count) activations = reshape_for_pca(activations, activation_shape) n = activations.shape[0] assert n >= n_components # subtract mean global_mean = activations.mean(axis=0, keepdims=True, dtype=np.float32) activations -= global_mean logging.info("running estimator") estimator.fit(activations) logging.info("getting components") comp, stdev, var_ratio = estimator.get_components() # normalize comp /= np.linalg.norm(comp, axis=-1, keepdims=True) # inflate comp = comp.reshape(-1, *activation_shape) global_mean = global_mean.reshape(activation_shape) pca_dict = { "layer": FLAGS.layer, "estimator": FLAGS.estimator, "comp": comp, "stdev": stdev, "var_ratio": var_ratio, "global_mean": global_mean } logging.info("pca_dict = {}".format(pca_dict)) if FLAGS.pca_out_file != None: logging.info("saving PCA result to {}".format(FLAGS.pca_out_file)) with open(FLAGS.pca_out_file, "wb") as fp: pickle.dump(pca_dict, fp, pickle.DEFAULT_PROTOCOL)
return file_list # GLOBALS CKPT_DIR = '/content/gansynth/acoustic_only' output_dir = '/content/gansynth/samples' BATCH_SIZE = 16 SR = 16000 # Make an output directory if it doesn't exist OUTPUT_DIR = util.expand_path(output_dir) if not tf.gfile.Exists(OUTPUT_DIR): tf.gfile.MakeDirs(OUTPUT_DIR) # Load the model tf.reset_default_graph() flags = lib_flags.Flags({'batch_size_schedule': [BATCH_SIZE]}) model = lib_model.Model.load_from_path(CKPT_DIR, flags) # Helper functions def load_midi(midi_path, min_pitch=36, max_pitch=84): """Load midi as a notesequence.""" midi_path = util.expand_path(midi_path) ns = mm.midi_file_to_sequence_proto(midi_path) pitches = np.array([n.pitch for n in ns.notes]) velocities = np.array([n.velocity for n in ns.notes]) start_times = np.array([n.start_time for n in ns.notes]) end_times = np.array([n.end_time for n in ns.notes]) valid = np.logical_and(pitches >= min_pitch, pitches <= max_pitch) notes = {'pitches': pitches[valid], 'velocities': velocities[valid], 'start_times': start_times[valid],
def main(unused_argv): absl.flags.FLAGS.alsologtostderr = True # Load the model flags = lib_flags.Flags({ 'batch_size_schedule': [FLAGS.batch_size], **({ 'tfds_data_dir': FLAGS.tfds_data_dir } if FLAGS.tfds_data_dir else {}) }) model = lib_model.Model.load_from_path(FLAGS.ckpt_dir, flags) # Make an output directory if it doesn't exist output_dir = util.expand_path(FLAGS.output_dir) if not tf.gfile.Exists(output_dir): tf.gfile.MakeDirs(output_dir) if FLAGS.seed != None: np.random.seed(seed=FLAGS.seed) tf.random.set_random_seed(FLAGS.seed) layer_offsets = {} if FLAGS.edits_file: with open(FLAGS.edits_file, "rb") as fp: edits_dict = pickle.load(fp) assert "layer" in edits_dict assert "comp" in edits_dict directions = edits_dict["comp"] amounts = np.zeros(edits_dict["comp"].shape[:1], dtype=np.float32) amounts[:len(list(map(float, FLAGS.edits)))] = FLAGS.edits scaled_directions = amounts.reshape(-1, 1, 1, 1) * directions linear_combination = np.sum(scaled_directions, axis=0) linear_combination_batch = np.repeat(linear_combination.reshape( 1, *linear_combination.shape), FLAGS.batch_size, axis=0) layer_offsets[edits_dict["layer"]] = linear_combination_batch if FLAGS.midi_file: # If a MIDI file is provided, synthesize interpolations across the clip unused_ns, notes = gu.load_midi(FLAGS.midi_file) # Distribute latent vectors linearly in time z_instruments, t_instruments = gu.get_random_instruments( model, notes['end_times'][-1], secs_per_instrument=FLAGS.secs_per_instrument) # Get latent vectors for each note z_notes = gu.get_z_notes(notes['start_times'], z_instruments, t_instruments) # Generate audio for each note print('Generating {} samples...'.format(len(z_notes))) audio_notes = model.generate_samples_from_z( z_notes, notes['pitches'], layer_offsets=layer_offsets) # Make a single audio clip audio_clip = gu.combine_notes(audio_notes, notes['start_times'], notes['end_times'], notes['velocities']) # Write the wave files fname = os.path.join(output_dir, 'generated_clip.wav') gu.save_wav(audio_clip, fname) else: # Otherwise, just generate a batch of random sounds waves = model.generate_samples(FLAGS.batch_size, pitch=FLAGS.pitch, layer_offsets=layer_offsets) # Write the wave files for i in range(len(waves)): fname = os.path.join(output_dir, 'generated_{}.wav'.format(i)) gu.save_wav(waves[i], fname)
import tensorflow as tf import sopilib.gansynth_protocol as gss from sopilib.utils import print_err, read_msg from handlers import handlers try: ckpt_dir = sys.argv[1] batch_size = int(sys.argv[2]) except IndexError: print_err("usage: {} checkpoint_dir batch_size".format( os.path.basename(__file__))) sys.exit(1) flags = lib_flags.Flags({"batch_size_schedule": [batch_size]}) model = lib_model.Model.load_from_path(ckpt_dir, flags) stdin = os.fdopen(sys.stdin.fileno(), "rb", 0) stdout = os.fdopen(sys.stdout.fileno(), "wb", 0) stdout.write(gss.to_tag_msg(gss.OUT_TAG_INIT)) audio_length = model.config['audio_length'] sample_rate = model.config['sample_rate'] info_msg = gss.to_info_msg(audio_length=audio_length, sample_rate=sample_rate) stdout.write(info_msg) stdout.flush() state = {} while True:
## Variables ## ckpt_dir, output_dir = sys.argv[1], sys.argv[2] batch_size = 16 sample_rate = SAMPLE_RATE # Make an output directory if it doesn't exist output_dir = util.expand_path(output_dir) if not tf.gfile.Exists(output_dir): tf.gfile.MakeDirs(output_dir) # Load the model tf.reset_default_graph() flags = lib_flags.Flags({'batch_size_schedule': [batch_size]}) model = lib_model.Model.load_from_path(ckpt_dir, flags) # Helper functions def load_midi(midi_path, min_pitch=36, max_pitch=84): """Load midi as a notesequence.""" midi_path = util.expand_path(midi_path) ns = music.midi_file_to_sequence_proto(midi_path) pitches = np.array([n.pitch for n in ns.notes]) velocities = np.array([n.velocity for n in ns.notes]) start_times = np.array([n.start_time for n in ns.notes]) end_times = np.array([n.end_time for n in ns.notes]) valid = np.logical_and(pitches >= min_pitch, pitches <= max_pitch) notes = {'pitches': pitches[valid], 'velocities': velocities[valid], 'start_times': start_times[valid],
def main(unused_argv): absl.flags.FLAGS.alsologtostderr = True # Load the model flags = lib_flags.Flags({'batch_size_schedule': [FLAGS.batch_size]}) model = lib_model.Model.load_from_path(FLAGS.ckpt_dir, flags) # Make an output directory if it doesn't exist output_dir = util.expand_path(FLAGS.output_dir) if not tf.gfile.Exists(output_dir): tf.gfile.MakeDirs(output_dir) if FLAGS.midi_file: # If a MIDI file is provided, synthesize interpolations across the clip unused_ns, notes = gu.load_midi(FLAGS.midi_file) # Distribute latent vectors linearly in time z_instruments, t_instruments = gu.get_random_instruments( model, notes['end_times'][-1], secs_per_instrument=FLAGS.secs_per_instrument) # Get latent vectors for each note z_notes = gu.get_z_notes(notes['start_times'], z_instruments, t_instruments) # Generate audio for each note print('Generating {} samples...'.format(len(z_notes))) audio_notes = model.generate_samples_from_z(z_notes, notes['pitches']) # Make a single audio clip audio_clip = gu.combine_notes(audio_notes, notes['start_times'], notes['end_times'], notes['velocities']) # Write the wave files fname = os.path.join(output_dir, 'generated_clip.wav') gu.save_wav(audio_clip, fname) else: # Otherwise, just generate a batch of random sounds # waves = model.generate_samples(FLAGS.batch_size) # original waves, z = model.generate_samples( FLAGS.batch_size, pitch=44 ) #DEBUG: generate on singular pitch (range: 24-84), return latent vectors # Write the wave files for i in range(len(waves)): fname = os.path.join(output_dir, 'generated_{}.wav'.format(i)) gu.save_wav(waves[i], fname) # DEBUG: write z to file for later analysis fname = os.path.join(output_dir, 'z.p') pickle.dump(z, open(fname, 'wb')) # DEBUG: flag samples based on similar latent variables flagged = get_flagged_latents(z, n=10) print("\nflagged (z):") for i in flagged: print(i) # DEBUG: flag samples based on similar waveforms flagged = get_flagged_waves_par(waves, n=10, frac=0.01) print("\nflagged (waves):") for i in flagged: print(i) fname = os.path.join(output_dir, '_sim_{}-{}.wav'.format(i[0][0], i[0][1])) gu.save_wav(np.array(list(waves[i[0][0]]) + list(waves[i[0][1]])), fname)
def main(unused_argv): absl.flags.FLAGS.alsologtostderr = True # Load the model flags = lib_flags.Flags({'batch_size_schedule': [FLAGS.batch_size]}) model = lib_model.Model.load_from_path(FLAGS.ckpt_dir, flags) # Make an output directory if it doesn't exist output_dir = util.expand_path(FLAGS.output_dir) if not tf.gfile.Exists(output_dir): tf.gfile.MakeDirs(output_dir) with open(FLAGS.pca_file, "rb") as fp: pca = pickle.load(fp) if FLAGS.seed != None: np.random.seed(seed=FLAGS.seed) tf.random.set_random_seed(FLAGS.seed) edits_axis = np.linspace(FLAGS.min, FLAGS.max, FLAGS.steps) edits_list = list(cartesian_product(edits_axis, edits_axis)) pitch_arr = np.array([FLAGS.pitch]) edits_batches = batch(FLAGS.batch_size, edits_list, []) pitch_batches = [[FLAGS.pitch] * FLAGS.batch_size] * len(edits_batches) fig, ax = plt.subplots(nrows=FLAGS.steps, ncols=FLAGS.steps, figsize=(15, 7.5)) fig.set_tight_layout(True) def _plot(): j = 0 for edits_batch, pitch_batch in zip(edits_batches, pitch_batches): waves = model.generate_samples_from_edits(pitch_batch, edits_batch, pca) for i, edits in enumerate(edits_batch): if j >= len(edits_list): return row = j // FLAGS.steps col = j % FLAGS.steps wave = waves[i] # wave = np.random.rand(64000) x = edits[0] y = edits[1] # plot wave subplot = ax[col][row] subplot.title.set_text("({}, {})".format( format_float(x), format_float(y))) plotstft(subplot, wave, 16000, binsize=2**8, colormap="magma") subplot.set_axis_off() # save wave gu.save_wav( wave, os.path.join(output_dir, "wave_{},{}.wav".format(x, y))) j += 1 _plot() plt.savefig(os.path.join(output_dir, "plot.svg"), bbox_inches="tight") if FLAGS.show: plt.show() meta = [ "checkpoint name: {}".format(os.path.basename(FLAGS.ckpt_dir)), "pca name: {}".format(os.path.basename(FLAGS.pca_file)), "pitch: {}".format(FLAGS.pitch), "min: {}".format(FLAGS.min), "max: {}".format(FLAGS.max), "steps: {}".format(FLAGS.steps) ] with open(os.path.join(output_dir, "meta.txt"), "w") as fp: fp.write("\n".join(meta) + "\n")