class TestAudioReader(tf.test.TestCase): def setUp(self): self.coord = tf.train.Coordinator() self.reader = AudioReader(DATA_DIR, DATA_OUT_DIR, self.coord, sample_rate=SAMPLE_RATE, sample_size=SAMPLE_SIZE, silence_threshold=SILENCE_THRESHOLD) def testAudioThread(self): max_allowed_mse = 1.0 with self.test_session() as sess: threads = tf.train.start_queue_runners(sess=sess, coord=self.coord) self.reader.start_threads(sess) input_batch = self.reader.dequeue(1) input_audio, output_audio = sess.run(input_batch) print(input_audio) test = self.reader.dequeue(1) print(sess.run(test)[0]) test = self.reader.dequeue(1) print(sess.run(test)[0]) test = self.reader.dequeue(1) print(sess.run(test)[0]) test = self.reader.dequeue(1) print(sess.run(test)[0]) test = self.reader.dequeue(1) print(sess.run(test)[0]) mse = mean_squared_error(input_audio.flatten(), output_audio.flatten()) self.assertLess(mse, max_allowed_mse) self.coord.request_stop()
def main(): args = get_arguments() try: directories = validate_directories(args) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) # Create coordinator. coord = tf.train.Coordinator() with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = args.silence_threshold if args.silence_threshold > \ EPSILON else None gc_enabled = args.gc_channels is not None reader = AudioReader( audio_dir=args.data_dir, coord=coord, sample_rate=wavenet_params["sample_rate"], gc_enabled=gc_enabled, receptive_field=WaveNetModel.calculate_receptive_field( wavenet_params["filter_width"], wavenet_params["dilations"], wavenet_params["scalar_input"], wavenet_params["initial_filter_width"]), sample_size=args.sample_size, mfsc_dim=wavenet_params["MFSC_channels"], ap_dim=wavenet_params["AP_channels"], F0_dim=wavenet_params["F0_channels"], phone_dim=wavenet_params["phones_channels"], phone_pos_dim=wavenet_params["phone_pos_channels"], silence_threshold=silence_threshold) ap_batch, lc_batch = reader.dequeue(args.batch_size) # print ("mfsc_batch_shape:", mfsc_batch.get_shape().as_list()) if gc_enabled: gc_id_batch = reader.dequeue_gc(args.batch_size) else: gc_id_batch = None # Create network. net = WaveNetModel( batch_size=args.batch_size, dilations=wavenet_params["dilations"], filter_width=wavenet_params["filter_width"], residual_channels=wavenet_params["residual_channels"], dilation_channels=wavenet_params["dilation_channels"], skip_channels=wavenet_params["skip_channels"], use_biases=wavenet_params["use_biases"], scalar_input=wavenet_params["scalar_input"], initial_filter_width=wavenet_params["initial_filter_width"], histograms=args.histograms, global_condition_channels=args.gc_channels, global_condition_cardinality=reader.gc_category_cardinality, MFSC_channels=wavenet_params["MFSC_channels"], F0_channels=wavenet_params["F0_channels"], phone_channels=wavenet_params["phones_channels"], phone_pos_channels=wavenet_params["phone_pos_channels"]) if args.l2_regularization_strength == 0: args.l2_regularization_strength = None # pdb.set_trace() loss = net.loss( input_batch= ap_batch, # audio_batch shape: [receptive_filed + sample_size, 1] lc_batch=lc_batch, global_condition_batch=gc_id_batch, # gc_id_batch shape: scalar l2_regularization_strength=args.l2_regularization_strength) optimizer = optimizer_factory[args.optimizer]( learning_rate=args.learning_rate, momentum=args.momentum) trainable = tf.trainable_variables() optim = optimizer.minimize(loss, var_list=trainable) # Set up logging for TensorBoard. writer = tf.summary.FileWriter(logdir) writer.add_graph(tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.summary.merge_all() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=args.max_checkpoints) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print("Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) print("========================================") print( "Total number of parameteres for mfsc model:", np.sum([ np.prod(v.get_shape().as_list()) for v in tf.trainable_variables() ])) # pdb.set_trace() step = None last_saved_step = saved_global_step try: for step in range(saved_global_step + 1, args.num_steps): start_time = time.time() if args.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) summary, loss_value, _ = sess.run([summaries, loss, optim], options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: summary, loss_value, _ = sess.run([summaries, loss, optim]) writer.add_summary(summary, step) duration = time.time() - start_time if step % 10 == 0: print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) if step % args.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() # finally: if step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads)
def main(): args = get_arguments() # Load parameters from wavenet params json file with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) quantization_channels = wavenet_params['quantization_channels'] with tf.Graph().as_default(): coord = tf.train.Coordinator() sess = tf.Session() # Lambda for white noise sampler gi_sampler = get_generator_input_sampler() # Intialize generator WaveNet G = WaveNetModel( batch_size=1, dilations=wavenet_params["dilations"], filter_width=wavenet_params["filter_width"], residual_channels=wavenet_params["residual_channels"], dilation_channels=wavenet_params["dilation_channels"], skip_channels=wavenet_params["skip_channels"], quantization_channels=wavenet_params["quantization_channels"], use_biases=wavenet_params["use_biases"], initial_filter_width=wavenet_params["initial_filter_width"]) gi_sampler = get_generator_input_sampler() # White noise generator params white_mean = 0 white_sigma = 1 white_length = ffnn.INPUT_SIZE white_noise = gi_sampler(white_mean, white_sigma, white_length) white_noise = process(white_noise, quantization_channels, 1) white_noise_t = tf.convert_to_tensor(white_noise) directory = './sampleTrue' reader = AudioReader(directory, coord, sample_rate = 16000, gc_enabled=False, receptive_field=5117, sample_size=15117, silence_threshold=0.05) threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) audio_batch = reader.dequeue(1) # initialize generator w_loss, w_prediction = G.loss(input_batch=white_noise_t, name='generator') #w_loss, w_prediction = G.loss(input_batch=audio_batch, name='generator') G_variables = tf.trainable_variables(scope='wavenet') optimizer = optimizer_factory[args.optimizer]( learning_rate=1e-3, momentum=args.momentum) optim = optimizer.minimize(w_loss, var_list=G_variables) init = tf.global_variables_initializer() sess.run(init) ''' for step in range(300): loss_value, _ = sess.run([w_loss, optim]) print('step {:d} - loss = {:.3f}'.format(step, loss_value)) prediction = sess.run(w_prediction) ''' ''' maxs = [] maxs_2 = [] maxs_3 = [] for i in range(0, 10000): temp = prediction[i] temp.sort() maxs_3.append(temp[253]) maxs_2.append(temp[254]) maxs.append(temp[255]) plt.plot(maxs) plt.plot(maxs_2) plt.plot(maxs_3) plt.ylabel('Value') plt.xlabel('Sample') plt.savefig('logits_after.png') np.set_printoptions(threshold=np.nan) print(sess.run(tf.nn.softmax(w_prediction))) ''' '''
def main(): args = get_arguments() try: directories = validate_directories(args) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) # Create coordinator. coord = tf.train.Coordinator() # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = args.silence_threshold if args.silence_threshold > \ EPSILON else None gc_enabled = args.gc_channels is not None reader = AudioReader( args.data_dir, coord, sample_rate=wavenet_params['sample_rate'], gc_enabled=gc_enabled, receptive_field=WaveNetModel.calculate_receptive_field( wavenet_params["filter_width"], wavenet_params["dilations"], wavenet_params["scalar_input"], wavenet_params["initial_filter_width"]), sample_size=args.sample_size, silence_threshold=silence_threshold) audio_batch = reader.dequeue(args.batch_size) if gc_enabled: gc_id_batch = reader.dequeue_gc(args.batch_size) else: gc_id_batch = None # Create network. net = WaveNetModel( batch_size=args.batch_size, dilations=wavenet_params["dilations"], filter_width=wavenet_params["filter_width"], residual_channels=wavenet_params["residual_channels"], dilation_channels=wavenet_params["dilation_channels"], skip_channels=wavenet_params["skip_channels"], quantization_channels=wavenet_params["quantization_channels"], use_biases=wavenet_params["use_biases"], scalar_input=wavenet_params["scalar_input"], initial_filter_width=wavenet_params["initial_filter_width"], histograms=args.histograms, global_condition_channels=args.gc_channels, global_condition_cardinality=reader.gc_category_cardinality) if args.l2_regularization_strength == 0: args.l2_regularization_strength = None loss = net.loss(input_batch=audio_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=args.l2_regularization_strength) optimizer = optimizer_factory[args.optimizer]( learning_rate=args.learning_rate, momentum=args.momentum) trainable = tf.trainable_variables() optim = optimizer.minimize(loss, var_list=trainable) # Set up logging for TensorBoard. writer = tf.summary.FileWriter(logdir) writer.add_graph(tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.summary.merge_all() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) init = tf.global_variables_initializer() sess.run(init) #sess = tf_debug.LocalCLIDebugWrapperSession(sess, thread_name_filter="MainThread$", dump_root="C:\\MProjects\\WaveNet\\tensorflow-wavenet-master\\debugDump") # run --node_name_filter wavenet_1/loss/Reshape_1:0 -- (36352, 256) # run --node_name_filter (.*loss.*)|(.*encode.*) # pt -a tensorName > C:/Users/russkov.alexander/Desktop/WaveNet/tensorflow-wavenet-master/myDebugInfo/file.txt #encoded_input = Tensor("wavenet_1/encode/ToInt32:0", shape=(1, ?, 1), dtype=int32) -- (1, 59901, 1) #encoded = Tensor("wavenet_1/one_hot_encode/Reshape:0", shape=(1, ?, 256), dtype=float32) -- (1, 59901, 256) #https: // www.tensorflow.org / guide / debugger # frequently_asked_questions #Q: The model I am debugging is very large. The data dumped by tfdbg fills up the free space of my disk. What can I do? #https: // github.com / tensorflow / tensorflow / issues / 8753 #sess = tf_debug.TensorBoardDebugWrapperSession(sess, "RUSSKOV-NB-W10:6064", send_traceback_and_source_code=False) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=args.max_checkpoints) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print("Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) step = None last_saved_step = saved_global_step try: for step in range(saved_global_step + 1, args.num_steps): start_time = time.time() if args.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) summary, loss_value, _ = sess.run([summaries, loss, optim], options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: summary, loss_value, _ = sess.run([summaries, loss, optim]) writer.add_summary(summary, step) duration = time.time() - start_time print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) if step % args.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads)
def main(): args = get_arguments() try: directories = validate_directories(args) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) # Create coordinator. coord = tf.train.Coordinator() # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): gc_enabled = args.gc_channels is not None reader = AudioReader( args.data_dir, coord, sample_rate=wavenet_params['sample_rate'], gc_enabled=gc_enabled, max_samples=get_max_samples(args.data_dir, wavenet_params['sample_rate']), receptive_field=WaveNetModel.calculate_receptive_field( wavenet_params["filter_width"], wavenet_params["dilations"], wavenet_params["scalar_input"], wavenet_params["initial_filter_width"]), sample_size=args.sample_size, silence_threshold=args.silence_threshold if args.silence_threshold > EPSILON else None) audio_batch = reader.dequeue(args.batch_size) if gc_enabled: gc_id_batch = reader.dequeue_gc(args.batch_size) else: gc_id_batch = None # Create network. net = WaveNetModel( batch_size=args.batch_size, dilations=wavenet_params["dilations"], filter_width=wavenet_params["filter_width"], residual_channels=wavenet_params["residual_channels"], dilation_channels=wavenet_params["dilation_channels"], skip_channels=wavenet_params["skip_channels"], quantization_channels=wavenet_params["quantization_channels"], use_biases=wavenet_params["use_biases"], scalar_input=wavenet_params["scalar_input"], initial_filter_width=wavenet_params["initial_filter_width"], histograms=args.histograms, global_condition_channels=args.gc_channels, global_condition_cardinality=reader.gc_category_cardinality) if args.l2_regularization_strength == 0: args.l2_regularization_strength = None loss = net.loss(input_batch=audio_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=args.l2_regularization_strength) learning_rate_placeholder = tf.placeholder(tf.float32, []) optimizer = tf.train.RMSPropOptimizer( learning_rate=learning_rate_placeholder, momentum=args.momentum) train_op = optimizer.minimize(loss) # Set up logging for TensorBoard. writer = tf.summary.FileWriter(logdir) writer.add_graph(tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.summary.merge_all() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=args.max_checkpoints) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print("Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) step = None loss_value = None update = 0 last_saved_step = saved_global_step learning_rate = args.learning_rate print('learning_rate {:f})'.format(learning_rate)) try: for step in range(saved_global_step + 1, args.num_steps): start_time = time.time() if args.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) summary, loss_value, _ = sess.run( [summaries, loss, train_op], feed_dict={learning_rate_placeholder: learning_rate}, options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: summary, loss_value, _ = sess.run( [summaries, loss, train_op], feed_dict={learning_rate_placeholder: learning_rate}) writer.add_summary(summary, step) if 1.5 >= loss_value > 0.5 and update == 0: learning_rate = learning_rate * 0.1 update += 1 print('learning_rate {:f})'.format(learning_rate)) elif loss_value <= 0.5 and update == 1: learning_rate = learning_rate * 0.1 update += 1 print('learning_rate {:f})'.format(learning_rate)) duration = time.time() - start_time print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) if step % args.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads)
receptive_field=1000, sample_size=21000, silence_threshold=0.05) threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) init = tf.global_variables_initializer() sess.run(init) prevA = [] for it in range(1000): batch_data = [] start_time = time.time() data = sess.run(reader.dequeue(1)) while (len(data[0]) < w1): data = sess.run(reader.dequeue(1)) data = np.array(data[0]) samples = process(data) batch_data.append(samples) for g_batch in range(20): _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: sample_Z(1, 100)}) for d_batch in range(1): _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={ X: batch_data,
def main(): args = get_arguments() try: directories = validate_directories(args) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] logdir_root = directories['logdir_root'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) # Create coordinator. coord = tf.train.Coordinator() # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = args.silence_threshold if args.silence_threshold > \ EPSILON else None reader = AudioReader(args.data_dir, coord, sample_rate=wavenet_params['sample_rate'], sample_size=args.sample_size, silence_threshold=args.silence_threshold) tower_grads = [] tower_losses = [] for device_index in xrange(args.num_gpus): with tf.device('/gpu:%d' % device_index), tf.name_scope( 'tower_%d' % device_index) as scope: audio_batch = reader.dequeue(args.batch_size) loss, optimizer, trainable = make_net(args, wavenet_params, audio_batch, reuse_variables=True) grads = optimizer.compute_gradients(loss, var_list=trainable) tower_losses.append(loss) tower_grads.append(grads) summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) tf.get_variable_scope().reuse_variables() if args.num_gpus == 1: optim = optimizer.minimize(loss, var_list=trainable) else: loss = tf.reduce_mean(tower_losses) average_grads = [] for grad_and_vars in zip(*tower_grads): grads = [] for g, _ in grad_and_vars: if g is None: continue expanded_g = tf.expand_dims(g, 0) grads.append(expanded_g) if len(grads) == 0: average_grads.append((None, v)) continue grad = tf.concat(0, grads) grad = tf.reduce_mean(grad, 0) v = grad_and_vars[0][1] grad_and_var = (grad, v) average_grads.append(grad_and_var) optim = optimizer.apply_gradients(average_grads) # Set up logging for TensorBoard. writer = tf.train.SummaryWriter(logdir) writer.add_graph(tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.merge_summary(summaries) #summaries = tf.merge_all_summaries() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)) init = tf.initialize_all_variables() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.trainable_variables()) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print("Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) step = None try: last_saved_step = saved_global_step for step in range(saved_global_step + 1, args.num_steps): start_time = time.time() if args.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) summary, loss_value, _ = sess.run([summaries, loss, optim], options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: summary, loss_value, _ = sess.run([summaries, loss, optim]) writer.add_summary(summary, step) duration = time.time() - start_time print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) if step % args.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads)
def main(): args = get_arguments() try: directories = validate_directories(args) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] logdir_root = directories['logdir_root'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) # Create coordinator. coord = tf.train.Coordinator() # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = args.silence_threshold if args.silence_threshold > \ EPSILON else None reader = AudioReader( args.data_dir, coord, sample_rate=wavenet_params['sample_rate'], sample_size=args.sample_size, silence_threshold=args.silence_threshold) audio_batch = reader.dequeue(args.batch_size) # Create network. net = WaveNetModel( batch_size=args.batch_size, dilations=wavenet_params["dilations"], filter_width=wavenet_params["filter_width"], residual_channels=wavenet_params["residual_channels"], dilation_channels=wavenet_params["dilation_channels"], skip_channels=wavenet_params["skip_channels"], quantization_channels=wavenet_params["quantization_channels"], use_biases=wavenet_params["use_biases"]) if args.l2_regularization_strength == 0: args.l2_regularization_strength = None loss = net.loss(audio_batch, args.l2_regularization_strength) optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) trainable = tf.trainable_variables() optim = optimizer.minimize(loss, var_list=trainable) # Set up logging for TensorBoard. writer = tf.train.SummaryWriter(logdir) writer.add_graph(tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.merge_all_summaries() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) init = tf.initialize_all_variables() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver() try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print("Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) try: last_saved_step = saved_global_step for step in range(saved_global_step + 1, args.num_steps): start_time = time.time() if args.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) summary, loss_value, _ = sess.run( [summaries, loss, optim], options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: summary, loss_value, _ = sess.run([summaries, loss, optim]) writer.add_summary(summary, step) duration = time.time() - start_time print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)' .format(step, loss_value, duration)) if step % args.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads)
def main(): args = get_arguments() try: directories = validate_directories(args) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] logdir_root = directories['logdir_root'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) # Create coordinator. coord = tf.train.Coordinator() # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = args.silence_threshold if args.silence_threshold > \ EPSILON else None reader = AudioReader(args.data_dir, coord, sample_rate=wavenet_params['sample_rate'], sample_size=args.sample_size, silence_threshold=args.silence_threshold) audio_batch = reader.dequeue(args.batch_size) # Create network. net = WaveNetModel( batch_size=args.batch_size, dilations=wavenet_params["dilations"], filter_width=wavenet_params["filter_width"], residual_channels=wavenet_params["residual_channels"], dilation_channels=wavenet_params["dilation_channels"], skip_channels=wavenet_params["skip_channels"], quantization_channels=wavenet_params["quantization_channels"], use_biases=wavenet_params["use_biases"], scalar_input=wavenet_params["scalar_input"], initial_filter_width=wavenet_params["initial_filter_width"]) if args.l2_regularization_strength == 0: args.l2_regularization_strength = None loss = net.loss(audio_batch, args.l2_regularization_strength) if args.optimizer == ADAM_OPTIMIZER: optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) elif args.optimizer == SGD_OPTIMIZER: optimizer = tf.train.MomentumOptimizer( learning_rate=args.learning_rate, momentum=args.sgd_momentum) else: # This shouldn't happen, given the choices specified in argument # specification. raise RuntimeError('Invalid optimizer option.') trainable = tf.trainable_variables() optim = optimizer.minimize(loss, var_list=trainable) # Set up logging for TensorBoard. writer = tf.train.SummaryWriter(logdir) writer.add_graph(tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.merge_all_summaries() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) init = tf.initialize_all_variables() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.trainable_variables()) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print("Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) try: last_saved_step = saved_global_step for step in range(saved_global_step + 1, args.num_steps): start_time = time.time() if args.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) summary, loss_value, _ = sess.run([summaries, loss, optim], options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: summary, loss_value, _ = sess.run([summaries, loss, optim]) writer.add_summary(summary, step) duration = time.time() - start_time print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) if step % args.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads)
def main(): args = get_arguments() try: directories = validate_directories(args) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) # Create coordinator. coord = tf.train.Coordinator() # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = args.silence_threshold if args.silence_threshold > \ EPSILON else None gc_enabled = args.gc_channels is not None reader = AudioReader( args.data_dir, coord, sample_rate=wavenet_params['sample_rate'], gc_enabled=gc_enabled, receptive_field=WaveNetModel.calculate_receptive_field( wavenet_params["filter_width"], wavenet_params["dilations"], wavenet_params["scalar_input"], wavenet_params["initial_filter_width"]), sample_size=args.sample_size, silence_threshold=silence_threshold) audio_batch = reader.dequeue(args.batch_size) if gc_enabled: gc_id_batch = reader.dequeue_gc(args.batch_size) else: gc_id_batch = None # Create network. net = WaveNetModel( batch_size=args.batch_size, dilations=wavenet_params["dilations"], filter_width=wavenet_params["filter_width"], residual_channels=wavenet_params["residual_channels"], dilation_channels=wavenet_params["dilation_channels"], skip_channels=wavenet_params["skip_channels"], quantization_channels=wavenet_params["quantization_channels"], use_biases=wavenet_params["use_biases"], scalar_input=wavenet_params["scalar_input"], initial_filter_width=wavenet_params["initial_filter_width"], histograms=args.histograms, global_condition_channels=args.gc_channels, global_condition_cardinality=reader.gc_category_cardinality) if args.l2_regularization_strength == 0: args.l2_regularization_strength = None #aleix loss, global_condition_batch, gc_embedding, conv_filter, conv_filter0, conv_filter1, conv_gate, \ embedding_table, weights_gc_filter, input_batch = net.loss(input_batch=audio_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=args.l2_regularization_strength) optimizer = optimizer_factory[args.optimizer]( learning_rate=args.learning_rate, momentum=args.momentum) trainable = tf.trainable_variables() optim = optimizer.minimize(loss, var_list=trainable) # Set up logging for TensorBoard. writer = tf.summary.FileWriter(logdir) writer.add_graph(tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.summary.merge_all() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=args.max_checkpoints) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print("Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) step = None last_saved_step = saved_global_step loss_plot = [] #store loss function (aleix) try: for step in range(saved_global_step + 1, args.num_steps): start_time = time.time() if args.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) summary, loss_value, _ = sess.run([summaries, loss, optim], options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: #aleix summary, loss_value, global_condition_batch0, gc_embedding0, conv_filter_end, conv_filter0_0, \ conv_filter0_1, conv_gate0,embedding_table0, weights_gc_filter0,input_batch0, _ = sess.run([ summaries, loss, global_condition_batch, gc_embedding, conv_filter, conv_filter0, conv_filter1, conv_gate, embedding_table, weights_gc_filter, input_batch, optim]) #print('global_condition_batch:') #print(global_condition_batch0) #print(global_condition_batch0.shape) #print() #print('gc_embedding') #print(gc_embedding0) #print(gc_embedding0.shape) #print() #print('conv_filter') #print(conv_filter_end) #print(conv_filter_end.shape) #print() #print('conv_filter0') #print(conv_filter0_0) #print(conv_filter0_0.shape) #print() #print('conv_filter1') #print(conv_filter0_1) #print(conv_filter0_1.shape) #print() #print('conv_gate') #print(conv_gate0) #print(conv_gate0.shape) #print() #print('embedding_table') #print(embedding_table0) #print(embedding_table0.shape) #print(target_output00) #print(target_output00.shape) #print(target_output10) #print(target_output10.shape) #print() #print('weights_gc_filter') #print(weights_gc_filter0) #print(weights_gc_filter.shape) #print(input_batch0.shape) writer.add_summary(summary, step) duration = time.time() - start_time print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) loss_plot.append(loss_value) if step % args.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step plt.figure(1) #store loss function (aleix) plt.plot(loss_plot) #plt.show() plt.savefig(os.path.join(args.data_dir, 'loss.png')) print() print('Loss .plot saved') file00 = open(os.path.join(args.data_dir, 'loss.txt'), 'w') for item in loss_plot: file00.write("%s\n" % item) file00.close() print('Loss .txt saved') print() except KeyboardInterrupt: plt.figure(1) #store loss function (aleix) plt.plot(loss_plot) plt.savefig(os.path.join(args.data_dir, 'loss.png')) print() print('Loss plot saved') file00 = open(os.path.join(args.data_dir, 'loss.txt'), 'w') for item in loss_plot: file00.write("%s\n" % item) file00.close() print('Loss .txt saved') print() #plt.show() # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads)
def main(): args = get_arguments() try: directories = validate_directories(args) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] logdir_root = directories['logdir_root'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) # Create coordinator. coord = tf.train.Coordinator() # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = args.silence_threshold if args.silence_threshold > \ EPSILON else None reader = AudioReader(args.data_dir, coord, sample_rate=wavenet_params['sample_rate'], sample_size=args.sample_size, silence_threshold=args.silence_threshold) #audio_batch, input_IDs = reader.dequeue(args.batch_size)#单GPu转成下面的多GPU # Create network. batch_size_single_GPU = int(1.0 * args.batch_size / args.num_gpus) net = WaveNetModel( batch_size=batch_size_single_GPU, dilations=wavenet_params["dilations"], filter_width=wavenet_params["filter_width"], residual_channels=wavenet_params["residual_channels"], dilation_channels=wavenet_params["dilation_channels"], skip_channels=wavenet_params["skip_channels"], quantization_channels=wavenet_params["quantization_channels"], ID_channels=wavenet_params["ID_channels"], use_biases=wavenet_params["use_biases"], scalar_input=wavenet_params["scalar_input"], #标量输入与矢量输入? initial_filter_width=wavenet_params["initial_filter_width"]) if args.l2_regularization_strength == 0: args.l2_regularization_strength = None optimizer = optimizer_factory[args.optimizer]( learning_rate=args.learning_rate, momentum=args.momentum) trainable = tf.trainable_variables() tower_grads = [] #for i in range(args.num_gpus): with tf.device('/gpu:0'): with tf.name_scope('losstower_0') as scope: audio_batch, input_IDs = reader.dequeue(batch_size_single_GPU) all_loss = net.loss(audio_batch, input_IDs, args.l2_regularization_strength) loss, L1 = all_loss #total loss tf.get_variable_scope().reuse_variables() grads_vars = optimizer.compute_gradients(loss, var_list=trainable) tower_grads.append(grads_vars) # update_wei_op = [] with tf.device('/cpu:0'): ### for gv in tower_grads: app_grad = optimizer.apply_gradients(gv) update_wei_op.append(app_grad) with tf.control_dependencies(update_wei_op): train_op = tf.no_op() # Set up logging for TensorBoard. writer = tf.train.SummaryWriter(logdir) writer.add_graph(tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.merge_all_summaries() # Set up session sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) init = tf.initialize_all_variables() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.trainable_variables()) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print("Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess, N_THREADS) step = None try: last_saved_step = saved_global_step avg_loss_value = 0.0 avg_L1_value = 0.0 start_time = time.time() for step in range(saved_global_step + 1, args.num_steps): if args.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) summary, all_loss_value, _ = sess.run( [summaries, all_loss, train_op], options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: all_loss_value, _ = sess.run([all_loss, train_op]) #writer.add_summary(summary, step) loss_value, L1_value = all_loss_value avg_loss_value += loss_value avg_L1_value += L1_value if step % args.checkloss_every == 0: avg_loss_value = avg_loss_value / args.checkloss_every avg_L1_value = avg_L1_value / args.checkloss_every duration = (time.time() - start_time) * 1.0 / args.checkloss_every print( 'step {:d} - avg_loss = {:.3f}, avg_L1 = {:.3f}, ({:.3f} sec/step)' .format(step, loss_value, L1_value, duration)) sys.stdout.flush() avg_loss_value = 0.0 avg_L1_value = 0.0 start_time = time.time() if step % args.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads)
def main(): args = get_arguments() try: directories = validate_directories(args) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] logdir_root = directories['logdir_root'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) with tf.device("/cpu:0"): # Create coordinator. coord = tf.train.Coordinator() # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = args.silence_threshold if args.silence_threshold > \ EPSILON else None gc_enabled = args.gc_channels is not None reader = AudioReader( args.data_dir, coord, sample_rate=wavenet_params['sample_rate'], gc_enabled=gc_enabled, sample_size=args.sample_size, silence_threshold=silence_threshold) # Create network. net = WaveNetModel( batch_size=args.batch_size, dilations=wavenet_params["dilations"], filter_width=wavenet_params["filter_width"], residual_channels=wavenet_params["residual_channels"], dilation_channels=wavenet_params["dilation_channels"], skip_channels=wavenet_params["skip_channels"], quantization_channels=wavenet_params["quantization_channels"], use_biases=wavenet_params["use_biases"], scalar_input=wavenet_params["scalar_input"], initial_filter_width=wavenet_params["initial_filter_width"], histograms=args.histograms, global_condition_channels=args.gc_channels, global_condition_cardinality=reader.gc_category_cardinality) if args.l2_regularization_strength == 0: args.l2_regularization_strength = None global_step = tf.get_variable("global_step", [], initializer=tf.constant_initializer(0), trainable=False) optimizer = optimizer_factory[args.optimizer]( learning_rate=args.learning_rate, momentum=args.momentum) tower_grads = [] tower_losses = [] with tf.variable_scope(tf.get_variable_scope()): for i in range(args.gpu_nums): with tf.device("/gpu:%d" % i), tf.name_scope("tower_%d" % i) as scope: audio_batch = reader.dequeue(args.batch_size) if gc_enabled: gc_id_batch = reader.dequeue_gc(args.batch_size) else: gc_id_batch = None loss = net.loss(input_batch=audio_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=args.l2_regularization_strength) tower_losses.append(loss) trainable = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=trainable) tower_grads.append(grads) summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) tf.get_variable_scope().reuse_variables() # calculate the mean of each gradient. Synchronization point across all towers grads = average_gradients(tower_grads) train_ops = optimizer.apply_gradients(grads, global_step=global_step) # calculate the mean loss loss = tf.reduce_mean(tower_losses) # Set up logging for TensorBoard. writer = tf.summary.FileWriter(logdir) writer.add_graph(tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries_ops = tf.summary.merge(summaries) # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.trainable_variables()) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print("Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) step = None try: last_saved_step = saved_global_step for step in range(saved_global_step + 1, args.num_steps): start_time = time.time() if args.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) summary, loss_value, _ = sess.run( [summaries_ops, loss, train_ops], options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: summary, loss_value, _ = sess.run([summaries_ops, loss, train_ops]) writer.add_summary(summary, step) duration = time.time() - start_time print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)' .format(step, loss_value, duration)) if step % args.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads)
def main(): args = get_arguments() try: directories = validate_directories(args) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) # Create coordinator. coord = tf.train.Coordinator() # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = args.silence_threshold if args.silence_threshold > \ EPSILON else None gc_enabled = args.gc_channels is not None reader = AudioReader( args.data_dir, coord, sample_rate=wavenet_params['sample_rate'], gc_enabled=gc_enabled, receptive_field=WaveNetModel.calculate_receptive_field( wavenet_params["filter_width"], wavenet_params["dilations"], wavenet_params["scalar_input"], wavenet_params["initial_filter_width"]), sample_size=args.sample_size, silence_threshold=silence_threshold, normalize_peak=args.normalize_peak, queue_size=32 * max(args.num_gpus, 1)) if gc_enabled: gc_id_batch = reader.dequeue_gc(args.batch_size) else: gc_id_batch = None if args.l2_regularization_strength == 0: args.l2_regularization_strength = None if args.num_gpus <= 1: print("Falling back to single computation unit.") audio_batch = reader.dequeue(args.batch_size) net = make_model(args, wavenet_params, reader) loss = net.loss( input_batch=audio_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=args.l2_regularization_strength) optimizer = optimizer_factory[args.optimizer]( learning_rate=args.learning_rate, momentum=args.momentum) trainable = tf.trainable_variables() gradients = optimizer.compute_gradients(loss, var_list=trainable) for gradient, variable in gradients: if gradient is not None: tf.summary.scalar(variable.name + '/gradient', tf.norm(gradient)) optim = optimizer.apply_gradients(gradients) else: print("Using {} GPUs for compuation.".format(args.num_gpus)) with tf.device('/gpu:0'), tf.name_scope('tower_0'): optimizer = optimizer_factory[args.optimizer]( learning_rate=args.learning_rate, momentum=args.momentum) losses = [] gradients = [] with tf.variable_scope(tf.get_variable_scope()) as scope: for i in range(args.num_gpus): with tf.device('/gpu:%d' % i), tf.name_scope('tower_%d' % i): audio_batch = reader.dequeue(args.batch_size) net = make_model(args, wavenet_params, reader, i) loss = net.loss(input_batch=audio_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=args. l2_regularization_strength) trainable = tf.trainable_variables() gradient = optimizer.compute_gradients(loss, var_list=trainable) losses.append(loss) gradients.append(gradient) scope.reuse_variables() with tf.device('/gpu:0'), tf.name_scope('tower_0'): loss = tf.reduce_mean(losses) tf.summary.scalar('mean_total_loss', loss) average_gradients = [] for grouped_gradients in zip(*gradients): expanded_gradients = [] for gradient, _ in grouped_gradients: if gradient is not None: expanded_gradients.append(tf.expand_dims(gradient, 0)) # Since all GPUs share the same variable we can just the the one from gpu:0 _, variable = grouped_gradients[0] if len(expanded_gradients) == 0: print('No gradient for %s' % variable.name) average_gradients.append((None, variable)) continue merged_gradients = tf.concat(expanded_gradients, 0) average_gradient = tf.reduce_mean(merged_gradients, 0) average_gradients.append((average_gradient, variable)) tf.summary.scalar(variable.name + '/gradient', tf.norm(average_gradient)) optim = optimizer.apply_gradients(average_gradients) # Set up logging for TensorBoard. writer = tf.summary.FileWriter(logdir) writer.add_graph(tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.summary.merge_all() # Set up session config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) # Workaround for avoiding allocating memory on all GPUs due to tensorflow#8021 config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=args.max_checkpoints) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print("Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) step = None last_saved_step = saved_global_step try: for step in range(saved_global_step + 1, args.num_steps): start_time = time.time() if args.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) summary, loss_value, _ = sess.run([summaries, loss, optim], options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: summary, loss_value, _ = sess.run([summaries, loss, optim]) writer.add_summary(summary, step) duration = time.time() - start_time print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) if step % args.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: if step is not None and step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads)
def main(): with tf.Graph().as_default(): coord = tf.train.Coordinator() sess = tf.Session() batch_size = 10 hidden1_units = 5202 hidden2_units = 2601 hidden3_units = 1300 hidden4_units = 650 hidden5_units = 325 max_steps = 1000 """ learning_rate = 1e-2 print('Learning Rate:') print(learning_rate) print('Layers') print(5) """ global_step = tf.Variable(0, name='global_step', trainable=False) initial_learning_rate = 4e-2 learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step, 100, 0.95, staircase=True) inputs_placeholder, labels_placeholder = placeholder_inputs(batch_size) logits = ffnn.inference(inputs_placeholder, hidden1_units, hidden2_units, hidden3_units, hidden4_units, hidden5_units) loss = ffnn.loss(logits, labels_placeholder) train_op = ffnn.training(loss, learning_rate, global_step) eval_correct = ffnn.evaluation(logits, labels_placeholder) summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver() summary_writer = tf.summary.FileWriter('./logdir', sess.graph) sess.run(init) args = get_arguments() if args.restore_from != None: restore_from = args.restore_from print("Restoring from: ") print(restore_from) else: restore_from = "" try: saved_global_step = load(saver, sess, restore_from) if saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 else: counter = saved_global_step % label_batch_size except: print( "Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise # TODO: Find a more robust way to find different data sets # Training data directory = './sampleTrue' reader = AudioReader(directory, coord, sample_rate=16000, gc_enabled=False, receptive_field=5117, sample_size=10000, silence_threshold=0.05) threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) directory = './sampleFalse' reader2 = AudioReader(directory, coord, sample_rate=16000, gc_enabled=False, receptive_field=5117, sample_size=10000, silence_threshold=0.05) threads2 = tf.train.start_queue_runners(sess=sess, coord=coord) reader2.start_threads(sess) total_loss = 0 for step in range(saved_global_step + 1, max_steps): start_time = time.time() batch_data = [] label_data = [] if (step % 100 == 0): print('Current learning rate: %6f' % (sess.run(learning_rate))) for b in range(batch_size): label = randint(0, 1) if label == 1: data = sess.run(reader.dequeue(1)) while (len(data[0]) < ffnn.INPUT_SIZE): data = sess.run(reader.dequeue(1)) else: data = sess.run(reader2.dequeue(1)) while (len(data[0]) < ffnn.INPUT_SIZE): data = sess.run(reader2.dequeue(1)) data = np.array(data[0]) mean = np.mean(data) std = np.std(data) standardized = [] for d in data: standardized.append(float(d - mean) / std) batch_data.append(standardized) label_data.append(label) feed_dict = fill_feed_dict(batch_data, label_data, inputs_placeholder, labels_placeholder) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time total_loss = total_loss + loss_value print('Step %d: loss = %.7f (%.3f sec)' % (step, loss_value, duration)) summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if step % 100 == 0 or (step + 1) == max_steps: average = total_loss / (step + 1) print('Cumulative average loss: %6f' % (average)) # TODO: Update train script to add data to new directory checkpoint_file = os.path.join('./logdir/init-train/', 'model.ckpt') print("Generating checkpoint file...") saver.save(sess, checkpoint_file, global_step=step) """
def main(): with tf.Graph().as_default(): coord = tf.train.Coordinator() sess = tf.Session() batch_size = 1 hidden1_units = 5202 hidden2_units = 2601 hidden3_units = 1300 hidden4_units = 650 hidden5_units = 325 max_training_steps = 1 global_step = tf.Variable(0, name='global_step', trainable=False) initial_training_learning_rate = 3e-2 training_learning_rate = tf.train.exponential_decay( initial_training_learning_rate, global_step, 100, 0.9, staircase=True) inputs_placeholder, labels_placeholder = placeholder_inputs(batch_size) logits = ffnn.inference(inputs_placeholder, hidden1_units, hidden2_units, hidden3_units, hidden4_units, hidden5_units) loss = ffnn.loss(logits, labels_placeholder) train_op = ffnn.training(loss, training_learning_rate, global_step) eval_correct = ffnn.evaluation(logits, labels_placeholder) summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver() summary_writer = tf.summary.FileWriter('./logdir', sess.graph) sess.run(init) args = get_arguments() # Load parameters from wavenet params json file with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) quantization_channels = wavenet_params['quantization_channels'] if args.restore_from != None: restore_from = args.restore_from print("Restoring from: ") print(restore_from) else: restore_from = "" try: saved_global_step = load(saver, sess, restore_from) if saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 else: counter = saved_global_step % label_batch_size except: print( "Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise # TODO: Find a more robust way to find different data sets # Training data directory = './sampleTrue' reader = AudioReader(directory, coord, sample_rate=16000, gc_enabled=False, receptive_field=5117, sample_size=15117, silence_threshold=0.05) threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) directory = './sampleFalse' reader2 = AudioReader(directory, coord, sample_rate=16000, gc_enabled=False, receptive_field=5117, sample_size=15117, silence_threshold=0.05) threads2 = tf.train.start_queue_runners(sess=sess, coord=coord) reader2.start_threads(sess) total_loss = 0 for step in range(saved_global_step + 1, max_training_steps): start_time = time.time() batch_data = [] label_data = [] if (step % 100 == 0): print('Current learning rate: %6f' % (sess.run(training_learning_rate))) for b in range(batch_size): label = randint(0, 1) if label == 1: data = sess.run(reader.dequeue(1)) while (len(data[0]) < ffnn.INPUT_SIZE): data = sess.run(reader.dequeue(1)) else: data = sess.run(reader2.dequeue(1)) while (len(data[0]) < ffnn.INPUT_SIZE): data = sess.run(reader2.dequeue(1)) data = np.array(data[0]) cut = [] for i in range(ffnn.INPUT_SIZE): cut.append(data[i]) data = cut # processing samples = process(data, quantization_channels, 1) batch_data.append(samples) label_data.append(label) feed_dict = fill_feed_dict(batch_data, label_data, inputs_placeholder, labels_placeholder) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time total_loss = total_loss + loss_value print('Step %d: loss = %.7f (%.3f sec)' % (step, loss_value, duration)) ''' if step % 100 == 0 or (step + 1) == max_training_steps: average = total_loss / (step + 1) print('Cumulative average loss: %6f' % (average)) # TODO: Update train script to add data to new directory checkpoint_file = os.path.join('./logdir/init-train/', 'model.ckpt') print("Generating checkpoint file...") saver.save(sess, checkpoint_file, global_step=step) ''' # Lambda for white noise sampler gi_sampler = get_generator_input_sampler() # Intialize generator WaveNet G = WaveNetModel( batch_size=1, dilations=wavenet_params["dilations"], filter_width=wavenet_params["filter_width"], residual_channels=wavenet_params["residual_channels"], dilation_channels=wavenet_params["dilation_channels"], skip_channels=wavenet_params["skip_channels"], quantization_channels=wavenet_params["quantization_channels"], use_biases=wavenet_params["use_biases"], initial_filter_width=wavenet_params["initial_filter_width"]) # White noise generator params white_mean = 0 white_sigma = 1 white_length = ffnn.INPUT_SIZE white_noise = gi_sampler(white_mean, white_sigma, white_length) white_noise = process(white_noise, quantization_channels, 1) white_noise_t = tf.convert_to_tensor(white_noise) # initialize generator w_loss, w_prediction = G.loss(input_batch=white_noise_t, name='generator') G_variables = tf.trainable_variables(scope='wavenet') optimizer = optimizer_factory[args.optimizer](learning_rate=3e-2, momentum=args.momentum) optim = optimizer.minimize(w_loss, var_list=G_variables) init = tf.global_variables_initializer() sess.run(init) print(sess.run(tf.shape(w_prediction))) # main GAN training loop for step in range(NUM_EPOCHS): batch_data = [] label_data = [] # train D on real for d_index in range(batch_size): data = sess.run(reader.dequeue(1)) data = data[0] d_real_data = process(data, quantization_channels, 1) batch_data.append(d_real_data) label_data.append(1) feed_dict = fill_feed_dict(batch_data, label_data, inputs_placeholder, labels_placeholder) _, d_real_loss = sess.run([train_op, loss], feed_dict=feed_dict) print("Real loss") print(d_real_loss) batch_data = [] label_data = [] # train D on fake for d_index in range(batch_size): samples = tf.placeholder(tf.int32) if args.fast_generation: next_sample = G.predict_proba_incremental( samples, args.gc_id) else: next_sample = G.predict_proba(samples, args.gc_id) if args.fast_generation: sess.run(tf.global_variables_initializer()) sess.run(G.init_ops) waveform = [0] for step in range(ffnn.INPUT_SIZE): if args.fast_generation: outputs = [next_sample] outputs.extend(G.push_ops) window = waveform[-1] else: if len(waveform) > G.receptive_field: window = waveform[-G.receptive_field:] else: window = waveform outputs = [next_sample] # Run the WaveNet to predict the next sample. prediction = sess.run(outputs, feed_dict={samples: window})[0] # Scale prediction distribution using temperature. np.seterr(divide='ignore') scaled_prediction = np.log(prediction) / 1 scaled_prediction = ( scaled_prediction - np.logaddexp.reduce(scaled_prediction)) scaled_prediction = np.exp(scaled_prediction) np.seterr(divide='warn') sample = np.random.choice(np.arange(quantization_channels), p=scaled_prediction) waveform.append(sample) del waveform[0] d_fake_data = process(waveform, quantization_channels, 0) batch_data.append(d_fake_data) label_data.append(0) feed_dict = fill_feed_dict(batch_data, label_data, inputs_placeholder, labels_placeholder) _, d_fake_loss = sess.run([train_op, loss], feed_dict=feed_dict) print("Fake loss") print(d_fake_loss) batch_data = [] label_data = [] # train G, but don't train D for g_index in range(batch_size): samples = tf.placeholder(tf.int32) if args.fast_generation: next_sample = G.predict_proba_incremental( samples, args.gc_id) else: next_sample = G.predict_proba(samples, args.gc_id) if args.fast_generation: sess.run(tf.global_variables_initializer()) sess.run(G.init_ops) waveform = [0] for step in range(ffnn.INPUT_SIZE): if args.fast_generation: outputs = [next_sample] outputs.extend(G.push_ops) window = waveform[-1] else: if len(waveform) > G.receptive_field: window = waveform[-G.receptive_field:] else: window = waveform outputs = [next_sample] # Run the WaveNet to predict the next sample. prediction = sess.run(outputs, feed_dict={samples: window})[0] # Scale prediction distribution using temperature. np.seterr(divide='ignore') scaled_prediction = np.log(prediction) / 1 scaled_prediction = ( scaled_prediction - np.logaddexp.reduce(scaled_prediction)) scaled_prediction = np.exp(scaled_prediction) np.seterr(divide='warn') sample = np.random.choice(np.arange(quantization_channels), p=scaled_prediction) waveform.append(sample) del waveform[0] g_data = process(waveform, quantization_channels, 0) batch_data.append(g_data) label_data.append(1) feed_dict = fill_feed_dict(batch_data, label_data, inputs_placeholder, labels_placeholder) _, g_loss = sess.run([optim, loss], feed_dict=feed_dict) print("Generator loss") print(g_loss) '''
def main(): def _str_to_bool(s): """Convert string to bool (in argparse context).""" if s.lower() not in ['true', 'false']: raise ValueError('Argument needs to be a boolean, got {}'.format(s)) return {'true': True, 'false': False}[s.lower()] parser = argparse.ArgumentParser(description='WaveNet example network') DATA_DIRECTORY = 'D:\\hccho\\multi-speaker-tacotron-tensorflow-master\\datasets\son\\audio' # './VCTK-Corpus' parser.add_argument('--data_dir', type=str, default=DATA_DIRECTORY, help='The directory containing the VCTK corpus.') LOGDIR = None #LOGDIR = './/logdir//train//2018-11-25T14-10-48' # son #LOGDIR = './/logdir//train//2018-11-30T22-22-58' # test parser.add_argument('--logdir', type=str, default=LOGDIR,help='Directory in which to store the logging information for TensorBoard. If the model already exists, it will restore the state and will continue training. Cannot use with --logdir_root and --restore_from.') parser.add_argument('--logdir_root', type=str, default=None,help='Root directory to place the logging output and generated model. These are stored under the dated subdirectory of --logdir_root. Cannot use with --logdir.') parser.add_argument('--restore_from', type=str, default=None,help='Directory in which to restore the model from. This creates the new model under the dated directory in --logdir_root. Cannot use with --logdir.') CHECKPOINT_EVERY = 20 # checkpoint 저장 주기 parser.add_argument('--checkpoint_every', type=int, default=CHECKPOINT_EVERY,help='How many steps to save each checkpoint after. Default: ' + str(CHECKPOINT_EVERY) + '.') config = parser.parse_args() # command 창에서 입력받을 수 있는 조건 try: directories = validate_directories(config,hparams) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from # Create coordinator. coord = tf.train.Coordinator() # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = hparams.silence_threshold if hparams.silence_threshold > EPSILON else None gc_enabled = hparams.gc_channels is not None # AudioReader에서 wav 파일을 잘라 input값을 만든다. receptive_field길이만큼을 앞부분에 pad하거나 앞조각에서 가져온다. (receptive_field+ sample_size)크기로 자른다. reader = AudioReader(config.data_dir,coord,sample_rate=hparams.sample_rate,gc_enabled=gc_enabled, receptive_field=WaveNetModel.calculate_receptive_field(hparams.filter_width, hparams.dilations,hparams.scalar_input, hparams.initial_filter_width), sample_size=hparams.sample_size,silence_threshold=silence_threshold) if gc_enabled: audio_batch, gc_id_batch = reader.dequeue(hparams.batch_size) # (batch_size, ?, 1) else: audio_batch = reader.dequeue(hparams.batch_size) # Create network. net = WaveNetModel( batch_size=hparams.batch_size, dilations=hparams.dilations, filter_width=hparams.filter_width, residual_channels=hparams.residual_channels, dilation_channels=hparams.dilation_channels, quantization_channels=hparams.quantization_channels, out_channels =hparams.out_channels, skip_channels=hparams.skip_channels, use_biases=hparams.use_biases, # True scalar_input=hparams.scalar_input, initial_filter_width=hparams.initial_filter_width, histograms=hparams.histograms, global_condition_channels=hparams.gc_channels, global_condition_cardinality=reader.gc_category_cardinality, train_mode=True) if hparams.l2_regularization_strength == 0: hparams.l2_regularization_strength = None loss = net.loss(input_batch=audio_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=hparams.l2_regularization_strength) optimizer = optimizer_factory[hparams.optimizer](learning_rate=hparams.learning_rate,momentum=hparams.momentum) trainable = tf.trainable_variables() optim = optimizer.minimize(loss, var_list=trainable) run_metadata = tf.RunMetadata() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) # log_device_placement=False --> cpu/gpu 자동 배치. init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=hparams.max_checkpoints) # 최대 checkpoint 저장 갯수 지정 try: saved_global_step = load(saver, sess, restore_from) # checkpoint load if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print("Something went wrong while restoring checkpoint. We will terminate training to avoid accidentally overwriting the previous model.") raise threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) step = None last_saved_step = saved_global_step try: for step in range(saved_global_step + 1, hparams.num_steps+1): start_time = time.time() if hparams.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) loss_value, _ = sess.run([loss, optim],options=run_options,run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: loss_value, _ = sess.run([loss, optim]) duration = time.time() - start_time print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration)) if step % config.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads)