def train(): category = pd.read_pickle('category.pkl') dilations = [2**i for i in range(10)] * 5 receptive_field = calculate_receptive_field(dilations, 2, 32) wavenet = WaveNet(dilations=dilations, use_glob_cond=True, glob_cls_num=len(category), glob_embed_dim=5) optimizer = tf.keras.optimizers.Adam(1e-3) # load dataset trainset = tf.data.TFRecordDataset(join( 'dataset', 'trainset.tfrecord')).repeat(-1).map( parse_function_generator()).batch(batch_size).prefetch( tf.data.experimental.AUTOTUNE) # restore from existing checkpoint if False == exists('checkpoints'): mkdir('checkpoints') checkpoint = tf.train.Checkpoint(model=wavenet, optimizer=optimizer) checkpoint.restore(tf.train.latest_checkpoint('checkpoints')) # create log log = tf.summary.create_file_writer('checkpoints') # train model avg_loss = tf.keras.metrics.Mean(name='loss', dtype=tf.float32) for audios, person_id in trainset: inputs = audios[:, :-1, :] # inputs.shape = (batch, receptive_field + audio_length - 1, 1) target = audios[:, receptive_field:, :] # target.shape = (batch, audio_length, 1) with tf.GradientTape() as tape: outputs = wavenet([inputs, person_id]) # outputs.shape = (batch, audio_length, 1) loss = tf.keras.losses.SparseCategoricalCrossentropy()(target, outputs) avg_loss.update_state(loss) # write log if tf.equal(optimizer.iterations % 100, 0): with log.as_default(): tf.summary.scalar('loss', avg_loss.result(), step=optimizer.iterations) print('Step #%d Loss: %.6f' % (optimizer.iterations, avg_loss.result())) if avg_loss.result() < 0.01: break avg_loss.reset_states() grads = tape.gradient(loss, wavenet.trainable_variables) optimizer.apply_gradients(zip(grads, wavenet.trainable_variables)) # save the network structure with weights if False == exists('model'): mkdir('model') wavenet.save(join('model', 'wavenet.h5'))
def train(num_gpus, rank, group_name, output_directory, tensorboard_directory, ckpt_iter, n_iters, iters_per_ckpt, iters_per_logging, learning_rate, batch_size_per_gpu): """ Train the WaveNet model on the LJSpeech dataset Parameters: num_gpus, rank, group_name: parameters for distributed training output_directory (str): save model checkpoints to this path tensorboard_directory (str): save tensorboard events to this path ckpt_iter (int or 'max'): the pretrained checkpoint to be loaded; automitically selects the maximum iteration if 'max' is selected n_iters (int): number of iterations to train, default is 1M iters_per_ckpt (int): number of iterations to save checkpoint, default is 10k, for models with residual_channel=64 this number can be larger iters_per_logging (int): number of iterations to save training log, default is 100 learning_rate (float): learning rate batch_size_per_gpu (int): batchsize per gpu, default is 2 so total batchsize is 16 with 8 gpus """ # generate experiment (local) path local_path = "ch{}_T{}_betaT{}".format(wavenet_config["res_channels"], diffusion_config["T"], diffusion_config["beta_T"]) # Create tensorboard logger. if rank == 0: tb = SummaryWriter(os.path.join('exp', local_path, tensorboard_directory)) # distributed running initialization if num_gpus > 1: init_distributed(rank, num_gpus, group_name, **dist_config) # Get shared output_directory ready output_directory = os.path.join('exp', local_path, output_directory) if rank == 0: if not os.path.isdir(output_directory): os.makedirs(output_directory) os.chmod(output_directory, 0o775) print("output directory", output_directory, flush=True) # map diffusion hyperparameters to gpu for key in diffusion_hyperparams: if key is not "T": diffusion_hyperparams[key] = diffusion_hyperparams[key].cuda() # load training data trainloader = load_LJSpeech(trainset_config=trainset_config, batch_size=batch_size_per_gpu, num_gpus=num_gpus) print('Data loaded') # predefine model net = WaveNet(**wavenet_config).cuda() print_size(net) # apply gradient all reduce if num_gpus > 1: net = apply_gradient_allreduce(net) # define optimizer optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate) # load checkpoint if ckpt_iter == 'max': ckpt_iter = find_max_epoch(output_directory) if ckpt_iter >= 0: try: # load checkpoint file model_path = os.path.join(output_directory, '{}.pkl'.format(ckpt_iter)) checkpoint = torch.load(model_path, map_location='cpu') # feed model dict and optimizer state net.load_state_dict(checkpoint['model_state_dict']) if 'optimizer_state_dict' in checkpoint: optimizer.load_state_dict(checkpoint['optimizer_state_dict']) print('Successfully loaded model at iteration {}'.format(ckpt_iter)) except: ckpt_iter = -1 print('No valid checkpoint model found, start training from initialization.') else: ckpt_iter = -1 print('No valid checkpoint model found, start training from initialization.') # training n_iter = ckpt_iter + 1 while n_iter < n_iters + 1: for mel_spectrogram, audio in trainloader: # load audio and mel spectrogram mel_spectrogram = mel_spectrogram.cuda() audio = audio.unsqueeze(1).cuda() # back-propagation optimizer.zero_grad() X = (mel_spectrogram, audio) loss = training_loss(net, nn.MSELoss(), X, diffusion_hyperparams) if num_gpus > 1: reduced_loss = reduce_tensor(loss.data, num_gpus).item() else: reduced_loss = loss.item() loss.backward() optimizer.step() # output to log # note, only do this on the first gpu if n_iter % iters_per_logging == 0 and rank == 0: # save training loss to tensorboard print("iteration: {} \treduced loss: {} \tloss: {}".format(n_iter, reduced_loss, loss.item())) tb.add_scalar("Log-Train-Loss", torch.log(loss).item(), n_iter) tb.add_scalar("Log-Train-Reduced-Loss", np.log(reduced_loss), n_iter) # save checkpoint if n_iter > 0 and n_iter % iters_per_ckpt == 0 and rank == 0: checkpoint_name = '{}.pkl'.format(n_iter) torch.save({'model_state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict()}, os.path.join(output_directory, checkpoint_name)) print('model at iteration %s is saved' % n_iter) n_iter += 1 # Close TensorBoard. if rank == 0: tb.close()
# make directory of results result = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') os.mkdir(result) shutil.copy(__file__, os.path.join(result, __file__)) shutil.copy('utils.py', os.path.join(result, 'utils.py')) shutil.copy('params.py', os.path.join(result, 'params.py')) shutil.copy('generate.py', os.path.join(result, 'generate.py')) shutil.copy('net.py', os.path.join(result, 'net.py')) shutil.copytree('WaveNet', os.path.join(result, 'WaveNet')) # Model encoder = UpsampleNet(params.upsample_factors) decoder = WaveNet(params.n_loop, params.n_layer, params.filter_size, params.residual_channels, params.dilated_channels, params.skip_channels, params.output_dim, params.quantize, params.log_scale_min, params.condition_dim, params.dropout_zero_rate) if params.distribution_type == 'gaussian': loss_fun = decoder.calculate_gaussian_loss acc_fun = None elif params.distribution_type == 'logistic': loss_fun = decoder.calculate_logistic_loss acc_fun = None elif params.distribution_type == 'softmax': loss_fun = chainer.functions.softmax_cross_entropy acc_fun = chainer.functions.accuracy model = EncoderDecoderModel(encoder, decoder, loss_fun, acc_fun) # Optimizer
maes = [] mases = [] hitss = [] for r in random_seeds: train_a = np.expand_dims(train[:, 0], 1) test_a = np.expand_dims(test[:, 0], 1) if s == 'MIMO': MIMO = True else: MIMO = False wavenet = WaveNet(forecast_horizon=1, log_difference=True, initial_filter_width=48, filter_width=2, residual_channels=64, dilation_channels=64, skip_channels=64, use_biases=True, use_batch_norm=False, dilations=[1, 2, 4, 8, 16, 32], random_seed=r, MIMO=MIMO) if s == 'auto_regressive': mae, mase, hits = wavenet.train_and_predict( train_a, test_a, batch_size=128, max_epochs=10, plot=False, train_fraction=0.8) else: mae, mase, hits = wavenet.train_and_predict(
inputs = Preprocess(params.sr, params.n_fft, params.hop_length, params.n_mels, params.top_db, None, params.categorical_output_dim)(path) _, condition, _ = inputs if params.categorical_output_dim is False or params.categorical_output_dim is None: input_dim = 1 else: input_dim = categorical_output_dim x = numpy.zeros([n, input_dim, 1, 1], dtype=numpy.float32) condition = numpy.expand_dims(condition, axis=0) # make model encoder = UpsampleNet(params.upsample_factors) decoder = WaveNet(params.n_loop, params.n_layer, params.filter_size, params.residual_channels, params.dilated_channels, params.skip_channels, params.output_dim, params.quantize, params.log_scale_min, params.condition_dim, params.dropout_zero_rate) # load trained parameter chainer.serializers.load_npz(args.model, encoder, 'updater/model:main/encoder/') chainer.serializers.load_npz(args.model, decoder, 'updater/model:main/decoder/') if args.gpu >= 0: use_gpu = True chainer.cuda.get_device_from_id(args.gpu).use() else: use_gpu = False
def generate(output_directory, tensorboard_directory, num_samples, ckpt_path, ckpt_iter): """ Generate audio based on ground truth mel spectrogram Parameters: output_directory (str): save generated speeches to this path tensorboard_directory (str): save tensorboard events to this path num_samples (int): number of samples to generate, default is 4 ckpt_path (str): checkpoint path ckpt_iter (int or 'max'): the pretrained checkpoint to be loaded; automitically selects the maximum iteration if 'max' is selected """ # generate experiment (local) path local_path = "ch{}_T{}_betaT{}".format(wavenet_config["res_channels"], diffusion_config["T"], diffusion_config["beta_T"]) # Get shared output_directory ready output_directory = os.path.join('exp', local_path, output_directory) if not os.path.isdir(output_directory): os.makedirs(output_directory) os.chmod(output_directory, 0o775) print("output directory", output_directory, flush=True) # map diffusion hyperparameters to gpu for key in diffusion_hyperparams: if key is not "T": diffusion_hyperparams[key] = diffusion_hyperparams[key].cuda() # predefine model net = WaveNet(**wavenet_config).cuda() print_size(net) # load checkpoint ckpt_path = os.path.join('exp', local_path, ckpt_path) if ckpt_iter == 'max': ckpt_iter = find_max_epoch(ckpt_path) model_path = os.path.join(ckpt_path, '{}.pkl'.format(ckpt_iter)) try: checkpoint = torch.load(model_path, map_location='cpu') net.load_state_dict(checkpoint['model_state_dict']) print('Successfully loaded model at iteration {}'.format(ckpt_iter)) except: raise Exception('No valid model found') # predefine audio shape audio_length = trainset_config["segment_length"] # 16000 print('begin generating audio of length %s' % audio_length) # inference start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) start.record() generated_audio = sampling(net, (num_samples,1,audio_length), diffusion_hyperparams) end.record() torch.cuda.synchronize() print('generated {} utterances of random_digit at iteration {} in {} seconds'.format(num_samples, ckpt_iter, int(start.elapsed_time(end)/1000))) # save audio to .wav for i in range(num_samples): outfile = '{}_{}_{}k_{}.wav'.format(wavenet_config["res_channels"], diffusion_config["T"], ckpt_iter // 1000, i) wavwrite(os.path.join(output_directory, outfile), trainset_config["sampling_rate"], generated_audio[i].squeeze().cpu().numpy()) # save audio to tensorboard tb = SummaryWriter(os.path.join('exp', local_path, tensorboard_directory)) tb.add_audio(tag=outfile, snd_tensor=generated_audio[i], sample_rate=trainset_config["sampling_rate"]) tb.close() print('saved generated samples at iteration %s' % ckpt_iter)
# make directory of results result = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') os.mkdir(result) shutil.copy(__file__, os.path.join(result, __file__)) shutil.copy('utils.py', os.path.join(result, 'utils.py')) shutil.copy('params.py', os.path.join(result, 'params.py')) shutil.copy('generate.py', os.path.join(result, 'generate.py')) shutil.copy('net.py', os.path.join(result, 'net.py')) shutil.copytree('WaveNet', os.path.join(result, 'WaveNet')) # Model encoder = UpsampleNet(params.channels, params.upsample_factors) wavenet = WaveNet(params.n_loop, params.n_layer, params.filter_size, params.input_dim, params.residual_channels, params.dilated_channels, params.skip_channels, params.quantize, params.use_logistic, params.n_mixture, params.log_scale_min, params.condition_dim, params.dropout_zero_rate) if params.ema_mu < 1: decoder = ExponentialMovingAverage(wavenet, params.ema_mu) else: decoder = wavenet if params.use_logistic: loss_fun = wavenet.calculate_logistic_loss acc_fun = None else: loss_fun = chainer.functions.softmax_cross_entropy acc_fun = chainer.functions.accuracy model = EncoderDecoderModel(encoder, decoder, loss_fun, acc_fun)
# tensorboard --host 127.0.0.1 --logdir=D:\Projects\AI\Summary\Wavenet\ gen = MarketDataGenerator(train_ratio, seq_length, output_count, batch_size, ["EURGBP", "EURUSD", "GBPUSD"], datetime(2019, 4, 20), 90000) # gen = FXTMEncoded(train_ratio, seq_length, output_count, batch_size) trainX = gen.trainX trainY = gen.trainY validX = gen.validX validY = gen.validY testX = gen.testX testY = gen.testY # test = WaveNetMK0(n_filter, n_fc, n_layer) test = WaveNet(n_filter, n_pp, n_fc, n_layer) test.compile(gen.input_dim, gen.output_dim, optimizer=Adam(lr=0.001, decay=0.01), mode=0, default_loss='mse') test.model_train.fit([trainX, trainY], trainY, batch_size=batch_size, epochs=epoch, callbacks=[model_saver], validation_data=([validX, validY], validY)) test.model_train.load_weights(mfile) # input_dim = [16, 3, 1] # encoder_layers = [400]
Remember the "p" in ARIMA(p, d, q) is how far back in the past the algorithm looks to predict the future. The "p" for WaveNet is equal to filter_width*sum(dilation_factors) + initial_filter_width We want to make this on the order of 200 to capture the weekly dependencies. Also, make sure dilation_factors start from 1 and go up in powers of 2. random_seed: random seed from which weights are drawn, ensures a consistent method of getting the same results each time MIMO: (multi-input multi-output) If you pass multiple time series into the network and MIMO is False, the network will adjust it's topoplogy to condition on the time series to predict the first time series in the list If you pass multiple time series into the network and MIMO is True, the network will adjust it's topoplogy to predict all time_series simultaneously ''' wavenet = WaveNet(forecast_horizon=1, log_difference=False, initial_filter_width=2, filter_width=2, residual_channels=32, dilation_channels=32, skip_channels=256, use_biases=True, use_batch_norm=True, dilations=[1, 2, 4, 8, 16, 32, 64, 128], random_seed=1234, MIMO=False) ''' The first two elements are your train and test set batch_size: how many sequences are passed to the GPU at once. The bigger batch_size is, the faster training will go. However, too big and you will run out of memory max_epochs: maximum number of epochs to train, 10-15 should do plot: leave False train_fraction: what portion of the training set to use for training, the rest will be used for validation ''' wavenet.train_and_predict(center_node_train, center_node_test, batch_size=128,