def test_attention(): from utils import create_ipu_config from fastspeech2 import Attention from fastspeech2 import FastSpeech2Config as IPUFastSpeech2Config from tests.test_utils import check_tensor from tests.tf2_fastspeech2 import TFFastSpeechAttention, FastSpeech2Config setup_random_seed() test_dir = Path(__file__).parent with open(Path(test_dir, "test_configs", "test.yaml"), "r") as f: conf1 = yaml.load(f, Loader=yaml.Loader) with open(Path(test_dir, "test_configs", "test.json"), "r") as f: conf2 = json.load(f) batch_size = conf2["batch_size"] seq_len = conf2["max_seq_length"] hidden_size = conf2["encoder_hidden_size"] inp = np.random.random((batch_size, seq_len, hidden_size)) inputs = tf.convert_to_tensor(inp, tf.float32) attention_mask = tf.convert_to_tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], tf.float32) gconf = FastSpeech2Config(**conf1["fastspeech2_params"]) iconf = IPUFastSpeech2Config(**conf2) cfg = create_ipu_config( available_memory_proportion=conf2["available_memory_proportion"], num_required_ipus=1, partials_type=conf2["partials_type"], fp_exceptions=conf2["fp_exceptions"], xla_recompute=conf2["xla_recompute"], enable_stochastic_rounding=conf2["stochastic_rounding"]) model_gpu = TFFastSpeechAttention(gconf.encoder_self_attention_params) with tf.GradientTape() as tape: out_gpu = model_gpu([inputs, attention_mask]) loss1 = tf.reduce_mean(tf.math.abs(out_gpu)) strategy = ipu.ipu_strategy.IPUStrategy() with strategy.scope(): model_ipu = Attention(iconf.encoder_self_attention_params) dummy_output = model_ipu([inputs, attention_mask]) gw = model_gpu.get_weights() model_ipu.set_weights(gw) iw = model_ipu.get_weights() with tf.GradientTape() as tape2: out_ipu = model_ipu([inputs, attention_mask]) for w1, w2 in zip(gw, iw): check_tensor(w1, w2) check_tensor(out_gpu[0].numpy(), out_ipu[0].numpy())
def test_lr(): from fastspeech2 import LengthRegulator, FastSpeech2Config from utils import create_ipu_config from tests.test_utils import check_tensor, check_tensor_relative setup_random_seed() config = FastSpeech2Config() batch_size = 1 seq_len = config.max_seq_length wave_len = config.max_wave_length hidden_size = config.encoder_self_attention_params.hidden_size # create input data encoder_hidden_state = np.random.random((batch_size, hidden_size, seq_len)) duration_gt = np.random.randint(0, 7, size=(batch_size, seq_len)) while duration_gt.sum() > wave_len: duration_gt = np.random.randint(0, 7, size=(batch_size, seq_len)) # expand each hidden state according to duration_gt out_gt = [ np.array([ np.repeat(encoder_hidden_state[i, :, :], duration_gt[i], axis=-1) ]) for i in range(batch_size) ] # pad to wave_len out_gt = np.concatenate([ np.pad(out_gt[i], [(0, 0), (0, 0), (0, wave_len - out_gt[i].shape[-1])]) for i in range(len(out_gt)) ], axis=0) encoder_hidden_state = tf.convert_to_tensor(encoder_hidden_state, tf.float32) duration_gt = tf.convert_to_tensor(duration_gt, tf.float32) cfg = create_ipu_config(available_memory_proportion=0.4, num_required_ipus=1) strategy = ipu.ipu_strategy.IPUStrategy() with strategy.scope(): lr = LengthRegulator(config) out_pd, mask = lr([encoder_hidden_state, duration_gt]) check_tensor_relative(out_pd.numpy(), out_gt, margin=1e-8)
loss = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)) preds = tf.argmax(input=logits, axis=-1) optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.01) # Wrap the optimiser for sharding optimiser = ipu.cross_replica_optimizer.CrossReplicaOptimizer(optimizer) train_op = optimiser.minimize(loss) eval_accuracy, eval_op = tf.compat.v1.metrics.accuracy(labels, preds) metric_ops = {"accuracy": (eval_accuracy, eval_op)} return tf.estimator.EstimatorSpec(mode=mode, predictions=preds, loss=loss, train_op=train_op, eval_metric_ops=metric_ops) if __name__ == '__main__': opts = parse_params(enable_multi_ipu=True) print("Loading the data...") data = CIFAR10_Data() print("Initialize the model") test_steps = len(data.y_test) // opts.batch_size training_steps = 5 * test_steps config = create_ipu_config(training_steps, test_steps, num_replicas=opts.ipus) ipu_estimator = ipu.ipu_estimator.IPUEstimator(config=config, model_fn=estimator_model) print("Training...") ipu_estimator.train(partial(data.get_train_datagenerator, opts.batch_size), steps=training_steps*opts.epochs) print("Check the result...") result = ipu_estimator.evaluate(partial(data.get_test_datagenerator, opts.batch_size), steps=test_steps) print("Validation accuracy: {}%".format(100.0 * result['accuracy'])) print("Validation loss: {}".format(result['loss']))
pipeline_schedule=None) ###end of def: if __name__ == '__main__': opts = parse_params(enable_multi_ipu=True, enable_pipelining=True) print("Loading the data...") data = CIFAR10_Data() print("Initialize the model") test_steps = len( data.y_test) // (opts.batch_size * opts.gradient_accumulation_count) training_steps = 5 * test_steps config = create_ipu_config(training_steps, test_steps, num_shards=opts.ipus) ipu_estimator = ipu.ipu_pipeline_estimator.IPUPipelineEstimator( config=config, model_fn=partial(estimator_model, opts)) ipu_estimator.train(partial(data.get_train_datagenerator, opts.batch_size), steps=training_steps * opts.epochs) print("Check the result...") result = ipu_estimator.evaluate(partial(data.get_test_datagenerator, opts.batch_size), steps=test_steps) print("Validation accuracy: {}%".format(100.0 * result['accuracy'])) print("Validation loss: {}".format(result['loss'])) ###end of if: """end of codes"""
def test_fastspeech2(): tf.keras.backend.clear_session() setup_random_seed() input_ids = tf.convert_to_tensor( [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], tf.int32) speaker_ids = tf.convert_to_tensor([0], tf.int32) duration_gts = tf.convert_to_tensor( [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], tf.int32) f0_gts = tf.convert_to_tensor( [[10, 10, 10, 10, 10, 10, 10, 10, 10, 10]], tf.float32 ) energy_gts = tf.convert_to_tensor( [[10, 10, 10, 10, 10, 10, 10, 10, 10, 10]], tf.float32 ) mel_gts = tf.convert_to_tensor( np.random.random(size=(1, 10, 80)), tf.float32) inputs = [input_ids, speaker_ids, duration_gts, f0_gts, energy_gts, mel_gts] test_dir = Path(__file__).parent with open(Path(test_dir, "test_configs", "test.yaml"), "r") as f: gconf = yaml.load(f, Loader=yaml.Loader) with open(Path(test_dir, "test_configs", "test.json"), "r") as f: iconf = json.load(f) cfg = create_ipu_config( available_memory_proportion=iconf["available_memory_proportion"], num_required_ipus=1, partials_type=iconf["partials_type"], fp_exceptions=iconf["fp_exceptions"], xla_recompute=iconf["xla_recompute"], enable_stochastic_rounding=iconf["stochastic_rounding"]) base_lr = 0.001 optimizer1 = tf.keras.optimizers.SGD(base_lr) optimizer2 = tf.keras.optimizers.SGD(base_lr) # run fwd of gpu model to get weights/outputs/loss model_gpu = create_gpu_model(gconf["fastspeech2_params"]) with tf.GradientTape() as tape: out_gpu = model_gpu( input_ids=input_ids, speaker_ids=speaker_ids, duration_gts=duration_gts, f0_gts=f0_gts, energy_gts=energy_gts) loss_gpu = calculate_gpu_loss(out_gpu, inputs) gnames = [w.name for w in model_gpu.trainable_weights] model_gpu.save_weights(Path(test_dir, "full_model_weights.h5")) grad_gpu = tape.gradient(loss_gpu, model_gpu.trainable_weights) grad_gpu_dict = dict(zip(gnames, grad_gpu)) weight_gpu_dict = dict(zip(gnames, model_gpu.trainable_weights)) reload_weights = parse_h5(Path(test_dir, "full_model_weights.h5")) i2g_mapper = get_h5_mapper(iconf) strategy = ipu.ipu_strategy.IPUStrategy() with strategy.scope(): model_ipu = create_ipu_model(iconf) _ = strategy.run(inference_step, args=[ (input_ids, duration_gts, f0_gts, energy_gts), model_ipu]) wi_names = [w.name for w in model_ipu.weights] inames = [w.name for w in model_ipu.trainable_weights] weights_to_restore = [] for w in model_ipu.weights: weights_to_restore.append(reload_weights[i2g_mapper[w.name]]) assert len(weights_to_restore) == len(reload_weights) == len(wi_names), \ f"Weights loading failed.Loaded {len(weights_to_restore)}/{len(wi_names)}." model_ipu.set_weights(weights_to_restore) out_ipu, loss_ipu, grad_ipu = strategy.run( training_step, args=[(input_ids, duration_gts, f0_gts, energy_gts, mel_gts), model_ipu]) grad_ipu_dict = dict(zip(inames, grad_ipu)) weight_ipu_dict = dict(zip(inames, model_ipu.trainable_weights)) optimizer1.apply_gradients(zip(grad_gpu, model_gpu.trainable_weights)) optimizer2.apply_gradients(zip(grad_ipu, model_ipu.trainable_weights)) # Compare the outputs for og, oi in zip(out_gpu, out_ipu): gt = og.numpy() it = oi.numpy() print(f"Before: {gt.shape}, {it.shape}") if len(it.shape) == 3: # continue if it.shape[1] != gt.shape[1]: it = it[:, :gt.shape[1], :] print(f"After: {gt.shape}, {it.shape}") print(f"Err: {getTensorRelativError(gt, it)}") check_tensor_relative(gt, it, margin=5e-5) # compare loss check_tensor_relative(loss_gpu, loss_ipu, margin=5e-6) # compare gradients for k, v in i2g_mapper.items(): # skip non-trainable weights if 'position_embeddings' in k: continue grad_i = grad_ipu_dict[k] grad_g = grad_gpu_dict[v] if "mel_before/bias" in k: print(f"{grad_g}, {grad_i}") continue print(f"[Gradients]{k}({grad_g.shape}) <--> {v}({grad_i.shape})") if isinstance(grad_g, tf.IndexedSlices) and isinstance(grad_i, tf.IndexedSlices): check_tensor_relative(grad_g.values, grad_i.values, margin=5e-5) print(f"Err: {getTensorRelativError(grad_g.values, grad_i.values)}") else: check_tensor_relative(grad_g.numpy(), grad_i.numpy(), margin=5e-5) print( f"Err: {getTensorRelativError(grad_g.numpy(), grad_i.numpy())}") # Compare the weights after gradient update for k, v in i2g_mapper.items(): # skip non-trainable weights if 'position_embeddings' in k: continue wi = weight_ipu_dict[k] wg = weight_gpu_dict[v] if "mel_before/bias" in k: print(f"{wg.numpy()}, {wi.numpy()}") continue print(f"[Weights]{wg.name}({wg.shape}) <--> {wi.name}({wi.shape})") check_tensor_relative(wg.numpy(), wi.numpy(), margin=1e-5)
eval_accuracy, eval_op = tf.compat.v1.metrics.accuracy(labels, preds) metric_ops = {"accuracy": (eval_accuracy, eval_op)} return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=metric_ops, train_op=train_op) if __name__ == '__main__': opts = parse_params() print("Loading the data...") data = CIFAR10_Data() print("Initialize the model") test_steps = len(data.y_test) // opts.batch_size training_steps = 5 * test_steps config = create_ipu_config(training_steps, test_steps) ipu_estimator = ipu.ipu_estimator.IPUEstimator(config=config, model_fn=estimator_model) print("Training...") ipu_estimator.train(partial(data.get_train_datagenerator, opts.batch_size), steps=training_steps * opts.epochs) print("Check the result...") result = ipu_estimator.evaluate(partial(data.get_test_datagenerator, opts.batch_size), steps=test_steps) print("Validation accuracy: {}%".format(100.0 * result['accuracy'])) print("Validation loss: {}".format(result['loss']))
def run_model(opts, use_pipeline_model=True): wandb = init_wandb(opts) set_randome_seed(int(opts["seed"])) logger = setup_logger() data_type = tf.float16 if opts["precision"] == "16" else tf.float32 num_ipus_per_replica = 2 num_ipus = num_ipus_per_replica * int(opts["replicas"]) assert num_ipus & ( num_ipus - 1 ) == 0, f"You‘re trying to apply {num_ipus} IPUs, but we only support to apply the power of 2 IPUs." logger.info(f"Options: {opts}") # Set up the IPU system. cfg = create_ipu_config( available_memory_proportion=opts["available_memory_proportion"], num_required_ipus=num_ipus, partials_type=opts["partials_type"], fp_exceptions=opts["fp_exceptions"], xla_recompute=opts["xla_recompute"], enable_stochastic_rounding=opts["stochastic_rounding"]) train_datasets = LJSpeechCharLevelDataset(opts, is_train=True) val_datasets = LJSpeechCharLevelDataset(opts, is_train=False) pipeline_schedule = ipu.pipelining_ops.PipelineSchedule.Grouped if opts["pipeline_schedule"] == "Interleaved": pipeline_schedule = ipu.pipelining_ops.PipelineSchedule.Interleaved optim = get_optimizer(opts) loss_dict = setup_loss_dict() strategy = ipu.ipu_strategy.IPUStrategy() with strategy.scope(): if num_ipus == 1: fastspeech2 = tf.keras.Model(*build_model(opts, training=True)) else: # For TF2.4 fastspeech2 = keras.Model( *build_pipeline_model(opts, training=True)) fastspeech2.set_pipelining_options( gradient_accumulation_steps_per_replica=int( opts["gradient_accumulation_count"]), recomputation_mode=ipu.ops.pipelining_ops.RecomputationMode. Auto, pipeline_schedule=pipeline_schedule, offload_weight_update_variables=opts["variable_offloading"], device_mapping=[0, 1], ) fastspeech2.print_pipeline_stage_assignment_summary() fastspeech2.compile(optimizer=optim, loss=loss_dict, steps_per_execution=opts["steps_per_epoch"]) fastspeech2.summary() if opts["train"]: train_start_time = time.time() history = train(fastspeech2, train_datasets=train_datasets, opts=opts, wandb=wandb) training_time = time.time() - train_start_time logger.info(f"[Duration: {training_time:.2f}s]Training finish.") if opts["eval"]: logger.info("Start to evaluate...") eval_res = evaluation(fastspeech2, valid_datasets=val_datasets, opts=opts, ckpt_path=opts["init_checkpoint"], wandb=wandb)
def test_intermediate_layer(): from utils import create_ipu_config from fastspeech2 import Intermediate from fastspeech2 import FastSpeech2Config as IPUFastSpeech2Config from tests.test_utils import check_tensor from tests.tf2_fastspeech2 import TFFastSpeechIntermediate, FastSpeech2Config setup_random_seed() test_dir = Path(__file__).parent with open(Path(test_dir, "test_configs", "test.yaml"), "r") as f: conf1 = yaml.load(f, Loader=yaml.Loader) with open(Path(test_dir, "test_configs", "test.json"), "r") as f: conf2 = json.load(f) batch_size = conf2["batch_size"] seq_len = conf2["max_seq_length"] hidden_size = conf2["encoder_hidden_size"] inp = np.random.random((batch_size, seq_len, hidden_size)) inputs = tf.convert_to_tensor(inp, tf.float32) attention_mask = tf.convert_to_tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], tf.float32) gconf = FastSpeech2Config(**conf1["fastspeech2_params"]) iconf = IPUFastSpeech2Config(**conf2) cfg = create_ipu_config( available_memory_proportion=conf2["available_memory_proportion"], num_required_ipus=1, partials_type=conf2["partials_type"], fp_exceptions=conf2["fp_exceptions"], xla_recompute=conf2["xla_recompute"], enable_stochastic_rounding=conf2["stochastic_rounding"]) base_lr = 0.01 optimizer1 = tf.keras.optimizers.SGD(base_lr) optimizer2 = tf.keras.optimizers.SGD(base_lr) model_gpu = TFFastSpeechIntermediate(gconf.encoder_self_attention_params) with tf.GradientTape() as tape: out_gpu = model_gpu([inputs, attention_mask]) loss1 = tf.reduce_mean(tf.math.abs(out_gpu)) strategy = ipu.ipu_strategy.IPUStrategy() with strategy.scope(): model_ipu = Intermediate(iconf.encoder_self_attention_params) # first run to make model_ipu.get_weights() work dummy_output = model_ipu([inputs, attention_mask]) model_ipu.set_weights(model_gpu.get_weights()) with tf.GradientTape() as tape2: out_ipu = model_ipu([inputs, attention_mask]) loss2 = tf.reduce_mean(tf.math.abs(out_ipu)) grad2 = tape2.gradient(loss2, model_ipu.trainable_weights) grad1 = tape.gradient(loss1, model_gpu.trainable_weights) optimizer1.apply_gradients(zip(grad1, model_gpu.trainable_weights)) optimizer2.apply_gradients(zip(grad2, model_ipu.trainable_weights)) # Check the weights for w1, w2 in zip(model_gpu.weights, model_ipu.weights): check_tensor(w1.numpy(), w2.numpy()) # Check the outputs check_tensor(out_gpu[0].numpy(), out_ipu[0].numpy()) # Check the gradients for g1, g2 in zip(grad1, grad2): check_tensor(g1.numpy(), g2.numpy())