def load_model_from_tf( file_path, is_checkpoint, config, indices, positions, segments, task, builder=popart.Builder(), ): """ Loads weights, etc. from Tensorflow files into the Graphcore IPU BERT implementation. Can read either checkpoint files, or frozen graphs, according to the `is_checkpoint` flag, passed in as the second argument. Requires input tensors to be provided to initialise the graph build. The user can optionally pass in a builder object (e.g. for compatibility with an older ONNX version). If not provided, a default builder is created. """ initializers = load_initializers_from_tf(file_path, is_checkpoint, config, task) popart_model = Bert(config, builder=builder, initializers=initializers) output_tensor = popart_model.build_graph(indices, positions, segments) proto = builder.getModelProto() return popart_model, proto, output_tensor
def get_model_proto(config, initializers=None): model = Bert(config, pipeline=True, initializers=initializers) sequence_info = popart.TensorInfo("UINT32", [config.micro_batch_size * config.sequence_length]) indices = model.builder.addInputTensor(sequence_info) positions = model.builder.addInputTensor(sequence_info) segments = model.builder.addInputTensor(sequence_info) output = model.build_graph(indices, positions, segments) return onnx.load_model_from_string(model.builder.getModelProto())
def test_warmup(custom_ops, num_steps=100000): builder = popart.Builder(opsets={ "ai.onnx": 9, "ai.onnx.ml": 1, "ai.graphcore": 1 }) config = BertConfig(vocab_length=9728, num_layers=1, batch_size=1, hidden_size=768, sequence_length=128, popart_dtype="FLOAT", no_dropout=True, custom_ops=['gather', 'attention']) popart_model = Bert(config, builder=builder) sequence_info = popart.TensorInfo( "UINT32", [config.batch_size * config.sequence_length]) indices = builder.addInputTensor(sequence_info) positions = builder.addInputTensor(sequence_info) data = { indices: np.random.randint(0, config.vocab_length, (config.batch_size * config.sequence_length)).astype( np.uint32), positions: np.random.randint(0, config.sequence_length, (config.batch_size * config.sequence_length)).astype( np.uint32) } output = popart_model.build_graph(indices, positions)[0] losses = [popart.L1Loss(output, "l1LossVal", 0.1)] for loss in losses: loss.virtualGraph(popart_model.ipu) proto = popart_model.builder.getModelProto() optimizer = popart.SGD(0.00001) ipus = math.ceil(config.num_layers / config.layers_per_ipu) \ + popart_model.layer_offset # Analagous to run_py, but only the setup stages print("Creating session and compiling graph") session, anchors, device = create_session(proto, data, output, optimizer, losses, ipus=ipus) print("Running with opimiser updates") times_with_optimiser = timed_run_steps(session, anchors, data, 0.1, num_steps=num_steps) print("Running without opimiser updates") times_no_optimiser = timed_run_steps(session, anchors, data, None, num_steps=num_steps) device.detach() # Convert seconds to milliseconds. opt_np = 1000 * times_with_optimiser noopt_np = 1000 * times_no_optimiser print(f"W/ Optimiser Update") print(f"\tMean: {opt_np.mean():.5f}") print(f"\tSum: {opt_np.sum():.5f}") print(f"\tRng: {opt_np.min():.5f} -> {opt_np.max():.5f}") print(f"W/o Optimiser Update") print(f"\tMean: {noopt_np.mean():.5f}") print(f"\tSum: {noopt_np.sum():.5f}") print(f"\tRng: {noopt_np.min():.5f} -> {noopt_np.max():.5f}") mean_diff = opt_np.mean() - noopt_np.mean() percentage_difference = 100 * mean_diff / noopt_np.mean() print( f"Mean difference, {mean_diff:.5f}ms (~{percentage_difference:.1f}%)") assert (percentage_difference < 5)
def main(args): set_library_seeds(args.seed) config = bert_config_from_args(args) initializers = bert_pretrained_initialisers(config, args) logger.info("Building Model") model = Bert(config, pipeline=args.pipeline, initializers=initializers) if not config.use_packed_sequence_format: # If config.host_embedding is enabled, indices and positions will have the matrices instead of the index vector. indices, positions, segments, masks, labels = bert_add_inputs( args, model) logits = model.build_graph(indices, positions, segments, masks) outputs, accuracies, losses, final_loss, writer = bert_add_outputs( args, model, logits, labels) dataset = get_bert_dataset( model, args, [indices, positions, segments, masks, labels]) else: # use_packed_sequence_format if args.task != "PRETRAINING": raise RuntimeError( "Packed sequence format currently only supported for pretraining." ) input_tensor_shapes = packed_bert_utils.add_inputs(model) logits = packed_bert_utils.logits_graph(model) losses, accuracies, final_loss, outputs = packed_bert_utils.pretraining_loss_and_accuracy( model, logits) writer = bert_writer(args) if not args.inference else None dataset = get_pretraining_dataset(args, input_tensor_shapes) device = acquire_device(args, bert_required_ipus(args, model)) logger.info(f"Dataset length: {len(dataset)}") data_flow = popart.DataFlow(args.batches_per_step, outputs) iteration = bert_iteration(args, dataset, writer) if args.inference: session, anchors = bert_inference_session(model, args, data_flow, device) logger.info("Inference Started") inputs = [indices, positions, segments, *masks, *labels] bert_infer_loop(args, session, dataset, inputs, logits, anchors, accuracies, losses, iteration) device.detach() else: if not args.no_training: optimizer_factory = bert_optimizer_factory(args, model, iteration) if args.save_initializers_externally: save_dir = Path(args.checkpoint_dir, f'model_{args.continue_training_from_epoch}') save_dir.mkdir(parents=True, exist_ok=True) weight_tensors = [ item for sublist in model.tensors.values() for item in sublist ] vars_path = f'vars_{args.continue_training_from_epoch}.onnx' vars_path = os.path.join(save_dir, vars_path) model.builder.saveInitializersExternally( weight_tensors, vars_path) session, anchors = bert_training_session(model, args, data_flow, final_loss, device, optimizer_factory) logger.info("Training Started") bert_train_loop(args, session, writer, dataset, accuracies, losses, anchors, iteration, optimizer_factory) save_model(args, session, iteration.count) if args.wandb_save_checkpoints: artifact = wandb.Artifact(name=args.wandb_save_checkpoints, type="model") artifact.add_dir(args.checkpoint_dir) wandb.log_artifact(artifact) device.detach() logger.info("Training Finished") return session, iteration