def build_data_silo(params: Params, splits, processor): data_silo = DataSilo( processor=processor, batch_size=params.batch_size, automatic_loading=False, max_processes=4, ) farm_data = build_farm_data_dicts(splits) data_silo._load_data( **{"%s_dicts" % split_name: d for split_name, d in farm_data.items()}) return data_silo, farm_data
def convert_to_onnx(self, output_path, opset_version=11, optimize_for=None): """ Convert a PyTorch AdaptiveModel to ONNX. The conversion is trace-based by performing a forward pass on the model with a input batch. :param output_path: model dir to write the model and config files :type output_path: Path :param opset_version: ONNX opset version :type opset_version: int :param optimize_for: optimize the exported model for a target device. Available options are "gpu_tensor_core" (GPUs with tensor core like V100 or T4), "gpu_without_tensor_core" (most other GPUs), and "cpu". :type optimize_for: str :return: """ if type(self.prediction_heads[0]) is not QuestionAnsweringHead: raise NotImplementedError tokenizer = Tokenizer.load( pretrained_model_name_or_path="deepset/bert-base-cased-squad2") label_list = ["start_token", "end_token"] metric = "squad" max_seq_len = 384 batch_size = 1 processor = SquadProcessor( tokenizer=tokenizer, max_seq_len=max_seq_len, label_list=label_list, metric=metric, train_filename= "stub-file", # the data is loaded from dicts instead of file. dev_filename=None, test_filename=None, data_dir="stub-dir", ) data_silo = DataSilo(processor=processor, batch_size=1, distributed=False, automatic_loading=False) sample_dict = [{ "context": 'The Normans were the people who in the 10th and 11th centuries gave their name to Normandy, ' 'a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders ' 'and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear ' 'fealty to King Charles III of West Francia.', "qas": [{ "question": "In what country is Normandy located?", "id": "56ddde6b9a695914005b9628", "answers": [{ "text": "France", "answer_start": 159 }], "is_impossible": False, }], }] data_silo._load_data(train_dicts=sample_dict) data_loader = data_silo.get_data_loader("train") data = next(iter(data_loader)) data = list(data.values()) inputs = { 'input_ids': data[0].to(self.device).reshape(batch_size, max_seq_len), 'padding_mask': data[1].to(self.device).reshape(batch_size, max_seq_len), 'segment_ids': data[2].to(self.device).reshape(batch_size, max_seq_len) } # The method argument passing in torch.onnx.export is different to AdaptiveModel's forward(). # To resolve that, an ONNXWrapper instance is used. model = ONNXWrapper.load_from_adaptive_model(self) if not os.path.exists(output_path): os.makedirs(output_path) with torch.no_grad(): symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} torch.onnx.export( model, args=tuple(inputs.values()), f=output_path / 'model.onnx'.format(opset_version), opset_version=opset_version, do_constant_folding=True, input_names=['input_ids', 'padding_mask', 'segment_ids'], output_names=['logits'], dynamic_axes={ 'input_ids': symbolic_names, 'padding_mask': symbolic_names, 'segment_ids': symbolic_names, 'logits': symbolic_names, }) if optimize_for: optimize_args = Namespace(disable_attention=False, disable_bias_gelu=False, disable_embed_layer_norm=False, opt_level=99, disable_skip_layer_norm=False, disable_bias_skip_layer_norm=False, hidden_size=768, verbose=False, input='onnx-export/model.onnx', model_type='bert', num_heads=12, output='onnx-export/model.onnx') if optimize_for == "gpu_tensor_core": optimize_args.float16 = True optimize_args.input_int32 = True elif optimize_for == "gpu_without_tensor_core": optimize_args.float16 = False optimize_args.input_int32 = True elif optimize_for == "cpu": logger.info("") optimize_args.float16 = False optimize_args.input_int32 = False else: raise NotImplementedError( f"ONNXRuntime model optimization is not available for {optimize_for}. Choose " f"one of 'gpu_tensor_core'(V100 or T4), 'gpu_without_tensor_core' or 'cpu'." ) optimize_onnx_model(optimize_args) else: logger.info( "Exporting unoptimized ONNX model. To enable optimization, supply " "'optimize_for' parameter with the target device.'") # PredictionHead contains functionalities like logits_to_preds() that would still be needed # for Inference with ONNX models. Only the config of the PredictionHead is stored. for i, ph in enumerate(self.prediction_heads): ph.save_config(output_path, i) processor.save(output_path) onnx_model_config = { "onnx_opset_version": opset_version, "language": self.get_language(), } with open(output_path / "model_config.json", "w") as f: json.dump(onnx_model_config, f) logger.info(f"Model exported at path {output_path}")