def __init__(self, name=DEFAULT_MP_NAME, root='~/.insightface', allowed_modules=None): onnxruntime.set_default_logger_severity(3) self.models = {} self.model_dir = ensure_available('models', name, root=root) onnx_files = glob.glob(osp.join(self.model_dir, '*.onnx')) onnx_files = sorted(onnx_files) for onnx_file in onnx_files: if onnx_file.find('_selfgen_') > 0: #print('ignore:', onnx_file) continue model = model_zoo.get_model(onnx_file) if model is None: print('model not recognized:', onnx_file) elif allowed_modules is not None and model.taskname not in allowed_modules: print('model ignore:', onnx_file, model.taskname) del model elif model.taskname not in self.models and (allowed_modules is None or model.taskname in allowed_modules): print('find model:', onnx_file, model.taskname, model.input_shape, model.input_mean, model.input_std) self.models[model.taskname] = model else: print('duplicated model task type, ignore:', onnx_file, model.taskname) del model assert 'detection' in self.models self.det_model = self.models['detection']
def run_onnx_model(model: onnx.ModelProto, output_name: List[str]) -> Dict[str, np.ndarray]: """ This function run onnx model on onnxruntime and get values for given output names. """ # Log severity level 3(Error) ort.set_default_logger_severity(3) sess = ort.InferenceSession(model) feed_dict = dict() for attr in sess.get_inputs(): name = attr.name shape = attr.shape type = attr.type if type == 'tensor(float)': dtype = np.float32 elif type == 'tensor(int64)': dtype = np.int64 else: raise Exception('Unknown dtype: %s' % type) feed_dict[name] = np.ones(shape).astype(dtype) values = sess.run(output_name, feed_dict) return dict(zip(output_name, values))
def get_input_shape_from_onnx(onnx_path) -> dict: # onnx_input_info: list = { "input name":{ # {"shape": [n, c,...]}, # {"data_type": 0} # } # , # "input name": { # {"shape": [n, c,...]}, # {"data_type": 0} # } # } onnxruntime.set_default_logger_severity(3) session = onnxruntime.InferenceSession(onnx_path) input_info: dict = {} for ip in session.get_inputs(): name = ip.name name = name.replace(":", "_") shape = ip.shape data_type = 0 if ip.type == 'tensor(float)': data_type = 0 elif ip.type == 'tensor(int64)': data_type = 3 else: logging.error("Do not support input date type") if type(shape[0]) is not int: shape[0] = 1 shape_information = {'shape': shape, 'data_type': data_type} input_info.update({name: shape_information}) return input_info
def load_model(self): self.model = self.get_current_model() log("Loading ONNX - {}".format(self.model)) self.dimensions = self.get_dimensions(self.model) ort.set_default_logger_severity(4) self.detector = ort.InferenceSession(self.model) self.input_name = self.detector.get_inputs()[0].name
def __init__(self, onnx_bytes, sess_options=None, log_severity_level=4, device=None): if InferenceSession is None: raise ImportError( # pragma: no cover "onnxruntime is not available.") self.log_severity_level = log_severity_level if device is None: self.device = get_ort_device('cpu') else: self.device = get_ort_device(device) self.providers = device_to_providers(self.device) set_default_logger_severity(3) if sess_options is None: self.so = SessionOptions() self.so.log_severity_level = log_severity_level self.sess = OrtInferenceSession(onnx_bytes, sess_options=self.so, providers=self.providers) else: self.so = sess_options self.sess = OrtInferenceSession(onnx_bytes, sess_options=sess_options, providers=self.providers) self.ro = RunOptions() self.ro.log_severity_level = log_severity_level self.ro.log_verbosity_level = log_severity_level self.output_names = [o.name for o in self.get_outputs()]
def __init__(self, model_dir: str, model_name: str, model_vocab: str, labels: List[str]): self.model_path = os.path.join(model_dir, model_name) self.vocab_path = os.path.join(model_dir, model_vocab) self.labels = labels self.tokenizer = BertTokenizer.from_pretrained(self.vocab_path) self.session = self.create_session() onnxruntime.set_default_logger_severity(3)
def get_input_shape_from_onnx(onnx_path) -> dict: onnxruntime.set_default_logger_severity(3) session = onnxruntime.InferenceSession(onnx_path) input_info: dict = {} for ip in session.get_inputs(): name = ip.name shape = ip.shape if type(shape[0]) is not int: shape[0] = 1 input_info.update({name: shape}) return input_info
def __init__(self, onnx_model_path, providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'], severity=None): super().__init__() self.dict = dict self.bpe_only = False self.onnxruntime_session = onnxruntime.InferenceSession(onnx_model_path) if severity is not None: onnxruntime.set_default_logger_severity(severity) if providers: self.onnxruntime_session.set_providers(providers)
def check_onnx_dim(onnx_path: str): onnxruntime.set_default_logger_severity(3) session = onnxruntime.InferenceSession(onnx_path) current_shape = [] status = 0 for ip in session.get_inputs(): current_shape.append((ip.name, ip.shape)) for dim in ip.shape: if type(dim) is not int or dim < 1: status = -1 if status == -1: return False, current_shape return True, None
def __init__(self, model_filename, labels): super(ONNXRuntimeObjectDetection, self).__init__(labels) model = onnx.load(model_filename) with tempfile.TemporaryDirectory() as dirpath: temp = os.path.join(dirpath, os.path.basename(MODEL_FILENAME)) model.graph.input[0].type.tensor_type.shape.dim[ -1].dim_param = 'dim1' model.graph.input[0].type.tensor_type.shape.dim[ -2].dim_param = 'dim2' onnx.save(model, temp) onnxruntime.set_default_logger_severity(0) self.session = onnxruntime.InferenceSession(temp) self.input_name = self.session.get_inputs()[0].name self.is_fp16 = self.session.get_inputs()[0].type == 'tensor(float16)'
def main(): tokenizer = t10n.FullTokenizer(vocab_file="biobert_vocab.txt", do_lower_case=True) onnxruntime.set_default_logger_severity(3) session = create_session("biobert_ner.onnx") label_names = ["[PAD]", "B", "I", "O", "X", "[CLS]", "[SEP]"] print("Writing results to ./predicted_labels.txt ...") with open("predicted_labels.txt", "w") as out: i = 0 for sequence in open("data_proc.txt", "r"): if i > 50: break sys.stdout.write("\rHandled %d sequences ... " % i) sys.stdout.flush() i += 1 r = parse_sequence(tokenizer, sequence) _, out_2, _ = session.run( [], { "segment_ids_1:0": np.array([r.token_type_ids], dtype=np.int32), "input_mask_1_raw_output___9:0": np.array([r.attention_mask], dtype=np.int32), "input_ids_1:0": np.array([r.input_ids], dtype=np.int32), "label_ids_1:0": np.array([0], dtype=np.int32) }) labels = [] for index in out_2[0]: labels.append(label_names[index]) for token, label in zip(r.tokens, labels): out.write("{} {}\n".format(token, label)) print("Done.")
def get_src_model_input_information(self) -> dict: onnxruntime.set_default_logger_severity(3) session = onnxruntime.InferenceSession(self.src_model_path) input_info: dict = {} for ip in session.get_inputs(): name = ip.name shape = ip.shape data_type = 0 if ip.type == 'tensor(float)': data_type = 0 elif ip.type == 'tensor(int64)': data_type = 3 else: logging.error("Do not support input date type") if type(shape[0]) is not int: shape[0] = 1 shape_information = {'shape': shape, 'data_type': data_type} input_info.update({name: shape_information}) return input_info
def check_model(model: onnx.ModelProto, check_runnable: bool = True) -> None: """ Check if model's well-defined and executable on onnxruntime """ # TODO After collecting possible errors, # pass through only if all error messages are "No opset import for domain 'com.microsoft'". # The code below is only to see the first error encountered. acceptable_error_msg = [ "No opset import for domain 'com.microsoft'", 'No Op registered for LayerNormalization with domain_version of 12' ] try: checker.check_model(model) except checker.ValidationError as e: if str(e).split("==>")[0].rstrip() in acceptable_error_msg: pass else: checker.check_model(model) if check_runnable: ort.set_default_logger_severity(3) ort.InferenceSession(model.SerializeToString())
def __init__(self, onnx_path=None, in_shape=None, backend='auto'): self.in_shape = in_shape self.onnx_input_name = 'input.1' self.onnx_output_names = ['537', '538', '539', '540'] if onnx_path is None: onnx_path = default_onnx_path if backend == 'auto': try: import onnx import onnxruntime backend = 'onnxrt' except: # TODO: Warn when using a --verbose flag # print('Failed to import onnx or onnxruntime. Falling back to slower OpenCV backend.') backend = 'opencv' self.backend = backend if self.backend == 'opencv': self.net = cv2.dnn.readNetFromONNX(onnx_path) elif self.backend == 'onnxrt': import onnx import onnx.utils import onnxruntime # Silence warnings about unnecessary bn initializers onnxruntime.set_default_logger_severity(3) static_model = onnx.load(onnx_path) dyn_model = self.dynamicize_shapes(static_model) dyn_model = onnx.utils.polish_model(dyn_model) self.sess = onnxruntime.InferenceSession( dyn_model.SerializeToString()) preferred_provider = self.sess.get_providers()[0] preferred_device = 'GPU' if preferred_provider.startswith( 'CUDA') else 'CPU'
def __init__(self, onnx_path=None, in_shape=None, backend="auto"): self.in_shape = in_shape self.onnx_input_name = "input.1" self.onnx_output_names = ["537", "538", "539", "540"] if onnx_path is None: onnx_path = default_onnx_path if backend == "auto": backend = "onnxrt" print( "attempting to use onnxrt, if its not compatible with your system, change 'backend' to opencv" ) self.backend = backend if self.backend == "opencv": self.net = cv2.dnn.readNetFromONNX(onnx_path) elif self.backend == "onnxrt": import onnx import onnx.utils import onnxruntime print(Fore.BLUE + "self.backend is onnxrt") # Silence warnings about unnecessary bn initializers onnxruntime.set_default_logger_severity(3) static_model = onnx.load(onnx_path) dyn_model = self.dynamicize_shapes(static_model) dyn_model = onnx.utils.polish_model(dyn_model) self.sess = onnxruntime.InferenceSession( dyn_model.SerializeToString()) preferred_provider = self.sess.get_providers()[0] print(Fore.BLUE + "preferred_provider ", preferred_provider) preferred_device = "GPU" if preferred_provider.startswith( "CUDA") else "CPU"
def __init__(self): """Load the MNIST test dataset (10000 images). Load onnx model and start inference session.""" start = time.time() mnist = tf.keras.datasets.mnist (_, _), (x_test, self.y_test) = mnist.load_data() self.x_test = x_test / 255.0 self.image_count = x_test.shape[0] end = time.time() print("Loading time: {0:f} secs".format(end - start)) # Load the ONNX model and check the model is well formed if not os.path.exists("model.onnx"): sys.exit( "There needs to be a model located at 'model.onnx'. Tests will fail if this is not the case." ) self.model = onnx.load("model.onnx") onnx.checker.check_model(self.model) # Start inference session rt.set_default_logger_severity(0) self.sess = rt.InferenceSession("model.onnx") self.input_name = self.sess.get_inputs()[0].name
def get_input_tensors(model: onnx.ModelProto) -> List[str]: ort.set_default_logger_severity(3) sess = ort.InferenceSession(model.SerializeToString()) input_tensors = [inp.name for inp in sess.get_inputs()] return input_tensors
def main(args): checkpoints = [ torch.load(checkpoint_path, map_location='cpu') for checkpoint_path in args.checkpoint ] checkpoint = (checkpoints + [{}])[0] if len(checkpoints) > 1: checkpoint['model_state_dict'] = { k: sum(c['model_state_dict'][k] for c in checkpoints) / len(checkpoints) for k in checkpoint['model_state_dict'] } if args.frontend_checkpoint: frontend_checkpoint = torch.load(args.frontend_checkpoint, map_location='cpu') frontend_extra_args = frontend_checkpoint['args'] frontend_checkpoint = frontend_checkpoint['model'] else: frontend_extra_args = None frontend_checkpoint = None args.experiment_id = args.experiment_id.format( model=args.model, frontend=args.frontend, train_batch_size=args.train_batch_size, optimizer=args.optimizer, lr=args.lr, weight_decay=args.weight_decay, time=time.strftime('%Y-%m-%d_%H-%M-%S'), experiment_name=args.experiment_name, bpe='bpe' if args.bpe else '', train_waveform_transform= f'aug{args.train_waveform_transform[0]}{args.train_waveform_transform_prob or ""}' if args.train_waveform_transform else '', train_feature_transform= f'aug{args.train_feature_transform[0]}{args.train_feature_transform_prob or ""}' if args.train_feature_transform else '').replace('e-0', 'e-').rstrip('_') if checkpoint and 'experiment_id' in checkpoint[ 'args'] and not args.experiment_name: args.experiment_id = checkpoint['args']['experiment_id'] args.experiment_dir = args.experiment_dir.format( experiments_dir=args.experiments_dir, experiment_id=args.experiment_id) os.makedirs(args.experiment_dir, exist_ok=True) if args.log_json: args.log_json = os.path.join(args.experiment_dir, 'log.json') if checkpoint: args.lang, args.model, args.num_input_features, args.sample_rate, args.window, args.window_size, args.window_stride = map( checkpoint['args'].get, [ 'lang', 'model', 'num_input_features', 'sample_rate', 'window', 'window_size', 'window_stride' ]) utils.set_up_root_logger(os.path.join(args.experiment_dir, 'log.txt'), mode='a') logfile_sink = JsonlistSink(args.log_json, mode='a') else: utils.set_up_root_logger(os.path.join(args.experiment_dir, 'log.txt'), mode='w') logfile_sink = JsonlistSink(args.log_json, mode='w') _print = utils.get_root_logger_print() _print('\n', 'Arguments:', args) _print( f'"CUDA_VISIBLE_DEVICES={os.environ.get("CUDA_VISIBLE_DEVICES", default = "")}"' ) _print( f'"CUDA_LAUNCH_BLOCKING={os.environ.get("CUDA_LAUNCH_BLOCKING", default="")}"' ) _print('Experiment id:', args.experiment_id, '\n') if args.dry: return utils.set_random_seed(args.seed) if args.cudnn == 'benchmark': torch.backends.cudnn.benchmark = True lang = datasets.Language(args.lang) #TODO: , candidate_sep = datasets.Labels.candidate_sep normalize_text_config = json.load(open( args.normalize_text_config)) if os.path.exists( args.normalize_text_config) else {} labels = [ datasets.Labels( lang, name='char', normalize_text_config=normalize_text_config) ] + [ datasets.Labels(lang, bpe=bpe, name=f'bpe{i}', normalize_text_config=normalize_text_config) for i, bpe in enumerate(args.bpe) ] frontend = getattr(models, args.frontend)(out_channels=args.num_input_features, sample_rate=args.sample_rate, window_size=args.window_size, window_stride=args.window_stride, window=args.window, dither=args.dither, dither0=args.dither0, stft_mode='conv' if args.onnx else None, extra_args=frontend_extra_args) model = getattr(models, args.model)( num_input_features=args.num_input_features, num_classes=list(map(len, labels)), dropout=args.dropout, decoder_type='bpe' if args.bpe else None, frontend=frontend if args.onnx or args.frontend_in_model else None, **(dict(inplace=False, dict=lambda logits, log_probs, olen, **kwargs: logits[0]) if args.onnx else {})) _print('Model capacity:', int(models.compute_capacity(model, scale=1e6)), 'million parameters\n') if checkpoint: model.load_state_dict(checkpoint['model_state_dict'], strict=False) if frontend_checkpoint: frontend_checkpoint = { 'model.' + name: weight for name, weight in frontend_checkpoint.items() } ##TODO remove after save checkpoint naming fix frontend.load_state_dict(frontend_checkpoint) if args.onnx: torch.set_grad_enabled(False) model.eval() model.to(args.device) model.fuse_conv_bn_eval() if args.fp16: model = models.InputOutputTypeCast(model.to(torch.float16), dtype=torch.float16) waveform_input = torch.rand(args.onnx_sample_batch_size, args.onnx_sample_time, device=args.device) logits = model(waveform_input) torch.onnx.export(model, (waveform_input, ), args.onnx, opset_version=args.onnx_opset, export_params=args.onnx_export_params, do_constant_folding=True, input_names=['x'], output_names=['logits'], dynamic_axes=dict(x={ 0: 'B', 1: 'T' }, logits={ 0: 'B', 2: 't' })) onnxruntime_session = onnxruntime.InferenceSession(args.onnx) if args.verbose: onnxruntime.set_default_logger_severity(0) (logits_, ) = onnxruntime_session.run( None, dict(x=waveform_input.cpu().numpy())) assert torch.allclose(logits.cpu(), torch.from_numpy(logits_), rtol=1e-02, atol=1e-03) #model_def = onnx.load(args.onnx) #import onnx.tools.net_drawer # import GetPydotGraph, GetOpNodeProducer #pydot_graph = GetPydotGraph(model_def.graph, name=model_def.graph.name, rankdir="TB", node_producer=GetOpNodeProducer("docstring", color="yellow", fillcolor="yellow", style="filled")) #pydot_graph.write_dot("pipeline_transpose2x.dot") #os.system('dot -O -Gdpi=300 -Tpng pipeline_transpose2x.dot') # add metadata to model return perf.init_default(loss=dict(K=50, max=1000), memory_cuda_allocated=dict(K=50), entropy=dict(K=4), time_ms_iteration=dict(K=50, max=10_000), lr=dict(K=50, max=1)) val_config = json.load(open(args.val_config)) if os.path.exists( args.val_config) else {} word_tags = json.load(open(args.word_tags)) if os.path.exists( args.word_tags) else {} for word_tag, words in val_config.get('word_tags', {}).items(): word_tags[word_tag] = word_tags.get(word_tag, []) + words vocab = set(map(str.strip, open(args.vocab))) if os.path.exists( args.vocab) else set() error_analyzer = metrics.ErrorAnalyzer( metrics.WordTagger(lang, vocab=vocab, word_tags=word_tags), metrics.ErrorTagger(), val_config.get('error_analyzer', {})) make_transform = lambda name_args, prob: None if not name_args else getattr( transforms, name_args[0])(*name_args[1:]) if prob is None else getattr( transforms, name_args[0])(prob, *name_args[1:] ) if prob > 0 else None val_frontend = models.AugmentationFrontend( frontend, waveform_transform=make_transform(args.val_waveform_transform, args.val_waveform_transform_prob), feature_transform=make_transform(args.val_feature_transform, args.val_feature_transform_prob)) if args.val_waveform_transform_debug_dir: args.val_waveform_transform_debug_dir = os.path.join( args.val_waveform_transform_debug_dir, str(val_frontend.waveform_transform) if isinstance( val_frontend.waveform_transform, transforms.RandomCompose) else val_frontend.waveform_transform.__class__.__name__) os.makedirs(args.val_waveform_transform_debug_dir, exist_ok=True) val_data_loaders = { os.path.basename(val_data_path): torch.utils.data.DataLoader( val_dataset, num_workers=args.num_workers, collate_fn=val_dataset.collate_fn, pin_memory=True, shuffle=False, batch_size=args.val_batch_size, worker_init_fn=datasets.worker_init_fn, timeout=args.timeout if args.num_workers > 0 else 0) for val_data_path in args.val_data_path for val_dataset in [ datasets.AudioTextDataset( val_data_path, labels, args.sample_rate, frontend=val_frontend if not args.frontend_in_model else None, waveform_transform_debug_dir=args. val_waveform_transform_debug_dir, min_duration=args.min_duration, time_padding_multiple=args.batch_time_padding_multiple, pop_meta=True, _print=_print) ] } decoder = [ decoders.GreedyDecoder() if args.decoder == 'GreedyDecoder' else decoders.BeamSearchDecoder(labels[0], lm_path=args.lm, beam_width=args.beam_width, beam_alpha=args.beam_alpha, beam_beta=args.beam_beta, num_workers=args.num_workers, topk=args.decoder_topk) ] + [decoders.GreedyDecoder() for bpe in args.bpe] model.to(args.device) if not args.train_data_path: model.eval() if not args.adapt_bn: model.fuse_conv_bn_eval() if args.device != 'cpu': model, *_ = models.data_parallel_and_autocast( model, opt_level=args.fp16, keep_batchnorm_fp32=args.fp16_keep_batchnorm_fp32) evaluate_model(args, val_data_loaders, model, labels, decoder, error_analyzer) return model.freeze(backbone=args.freeze_backbone, decoder0=args.freeze_decoder, frontend=args.freeze_frontend) train_frontend = models.AugmentationFrontend( frontend, waveform_transform=make_transform(args.train_waveform_transform, args.train_waveform_transform_prob), feature_transform=make_transform(args.train_feature_transform, args.train_feature_transform_prob)) tic = time.time() train_dataset = datasets.AudioTextDataset( args.train_data_path, labels, args.sample_rate, frontend=train_frontend if not args.frontend_in_model else None, min_duration=args.min_duration, max_duration=args.max_duration, time_padding_multiple=args.batch_time_padding_multiple, bucket=lambda example: int( math.ceil(((example[0]['end'] - example[0]['begin']) / args. window_stride + 1) / args.batch_time_padding_multiple)), pop_meta=True, _print=_print) _print('Time train dataset created:', time.time() - tic, 'sec') train_dataset_name = '_'.join(map(os.path.basename, args.train_data_path)) tic = time.time() sampler = datasets.BucketingBatchSampler( train_dataset, batch_size=args.train_batch_size, ) _print('Time train sampler created:', time.time() - tic, 'sec') train_data_loader = torch.utils.data.DataLoader( train_dataset, num_workers=args.num_workers, collate_fn=train_dataset.collate_fn, pin_memory=True, batch_sampler=sampler, worker_init_fn=datasets.worker_init_fn, timeout=args.timeout if args.num_workers > 0 else 0) optimizer = torch.optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov ) if args.optimizer == 'SGD' else torch.optim.AdamW( model.parameters(), lr=args.lr, betas=args.betas, weight_decay=args.weight_decay ) if args.optimizer == 'AdamW' else optimizers.NovoGrad( model.parameters(), lr=args.lr, betas=args.betas, weight_decay=args.weight_decay ) if args.optimizer == 'NovoGrad' else apex.optimizers.FusedNovoGrad( model.parameters(), lr=args.lr, betas=args.betas, weight_decay=args.weight_decay ) if args.optimizer == 'FusedNovoGrad' else None if checkpoint and checkpoint['optimizer_state_dict'] is not None: optimizer.load_state_dict(checkpoint['optimizer_state_dict']) if not args.skip_optimizer_reset: optimizers.reset_options(optimizer) scheduler = optimizers.MultiStepLR( optimizer, gamma=args.decay_gamma, milestones=args.decay_milestones ) if args.scheduler == 'MultiStepLR' else optimizers.PolynomialDecayLR( optimizer, power=args.decay_power, decay_steps=len(train_data_loader) * args.decay_epochs, end_lr=args.decay_lr ) if args.scheduler == 'PolynomialDecayLR' else optimizers.NoopLR( optimizer) epoch, iteration = 0, 0 if checkpoint: epoch, iteration = checkpoint['epoch'], checkpoint['iteration'] if args.train_data_path == checkpoint['args']['train_data_path']: sampler.load_state_dict(checkpoint['sampler_state_dict']) if args.iterations_per_epoch and iteration and iteration % args.iterations_per_epoch == 0: sampler.batch_idx = 0 epoch += 1 else: epoch += 1 if args.iterations_per_epoch: epoch_skip_fraction = 1 - args.iterations_per_epoch / len( train_data_loader) assert epoch_skip_fraction < args.max_epoch_skip_fraction, \ f'args.iterations_per_epoch must not skip more than {args.max_epoch_skip_fraction:.1%} of each epoch' if args.device != 'cpu': model, optimizer = models.data_parallel_and_autocast( model, optimizer, opt_level=args.fp16, keep_batchnorm_fp32=args.fp16_keep_batchnorm_fp32) if checkpoint and args.fp16 and checkpoint['amp_state_dict'] is not None: apex.amp.load_state_dict(checkpoint['amp_state_dict']) model.train() tensorboard_dir = os.path.join(args.experiment_dir, 'tensorboard') if checkpoint and args.experiment_name: tensorboard_dir_checkpoint = os.path.join( os.path.dirname(args.checkpoint[0]), 'tensorboard') if os.path.exists(tensorboard_dir_checkpoint ) and not os.path.exists(tensorboard_dir): shutil.copytree(tensorboard_dir_checkpoint, tensorboard_dir) tensorboard = torch.utils.tensorboard.SummaryWriter(tensorboard_dir) tensorboard_sink = TensorboardSink(tensorboard) with open(os.path.join(args.experiment_dir, args.args), 'w') as f: json.dump(vars(args), f, sort_keys=True, ensure_ascii=False, indent=2) with open(os.path.join(args.experiment_dir, args.dump_model_config), 'w') as f: model_config = dict( init_params=models.master_module(model).init_params, model=repr(models.master_module(model))) json.dump(model_config, f, sort_keys=True, ensure_ascii=False, indent=2) tic, toc_fwd, toc_bwd = time.time(), time.time(), time.time() oom_handler = utils.OomHandler(max_retries=args.oom_retries) for epoch in range(epoch, args.epochs): sampler.shuffle(epoch + args.seed_sampler) time_epoch_start = time.time() for batch_idx, (meta, s, x, xlen, y, ylen) in enumerate(train_data_loader, start=sampler.batch_idx): toc_data = time.time() if batch_idx == 0: time_ms_launch_data_loader = (toc_data - tic) * 1000 _print('Time data loader launch @ ', epoch, ':', time_ms_launch_data_loader / 1000, 'sec') lr = optimizer.param_groups[0]['lr'] perf.update(dict(lr=lr)) x, xlen, y, ylen = x.to(args.device, non_blocking=True), xlen.to( args.device, non_blocking=True), y.to( args.device, non_blocking=True), ylen.to(args.device, non_blocking=True) try: #TODO check nan values in tensors, they can break running_stats in bn log_probs, olen, loss = map( model(x, xlen, y=y, ylen=ylen).get, ['log_probs', 'olen', 'loss']) oom_handler.reset() except: if oom_handler.try_recover(model.parameters(), _print=_print): continue else: raise example_weights = ylen[:, 0] loss, loss_cur = (loss * example_weights).mean( ) / args.train_batch_accumulate_iterations, float(loss.mean()) perf.update(dict(loss_BT_normalized=loss_cur)) entropy = float( models.entropy(log_probs[0], olen[0], dim=1).mean()) toc_fwd = time.time() #TODO: inf/nan still corrupts BN stats if not (torch.isinf(loss) or torch.isnan(loss)): if args.fp16: with apex.amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() if iteration % args.train_batch_accumulate_iterations == 0: torch.nn.utils.clip_grad_norm_( apex.amp.master_params(optimizer) if args.fp16 else model.parameters(), args.max_norm) optimizer.step() if iteration > 0 and iteration % args.log_iteration_interval == 0: perf.update(utils.compute_memory_stats(), prefix='performance') tensorboard_sink.perf(perf.default(), iteration, train_dataset_name) tensorboard_sink.weight_stats( iteration, model, args.log_weight_distribution) logfile_sink.perf(perf.default(), iteration, train_dataset_name) optimizer.zero_grad() scheduler.step(iteration) perf.update(dict(entropy=entropy)) toc_bwd = time.time() time_ms_data, time_ms_fwd, time_ms_bwd, time_ms_model = map( lambda sec: sec * 1000, [ toc_data - tic, toc_fwd - toc_data, toc_bwd - toc_fwd, toc_bwd - toc_data ]) perf.update(dict(time_ms_data=time_ms_data, time_ms_fwd=time_ms_fwd, time_ms_bwd=time_ms_bwd, time_ms_iteration=time_ms_data + time_ms_model), prefix='performance') perf.update(dict(input_B=x.shape[0], input_T=x.shape[-1]), prefix='performance') print_left = f'{args.experiment_id} | epoch: {epoch:02d} iter: [{batch_idx: >6d} / {len(train_data_loader)} {iteration: >6d}] {"x".join(map(str, x.shape))}' print_right = 'ent: <{avg_entropy:.2f}> loss: {cur_loss_BT_normalized:.2f} <{avg_loss_BT_normalized:.2f}> time: {performance_cur_time_ms_data:.2f}+{performance_cur_time_ms_fwd:4.0f}+{performance_cur_time_ms_bwd:4.0f} <{performance_avg_time_ms_iteration:.0f}> | lr: {cur_lr:.5f}'.format( **perf.default()) _print(print_left, print_right) iteration += 1 sampler.batch_idx += 1 if iteration > 0 and (iteration % args.val_iteration_interval == 0 or iteration == args.iterations): evaluate_model(args, val_data_loaders, model, labels, decoder, error_analyzer, optimizer, sampler, tensorboard_sink, logfile_sink, epoch, iteration) if iteration and args.iterations and iteration >= args.iterations: return if args.iterations_per_epoch and iteration > 0 and iteration % args.iterations_per_epoch == 0: break tic = time.time() sampler.batch_idx = 0 _print('Epoch time', (time.time() - time_epoch_start) / 60, 'minutes') if not args.skip_on_epoch_end_evaluation: evaluate_model(args, val_data_loaders, model, labels, decoder, error_analyzer, optimizer, sampler, tensorboard_sink, logfile_sink, epoch + 1, iteration)
import logging from shutil import copy import onnxruntime as ort from onnxruntime import quantization import os logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) ort.set_default_logger_severity(4) def quantization_optimize(optimization_config): logger.info("ONNX model quantization started") base_dir = os.path.dirname(optimization_config.model_path) unquantized_model = os.path.join(base_dir, "unquantized_model.onnx") copy(optimization_config.model_path, unquantized_model) try: quantization.quantize_dynamic(unquantized_model, optimization_config.model_path) default_ep = "CUDAExecutionProvider" if "CUDAExecutionProvider" in ort.get_available_providers( ) else "CPUExecutionProvider" ort.InferenceSession(optimization_config.model_path, providers=[default_ep]) logger.info("ONNX model quantized successfully") except Exception as e: logger.info( "Quantization optimization failed with error {}. Original model will be used for optimization."
def calibrate_with_random( self, num_data: Optional[int] = None) -> Dict[str, Tuple[float, float]]: ''' Gather intermediate model outputs after running inference parameter model_path: path to augmented FP32 ONNX model parameter inputs: list of loaded test inputs (or image matrices) return: dictionary mapping added node names to (ReduceMin, ReduceMax) pairs ''' # Log severity level 3(Error) in order not to print warnings(level 2) ort.set_default_logger_severity(3) sess = ort.InferenceSession(self.model.SerializeToString(), None) input_names = [attr.name for attr in sess.get_inputs()] input_shapes = [attr.shape for attr in sess.get_inputs()] input_types = [attr.type for attr in sess.get_inputs()] feed_dict = dict() calibration_dataset = [] for _ in range(num_data or 10): for (name, shape, type) in zip(input_names, input_shapes, input_types): if type == 'tensor(float)': dtype = np.float32 elif type == 'tensor(int64)': dtype = np.int64 else: raise Exception('Unknown dtype: %s' % type) batch_size = 1 feed_dict[name] = np.random.random( (batch_size, *shape[1:])).astype(dtype) calibration_dataset.append(feed_dict) observers = [ output.name for output in sess.get_outputs() if 'ReduceMin' in output.name or 'ReduceMax' in output.name ] disabled = True if os.environ.get('TQDM_DISABLE') else False observed_vals = [ sess.run(observers, feed_dict) for feed_dict in tqdm.tqdm( calibration_dataset, desc='Calibration', disable=disabled) ] val_dicts = dict(zip(observers, zip(*observed_vals))) node_names = [ key.rpartition('_')[0] for key in val_dicts.keys() if 'ReduceMax' in key ] min_dicts = [ float(np.min(value)) for key, value in val_dicts.items() if 'ReduceMin' in key ] max_dicts = [ float(np.max(value)) for key, value in val_dicts.items() if 'ReduceMax' in key ] return dict(zip(node_names, zip(min_dicts, max_dicts)))
import os import numpy as np import tokenization import tensorflow as tf import onnxruntime from onnxruntime import ExecutionMode, InferenceSession, SessionOptions # import BertTokenizer.from_pretrained("bert-base-uncased") # Return to this if everything goes to shit tokenizer = tokenization.FullTokenizer( vocab_file="biobert_vocab.txt", do_lower_case=True) # Might want to re-check this if issues arise # 4 (FATAL) # 3 (ERROR) # 2 (WARNING) onnxruntime.set_default_logger_severity(3) sequence = ("The adverse events during combined therapy with cyclosporin A and " + "nifedipine included an increase in blood urea nitrogen levels " + "in 9 of the 13 patients and development of gingival hyperplasia " + "in 2 of the 13 patients.") tokenized_sequence = tokenizer.tokenize(sequence) tokenized_sequence.insert(0, '[CLS]') tokenized_sequence.append('[SEP]') # Could be 0 or 1, not sure which index is *supposed* to represent a first segment token_type_ids = [0]*len(tokenized_sequence) input_ids = tokenizer.convert_tokens_to_ids(tokenized_sequence) # Not sure if label_ids should be padded to sequence length or not label_ids = ["[PAD]", "B", "I", "O", "X", "[CLS]", "[SEP]"]
elif 'bool' in onnx_dtype: return np.bool_ else: raise NotImplementedError(onnx_dtype + " is not supported in this script yet.") return np.float32 dtype = get_numpy_dtype(tensor.type) shape = tensor.shape return np.ones(shape, dtype=dtype) # print("Execution Device:", ort.get_device()) print("Importing ONNX model into ONNX Runtime...") ort.set_default_logger_severity(args.logger_severity) sess_options = ort.SessionOptions() sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL ort_session = ort.InferenceSession(args.file, sess_options) if args.provider != '': ort_session.set_providers([args.provider]) print("Execution Providers:", ort_session.get_providers()) inputs = ort_session.get_inputs() inputs_name = [item.name for item in inputs] ort_inputs = {} for tensor in inputs: ort_inputs.update({tensor.name: get_numpy(tensor)})
def __init__(self, model_file, image_size=(112, 112)): import onnxruntime as ort ort.set_default_logger_severity(3) self.ort_session = ort.InferenceSession(model_file) self.output_names = [self.ort_session.get_outputs()[0].name] self.input_name = self.ort_session.get_inputs()[0].name