def __init__(self,
              name=DEFAULT_MP_NAME,
              root='~/.insightface',
              allowed_modules=None):
     onnxruntime.set_default_logger_severity(3)
     self.models = {}
     self.model_dir = ensure_available('models', name, root=root)
     onnx_files = glob.glob(osp.join(self.model_dir, '*.onnx'))
     onnx_files = sorted(onnx_files)
     for onnx_file in onnx_files:
         if onnx_file.find('_selfgen_') > 0:
             #print('ignore:', onnx_file)
             continue
         model = model_zoo.get_model(onnx_file)
         if model is None:
             print('model not recognized:', onnx_file)
         elif allowed_modules is not None and model.taskname not in allowed_modules:
             print('model ignore:', onnx_file, model.taskname)
             del model
         elif model.taskname not in self.models and (allowed_modules is None
                                                     or model.taskname
                                                     in allowed_modules):
             print('find model:', onnx_file, model.taskname,
                   model.input_shape, model.input_mean, model.input_std)
             self.models[model.taskname] = model
         else:
             print('duplicated model task type, ignore:', onnx_file,
                   model.taskname)
             del model
     assert 'detection' in self.models
     self.det_model = self.models['detection']
Exemple #2
0
    def run_onnx_model(model: onnx.ModelProto, output_name: List[str]) -> Dict[str, np.ndarray]:
        """
            This function run onnx model on onnxruntime and get values for given output names.
        """

        # Log severity level 3(Error)
        ort.set_default_logger_severity(3)
        sess = ort.InferenceSession(model)

        feed_dict = dict()

        for attr in sess.get_inputs():
            name = attr.name
            shape = attr.shape
            type = attr.type
            if type == 'tensor(float)':
                dtype = np.float32
            elif type == 'tensor(int64)':
                dtype = np.int64
            else:
                raise Exception('Unknown dtype: %s' % type)

            feed_dict[name] = np.ones(shape).astype(dtype)

        values = sess.run(output_name, feed_dict)

        return dict(zip(output_name, values))
Exemple #3
0
def get_input_shape_from_onnx(onnx_path) -> dict:
    # onnx_input_info: list = {  "input name":{
    #                                           {"shape": [n, c,...]},
    #                                           {"data_type": 0}
    #                                       }
    #                           ,
    #                           "input name": {
    #                                           {"shape": [n, c,...]},
    #                                           {"data_type": 0}
    #                                       }
    #                         }
    onnxruntime.set_default_logger_severity(3)
    session = onnxruntime.InferenceSession(onnx_path)
    input_info: dict = {}
    for ip in session.get_inputs():
        name = ip.name
        name = name.replace(":", "_")
        shape = ip.shape
        data_type = 0
        if ip.type == 'tensor(float)':
            data_type = 0
        elif ip.type == 'tensor(int64)':
            data_type = 3
        else:
            logging.error("Do not support input date type")
        if type(shape[0]) is not int:
            shape[0] = 1
        shape_information = {'shape': shape, 'data_type': data_type}
        input_info.update({name: shape_information})
    return input_info
Exemple #4
0
 def load_model(self):
     self.model = self.get_current_model()
     log("Loading ONNX - {}".format(self.model))
     self.dimensions = self.get_dimensions(self.model)
     ort.set_default_logger_severity(4)
     self.detector = ort.InferenceSession(self.model)
     self.input_name = self.detector.get_inputs()[0].name
Exemple #5
0
 def __init__(self,
              onnx_bytes,
              sess_options=None,
              log_severity_level=4,
              device=None):
     if InferenceSession is None:
         raise ImportError(  # pragma: no cover
             "onnxruntime is not available.")
     self.log_severity_level = log_severity_level
     if device is None:
         self.device = get_ort_device('cpu')
     else:
         self.device = get_ort_device(device)
     self.providers = device_to_providers(self.device)
     set_default_logger_severity(3)
     if sess_options is None:
         self.so = SessionOptions()
         self.so.log_severity_level = log_severity_level
         self.sess = OrtInferenceSession(onnx_bytes,
                                         sess_options=self.so,
                                         providers=self.providers)
     else:
         self.so = sess_options
         self.sess = OrtInferenceSession(onnx_bytes,
                                         sess_options=sess_options,
                                         providers=self.providers)
     self.ro = RunOptions()
     self.ro.log_severity_level = log_severity_level
     self.ro.log_verbosity_level = log_severity_level
     self.output_names = [o.name for o in self.get_outputs()]
Exemple #6
0
 def __init__(self, model_dir: str, model_name: str, model_vocab: str,
              labels: List[str]):
     self.model_path = os.path.join(model_dir, model_name)
     self.vocab_path = os.path.join(model_dir, model_vocab)
     self.labels = labels
     self.tokenizer = BertTokenizer.from_pretrained(self.vocab_path)
     self.session = self.create_session()
     onnxruntime.set_default_logger_severity(3)
Exemple #7
0
def get_input_shape_from_onnx(onnx_path) -> dict:
    onnxruntime.set_default_logger_severity(3)
    session = onnxruntime.InferenceSession(onnx_path)
    input_info: dict = {}
    for ip in session.get_inputs():
        name = ip.name
        shape = ip.shape
        if type(shape[0]) is not int:
            shape[0] = 1
        input_info.update({name: shape})
    return input_info
Exemple #8
0
	def __init__(self, onnx_model_path, providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'], severity=None):
		super().__init__()

		self.dict = dict
		self.bpe_only = False
		self.onnxruntime_session = onnxruntime.InferenceSession(onnx_model_path)

		if severity is not None:
			onnxruntime.set_default_logger_severity(severity)

		if providers:
			self.onnxruntime_session.set_providers(providers)
Exemple #9
0
def check_onnx_dim(onnx_path: str):
    onnxruntime.set_default_logger_severity(3)
    session = onnxruntime.InferenceSession(onnx_path)
    current_shape = []
    status = 0
    for ip in session.get_inputs():
        current_shape.append((ip.name, ip.shape))
        for dim in ip.shape:
            if type(dim) is not int or dim < 1:
                status = -1

    if status == -1:
        return False, current_shape
    return True, None
 def __init__(self, model_filename, labels):
     super(ONNXRuntimeObjectDetection, self).__init__(labels)
     model = onnx.load(model_filename)
     with tempfile.TemporaryDirectory() as dirpath:
         temp = os.path.join(dirpath, os.path.basename(MODEL_FILENAME))
         model.graph.input[0].type.tensor_type.shape.dim[
             -1].dim_param = 'dim1'
         model.graph.input[0].type.tensor_type.shape.dim[
             -2].dim_param = 'dim2'
         onnx.save(model, temp)
         onnxruntime.set_default_logger_severity(0)
         self.session = onnxruntime.InferenceSession(temp)
     self.input_name = self.session.get_inputs()[0].name
     self.is_fp16 = self.session.get_inputs()[0].type == 'tensor(float16)'
Exemple #11
0
def main():
    tokenizer = t10n.FullTokenizer(vocab_file="biobert_vocab.txt",
                                   do_lower_case=True)

    onnxruntime.set_default_logger_severity(3)
    session = create_session("biobert_ner.onnx")

    label_names = ["[PAD]", "B", "I", "O", "X", "[CLS]", "[SEP]"]

    print("Writing results to ./predicted_labels.txt ...")
    with open("predicted_labels.txt", "w") as out:
        i = 0
        for sequence in open("data_proc.txt", "r"):
            if i > 50:
                break

            sys.stdout.write("\rHandled %d sequences ... " % i)
            sys.stdout.flush()
            i += 1

            r = parse_sequence(tokenizer, sequence)

            _, out_2, _ = session.run(
                [], {
                    "segment_ids_1:0":
                    np.array([r.token_type_ids], dtype=np.int32),
                    "input_mask_1_raw_output___9:0":
                    np.array([r.attention_mask], dtype=np.int32),
                    "input_ids_1:0":
                    np.array([r.input_ids], dtype=np.int32),
                    "label_ids_1:0":
                    np.array([0], dtype=np.int32)
                })

            labels = []

            for index in out_2[0]:
                labels.append(label_names[index])

            for token, label in zip(r.tokens, labels):
                out.write("{} {}\n".format(token, label))

    print("Done.")
Exemple #12
0
    def get_src_model_input_information(self) -> dict:
        onnxruntime.set_default_logger_severity(3)
        session = onnxruntime.InferenceSession(self.src_model_path)
        input_info: dict = {}
        for ip in session.get_inputs():
            name = ip.name
            shape = ip.shape
            data_type = 0
            if ip.type == 'tensor(float)':
                data_type = 0
            elif ip.type == 'tensor(int64)':
                data_type = 3
            else:
                logging.error("Do not support input date type")
            if type(shape[0]) is not int:
                shape[0] = 1
            shape_information = {'shape': shape,
                                 'data_type': data_type}
            input_info.update({name: shape_information})

        return input_info
Exemple #13
0
def check_model(model: onnx.ModelProto, check_runnable: bool = True) -> None:
    """
    Check if model's well-defined and executable on onnxruntime
    """
    # TODO After collecting possible errors,
    #  pass through only if all error messages are "No opset import for domain 'com.microsoft'".
    #  The code below is only to see the first error encountered.
    acceptable_error_msg = [
        "No opset import for domain 'com.microsoft'",
        'No Op registered for LayerNormalization with domain_version of 12'
    ]
    try:
        checker.check_model(model)
    except checker.ValidationError as e:
        if str(e).split("==>")[0].rstrip() in acceptable_error_msg:
            pass
        else:
            checker.check_model(model)

    if check_runnable:
        ort.set_default_logger_severity(3)
        ort.InferenceSession(model.SerializeToString())
Exemple #14
0
    def __init__(self, onnx_path=None, in_shape=None, backend='auto'):
        self.in_shape = in_shape
        self.onnx_input_name = 'input.1'
        self.onnx_output_names = ['537', '538', '539', '540']

        if onnx_path is None:
            onnx_path = default_onnx_path

        if backend == 'auto':
            try:
                import onnx
                import onnxruntime
                backend = 'onnxrt'
            except:
                # TODO: Warn when using a --verbose flag
                # print('Failed to import onnx or onnxruntime. Falling back to slower OpenCV backend.')
                backend = 'opencv'
        self.backend = backend

        if self.backend == 'opencv':
            self.net = cv2.dnn.readNetFromONNX(onnx_path)
        elif self.backend == 'onnxrt':
            import onnx
            import onnx.utils
            import onnxruntime

            # Silence warnings about unnecessary bn initializers
            onnxruntime.set_default_logger_severity(3)

            static_model = onnx.load(onnx_path)
            dyn_model = self.dynamicize_shapes(static_model)
            dyn_model = onnx.utils.polish_model(dyn_model)
            self.sess = onnxruntime.InferenceSession(
                dyn_model.SerializeToString())

            preferred_provider = self.sess.get_providers()[0]
            preferred_device = 'GPU' if preferred_provider.startswith(
                'CUDA') else 'CPU'
Exemple #15
0
    def __init__(self, onnx_path=None, in_shape=None, backend="auto"):
        self.in_shape = in_shape
        self.onnx_input_name = "input.1"
        self.onnx_output_names = ["537", "538", "539", "540"]

        if onnx_path is None:
            onnx_path = default_onnx_path

        if backend == "auto":

            backend = "onnxrt"
            print(
                "attempting to use onnxrt, if its not compatible with your system, change 'backend' to opencv"
            )

        self.backend = backend

        if self.backend == "opencv":
            self.net = cv2.dnn.readNetFromONNX(onnx_path)
        elif self.backend == "onnxrt":
            import onnx
            import onnx.utils
            import onnxruntime

            print(Fore.BLUE + "self.backend is onnxrt")
            # Silence warnings about unnecessary bn initializers
            onnxruntime.set_default_logger_severity(3)

            static_model = onnx.load(onnx_path)
            dyn_model = self.dynamicize_shapes(static_model)
            dyn_model = onnx.utils.polish_model(dyn_model)
            self.sess = onnxruntime.InferenceSession(
                dyn_model.SerializeToString())

            preferred_provider = self.sess.get_providers()[0]
            print(Fore.BLUE + "preferred_provider ", preferred_provider)
            preferred_device = "GPU" if preferred_provider.startswith(
                "CUDA") else "CPU"
Exemple #16
0
    def __init__(self):
        """Load the MNIST test dataset (10000 images). Load onnx model and start inference session."""

        start = time.time()
        mnist = tf.keras.datasets.mnist
        (_, _), (x_test, self.y_test) = mnist.load_data()
        self.x_test = x_test / 255.0
        self.image_count = x_test.shape[0]
        end = time.time()
        print("Loading time: {0:f} secs".format(end - start))

        # Load the ONNX model and check the model is well formed
        if not os.path.exists("model.onnx"):
            sys.exit(
                "There needs to be a model located at 'model.onnx'. Tests will fail if this is not the case."
            )
        self.model = onnx.load("model.onnx")
        onnx.checker.check_model(self.model)

        # Start inference session
        rt.set_default_logger_severity(0)
        self.sess = rt.InferenceSession("model.onnx")
        self.input_name = self.sess.get_inputs()[0].name
Exemple #17
0
def get_input_tensors(model: onnx.ModelProto) -> List[str]:
    ort.set_default_logger_severity(3)
    sess = ort.InferenceSession(model.SerializeToString())
    input_tensors = [inp.name for inp in sess.get_inputs()]

    return input_tensors
Exemple #18
0
def main(args):
    checkpoints = [
        torch.load(checkpoint_path, map_location='cpu')
        for checkpoint_path in args.checkpoint
    ]
    checkpoint = (checkpoints + [{}])[0]
    if len(checkpoints) > 1:
        checkpoint['model_state_dict'] = {
            k: sum(c['model_state_dict'][k]
                   for c in checkpoints) / len(checkpoints)
            for k in checkpoint['model_state_dict']
        }

    if args.frontend_checkpoint:
        frontend_checkpoint = torch.load(args.frontend_checkpoint,
                                         map_location='cpu')
        frontend_extra_args = frontend_checkpoint['args']
        frontend_checkpoint = frontend_checkpoint['model']
    else:
        frontend_extra_args = None
        frontend_checkpoint = None

    args.experiment_id = args.experiment_id.format(
        model=args.model,
        frontend=args.frontend,
        train_batch_size=args.train_batch_size,
        optimizer=args.optimizer,
        lr=args.lr,
        weight_decay=args.weight_decay,
        time=time.strftime('%Y-%m-%d_%H-%M-%S'),
        experiment_name=args.experiment_name,
        bpe='bpe' if args.bpe else '',
        train_waveform_transform=
        f'aug{args.train_waveform_transform[0]}{args.train_waveform_transform_prob or ""}'
        if args.train_waveform_transform else '',
        train_feature_transform=
        f'aug{args.train_feature_transform[0]}{args.train_feature_transform_prob or ""}'
        if args.train_feature_transform else '').replace('e-0',
                                                         'e-').rstrip('_')
    if checkpoint and 'experiment_id' in checkpoint[
            'args'] and not args.experiment_name:
        args.experiment_id = checkpoint['args']['experiment_id']
    args.experiment_dir = args.experiment_dir.format(
        experiments_dir=args.experiments_dir, experiment_id=args.experiment_id)

    os.makedirs(args.experiment_dir, exist_ok=True)

    if args.log_json:
        args.log_json = os.path.join(args.experiment_dir, 'log.json')

    if checkpoint:
        args.lang, args.model, args.num_input_features, args.sample_rate, args.window, args.window_size, args.window_stride = map(
            checkpoint['args'].get, [
                'lang', 'model', 'num_input_features', 'sample_rate', 'window',
                'window_size', 'window_stride'
            ])
        utils.set_up_root_logger(os.path.join(args.experiment_dir, 'log.txt'),
                                 mode='a')
        logfile_sink = JsonlistSink(args.log_json, mode='a')
    else:
        utils.set_up_root_logger(os.path.join(args.experiment_dir, 'log.txt'),
                                 mode='w')
        logfile_sink = JsonlistSink(args.log_json, mode='w')

    _print = utils.get_root_logger_print()
    _print('\n', 'Arguments:', args)
    _print(
        f'"CUDA_VISIBLE_DEVICES={os.environ.get("CUDA_VISIBLE_DEVICES", default = "")}"'
    )
    _print(
        f'"CUDA_LAUNCH_BLOCKING={os.environ.get("CUDA_LAUNCH_BLOCKING", default="")}"'
    )
    _print('Experiment id:', args.experiment_id, '\n')
    if args.dry:
        return
    utils.set_random_seed(args.seed)
    if args.cudnn == 'benchmark':
        torch.backends.cudnn.benchmark = True

    lang = datasets.Language(args.lang)
    #TODO: , candidate_sep = datasets.Labels.candidate_sep
    normalize_text_config = json.load(open(
        args.normalize_text_config)) if os.path.exists(
            args.normalize_text_config) else {}
    labels = [
        datasets.Labels(
            lang, name='char', normalize_text_config=normalize_text_config)
    ] + [
        datasets.Labels(lang,
                        bpe=bpe,
                        name=f'bpe{i}',
                        normalize_text_config=normalize_text_config)
        for i, bpe in enumerate(args.bpe)
    ]
    frontend = getattr(models,
                       args.frontend)(out_channels=args.num_input_features,
                                      sample_rate=args.sample_rate,
                                      window_size=args.window_size,
                                      window_stride=args.window_stride,
                                      window=args.window,
                                      dither=args.dither,
                                      dither0=args.dither0,
                                      stft_mode='conv' if args.onnx else None,
                                      extra_args=frontend_extra_args)
    model = getattr(models, args.model)(
        num_input_features=args.num_input_features,
        num_classes=list(map(len, labels)),
        dropout=args.dropout,
        decoder_type='bpe' if args.bpe else None,
        frontend=frontend if args.onnx or args.frontend_in_model else None,
        **(dict(inplace=False,
                dict=lambda logits, log_probs, olen, **kwargs: logits[0])
           if args.onnx else {}))

    _print('Model capacity:', int(models.compute_capacity(model, scale=1e6)),
           'million parameters\n')

    if checkpoint:
        model.load_state_dict(checkpoint['model_state_dict'], strict=False)

    if frontend_checkpoint:
        frontend_checkpoint = {
            'model.' + name: weight
            for name, weight in frontend_checkpoint.items()
        }  ##TODO remove after save checkpoint naming fix
        frontend.load_state_dict(frontend_checkpoint)

    if args.onnx:
        torch.set_grad_enabled(False)
        model.eval()
        model.to(args.device)
        model.fuse_conv_bn_eval()

        if args.fp16:
            model = models.InputOutputTypeCast(model.to(torch.float16),
                                               dtype=torch.float16)

        waveform_input = torch.rand(args.onnx_sample_batch_size,
                                    args.onnx_sample_time,
                                    device=args.device)
        logits = model(waveform_input)

        torch.onnx.export(model, (waveform_input, ),
                          args.onnx,
                          opset_version=args.onnx_opset,
                          export_params=args.onnx_export_params,
                          do_constant_folding=True,
                          input_names=['x'],
                          output_names=['logits'],
                          dynamic_axes=dict(x={
                              0: 'B',
                              1: 'T'
                          },
                                            logits={
                                                0: 'B',
                                                2: 't'
                                            }))
        onnxruntime_session = onnxruntime.InferenceSession(args.onnx)
        if args.verbose:
            onnxruntime.set_default_logger_severity(0)
        (logits_, ) = onnxruntime_session.run(
            None, dict(x=waveform_input.cpu().numpy()))
        assert torch.allclose(logits.cpu(),
                              torch.from_numpy(logits_),
                              rtol=1e-02,
                              atol=1e-03)

        #model_def = onnx.load(args.onnx)
        #import onnx.tools.net_drawer # import GetPydotGraph, GetOpNodeProducer
        #pydot_graph = GetPydotGraph(model_def.graph, name=model_def.graph.name, rankdir="TB", node_producer=GetOpNodeProducer("docstring", color="yellow", fillcolor="yellow", style="filled"))
        #pydot_graph.write_dot("pipeline_transpose2x.dot")
        #os.system('dot -O -Gdpi=300 -Tpng pipeline_transpose2x.dot')
        # add metadata to model
        return

    perf.init_default(loss=dict(K=50, max=1000),
                      memory_cuda_allocated=dict(K=50),
                      entropy=dict(K=4),
                      time_ms_iteration=dict(K=50, max=10_000),
                      lr=dict(K=50, max=1))

    val_config = json.load(open(args.val_config)) if os.path.exists(
        args.val_config) else {}
    word_tags = json.load(open(args.word_tags)) if os.path.exists(
        args.word_tags) else {}
    for word_tag, words in val_config.get('word_tags', {}).items():
        word_tags[word_tag] = word_tags.get(word_tag, []) + words
    vocab = set(map(str.strip, open(args.vocab))) if os.path.exists(
        args.vocab) else set()
    error_analyzer = metrics.ErrorAnalyzer(
        metrics.WordTagger(lang, vocab=vocab, word_tags=word_tags),
        metrics.ErrorTagger(), val_config.get('error_analyzer', {}))

    make_transform = lambda name_args, prob: None if not name_args else getattr(
        transforms, name_args[0])(*name_args[1:]) if prob is None else getattr(
            transforms, name_args[0])(prob, *name_args[1:]
                                      ) if prob > 0 else None
    val_frontend = models.AugmentationFrontend(
        frontend,
        waveform_transform=make_transform(args.val_waveform_transform,
                                          args.val_waveform_transform_prob),
        feature_transform=make_transform(args.val_feature_transform,
                                         args.val_feature_transform_prob))

    if args.val_waveform_transform_debug_dir:
        args.val_waveform_transform_debug_dir = os.path.join(
            args.val_waveform_transform_debug_dir,
            str(val_frontend.waveform_transform) if isinstance(
                val_frontend.waveform_transform, transforms.RandomCompose) else
            val_frontend.waveform_transform.__class__.__name__)
        os.makedirs(args.val_waveform_transform_debug_dir, exist_ok=True)

    val_data_loaders = {
        os.path.basename(val_data_path): torch.utils.data.DataLoader(
            val_dataset,
            num_workers=args.num_workers,
            collate_fn=val_dataset.collate_fn,
            pin_memory=True,
            shuffle=False,
            batch_size=args.val_batch_size,
            worker_init_fn=datasets.worker_init_fn,
            timeout=args.timeout if args.num_workers > 0 else 0)
        for val_data_path in args.val_data_path for val_dataset in [
            datasets.AudioTextDataset(
                val_data_path,
                labels,
                args.sample_rate,
                frontend=val_frontend if not args.frontend_in_model else None,
                waveform_transform_debug_dir=args.
                val_waveform_transform_debug_dir,
                min_duration=args.min_duration,
                time_padding_multiple=args.batch_time_padding_multiple,
                pop_meta=True,
                _print=_print)
        ]
    }
    decoder = [
        decoders.GreedyDecoder() if args.decoder == 'GreedyDecoder' else
        decoders.BeamSearchDecoder(labels[0],
                                   lm_path=args.lm,
                                   beam_width=args.beam_width,
                                   beam_alpha=args.beam_alpha,
                                   beam_beta=args.beam_beta,
                                   num_workers=args.num_workers,
                                   topk=args.decoder_topk)
    ] + [decoders.GreedyDecoder() for bpe in args.bpe]

    model.to(args.device)

    if not args.train_data_path:
        model.eval()
        if not args.adapt_bn:
            model.fuse_conv_bn_eval()
        if args.device != 'cpu':
            model, *_ = models.data_parallel_and_autocast(
                model,
                opt_level=args.fp16,
                keep_batchnorm_fp32=args.fp16_keep_batchnorm_fp32)
        evaluate_model(args, val_data_loaders, model, labels, decoder,
                       error_analyzer)
        return

    model.freeze(backbone=args.freeze_backbone,
                 decoder0=args.freeze_decoder,
                 frontend=args.freeze_frontend)

    train_frontend = models.AugmentationFrontend(
        frontend,
        waveform_transform=make_transform(args.train_waveform_transform,
                                          args.train_waveform_transform_prob),
        feature_transform=make_transform(args.train_feature_transform,
                                         args.train_feature_transform_prob))
    tic = time.time()
    train_dataset = datasets.AudioTextDataset(
        args.train_data_path,
        labels,
        args.sample_rate,
        frontend=train_frontend if not args.frontend_in_model else None,
        min_duration=args.min_duration,
        max_duration=args.max_duration,
        time_padding_multiple=args.batch_time_padding_multiple,
        bucket=lambda example: int(
            math.ceil(((example[0]['end'] - example[0]['begin']) / args.
                       window_stride + 1) / args.batch_time_padding_multiple)),
        pop_meta=True,
        _print=_print)

    _print('Time train dataset created:', time.time() - tic, 'sec')
    train_dataset_name = '_'.join(map(os.path.basename, args.train_data_path))
    tic = time.time()
    sampler = datasets.BucketingBatchSampler(
        train_dataset,
        batch_size=args.train_batch_size,
    )
    _print('Time train sampler created:', time.time() - tic, 'sec')

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        num_workers=args.num_workers,
        collate_fn=train_dataset.collate_fn,
        pin_memory=True,
        batch_sampler=sampler,
        worker_init_fn=datasets.worker_init_fn,
        timeout=args.timeout if args.num_workers > 0 else 0)
    optimizer = torch.optim.SGD(
        model.parameters(),
        lr=args.lr,
        momentum=args.momentum,
        weight_decay=args.weight_decay,
        nesterov=args.nesterov
    ) if args.optimizer == 'SGD' else torch.optim.AdamW(
        model.parameters(),
        lr=args.lr,
        betas=args.betas,
        weight_decay=args.weight_decay
    ) if args.optimizer == 'AdamW' else optimizers.NovoGrad(
        model.parameters(),
        lr=args.lr,
        betas=args.betas,
        weight_decay=args.weight_decay
    ) if args.optimizer == 'NovoGrad' else apex.optimizers.FusedNovoGrad(
        model.parameters(),
        lr=args.lr,
        betas=args.betas,
        weight_decay=args.weight_decay
    ) if args.optimizer == 'FusedNovoGrad' else None

    if checkpoint and checkpoint['optimizer_state_dict'] is not None:
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        if not args.skip_optimizer_reset:
            optimizers.reset_options(optimizer)

    scheduler = optimizers.MultiStepLR(
        optimizer, gamma=args.decay_gamma, milestones=args.decay_milestones
    ) if args.scheduler == 'MultiStepLR' else optimizers.PolynomialDecayLR(
        optimizer,
        power=args.decay_power,
        decay_steps=len(train_data_loader) * args.decay_epochs,
        end_lr=args.decay_lr
    ) if args.scheduler == 'PolynomialDecayLR' else optimizers.NoopLR(
        optimizer)
    epoch, iteration = 0, 0
    if checkpoint:
        epoch, iteration = checkpoint['epoch'], checkpoint['iteration']
        if args.train_data_path == checkpoint['args']['train_data_path']:
            sampler.load_state_dict(checkpoint['sampler_state_dict'])
            if args.iterations_per_epoch and iteration and iteration % args.iterations_per_epoch == 0:
                sampler.batch_idx = 0
                epoch += 1
        else:
            epoch += 1
    if args.iterations_per_epoch:
        epoch_skip_fraction = 1 - args.iterations_per_epoch / len(
            train_data_loader)
        assert epoch_skip_fraction < args.max_epoch_skip_fraction, \
         f'args.iterations_per_epoch must not skip more than {args.max_epoch_skip_fraction:.1%} of each epoch'

    if args.device != 'cpu':
        model, optimizer = models.data_parallel_and_autocast(
            model,
            optimizer,
            opt_level=args.fp16,
            keep_batchnorm_fp32=args.fp16_keep_batchnorm_fp32)
    if checkpoint and args.fp16 and checkpoint['amp_state_dict'] is not None:
        apex.amp.load_state_dict(checkpoint['amp_state_dict'])

    model.train()

    tensorboard_dir = os.path.join(args.experiment_dir, 'tensorboard')
    if checkpoint and args.experiment_name:
        tensorboard_dir_checkpoint = os.path.join(
            os.path.dirname(args.checkpoint[0]), 'tensorboard')
        if os.path.exists(tensorboard_dir_checkpoint
                          ) and not os.path.exists(tensorboard_dir):
            shutil.copytree(tensorboard_dir_checkpoint, tensorboard_dir)
    tensorboard = torch.utils.tensorboard.SummaryWriter(tensorboard_dir)
    tensorboard_sink = TensorboardSink(tensorboard)

    with open(os.path.join(args.experiment_dir, args.args), 'w') as f:
        json.dump(vars(args), f, sort_keys=True, ensure_ascii=False, indent=2)

    with open(os.path.join(args.experiment_dir, args.dump_model_config),
              'w') as f:
        model_config = dict(
            init_params=models.master_module(model).init_params,
            model=repr(models.master_module(model)))
        json.dump(model_config,
                  f,
                  sort_keys=True,
                  ensure_ascii=False,
                  indent=2)

    tic, toc_fwd, toc_bwd = time.time(), time.time(), time.time()

    oom_handler = utils.OomHandler(max_retries=args.oom_retries)
    for epoch in range(epoch, args.epochs):
        sampler.shuffle(epoch + args.seed_sampler)
        time_epoch_start = time.time()
        for batch_idx, (meta, s, x, xlen, y,
                        ylen) in enumerate(train_data_loader,
                                           start=sampler.batch_idx):
            toc_data = time.time()
            if batch_idx == 0:
                time_ms_launch_data_loader = (toc_data - tic) * 1000
                _print('Time data loader launch @ ', epoch, ':',
                       time_ms_launch_data_loader / 1000, 'sec')

            lr = optimizer.param_groups[0]['lr']
            perf.update(dict(lr=lr))

            x, xlen, y, ylen = x.to(args.device, non_blocking=True), xlen.to(
                args.device, non_blocking=True), y.to(
                    args.device, non_blocking=True), ylen.to(args.device,
                                                             non_blocking=True)
            try:
                #TODO check nan values in tensors, they can break running_stats in bn
                log_probs, olen, loss = map(
                    model(x, xlen, y=y, ylen=ylen).get,
                    ['log_probs', 'olen', 'loss'])
                oom_handler.reset()
            except:
                if oom_handler.try_recover(model.parameters(), _print=_print):
                    continue
                else:
                    raise
            example_weights = ylen[:, 0]
            loss, loss_cur = (loss * example_weights).mean(
            ) / args.train_batch_accumulate_iterations, float(loss.mean())

            perf.update(dict(loss_BT_normalized=loss_cur))

            entropy = float(
                models.entropy(log_probs[0], olen[0], dim=1).mean())
            toc_fwd = time.time()
            #TODO: inf/nan still corrupts BN stats
            if not (torch.isinf(loss) or torch.isnan(loss)):
                if args.fp16:
                    with apex.amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()

                if iteration % args.train_batch_accumulate_iterations == 0:
                    torch.nn.utils.clip_grad_norm_(
                        apex.amp.master_params(optimizer)
                        if args.fp16 else model.parameters(), args.max_norm)
                    optimizer.step()

                    if iteration > 0 and iteration % args.log_iteration_interval == 0:
                        perf.update(utils.compute_memory_stats(),
                                    prefix='performance')
                        tensorboard_sink.perf(perf.default(), iteration,
                                              train_dataset_name)
                        tensorboard_sink.weight_stats(
                            iteration, model, args.log_weight_distribution)
                        logfile_sink.perf(perf.default(), iteration,
                                          train_dataset_name)

                    optimizer.zero_grad()
                    scheduler.step(iteration)
                perf.update(dict(entropy=entropy))
            toc_bwd = time.time()

            time_ms_data, time_ms_fwd, time_ms_bwd, time_ms_model = map(
                lambda sec: sec * 1000, [
                    toc_data - tic, toc_fwd - toc_data, toc_bwd - toc_fwd,
                    toc_bwd - toc_data
                ])
            perf.update(dict(time_ms_data=time_ms_data,
                             time_ms_fwd=time_ms_fwd,
                             time_ms_bwd=time_ms_bwd,
                             time_ms_iteration=time_ms_data + time_ms_model),
                        prefix='performance')
            perf.update(dict(input_B=x.shape[0], input_T=x.shape[-1]),
                        prefix='performance')
            print_left = f'{args.experiment_id} | epoch: {epoch:02d} iter: [{batch_idx: >6d} / {len(train_data_loader)} {iteration: >6d}] {"x".join(map(str, x.shape))}'
            print_right = 'ent: <{avg_entropy:.2f}> loss: {cur_loss_BT_normalized:.2f} <{avg_loss_BT_normalized:.2f}> time: {performance_cur_time_ms_data:.2f}+{performance_cur_time_ms_fwd:4.0f}+{performance_cur_time_ms_bwd:4.0f} <{performance_avg_time_ms_iteration:.0f}> | lr: {cur_lr:.5f}'.format(
                **perf.default())
            _print(print_left, print_right)
            iteration += 1
            sampler.batch_idx += 1

            if iteration > 0 and (iteration % args.val_iteration_interval == 0
                                  or iteration == args.iterations):
                evaluate_model(args, val_data_loaders, model, labels, decoder,
                               error_analyzer, optimizer, sampler,
                               tensorboard_sink, logfile_sink, epoch,
                               iteration)

            if iteration and args.iterations and iteration >= args.iterations:
                return

            if args.iterations_per_epoch and iteration > 0 and iteration % args.iterations_per_epoch == 0:
                break

            tic = time.time()

        sampler.batch_idx = 0
        _print('Epoch time', (time.time() - time_epoch_start) / 60, 'minutes')
        if not args.skip_on_epoch_end_evaluation:
            evaluate_model(args, val_data_loaders, model, labels, decoder,
                           error_analyzer, optimizer, sampler,
                           tensorboard_sink, logfile_sink, epoch + 1,
                           iteration)
import logging
from shutil import copy

import onnxruntime as ort
from onnxruntime import quantization
import os

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

ort.set_default_logger_severity(4)


def quantization_optimize(optimization_config):
    logger.info("ONNX model quantization started")
    base_dir = os.path.dirname(optimization_config.model_path)
    unquantized_model = os.path.join(base_dir, "unquantized_model.onnx")
    copy(optimization_config.model_path, unquantized_model)
    try:
        quantization.quantize_dynamic(unquantized_model,
                                      optimization_config.model_path)
        default_ep = "CUDAExecutionProvider" if "CUDAExecutionProvider" in ort.get_available_providers(
        ) else "CPUExecutionProvider"
        ort.InferenceSession(optimization_config.model_path,
                             providers=[default_ep])
        logger.info("ONNX model quantized successfully")
    except Exception as e:
        logger.info(
            "Quantization optimization failed with error {}. Original model will be used for optimization."
Exemple #20
0
    def calibrate_with_random(
            self,
            num_data: Optional[int] = None) -> Dict[str, Tuple[float, float]]:
        '''
        Gather intermediate model outputs after running inference
            parameter model_path: path to augmented FP32 ONNX model
            parameter inputs: list of loaded test inputs (or image matrices)
            return: dictionary mapping added node names to (ReduceMin, ReduceMax) pairs
        '''
        # Log severity level 3(Error) in order not to print warnings(level 2)
        ort.set_default_logger_severity(3)
        sess = ort.InferenceSession(self.model.SerializeToString(), None)

        input_names = [attr.name for attr in sess.get_inputs()]
        input_shapes = [attr.shape for attr in sess.get_inputs()]
        input_types = [attr.type for attr in sess.get_inputs()]

        feed_dict = dict()

        calibration_dataset = []
        for _ in range(num_data or 10):
            for (name, shape, type) in zip(input_names, input_shapes,
                                           input_types):
                if type == 'tensor(float)':
                    dtype = np.float32
                elif type == 'tensor(int64)':
                    dtype = np.int64
                else:
                    raise Exception('Unknown dtype: %s' % type)
                batch_size = 1
                feed_dict[name] = np.random.random(
                    (batch_size, *shape[1:])).astype(dtype)
            calibration_dataset.append(feed_dict)

        observers = [
            output.name for output in sess.get_outputs()
            if 'ReduceMin' in output.name or 'ReduceMax' in output.name
        ]

        disabled = True if os.environ.get('TQDM_DISABLE') else False
        observed_vals = [
            sess.run(observers, feed_dict) for feed_dict in tqdm.tqdm(
                calibration_dataset, desc='Calibration', disable=disabled)
        ]

        val_dicts = dict(zip(observers, zip(*observed_vals)))

        node_names = [
            key.rpartition('_')[0] for key in val_dicts.keys()
            if 'ReduceMax' in key
        ]
        min_dicts = [
            float(np.min(value)) for key, value in val_dicts.items()
            if 'ReduceMin' in key
        ]
        max_dicts = [
            float(np.max(value)) for key, value in val_dicts.items()
            if 'ReduceMax' in key
        ]

        return dict(zip(node_names, zip(min_dicts, max_dicts)))
Exemple #21
0
import os
import numpy as np
import tokenization
import tensorflow as tf
import onnxruntime
from onnxruntime import ExecutionMode, InferenceSession, SessionOptions

# import BertTokenizer.from_pretrained("bert-base-uncased") # Return to this if everything goes to shit
tokenizer = tokenization.FullTokenizer(
    vocab_file="biobert_vocab.txt", do_lower_case=True)

# Might want to re-check this if issues arise
# 4 (FATAL)
# 3 (ERROR)
# 2 (WARNING)
onnxruntime.set_default_logger_severity(3)

sequence = ("The adverse events during combined therapy with cyclosporin A and "
            + "nifedipine included an increase in blood urea nitrogen levels "
            + "in 9 of the 13 patients and development of gingival hyperplasia "
            + "in 2 of the 13 patients.")

tokenized_sequence = tokenizer.tokenize(sequence)
tokenized_sequence.insert(0, '[CLS]')
tokenized_sequence.append('[SEP]')

# Could be 0 or 1, not sure which index is *supposed* to represent a first segment
token_type_ids = [0]*len(tokenized_sequence)
input_ids = tokenizer.convert_tokens_to_ids(tokenized_sequence)
# Not sure if label_ids should be padded to sequence length or not
label_ids = ["[PAD]", "B", "I", "O", "X", "[CLS]", "[SEP]"]
        elif 'bool' in onnx_dtype:
            return np.bool_
        else:
            raise NotImplementedError(onnx_dtype +
                                      " is not supported in this script yet.")
        return np.float32

    dtype = get_numpy_dtype(tensor.type)
    shape = tensor.shape
    return np.ones(shape, dtype=dtype)


# print("Execution Device:", ort.get_device())

print("Importing ONNX model into ONNX Runtime...")
ort.set_default_logger_severity(args.logger_severity)
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
ort_session = ort.InferenceSession(args.file, sess_options)

if args.provider != '':
    ort_session.set_providers([args.provider])

print("Execution Providers:", ort_session.get_providers())

inputs = ort_session.get_inputs()
inputs_name = [item.name for item in inputs]
ort_inputs = {}
for tensor in inputs:
    ort_inputs.update({tensor.name: get_numpy(tensor)})
Exemple #23
0
 def __init__(self, model_file, image_size=(112, 112)):
     import onnxruntime as ort
     ort.set_default_logger_severity(3)
     self.ort_session = ort.InferenceSession(model_file)
     self.output_names = [self.ort_session.get_outputs()[0].name]
     self.input_name = self.ort_session.get_inputs()[0].name