Example #1
0
    def initialize(self, args):
        """`initialize` is called only once when the model is being loaded.
        Implementing `initialize` function is optional. This function allows
        the model to intialize any state associated with this model.

        Parameters
        ----------
        args : dict
          Both keys and values are strings. The dictionary keys and values are:
          * model_config: A JSON string containing the model configuration
          * model_instance_kind: A string containing model instance kind
          * model_instance_device_id: A string containing model instance device ID
          * model_repository: Model repository path
          * model_version: Model version
          * model_name: Model name
        """

        # You must parse model_config. JSON string is not parsed here
        self.model_config = model_config = json.loads(args['model_config'])

        # Get OUTPUT0 configuration
        output0_config = pb_utils.get_output_config_by_name(
            model_config, "OUTPUT0")

        # Get OUTPUT1 configuration
        output1_config = pb_utils.get_output_config_by_name(
            model_config, "OUTPUT1")

        # Convert Triton types to numpy types
        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config['data_type'])
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config['data_type'])
Example #2
0
    def initialize(self, args):
        self.model_config = model_config = json.loads(args['model_config'])

        output0_config = pb_utils.get_output_config_by_name(
            model_config, "ANOMALY_SCORE0")
        output1_config = pb_utils.get_output_config_by_name(
            model_config, "ANOMALY0")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config['data_type'])
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config['data_type'])
    def initialize(self, args):
        """`initialize` is called only once when the model is being loaded.
        Implementing `initialize` function is optional. This function allows
        the model to intialize any state associated with this model.

        Parameters
        ----------
        args : dict
          Both keys and values are strings. The dictionary keys and values are:
          * model_config: A JSON string containing the model configuration
          * model_instance_kind: A string containing model instance kind
          * model_instance_device_id: A string containing model instance device ID
          * model_repository: Model repository path
          * model_version: Model version
          * model_name: Model name
        """

        # You must parse model_config. JSON string is not parsed here
        self.model_config = model_config = json.loads(args['model_config'])

        # Get yolov4_parser output configuration
        detected_objects_config = pb_utils.get_output_config_by_name(
            model_config, "DETECTED_OBJECTS_JSON")

        # Convert Triton types to numpy types
        self.detected_objects_dtype = pb_utils.triton_string_to_numpy(
            detected_objects_config['data_type'])

        logger.info(f"detected_objects_dtype={self.detected_objects_dtype}")

        dir_path = os.path.dirname(os.path.realpath(__file__))
        logger.info(f"dir_path={dir_path}")
        self.class_names = [c.strip() for c in open(dir_path+'/coco.names').readlines()]
        logger.info(self.class_names)
Example #4
0
    def initialize(self, args):
        self.model_config = model_config = json.loads(args['model_config'])

        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
            model_config)
        if not using_decoupled:
            raise pb_utils.TritonModelException(
                """the model `{}` can generate any number of responses per request,
                enable decoupled transaction policy in model configuration to 
                serve this model""".format(args['model_name']))

        output0_config = pb_utils.get_output_config_by_name(
            model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(
            model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config['data_type'])
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config['data_type'])
Example #5
0
    def initialize(self, args):
        workflow_path = os.path.join(
            args["model_repository"], str(args["model_version"]), "workflow"
        )
        self.workflow = nvtabular.Workflow.load(workflow_path)
        self.model_config = json.loads(args["model_config"])

        self.output_dtypes = dict()
        for name in self.workflow.column_group.input_column_names:
            conf = get_output_config_by_name(self.model_config, name)
            self.output_dtypes[name] = triton_string_to_numpy(conf["data_type"])
    def initialize(self, args):
        self.model_config = model_config = json.loads(args['model_config'])
        output0_config = pb_utils.get_output_config_by_name(
            model_config, "PYTHON_OUTPUT_0")
        self.output_dtype = pb_utils.triton_string_to_numpy(
            output0_config['data_type'])

        with torch.no_grad():
            mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
            std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
            self.mean = mean.cuda()
            self.std = std.cuda()
Example #7
0
    def initialize(self, args):
        self.model_config = model_config = json.loads(args['model_config'])

        output0_config = pb_utils.get_output_config_by_name(
            model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(
            model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config['data_type'])
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config['data_type'])
        self.numpy_to_pytorch_dtype = {
            np.bool_: torch.bool,
            np.uint8: torch.uint8,
            np.int8: torch.int8,
            np.int16: torch.int16,
            np.int32: torch.int32,
            np.int64: torch.int64,
            np.float16: torch.float16,
            np.float32: torch.float32,
            np.float64: torch.float64,
        }
Example #8
0
    def initialize(self, args):
        """`initialize` is called only once when the model is being loaded.
        Implementing `initialize` function is optional. This function allows
        the model to initialize any state associated with this model.

        Parameters
        ----------
        args : dict
          Both keys and values are strings. The dictionary keys and values are:
          * model_config: A JSON string containing the model configuration
          * model_instance_kind: A string containing model instance kind
          * model_instance_device_id: A string containing model instance device ID
          * model_repository: Model repository path
          * model_version: Model version
          * model_name: Model name
        """
        self.model_config = model_config = json.loads(args['model_config'])
        self.max_batch_size = max(model_config["max_batch_size"], 1)

        # Get OUTPUT0 configuration
        output0_config = pb_utils.get_output_config_by_name(
            model_config, "OUTPUT0")
        # Convert Triton types to numpy types
        self.out0_dtype = pb_utils.triton_string_to_numpy(
            output0_config['data_type'])

        # Get INPUT configuration
        batch_log_probs = pb_utils.get_input_config_by_name(
            model_config, "batch_log_probs")
        self.beam_size = batch_log_probs['dims'][-1]

        encoder_config = pb_utils.get_input_config_by_name(
            model_config, "encoder_out")
        self.data_type = pb_utils.triton_string_to_numpy(
            encoder_config['data_type'])

        self.feature_size = encoder_config['dims'][-1]

        self.lm = None
        self.init_ctc_rescore(self.model_config['parameters'])
        print('Initialized Rescoring!')
Example #9
0
    def initialize(self, args):
        # You must parse model_config. JSON string is not parsed here
        self.model_config = model_config = json.loads(args['model_config'])

        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
            model_config)
        if not using_decoupled:
            raise pb_utils.TritonModelException(
                """the model `{}` can generate any number of responses per request,
                enable decoupled transaction policy in model configuration to
                serve this model""".format(args['model_name']))

        # Get OUT configuration
        out_config = pb_utils.get_output_config_by_name(model_config, "OUT")

        # Convert Triton types to numpy types
        self.out_dtype = pb_utils.triton_string_to_numpy(
            out_config['data_type'])

        self.inflight_thread_count = 0
        self.inflight_thread_count_lck = threading.Lock()
Example #10
0
    def initialize(self, args):
        """`initialize` is called only once when the model is being loaded.
        Implementing `initialize` function is optional. This function allows
        the model to intialize any state associated with this model.
        Parameters
        ----------
        args : dict
          Both keys and values are strings. The dictionary keys and values are:
          * model_config: A JSON string containing the model configuration
          * model_instance_kind: A string containing model instance kind
          * model_instance_device_id: A string containing model instance device ID
          * model_repository: Model repository path
          * model_version: Model version
          * model_name: Model name
        """

        # You must parse model_config. JSON string is not parsed here
        self.model_config = model_config = json.loads(args['model_config'])

        # get device
        if args["model_instance_kind"] == "GPU":
            self.device = 'cuda'
        else:
            self.device = 'cpu'

        # get parameter configurations
        self.model = WenetModel(model_config["parameters"], self.device)

        # Get OUTPUT0 configuration
        output0_config = pb_utils.get_output_config_by_name(
            model_config, "OUTPUT0")

        # Convert Triton types to numpy types
        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config['data_type'])

        # use to record every sequence state
        self.seq_states = {}
        print("Finish Init")
Example #11
0
    def initialize(self, args):
        # You must parse model_config. JSON string is not parsed here
        self.model_config = model_config = json.loads(args['model_config'])

        # Get OUTPUT0 configuration
        output0_config = pb_utils.get_output_config_by_name(
            model_config, "OUTPUT0")

        # Convert Triton types to numpy types
        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config['data_type'])

        max_seq_len = 64
        self.download_cddd_models()
        tokenizer_path = '/models/molbart/mol_opt_tokeniser.pickle'
        model_chk_path = '/models/molbart/az_molbart_pretrain.ckpt'
        if torch.cuda.is_available():
            self.device = 'cuda'
        else:
            self.device = 'cpu'

        self.tokenizer = self.load_tokenizer(tokenizer_path)
        self.bart_model = self.load_model(model_chk_path, self.tokenizer, max_seq_len)
        self.bart_model.to('cuda')
 def initialize(self, args):
     self.model_config = model_config = json.loads(args['model_config'])
     output0_config = pb_utils.get_output_config_by_name(
         model_config, "PYTHON_OUTPUT_0")
     self.output_dtype = pb_utils.triton_string_to_numpy(
         output0_config['data_type'])
     self.feat_proc = features.FilterbankFeatures(spec_augment=None,
                                                  cutout_augment=None,
                                                  sample_rate=16000,
                                                  window_size=0.02,
                                                  window_stride=0.01,
                                                  window="hann",
                                                  normalize="per_feature",
                                                  n_fft=512,
                                                  preemph=0.97,
                                                  n_filt=64,
                                                  lowfreq=0,
                                                  highfreq=None,
                                                  log=True,
                                                  dither=1e-5,
                                                  pad_align=16,
                                                  pad_to_max_duration=False,
                                                  max_duration=float('inf'),
                                                  frame_splicing=1)
 def _set_output_dtype(self, name):
     conf = get_output_config_by_name(self.model_config, name)
     self.output_dtypes[name] = triton_string_to_numpy(conf["data_type"])
Example #14
0
    def initialize(self, args):
        """`initialize` is called only once when the model is being loaded.
        Implementing `initialize` function is optional. This function allows
        the model to initialize any state associated with this model.
        Parameters
        ----------
        args : dict
          Both keys and values are strings. The dictionary keys and values are:
          * model_config: A JSON string containing the model configuration
          * model_instance_kind: A string containing model instance kind
          * model_instance_device_id: A string containing model instance device ID
          * model_repository: Model repository path
          * model_version: Model version
          * model_name: Model name
        """
        self.model_config = model_config = json.loads(args['model_config'])
        self.max_batch_size = max(model_config["max_batch_size"], 1)

        if "GPU" in model_config["instance_group"][0]["kind"]:
            self.device = "cuda"
        else:
            self.device = "cpu"

        # Get OUTPUT0 configuration
        output0_config = pb_utils.get_output_config_by_name(
            model_config, "speech")
        # Convert Triton types to numpy types
        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config['data_type'])

        if self.output0_dtype == np.float32:
            self.dtype = torch.float32
        else:
            self.dtype = torch.float16

        self.feature_size = output0_config['dims'][-1]
        self.decoding_window = output0_config['dims'][-2]
        # Get OUTPUT1 configuration
        output1_config = pb_utils.get_output_config_by_name(
            model_config, "speech_lengths")
        # Convert Triton types to numpy types
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config['data_type'])

        feat_opt = self.parse_model_params(model_config["parameters"])

        opts = kaldifeat.FbankOptions()
        opts.frame_opts.dither = 0
        opts.mel_opts.num_bins = self.feature_size
        frame_length_ms = feat_opt["frame_length_ms"]
        frame_shift_ms = feat_opt["frame_shift_ms"]
        opts.frame_opts.frame_length_ms = frame_length_ms
        opts.frame_opts.frame_shift_ms = frame_shift_ms
        opts.frame_opts.samp_freq = feat_opt["sample_rate"]
        opts.device = torch.device(self.device)
        self.opts = opts
        self.feature_extractor = Fbank(self.opts)
        self.seq_feat = {}
        chunk_size_s = feat_opt["chunk_size_s"]
        sample_rate = feat_opt["sample_rate"]
        self.chunk_size = int(chunk_size_s * sample_rate)
        self.frame_stride = (chunk_size_s * 1000) // frame_shift_ms

        first_chunk_size = int(self.chunk_size)
        cur_frames = _kaldifeat.num_frames(first_chunk_size, opts.frame_opts)
        while cur_frames < self.decoding_window:
            first_chunk_size += frame_shift_ms * sample_rate // 1000
            cur_frames = _kaldifeat.num_frames(first_chunk_size,
                                               opts.frame_opts)
        #  self.pad_silence = first_chunk_size - self.chunk_size
        self.first_chunk_size = first_chunk_size
        self.offset_ms = self.get_offset(frame_length_ms, frame_shift_ms)
        self.sample_rate = sample_rate
        self.min_seg = frame_length_ms * sample_rate // 1000
        print("MIN SEG IS", self.min_seg)
Example #15
0
    def initialize(self, args):
        """`initialize` is called only once when the model is being loaded.
        Implementing `initialize` function is optional. This function allows
        the model to initialize any state associated with this model.

        Parameters
        ----------
        args : dict
          Both keys and values are strings. The dictionary keys and values are:
          * model_config: A JSON string containing the model configuration
          * model_instance_kind: A string containing model instance kind
          * model_instance_device_id: A string containing model instance device ID
          * model_repository: Model repository path
          * model_version: Model version
          * model_name: Model name
        """
        self.model_config = model_config = json.loads(args['model_config'])
        self.max_batch_size = max(model_config["max_batch_size"], 1)

        if "GPU" in model_config["instance_group"][0]["kind"]:
            self.device = "cuda"
        else:
            self.device = "cpu"

        # Get OUTPUT0 configuration
        output0_config = pb_utils.get_output_config_by_name(
            model_config, "speech")
        # Convert Triton types to numpy types
        output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config['data_type'])
        if output0_dtype == np.float32:
            self.output0_dtype = torch.float32
        else:
            self.output0_dtype = torch.float16

        # Get OUTPUT1 configuration
        output1_config = pb_utils.get_output_config_by_name(
            model_config, "speech_lengths")
        # Convert Triton types to numpy types
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config['data_type'])

        params = self.model_config['parameters']
        opts = kaldifeat.FbankOptions()
        opts.frame_opts.dither = 0

        for li in params.items():
            key, value = li
            value = value["string_value"]
            if key == "num_mel_bins":
                opts.mel_opts.num_bins = int(value)
            elif key == "frame_shift_in_ms":
                opts.frame_opts.frame_shift_ms = float(value)
            elif key == "frame_length_in_ms":
                opts.frame_opts.frame_length_ms = float(value)
            elif key == "sample_rate":
                opts.frame_opts.samp_freq = int(value)
        opts.device = torch.device(self.device)
        self.opts = opts
        self.feature_extractor = Fbank(self.opts)
        self.feature_size = opts.mel_opts.num_bins