def _preprocess_and_verify_arguments(self): """ Enforces some rules on the config. Raises ------ TritonModelAnalyzerException If there is a problem with arguments or config. """ if self.export: if not self.export_path: logger.warning( "--export-path specified without --export flag: skipping exporting metrics." ) self.export_path = None elif self.export_path and not os.path.isdir(self.export_path): raise TritonModelAnalyzerException( f"Export path {self.export_path} is not a directory.") if self.triton_launch_mode == 'remote': if self.client_protocol == 'http' and not self.triton_http_endpoint: raise TritonModelAnalyzerException( "client-protocol is 'http'. Must specify triton-http-endpoint " "if connecting to already running server or change protocol using " "--client-protocol.") if self.client_protocol == 'grpc' and not self.triton_grpc_endpoint: raise TritonModelAnalyzerException( "client-protocol is 'grpc'. Must specify triton-grpc-endpoint " "if connecting to already running server or change protocol using " "--client-protocol.")
def wait_for_server_ready(self, num_retries): """ Parameters ---------- num_retries : int number of times to send a ready status request to the server before raising an exception Raises ------ TritonModelAnalyzerException If server readiness could not be determined in given num_retries """ retries = num_retries while retries > 0: try: if self._client.is_server_ready(): return else: time.sleep(0.05) retries -= 1 except Exception as e: time.sleep(0.05) retries -= 1 if retries == 0: raise TritonModelAnalyzerException(e) raise TritonModelAnalyzerException( "Could not determine server readiness. " "Number of retries exceeded.")
def add_measurement(self, run_config, measurement): """ This function adds model inference measurements to the required result Parameters ---------- run_config : RunConfig Contains the parameters used to generate the measurment like the model name, model_config_name measurement: Measurement the measurement to be added """ if len(self._result_tables) == 0: raise TritonModelAnalyzerException( "Cannot add measurements without tables") elif not self._result_comparator: raise TritonModelAnalyzerException( "Cannot add measurements without setting result comparator") model_name = run_config.model_name() model_config_name = run_config.model_config().get_field('name') if model_config_name not in self._results[model_name]: self._results[model_name][model_config_name] = ModelResult( model_name=run_config.model_name(), model_config=run_config.model_config(), comparator=self._result_comparator, constraints=self._constraints) measurement.set_result_comparator(comparator=self._result_comparator) self._results[model_name][model_config_name].add_measurement( measurement)
def create_from_file(model_path): """ Constructs a ModelConfig from the pbtxt at file Parameters ------- model_path : str The full path to this model directory Returns ------- ModelConfig """ if not os.path.exists(model_path): raise TritonModelAnalyzerException( 'Model path specified does not exist.') if os.path.isfile(model_path): raise TritonModelAnalyzerException( 'Model output path must be a directory.') with open(os.path.join(model_path, "config.pbtxt"), 'r+') as f: config_str = f.read() protobuf_message = text_format.Parse(config_str, model_config_pb2.ModelConfig()) return ModelConfig(protobuf_message)
def filter_records(self, record_types=None, filters=None): """ Get records that satisfy the given list of criteria. Parameters ---------- record_types : list of types of Records the types of the records we are imposing the filter criteria on. filters : list of callables conditions that determine whether a given record should be returned. If no filters specified, all records of types specified by record_types will be returned. Note : This must be of the same length as the list of record_types, or omitted. Returns ------- RecordAggregator Returns a new RecordAggregator containing the filtered records """ filtered_records = RecordAggregator() if not record_types and not filters: for record_type, records in self._records.items(): filtered_records.add_key(record_type, records) return filtered_records if record_types and not filters: try: for record_type in record_types: filtered_records.add_key(record_type, self._records[record_type]) return filtered_records except KeyError as k: raise TritonModelAnalyzerException( f"Record type '{k.header()}' not found in this RecordAggregator" ) if filters and not record_types: raise TritonModelAnalyzerException( "Must specify the record types corresponding to each filter criterion." ) if len(record_types) != len(filters): raise TritonModelAnalyzerException( "Must specify the same number of record types as filter criteria." ) # Remove records that do not satisfy criteria for h, f in zip(record_types, filters): for record in self._records[h]: if f(record): filtered_records.insert(record) return filtered_records
def _preprocess_and_verify_arguments(self): """ Enforces some rules on the config. Raises ------ TritonModelAnalyzerException If there is a problem with arguments or config. """ if self.triton_launch_mode == 'remote': if self.client_protocol == 'http' and not self.triton_http_endpoint: raise TritonModelAnalyzerException( "client-protocol is 'http'. Must specify triton-http-endpoint " "if connecting to already running server or change protocol using " "--client-protocol.") if self.client_protocol == 'grpc' and not self.triton_grpc_endpoint: raise TritonModelAnalyzerException( "client-protocol is 'grpc'. Must specify triton-grpc-endpoint " "if connecting to already running server or change protocol using " "--client-protocol.") elif self.triton_docker_mounts or self.triton_docker_labels: if self.triton_launch_mode == 'docker': # Verify format if self.triton_docker_mounts: for volume_str in self.triton_docker_mounts: if volume_str.count(':') != 2: raise TritonModelAnalyzerException( "triton_docker_mounts needs to be a list of strings. Each string " " should be of the format <host path>:<container dest>:<access mode>" ) else: logger.warning( f"Triton launch mode is set to {self.triton_launch_mode}. " "Ignoring triton_docker_mounts and triton_docker_labels.") if self.triton_launch_mode == 'docker': if not self.triton_docker_image or self.triton_docker_image.isspace( ): raise TritonModelAnalyzerException( "triton_docker_image provided but is empty.") if self.triton_launch_mode == 'c_api': if self.triton_server_flags: logger.warning( "Triton launch mode is set to C_API. Model Analyzer cannot set " "triton_server_flags.") if self.triton_output_path: logger.warning( "Triton launch mode is set to C_API, triton logs are not supported. " "Triton server error output can be obtained by setting perf_output_path." ) # If run config search is disabled and no concurrency value is provided, # set the default value. if self.run_config_search_disable: if len(self.concurrency) == 0: self.concurrency = [1]
def run(self, metrics): """ Runs the perf analyzer with the intialized configuration Parameters ---------- metrics : List of Record types The list of record types to parse from Perf Analyzer Returns ------- List of Records List of the metrics obtained from this run of perf_analyzer Raises ------ TritonModelAnalyzerException If subprocess throws CalledProcessError """ if metrics: # Synchronously start and finish run for _ in range(MAX_INTERVAL_CHANGES): cmd = [self.bin_path] cmd += self._config.to_cli_string().replace('=', ' ').split() try: self._output = check_output(cmd, start_new_session=True, stderr=STDOUT, encoding='utf-8') self._parse_output() return except CalledProcessError as e: if e.output.find("Please use a larger time window.") > 0: self._config['measurement-interval'] += INTERVAL_DELTA logger.info( "perf_analyzer's measurement window is too small, " f"increased to {self._config['measurement-interval']} ms." ) else: raise TritonModelAnalyzerException( f"Running perf_analyzer with {e.cmd} failed with" f" exit status {e.returncode} : {e.output}") raise TritonModelAnalyzerException( f"Ran perf_analyzer {MAX_INTERVAL_CHANGES} times, " "but no valid requests recorded in max time interval" f" of {self._config['measurement-interval']} ")
def write_config_to_file(self, model_path, src_model_path, first_variant_model_path): """ Writes a protobuf config file. Parameters ---------- model_path : str Path to write the model config. src_model_path : str Path to the source model in the Triton Model Repository first_variant_model_path : str Indicates the path to the first model variant. Raises ------ TritonModelAnalyzerException If the path doesn't exist or the path is a file """ if not os.path.exists(model_path): raise TritonModelAnalyzerException( 'Output path specified does not exist.') if os.path.isfile(model_path): raise TritonModelAnalyzerException( 'Model output path must be a directory.') model_config_bytes = text_format.MessageToBytes(self._model_config) # Create current variant model as symlinks to first variant model if first_variant_model_path is not None: for file in os.listdir(first_variant_model_path): # Do not copy the config.pbtxt file if file == 'config.pbtxt': continue else: os.symlink( os.path.join( os.path.relpath(first_variant_model_path, model_path), file), os.path.join(model_path, file)) else: # Create first variant model as copy of source model copy_tree(src_model_path, model_path) with open(os.path.join(model_path, "config.pbtxt"), 'wb') as f: f.write(model_config_bytes)
def write(self, out, append=False): """ Writes the output to a file or stdout Parameters ---------- out : str The string to be written to the file or stdout Raises ------ TritonModelAnalyzerException If there is an error or exception while writing the output. """ write_mode = 'a+' if append else 'w+' if self._filename: try: with open(self._filename, write_mode) as f: f.write(out) except OSError as e: raise TritonModelAnalyzerException(e) else: print(out, end='')
def get_device_by_uuid(self, uuid, dcgmPath=None): """ Get a GPU device using the GPU uuid. Parameters ---------- uuid : str index of the device in the list of visible CUDA devices. Returns ------- Device The device associated with the uuid. Raises ------ TritonModelAnalyzerExcpetion If the uuid does not exist this exception will be raised. """ if uuid in self._devices_by_uuid: return self._devices_by_uuid[uuid] else: raise TritonModelAnalyzerException( f'GPU UUID {uuid} was not found.')
def insert_row_by_index(self, row, index=None): """ Adds a row to the table. Handles wrapping. Parameters ---------- row : list A row of data to add to the ResultTable Raises ------ TritonModelAnalyzerException if there is a mismatch between the table headers and the row to be inserted. """ if len(row) != len(self._headers): raise TritonModelAnalyzerException( "Must provide a value for each existing" " column when adding a new row.") if index is None: index = len(self._rows) self._rows.insert(index, row[:]) for i in range(len(row)): self._column_widths[i] = max( len(str(row[i])) + self.column_padding, self._column_widths[i])
def __init__(self, config, client, server, metrics_manager, result_manager): """ Parameters ---------- config: AnalyzerConfig The config for the model analyzer """ self._config = config self._client = client self._server = server self._metrics_manager = metrics_manager self._result_manager = result_manager self._run_search = RunSearch(config=config) self._run_config_generator = RunConfigGenerator(config=config, client=self._client) # Generate the output model repository path folder. self._output_model_repo_path = config.output_model_repository_path try: os.mkdir(self._output_model_repo_path) except OSError: if not config.override_output_model_repository: raise TritonModelAnalyzerException( f'Path "{self._output_model_repo_path}" already exists. ' 'Please set or modify "--output-model-repository-path" flag or remove this directory.' ' You can also allow overriding of the output directory using' ' the "--override-output-model-repository" flag.') else: shutil.rmtree(self._output_model_repo_path) logging.warn( f'Overriding the output model repo path "{self._output_model_repo_path}"...' ) os.mkdir(self._output_model_repo_path)
def _profile(self, perf_analyzer, dcgm_monitor): """ Utility function that runs the perf_analyzer and DCGMMonitor once. Raises ------ TritonModelAnalyzerException if path to perf_analyzer binary could not be found. """ # Start monitors and run perf_analyzer dcgm_monitor.start_recording_metrics() if perf_analyzer: try: perf_records = perf_analyzer.run(self.perf_tags) except FileNotFoundError as e: raise TritonModelAnalyzerException( f"perf_analyzer binary not found : {e}") else: perf_records = [] time.sleep(self._duration_seconds) dcgm_records = dcgm_monitor.stop_recording_metrics() # Insert all records into aggregator and get aggregated DCGM records record_aggregator = RecordAggregator() for record in perf_records + dcgm_records: record_aggregator.insert(record) return record_aggregator.aggregate()
def wait_for_model_ready(self, model, num_retries=WAIT_FOR_READY_NUM_RETRIES): """ Returns when model is ready. Parameters ---------- model : str name of the model to load from repository num_retries : int number of times to send a ready status request to the server before raising an exception Raises ------ TritonModelAnalyzerException If could not determine model readiness in given num_retries """ while num_retries > 0: try: if self._client.is_model_ready(model.name()): return else: time.sleep(0.05) num_retries -= 1 except Exception as e: time.sleep(0.05) num_retries -= 1 pass raise TritonModelAnalyzerException( "Could not determine model readiness. " "Number of retries exceeded.")
def _parse_output(self): """ Extract metrics from the output of the perf_analyzer """ self._perf_records = [] perf_out_lines = self._output.split('\n') for line in perf_out_lines[:-3]: # Get first word after Throughput if 'Throughput:' in line: throughput = float(line.split()[1]) self._perf_records.append(PerfThroughput(value=throughput)) # Get first word and first word after 'latency:' elif 'p99 latency:' in line: latency_tags = line.split(' latency: ') # Convert value to ms from us latency = float(latency_tags[1].split()[0]) / 1e3 self._perf_records.append(PerfLatency(value=latency)) if not self._perf_records: raise TritonModelAnalyzerException( 'perf_analyzer output was not as expected.')
def __setitem__(self, key, value): """ Sets an arguments value in config after checking if defined/supported. Parameters ---------- key : str The name of the argument to the tritonserver value : (any) The value to which the argument is being set Raises ------ TritonModelAnalyzerException If key is unsupported or undefined in the config class """ kebab_cased_key = key.strip().replace('_', '-') if kebab_cased_key in self._server_args: self._server_args[kebab_cased_key] = value else: raise TritonModelAnalyzerException( f"The argument '{key}' to the Triton Inference " "Server is not supported by the model analyzer.")
def __getitem__(self, key): """ Gets an arguments value in config Parameters ---------- key : str The name of the argument to the perf config Returns ------- object The value that the argument is set to in this config Raises ------ KeyError If argument not found in the config """ if key in self._args: return self._args[key] elif key in self._input_to_options: return self._options[self._input_to_options[key]] elif key in self._input_to_verbose: return self._verbose[self._input_to_verbose[key]] else: raise TritonModelAnalyzerException( f'Key {key} does not exist in perf_analyzer_flags.')
def __setitem__(self, key, value): """ Sets an arguments value in config after checking if defined/supported. Parameters ---------- key : str The name of the argument to the tritonserver value : (any) The value to which the argument is being set Raises ------ TritonModelAnalyzerException If key is unsupported or undefined in the config class """ if key in self._args: self._args[key] = value elif key in self._input_to_options: self._options[self._input_to_options[key]] = value elif key in self._input_to_verbose: self._verbose[self._input_to_verbose[key]] = value else: raise TritonModelAnalyzerException( f"The argument '{key}' to the perf_analyzer " "is not supported by the model analyzer.")
def add_column(self, column, index=None): """ Adds a column to the table. **Note : column[0] is assumed to be the column header. Parameters ---------- column : list of vals Content of the column to be added index : int The index at which to add a column """ if len(column) != len(self._rows) + 1: raise TritonModelAnalyzerException( "Must provide a value for each existing row when adding a new column." ) if index is None: index = len(self._headers) self._headers.insert(index, column[0]) column_width = max( [len(str(val)) + self.column_padding for val in column]) self._column_widths.insert(index, column_width) for i in range(len(self._rows)): self._rows[i].insert(index, column[i + 1])
def __getitem__(self, key): """ Gets an arguments value in config Parameters ---------- key : str The name of the argument to the tritonserver Returns ------- The value that the argument is set to in this config Raises ------ TritonModelAnalyzerException If argument not found in the config """ if key in self._args: return self._args[key] elif key in self._input_to_options: return self._options[self._input_to_options[key]] elif key in self._input_to_verbose: return self._verbose[self._input_to_verbose[key]] else: raise TritonModelAnalyzerException( f"'{key}' Key not found in config")
def test_all_options(self, mocked_load_config_file_profile, mocked_verify_args_analyze, mocked_load_config_file_analyze, mocked_verify_args_report, mocked_load_config_file_report): options = get_test_options() all_tested_options_set = set() for option in options: all_tested_options_set.add( self._convert_flag_to_use_underscores(option.long_flag)) if option.type in ["bool"]: self._test_boolean_option(option) elif option.type in ["int", "float"]: self._test_numeric_option(option) elif option.type in ["string"]: self._test_string_option(option) elif option.type in ["intlist", "stringlist"]: self._test_list_option(option) elif option.type in ["noop"]: pass else: raise (TritonModelAnalyzerException("Invalid option type")) self._verify_all_options_tested(all_tested_options_set)
def total(self, record_type=None): """ Get the total number of records in the RecordAggregator Parameters ---------- record_type : a class name of type Record The type of records to count, if None, count all types Returns ------- int number of records in the RecordAggregator """ if record_type: if record_type not in self._records: raise TritonModelAnalyzerException( f"Record type '{record_type.header()}' not found in this RecordAggregator" ) return len(self._records[record_type]) return sum(len(self._records[k]) for k in self._records)
def dcgmGroupAddDevice(dcgm_handle, group_id, gpu_device_id): """ Mock dcgmGroupAddDevice Parameters ---------- dcgm_handle : ctypes.c_void_p A DCGM Handle object group_id : int Group type gpu_device_id : int GPU device id Raises ------ KeyError If the group does not exist """ group_id = group_id.value if group_id >= len(list(MockDCGMAgent.device_groups)): raise KeyError device_group_name = list(MockDCGMAgent.device_groups)[group_id] device_group = MockDCGMAgent.device_groups[device_group_name] if gpu_device_id in device_group: raise TritonModelAnalyzerException( f'GPU device {gpu_device_id} already exists in the device group' ) device_group.append(gpu_device_id)
def run(self): """ Runs the perf analyzer with the intialized configuration Returns ------- List of Records for metrics that are generated by the perf analyzer Raises ------ TritonModelAnalyzerException If subprocess throws CalledProcessError """ cmd = [self.bin_path] cmd += self._config.to_cli_string().replace('=', ' ').split() # Synchronously start and finish run try: out = check_output(cmd, stderr=STDOUT, encoding='utf-8') except CalledProcessError as e: raise TritonModelAnalyzerException( f"perf analyzer returned with exit" " status {e.returncode} : {e.output}") return self._parse_perf_output(out)
def _get_perf_analyzer_metrics(self, perf_config, perf_output_writer=None): """ Gets the aggregated metrics from the perf_analyzer Parameters ---------- perf_config : dict The keys are arguments to perf_analyzer The values are their values perf_output_writer : OutputWriter Writer that writes the output from perf_analyzer to the output stream/file. If None, the output is not written Raises ------ TritonModelAnalyzerException """ try: perf_analyzer = PerfAnalyzer(path=self._perf_analyzer_path, config=perf_config) perf_analyzer.run(self._perf_metrics) except FileNotFoundError as e: raise TritonModelAnalyzerException( f"perf_analyzer binary not found : {e}") if perf_output_writer: perf_output_writer.write(perf_analyzer.output() + '\n') perf_records = perf_analyzer.get_records() perf_record_aggregator = RecordAggregator() perf_record_aggregator.insert_all(perf_records) return perf_record_aggregator.aggregate()
def start(self, env=None): """ Starts the tritonserver container locally """ if self._server_path: # Create command list and run subprocess cmd = [self._server_path] cmd += self._server_config.to_args_list() # Set environment, update with user config env triton_env = os.environ.copy() if env: # Filter env variables that use env lookups for variable, value in env.items(): if value.find('$') == -1: triton_env[variable] = value else: # Collect the ones that need lookups to give to the shell triton_env[variable] = os.path.expandvars(value) # List GPUs to be used by tritonserver triton_env['CUDA_VISIBLE_DEVICES'] = ','.join( [gpu.device_uuid() for gpu in self._gpus]) if self._log_path: try: self._log_file = open(self._log_path, 'a+') except OSError as e: raise TritonModelAnalyzerException(e) else: self._log_file = DEVNULL # Construct Popen command try: self._tritonserver_process = Popen(cmd, stdout=self._log_file, stderr=STDOUT, start_new_session=True, universal_newlines=True, env=triton_env) logger.info('Triton Server started.') except Exception as e: raise TritonModelAnalyzerException(e)
def remove_row_by_index(self, index): """ Removes row at given index from the table Parameters ---------- index : int The index of the row to be removed """ if len(self._rows) == 0: raise TritonModelAnalyzerException( "Attempting to remove result from an empty ResultTable!") if index < 0 or index >= len(self._rows): raise TritonModelAnalyzerException( f"Index {index} out of range for remove_row_by_index") self._rows.pop(index)
def _get_table(self, key): """ Get a ResultTable by table key """ if key not in self._result_tables: raise TritonModelAnalyzerException( f"Table with key '{key}' not found in ResultManager") return self._result_tables[key]
def wait_for_ready(self, num_retries=WAIT_FOR_READY_NUM_RETRIES): """ Parameters ---------- num_retries : int number of times to send a ready status request to the server before raising an exception Raises ------ TritonModelAnalyzerException 1) If config doesn't allow http requests 2) If server readiness could not be determined in given num_retries. """ if self._server_config['allow-http'] is not False: http_port = self._server_config['http-port'] or SERVER_HTTP_PORT url = f"http://localhost:{http_port}/v2/health/ready" else: # TODO to use GRPC to check for ready also raise TritonModelAnalyzerException( 'allow-http must be True in order to use wait_for_server_ready' ) retries = num_retries # poll ready endpoint for number of retries while retries > 0: try: r = requests.get(url) if r.status_code == 200: return True except requests.exceptions.RequestException as e: pass time.sleep(0.1) retries -= 1 # If num_retries is exceeded return an exception raise TritonModelAnalyzerException( f"Server not ready : num_retries : {num_retries}")
def __init__(self, gpus, frequency, tags): """ Parameters ---------- gpus : list A list of strings containing GPU UUIDs. frequency : float How often the metrics should be monitored. tags : list A list of Record objects that will be monitored. Raises ------ TritonModelAnalyzerExcpetion If the GPU cannot be found, the exception will be raised. """ self._frequency = frequency self._gpus = [] if len(gpus) == 1 and gpus[0] == 'all': cuda_devices = numba.cuda.list_devices() if len(cuda_devices) == 0: raise TritonModelAnalyzerException( "No GPUs are visible by CUDA. Make sure that 'nvidia-smi'" " output shows available GPUs. If you are using Model" " Analyzer inside a container, ensure that you are" " launching the container with the" " appropriate '--gpus' flag") for gpu in cuda_devices: gpu_device = GPUDeviceFactory.create_device_by_cuda_index( gpu.id) self._gpus.append(gpu_device) else: for gpu in gpus: gpu_device = GPUDeviceFactory.create_device_by_uuid(gpu) self._gpus.append(gpu_device) gpu_uuids = [] for gpu in self._gpus: gpu_uuids.append(str(gpu.device_uuid(), encoding='ascii')) gpu_uuids_str = ','.join(gpu_uuids) logger.info( f'Using GPU(s) with UUID(s) = {{ {gpu_uuids_str} }} for the analysis.' ) # Is the background thread active self._thread_active = False # Background thread collecting results self._thread = None # Thread pool self._thread_pool = ThreadPool(processes=1) self._tags = tags