def new_from(cls, project_root, entry_point): path_to_entry_point = os.path.join(project_root, entry_point) # Note: This is not necessarily the same as project_root because the # entry_point could be in a subdirectory. path_to_entry_point_dir = os.path.dirname(path_to_entry_point) # 1. Run the entry point file to "load" the model entry_point_code, entry_point_ast, scope = _run_entry_point( path_to_entry_point, path_to_entry_point_dir, project_root, ) # 2. Check that the model provider and input provider functions exist if MODEL_PROVIDER_NAME not in scope: raise AnalysisError( "The project entry point file is missing a model provider " "function. Please add a model provider function named " "\"{}\".".format(MODEL_PROVIDER_NAME)).with_file_context( entry_point) if INPUT_PROVIDER_NAME not in scope: raise AnalysisError( "The project entry point file is missing an input provider " "function. Please add an input provider function named " "\"{}\".".format(INPUT_PROVIDER_NAME)).with_file_context( entry_point) if ITERATION_PROVIDER_NAME not in scope: raise AnalysisError( "The project entry point file is missing an iteration " "provider function. Please add an iteration provider function " "named \"{}\".".format( ITERATION_PROVIDER_NAME)).with_file_context(entry_point) batch_size = _validate_providers_signatures( scope[MODEL_PROVIDER_NAME], scope[INPUT_PROVIDER_NAME], scope[ITERATION_PROVIDER_NAME], entry_point, ) model_provider, input_provider, iteration_provider = ( _wrap_providers_with_validators( scope[MODEL_PROVIDER_NAME], scope[INPUT_PROVIDER_NAME], scope[ITERATION_PROVIDER_NAME], entry_point, )) return cls( project_root, entry_point, path_to_entry_point_dir, model_provider, input_provider, iteration_provider, batch_size, StaticAnalyzer(entry_point_code, entry_point_ast), )
def _validate_providers_signatures( model_provider, input_provider, iteration_provider, entry_point, ): model_sig = inspect.signature(model_provider) if len(model_sig.parameters) != 0: raise AnalysisError( "The model provider function cannot have any parameters." ).with_file_context(entry_point) input_sig = inspect.signature(input_provider) if (len(input_sig.parameters) != 1 or BATCH_SIZE_ARG not in input_sig.parameters or type(input_sig.parameters[BATCH_SIZE_ARG].default) is not int): raise AnalysisError( "The input provider function must have exactly one '{}' " "parameter with an integral default " "value.".format(BATCH_SIZE_ARG)).with_file_context(entry_point) batch_size = input_sig.parameters[BATCH_SIZE_ARG].default iteration_sig = inspect.signature(iteration_provider) if len(iteration_sig.parameters) != 1: raise AnalysisError( "The iteration provider function must have exactly one " "parameter (the model being profiled).").with_file_context( entry_point) return batch_size
def iteration_provider_wrapped(model): iteration = iteration_provider(model) if not callable(iteration): raise AnalysisError( "The iteration provider function must return a callable " "(i.e. return something that can be called like a " "function).").with_file_context(entry_point) return iteration
def model_provider_wrapped(): model = model_provider() if not callable(model): raise AnalysisError( "The model provider function must return a callable (i.e. " "return something that can be called like a PyTorch " "module or function).").with_file_context(entry_point) return model
def to_trainable_model(parse_tree, class_name): try: executable = compile(parse_tree, '<string>', 'exec') scope = {} exec(executable, scope, scope) model = scope[class_name]().to(torch.device('cuda')) model.train() return model except Exception as ex: raise AnalysisError(str(ex), type(ex))
def input_provider_wrapped(batch_size=None): if batch_size is None: inputs = input_provider() else: inputs = input_provider(batch_size=batch_size) if isinstance(inputs, torch.Tensor): raise AnalysisError( "The input provider function must return an iterable that " "contains the inputs for the model. If your model only takes " "a single tensor as input, return a single element tuple or " "list in your input provider (e.g., 'return [the_input]')." ).with_file_context(entry_point) try: input_iter = iter(inputs) return inputs except TypeError as ex: raise AnalysisError( "The input provider function must return an iterable that " "contains the inputs for the model.").with_file_context( entry_point)
def measure_throughput(self): if self._profiler is None: self._initialize_iteration_profiler() # 1. Measure the throughput at several spots to be able to build a # prediction model num_samples = 3 samples = self._profiler.sample_run_time_ms_by_batch_size( start_batch_size=self._batch_size, memory_usage_percentage=self._memory_usage_percentage, start_batch_size_run_time_ms=self. _batch_size_iteration_run_time_ms, num_samples=num_samples, ) if len(samples) == 0 or samples[0].batch_size != self._batch_size: raise AnalysisError( "Something went wrong with Skyline when measuring your " "model's throughput. Please file a bug.") # 2. Begin filling in the throughput response measured_throughput = (samples[0].batch_size / samples[0].run_time_ms * 1000) throughput = pm.ThroughputResponse() throughput.samples_per_second = measured_throughput throughput.predicted_max_samples_per_second = math.nan throughput.can_manipulate_batch_size = False # 3. Determine whether we have information about the batch size # location in the code batch_info = self._entry_point_static_analyzer.batch_size_location() if batch_info is not None: throughput.batch_size_context.line_number = batch_info[0] throughput.can_manipulate_batch_size = batch_info[1] throughput.batch_size_context.file_path.components.extend( self._entry_point.split(os.sep)) # 4. If we do not have enough throughput samples, we cannot build any # prediction models so just return the message as-is if len(samples) != num_samples: return throughput # 5. Build and validate the prediction models for run time (throughput) # and memory batches = list(map(lambda sample: sample.batch_size, samples)) run_times = list(map(lambda sample: sample.run_time_ms, samples)) usages = list(map(lambda sample: sample.peak_usage_bytes, samples)) run_time_model = _fit_linear_model(batches, run_times) peak_usage_model = _fit_linear_model(batches, usages) logger.debug( "Run time model - Slope: %f, Bias: %f (ms)", *run_time_model, ) logger.debug( "Peak usage model - Slope: %f, Bias: %f (bytes)", *peak_usage_model, ) throughput.peak_usage_bytes.slope = peak_usage_model[0] throughput.peak_usage_bytes.bias = peak_usage_model[1] predicted_max_throughput = 1000.0 / run_time_model[0] # Our prediction can be inaccurate due to sampling error or incorrect # assumptions. In these cases, we ignore our prediction. At the very # minimum, a good linear model has a positive slope and bias. if (run_time_model[0] < 1e-3 or run_time_model[1] < 1e-3 or measured_throughput > predicted_max_throughput): return throughput throughput.predicted_max_samples_per_second = predicted_max_throughput throughput.run_time_ms.slope = run_time_model[0] throughput.run_time_ms.bias = run_time_model[1] return throughput
def _handle_analysis_request(self, analysis_request, context): start_time = time.perf_counter() try: logger.debug( 'Processing request %d from (%s:%d).', context.sequence_number, *(context.address), ) analyzer = analyze_project( Config.project_root, Config.entry_point, self._nvml) # Abort early if the connection has been closed if not context.state.connected: logger.debug( 'Aborting request %d from (%s:%d) early ' 'because the client has disconnected.', context.sequence_number, *(context.address), ) return breakdown = next(analyzer) self._enqueue_response( self._send_breakdown_response, breakdown, context, ) if not context.state.connected: logger.debug( 'Aborting request %d from (%s:%d) early ' 'because the client has disconnected.', context.sequence_number, *(context.address), ) return throughput = next(analyzer) self._enqueue_response( self._send_throughput_response, throughput, context, ) elapsed_time = time.perf_counter() - start_time logger.debug( 'Processed analysis request %d from (%s:%d) in %.4f seconds.', context.sequence_number, *(context.address), elapsed_time, ) except AnalysisError as ex: self._enqueue_response(self._send_analysis_error, ex, context) except: logger.exception( 'Exception occurred when handling analysis request.') self._enqueue_response( self._send_analysis_error, AnalysisError( 'An unexpected error occurred when analyzing your model. ' 'Please file a bug report and then restart Skyline.' ), context, )