class FitCommand(cexc.BaseChunkHandler): """FitCommand uses ChunkedController & one of two processors to fit models. The FitCommand can use either the FitBatchProcessor or the FitPartialProcessor, which is chosen based on the presence of the partial_fit parameter. """ @staticmethod def handle_arguments(getinfo): """Take the getinfo metadata and return controller_options. Args: getinfo (dict): getinfo metadata from first chunk Returns: controller_options (dict): options to be passed to controller partial_fit (bool): boolean flag to indicate partial fit """ if len(getinfo['searchinfo']['raw_args']) == 0: raise RuntimeError('First argument must be an "algorithm"') raw_options = parse_args(getinfo['searchinfo']['raw_args'][1:]) controller_options, partial_fit = FitCommand.handle_raw_options( raw_options) controller_options['algo_name'] = getinfo['searchinfo']['args'][0] return controller_options, partial_fit @staticmethod def handle_raw_options(controller_options): """Load command specific options. Args: controller_options (dict): options from handle_arguments Returns: controller_options (dict): dict of controller options partial_fit (dict): boolean flag for partial fit """ controller_options['processor'] = 'FitBatchProcessor' partial_fit = False if 'params' in controller_options: try: fit_params = convert_params( params=controller_options['params'], ignore_extra=True, bools=['apply', 'partial_fit']) except ValueError as e: raise RuntimeError(str(e)) if 'apply' in fit_params: controller_options['apply'] = fit_params['apply'] del controller_options['params']['apply'] if 'model_name' not in controller_options and not fit_params[ 'apply']: raise RuntimeError( 'You must save a model if you are not applying it.') if 'partial_fit' in fit_params: partial_fit = fit_params['partial_fit'] del controller_options['params']['partial_fit'] if partial_fit: controller_options['processor'] = 'FitPartialProcessor' return controller_options, partial_fit def setup(self): """Get options, start controller & watchdog, return command type. Returns: (dict): get info response (command type) and required fields """ self.controller_options, self.partial_fit = self.handle_arguments( self.getinfo) self.controller = ChunkedController(self.getinfo, self.controller_options) self.watchdog = command_util.get_watchdog( self.controller.resource_limits['max_fit_time'], self.controller.resource_limits['max_memory_usage_mb'], os.path.join(self.getinfo['searchinfo']['dispatch_dir'], 'finalize')) required_fields = self.controller.get_required_fields() return {'type': 'events', 'required_fields': required_fields} def get_output_body(self): """Collect output body from controller. Returns: (str): body """ return self.controller.output_results() def handler(self, metadata, body): """Main handler we override from BaseChunkHandler. Args: metadata (dict): metadata information body (str): data payload from CEXC Returns: (dict): metadata to be sent back to CEXC output_body (str): data payload to be sent back to CEXC """ if command_util.is_invalid_chunk(metadata): logger.debug('Not running without session key.') return {'finished': True} if command_util.is_getinfo_chunk(metadata): return self.setup() if self.getinfo.get('preview', False): logger.debug('Not running in preview.') return {'finished': True} if not self.watchdog.started: self.watchdog.start() finished_flag = metadata.get('finished', False) self.controller.load_data(body) # Partial fit should *always* execute on every chunk. # Non partial fit will execute on the last chunk. if self.partial_fit or finished_flag: self.controller.execute() output_body = self.get_output_body() else: output_body = None if finished_flag: self.controller.finalize() # Gracefully terminate watchdog if self.watchdog.started: self.watchdog.join() # Our final farewell self.log_performance_timers() return ({'finished': finished_flag}, output_body) def log_performance_timers(self): logger.debug( "command=fit, read_time=%f, handle_time=%f, write_time=%f, csv_parse_time=%f, csv_render_time=%f" % (self._read_time, self._handle_time, self._write_time, self.controller._csv_parse_time, self.controller._csv_render_time))
class ScoreCommand(cexc.BaseChunkHandler): """ScoreCommand uses ChunkedController & processor(s) to score field(s). """ @staticmethod def handle_arguments(getinfo): """Take the getinfo metadata and return controller_options. Args: getinfo (dict): getinfo metadata from first chunk Returns: controller_options (dict): options to be passed to controller """ if len(getinfo['searchinfo']['raw_args']) == 0: raise RuntimeError('First argument must be a scoring method') raw_options = parse_args(getinfo['searchinfo']['raw_args'][1:]) controller_options = ScoreCommand.handle_raw_options(raw_options) controller_options['scoring_name'] = getinfo['searchinfo']['args'][0] return controller_options @staticmethod def handle_raw_options(controller_options): """Load command specific options. Args: controller_options (dict): options from handle_arguments Returns: controller_options (dict): dict of controller options """ controller_options['processor'] = 'ScoreProcessor' controller_options['variables'] = controller_options.pop( 'feature_variables', []) return controller_options def setup(self): """Parse search string, choose processor, initialize controller & watchdog. Returns: (dict): get info response (command type) and required fields. This response will be sent back to the CEXC process on the getinfo exchange (first chunk) to establish our execution type and required fields. """ controller_options = self.handle_arguments(self.getinfo) self.controller = ChunkedController(self.getinfo, controller_options) self.watchdog = command_util.get_watchdog( self.controller.resource_limits['max_score_time'], self.controller.resource_limits['max_memory_usage_mb'], os.path.join(self.getinfo['searchinfo']['dispatch_dir'], 'finalize')) required_fields = self.controller.get_required_fields() return {'type': 'events', 'required_fields': required_fields} def handler(self, metadata, body): """Main handler we override from BaseChunkHandler. Handles the reading and writing of data to the CEXC process, and finishes negotiation of the termination of the process. Args: metadata (dict): metadata information body (str): data payload from CEXC Returns: (dict): metadata to be sent back to CEXC output_body (str): data payload to be sent back to CEXC """ if command_util.is_invalid_chunk(metadata): logger.debug('Not running without session key.') return {'finished': True} if command_util.is_getinfo_chunk(metadata): return self.setup() finished_flag = metadata.get('finished', False) if not self.watchdog.started: self.watchdog.start() # Load data self.controller.load_data(body) # score will execute on the last chunk. if finished_flag: self.controller.execute() output_body = self.controller.output_results() else: output_body = None if finished_flag: if self.watchdog.started: self.watchdog.join() # Our final farewell return ({'finished': finished_flag}, output_body)
class ApplyCommand(BaseChunkHandler): """ApplyCommand uses the ChunkedController & ApplyProcessor to make predictions.""" @staticmethod def handle_arguments(getinfo): """Take the getinfo metadata and return controller_options. Args: getinfo (dict): getinfo metadata Returns: controller_options (dict): options to be sent to controller """ if len(getinfo['searchinfo']['args']) == 0: raise RuntimeError('First argument must be a saved model.') raw_options = parse_args(getinfo['searchinfo']['raw_args'][1:]) controller_options = ApplyCommand.handle_raw_options(raw_options) controller_options['namespace'], controller_options[ 'model_name'] = parse_namespace_model_name( getinfo['searchinfo']['args'][0]) return controller_options @staticmethod def handle_raw_options(raw_options): """Load command specific options. Args: raw_options (dict): raw options Raises: RuntimeError Returns: raw_options (dict): modified raw_options """ raw_options['processor'] = 'ApplyProcessor' if 'args' in raw_options: raise RuntimeError('Apply does not accept positional arguments.') return raw_options def setup(self): """Parse search string, choose processor, initialize controller. Returns: (dict): get info response (command type) and required fields. This response will be sent back to the CEXC process on the getinfo exchange (first chunk) to establish our execution type and required fields. """ self.controller_options = self.handle_arguments(self.getinfo) self.controller = ChunkedController(self.getinfo, self.controller_options) self.watchdog = command_util.get_watchdog( time_limit=-1, memory_limit=self.controller.resource_limits['max_memory_usage_mb'] ) streaming_apply = is_truthy( conf.get_mlspl_prop('streaming_apply', default='f')) exec_type = 'streaming' if streaming_apply else 'stateful' required_fields = self.controller.get_required_fields() return {'type': exec_type, 'required_fields': required_fields} def handler(self, metadata, body): """Main handler we override from BaseChunkHandler. Handles the reading and writing of data to the CEXC process, and finishes negotiation of the termination of the process. Args: metadata (dict): metadata information body (str): data payload from CEXC Returns: (dict): metadata to be sent back to CEXC output_body (str): data payload to be sent back to CEXC """ # Get info exchange an initialize controller, processor, algorithm if command_util.is_getinfo_chunk(metadata): return self.setup() finished_flag = metadata.get('finished', False) if not self.watchdog.started: self.watchdog.start() # Skip to next chunk if this chunk is empty if len(body) == 0: return {} # Load data, execute and collect results. self.controller.load_data(body) self.controller.execute() output_body = self.controller.output_results() if finished_flag: # Gracefully terminate watchdog if self.watchdog.started: self.watchdog.join() # Our final farewell return ({'finished': finished_flag}, output_body)