class InferTaskTypeHandler(BaseHandler): experiment_service = ExperimentService() dataset_service = DatasetService() @gen.coroutine def post(self, dataset_name, *args, **kwargs): req_dict = self.get_request_as_dict_if_json() feature_name = util.require_in_dict(req_dict, 'feature_name', str) with db.open_session() as s: dataset = self.dataset_service.dataset_dao.require_by_name( s, dataset_name) features = Feature.load_dict_list(dataset.features) target_f = None for f in features: if f.name == feature_name: target_f = f break if target_f is None: raise ValueError(f"Feature name = {feature_name} not found. ") task_type = self.experiment_service._infer_task_type(target_f) resp = {"task_type": task_type, "feature_name": feature_name} self.response_json(resp)
class ExperimentHandler(BaseHandler): experiment_service = ExperimentService() @gen.coroutine def post(self, dataset_name, *args, **kwargs): request_body = self.get_request_as_dict_if_json() request_body['dataset_name'] = dataset_name config = self.experiment_service.experiment(request_body) self.response_json(config) @gen.coroutine def get(self, dataset_name, *args, **kwargs): # page_num, page_size page_num = int(self.get_argument('page_num', 1)) page_size = int(self.get_argument('page_size', 20)) experiments, total = self.experiment_service.get_experiments( dataset_name, page_num, page_size) r = { "experiments": experiments, "count": total, "notebook_portal": consts.NOTEBOOK_PORTAL } self.response_json(r)
class ModelDetailHandler(BaseHandler): experiment_service = ExperimentService() @gen.coroutine def get(self, dataset_name, model_name, *args, **kwargs): m = self.experiment_service.retrieve_model(model_name) self.response_json(m)
class RecommendTrainConfigurationHandler(BaseHandler): experiment_service = ExperimentService() @gen.coroutine def post(self, dataset_name, *args, **kwargs): req_dict = self.get_request_as_dict_if_json() conf = self.experiment_service.recommended_train_configuration(dataset_name, req_dict) data = \ { "conf": conf } self.response_json(data)
class ProcessMonitor(threading.Thread): """ Fix some process end but never send back event sometimes , such as server restart """ experiment_service = ExperimentService() def __init__(self): super(ProcessMonitor, self).__init__(name="ProcessMonitorThread", daemon=True) # stop if parent Thread finished self.process_status_mapping = {} def run(self) -> None: logger.info("[MonitorThread] loop running...") while 1: time.sleep(1) # 1. select all running models models = self.experiment_service.find_running_model() # 2. check process of running model self.handle_models(models) def handle_models(self, models: list): for m in models: m: Model = m pid = m.pid if pid is None: pass # logger.warning(f"Model {m.name} , training process pid is None. ") else: try: status = psutil.Process(pid).status() if pid not in self.process_status_mapping: self.process_status_mapping[pid] = status logger.info( f"Model {m.name} , pid is {pid} process status is {status} " ) else: if self.process_status_mapping[pid] != status: logger.info( f"Model {m.name} , pid is {pid} process status changed from{ self.process_status_mapping[pid] } to {status} " ) self.process_status_mapping[pid] = status except Exception as e: # usually is NoSuchProcess # update if process finished logger.warning( f"Model {m.name} , training process pid = {pid} not exists. " ) self.experiment_service.train_process_terminated(m.name)
class ModelTrainProcessHandler(BaseHandler): experiment_service = ExperimentService() @gen.coroutine def post(self, dataset_name, train_job_name, *args, **kwargs): # 1. read param req_dict = self.get_request_as_dict_if_json() self.experiment_service.add_train_process_step(train_job_name, req_dict) # 2. response self.response_json({}) @gen.coroutine def get(self, temporary_dataset_name, analyze_job_name, *args, **kwargs): pass