def delete(request, project_id, component_id): input_type = extract_component_type(component_id) if input_type == COMPONENTS.CSV_READER: CsvReaderInfo.objects.filter(project_id=project_id, component_id=component_id).delete() CsvReaderInfotype.objects.filter(project_id=project_id, component_id=component_id).delete() elif input_type == COMPONENTS.ATOM_ACT: AtomActModel.objects.filter(project_id=project_id, component_id=component_id).delete() elif input_type == COMPONENTS.ATOM_LEARN: AtomLearnModel.objects.filter(project_id=project_id, component_id=component_id).delete() AtomLearnParam.objects.filter(project_id=project_id, component_id=component_id).delete() elif input_type == COMPONENTS.ATOM_TEST: AtomTestModel.objects.filter(project_id=project_id, component_id=component_id).delete() elif input_type == COMPONENTS.ATOM_EXPLORE: AtomExploreModel.objects.filter(project_id=project_id, component_id=component_id).delete() AtomExploreParam.objects.filter(project_id=project_id, component_id=component_id).delete() elif input_type == COMPONENTS.ROBOTX: Container.objects.filter(project_id=project_id, component_id=component_id).delete() Relation.objects.filter(project_id=project_id, component_id=component_id).delete() else: return Response.fail(ERRORS.CSV_TYPE_ERROR, None) return Response.success()
def getDir(project_id, component_id=None): component_type = None dir = None if component_id != None: component_type = extract_component_type(component_id) component_type = re.sub('Atom', '', component_type) if component_type != None: dir = os.path.join(setting.WORKING_DIRECTORY, project_id, component_type) # dir = os.path.join(setting.WORKING_DIRECTORY, project_id, component_id, component_type) else: dir = os.path.join(setting.WORKING_DIRECTORY, project_id) return dir
def robotx_execute(project_id, component_id, task_id): # 初始化 RobotxSpark类 from comm_model.components.RobotX import RobotX component_type = extract_component_type(component_id) robotx_class = eval(component_type) robotx_obj = robotx_class(project_id, component_id) # robotx 输出 output_path, output_dict = robotx_obj.output config_file_path = robotx_obj.get_config_path partial_command = setting.ROBOTX_PATH command = partial_command ,\ "--config_path", config_file_path,\ "--output", output_dict,\ "--dict_only n",\ "--dbname testdb_%s_%s_%s"%(project_id,component_id,task_id),\ "--delete_db y",\ "--label ","robot_x" print(" ".join(command)) status = apps.SUCCEEDED try: p = subprocess.Popen(command, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) submit_log = list() # while p.poll() is None: # out = p.stdout.readline().decode('utf-8', 'ignore').strip() # error = p.stderr.readline().decode('utf-8', 'ignore').strip() # submit_log.append(out) # if len(error) > 0 : # status = apps.FAILED # submit_log.append(error) error = p.stderr.readline().strip() if len(error) != 0: status = apps.FAILED for line in iter(p.stdout.readline, b''): # while p.poll() is None: submit_log.append(str(line)) p.stdout.close() for line in iter(p.stderr.readline, b''): submit_log.append(str(line)) p.stderr.close() update_task_detail(project_id, component_id, task_id, error_code="", detail="\\n".join(submit_log)) except Exception as e: update_task_detail(project_id, component_id, task_id, detail=str(e)) return apps.FAILED return status
def __load_from_db__(self): project_id = self.project_id component_id = self.component_id atom_learn_model = AtomLearnModel.objects.filter( project_id=project_id, component_id=component_id) if len(atom_learn_model) == 0: raise Exception("ATOM LEARN NOT CONFIGURED") atom_learn_model = atom_learn_model[0] assert isinstance(atom_learn_model, AtomLearnModel) input_comp_id = atom_learn_model.input_comp_id algorithm = atom_learn_model.algorithm output__dir = None # learn输出路径 input_comp_type = extract_component_type(input_comp_id) if input_comp_type == COMPONENTS.ATOM_EXPLORE: output__dir = AtomExplore.explore_fold_path( project_id, component_id) else: raise Exception("AtomLearn 依赖组件出错") self.config = Config(output__dir, algorithm) # data__filename, dictionary__filename, id__varname, target__varname, output__dir ,algorithm if algorithm not in ALGORITHM_PARAMS: raise Exception("ALGORITHM %s NOT SUPPORTED" % algorithm) algorithm_params = ALGORITHM_PARAMS[algorithm] atom_learn_param = AtomLearnParam.objects.filter( project_id=project_id, component_id=component_id) if len(atom_learn_param) > 0: if len(algorithm_params) != len(atom_learn_param): raise Exception("ALGORITHM %s LUCK OF PARAMETER" % algorithm) for param in atom_learn_param: assert isinstance(param, AtomLearnParam) param_name = param.param_name param_value = param.param_value # 转换为真实参数 param_description = algorithm_params[param_name] true_value = param_transform(param_description, param_value) if param_name in ALGORITHM_COMMON_PARAMS: # 通用参数 self.config.add_common_param(param_name, true_value) else: self.config.add_hparms(param_name, true_value) if len(self.config.hparms) == 0: self.config.hparms = None
def __load_from_db__(self): project_id = self.project_id component_id = self.component_id atom_act_model = AtomActModel.objects.filter(project_id=project_id, component_id=component_id) if len(atom_act_model) == 0: raise Exception("ATOM ACT NOT CONFIGURED") atom_act_model = atom_act_model[0] assert isinstance(atom_act_model, AtomActModel) input_comp_id = atom_act_model.input_comp_id atom_learn_id = atom_act_model.atom_learn_id reason_code__nvars = atom_act_model.reason_code_nvars ncores = atom_act_model.ncores # 测试数据路径 newdata__filename = None input_comp_type = extract_component_type(input_comp_id) if input_comp_type == COMPONENTS.CSV_READER: # hive reader csv_header = CsvReaderInfo.objects.filter( project_id=project_id, component_id=input_comp_id) if len(csv_header) == 0: raise Exception("ATOM LEARN INPUT HIVE READER NOT FOUND") csv_header = csv_header[0] assert isinstance(csv_header, CsvReaderInfo) input_table = csv_header.magic_name input_id = csv_header.component_id file_name = csv_header.file_name # newdata__filename = "%s.%s" %(setting.HIVE_INPUT_DB, input_table) newdata__filename = "%s/%s" % (Component.cluster_working_directory( project_id, input_id), file_name) elif input_comp_type == COMPONENTS.ROBOTX: newdata__filename = RobotX.output_table(project_id, input_comp_id) elif input_comp_type == COMPONENTS.FEATURE_COMBINE: newdata__filename = FeatureCombine.output_table( project_id, input_comp_id) # 模型路径 learn_fold_path = AtomLearn.learn_fold_path(project_id, atom_learn_id) # act输出路径 output__dir = self.act_fold_path(project_id, component_id) self.config = Config(newdata__filename, reason_code__nvars, ncores, output__dir)
def __load_from_db__(self): project_id = self.project_id component_id = self.component_id atom_test_model = AtomTestModel.objects.filter( project_id=project_id, component_id=component_id) if len(atom_test_model) == 0: raise Exception("ATOM TEST NOT CONFIGURED") atom_test_model = atom_test_model[0] assert isinstance(atom_test_model, AtomTestModel) input_comp_id = atom_test_model.input_comp_id atom_act_id = atom_test_model.atom_act_id target_varname = atom_test_model.feature_target # 测试数据路径 test_data_filename = None input_comp_type = extract_component_type(input_comp_id) if input_comp_type == COMPONENTS.CSV_READER: # hive reader csv_reader = CsvReaderInfo.objects.filter( project_id=project_id, component_id=input_comp_id) if len(csv_reader) == 0: raise Exception("ATOM LEARN INPUT HIVE READER NOT FOUND") csv_reader = csv_reader[0] assert isinstance(csv_reader, CsvReaderInfo) input_table = csv_reader.magic_name file_name = csv_reader.file_name csv_reader_id = csv_reader.component_id test_data_filename = "%s/%s" % ( Component.cluster_working_directory(project_id, csv_reader_id), file_name) elif input_comp_type == COMPONENTS.ROBOTX: test_data_filename = RobotX.output_table(project_id, input_comp_id) # 模型路径 # act_fold_path = AtomAct.act_fold_path(project_id, atom_act_id) # act输出路径 test_fold_path = self.test_fold_path(project_id, component_id) self.config = Config(test_data_filename, target_varname, test_fold_path)
def task_detect(): executions = Execution.objects.filter( status=ExecutionStatus.RUNNING) # type: list[Execution] executions = [ec.task_id for ec in executions] tasks = Task.objects.filter(task_status=PENDING, relies=0, task_id__in=executions) for task in tasks: project_id = task.project_id component_id = task.component_id task_id = task.task_id component_type = extract_component_type(component_id) executor_class = eval(component_type) executor = executor_class(project_id, component_id) executor.execute(task_id) logger.info("%s-%s-%s submitted to task queue" % (project_id, component_id, task_id)) print( "-----------------------------------------------------------------------" )
def load(request, project_id, component_id, atom_act_id, input_comp_id): fields = list() csv_input_type = extract_component_type(input_comp_id) if csv_input_type == COMPONENTS.CSV_READER: fields = CsvReaderInfotype.objects.filter(project_id=project_id, component_id=input_comp_id) else: return Response.fail(ERRORS.CSV_TYPE_ERROR, None) atom_test_db = AtomTest.objects.filter(project_id=project_id, component_id=component_id) if len(atom_test_db) == 0: return Response.success() # return Response.fail(ERRORS.ATOM_TEST_NOT_CONFIGURED, None) data_changed = Response.success("changed") atom_test = atom_test_db[0] params = list() for db_field_type in fields: # params.append({"field":field,"field_type":field_type,"date_format":date_format,"sample_data":sample_data}) params.append( FieldType(db_field_type.field, db_field_type.selected, db_field_type.field_type, db_field_type.date_format, db_field_type.sample_data)) # 检查 input_comp_id 是否一样 if atom_test.input_comp_id != input_comp_id: atom_test_db.delete() return data_changed # todo 检查 id, target 是否在其中,还缺少 robotx和自定义特征组合 # 检查通过,返回需要初始化的内容 result = dict( id=atom_test.feature_id, target=atom_test.feature_target, # max_value=atom_test.max_value, fields=params) return Response.success(result)
def report(request): project_id = request.GET.get('project_id') component_id = request.GET.get('component_id') fileName = request.GET.get('fileName') error_msg = "参数缺失" if project_id is None or project_id == "": return Response.fail(error_msg) if component_id is None or component_id == "": return Response.fail(error_msg) component_type = re.sub('Atom', '', extract_component_type(component_id)) if fileName is None or fileName == "": if component_type in apps.COMPONENTS.ROBOTX: fileName = "tmp/full.sql" else: fileName =component_type+".txt" print(setting.WORKING_DIRECTORY, project_id, component_type,fileName) file = os.path.join(setting.WORKING_DIRECTORY, project_id, component_type,fileName) # file = os.path.join(setting.WORKING_DIRECTORY, project_id, component_id, component_type,fileName) try: with codecs.open(file, 'r+') as get: content = get.read() except FileNotFoundError: content = "File is not found. or You don't have permission to access this file." return Response.success({"data":content})
def __load_from_db__(self): project_id = self.project_id component_id = self.component_id atom_explore_model = AtomExploreModel.objects.filter( project_id=project_id, component_id=component_id) if len(atom_explore_model) == 0: raise Exception("ATOM EXPLORE NOT CONFIGURED") atom_explore_model = atom_explore_model[0] assert isinstance(atom_explore_model, AtomExploreModel) input_comp_id = atom_explore_model.input_comp_id feature_id = atom_explore_model.feature_id feature_target = atom_explore_model.feature_target # data.filename 数据文件名 data__filename = None # dictionary.filename 字典文件名 dictionary__filename = None # 训练数据路径 input_comp_type = extract_component_type(input_comp_id) if input_comp_type == COMPONENTS.CSV_READER: # csv_reader csv_reader = CsvReaderInfo.objects.filter( project_id=project_id, component_id=input_comp_id) if len(csv_reader) == 0: raise Exception("ATOM EXPLORE INPUT CSVREADER NOT FOUND") csv_reader = csv_reader[0] assert isinstance(csv_reader, CsvReaderInfo) input_file = csv_reader.file_name data__filename = "%s/%s" % (mk_working_directory( project_id, input_comp_id), input_file) # 生成数据字典 io_field_types = CsvReaderInfotype.objects.filter( project_id=project_id, component_id=input_comp_id, selected=True) with open(AtomExplore.csv_reader_dict_path(project_id, component_id), 'w', encoding='utf-8') as f: lines = list() lines.append("variable,type\n") for io_f_type_ in io_field_types: assert isinstance(io_f_type_, CsvReaderInfotype) if io_f_type_.field_type not in ["factor", "numeric"]: continue lines.append('"%s",%s\n' % (io_f_type_.field, io_f_type_.field_type)) f.writelines(lines) dictionary__filename = AtomExplore.csv_reader_dict_path( project_id, component_id) elif input_comp_type == COMPONENTS.ROBOTX: # robotx # relations = Relation.objects.filter(project_id=project_id,component_id=input_comp_type) containers = Container.objects.filter(project_id=project_id, component_id=input_comp_id) # if len(relations)==0: # raise Exception("ATOM EXPLORE INPUT ROBOTX-RELATION NOT FOUND") if len(containers) == 0: raise Exception( "ATOM EXPLORE INPUT ROBOTX-CONTAINER NOT FOUND") # relation = relations[0] container = containers[0] csvReaders = CsvReaderInfo.objects.filter( project_id=project_id, component_id=container.container_id) # csvReader1 = CsvReaderInfo.objects.filter(project_id=project_id, component_id=relation.target) if len(csvReaders) == 0: raise Exception( "ATOM EXPLORE INPUT ROBOTX-CSVREADER NOT FOUND") dictionary__filename = RobotX.output_dict(project_id, input_comp_type) data__filename = "%s/%s" % (Component.cluster_working_directory( project_id, csvReaders[0].component_id), csvReaders[0].file_name) # explore输出路径 output__dir = self.explore_fold_path(project_id, component_id) self.config = Config(data__filename, dictionary__filename, feature_id, feature_target, output__dir) # data__filename, dictionary__filename, id__varname, target__varname, output__dir algorithm_params = setting.EXPLORE_COMMON_PARAMS atom_explore_param = AtomExploreParam.objects.filter( project_id=project_id, component_id=component_id) if len(algorithm_params) != len(atom_explore_param): raise Exception("ALGORITHM %s LUCK OF PARAMETER" % str(ALGORITHM_COMMON_PARAMS)) for param in atom_explore_param: assert isinstance(param, AtomExploreParam) param_name = param.param_name param_value = param.param_value # 转换为真实参数 param_description = COMM_PARAMS[param_name] true_value = param_transform(param_description, param_value) if param_name in ALGORITHM_COMMON_PARAMS: # 通用参数 self.config.add_common_param(param_name, true_value)