def prepare(self): config_json = to_json(self.config, indent=4) config_json = config_json.replace("__", ".") config_path = mk_working_directory(self.project_id, self.component_id, RobotX.CONFIG_FILE_NAME) with open(config_path, 'w', encoding='utf-8') as f: f.write(config_json)
def need_execution(self, force=False): changed = True if force else self.changed() if changed: if not self.loaded: self.load_from_db() pickle_path = mk_working_directory(self.project_id, self.component_id, Component.PREVIOUS) with open(pickle_path, 'wb') as f: pickle.dump(self, f) self.prepare() return changed
def check_target(project_id, input_comp_id, target): csv_reader = CsvReaderInfo.objects.filter(project_id=project_id, component_id=input_comp_id) if len(csv_reader) == 0: return True csv_reader = csv_reader[0] try: assert isinstance(csv_reader, CsvReaderInfo) data_saving_path = mk_working_directory(project_id, input_comp_id, csv_reader.file_name) csv_reader = pandas.read_csv(data_saving_path, usecols=[target]) df = pandas.DataFrame(csv_reader) detail = dict(df.groupby([target]).size()) if not set([0, 1]) >= set(list(detail)): return True return (detail.get(0) < detail.get(1)) except UnicodeDecodeError as e: return True
def perview(request, project_id, component_id): self_defined_feature = CsvReaderInfo.objects.filter( project_id=project_id, component_id=component_id) if len(self_defined_feature) == 0: return Response.fail(ERRORS.NOT_INITED) data_saving_path = mk_working_directory(project_id, component_id, 'data.csv') result = list() with (open(data_saving_path, 'r', encoding='utf-8')) as f: csv_reader = csv.reader(f) for row_num, row in enumerate(csv_reader): if row_num > 10: break if len(result) == 0: for col in row: result.append(dict(name=col, value=list())) else: for column, sample in zip(result, row): column['value'].append(sample) return Response.success(result)
def perview(request, project_id, component_id): # self_defined_feature = SelfDefinedFeature.objects.filter(project_id=project_id, component_id=component_id) # if len(self_defined_feature)==0: # return HttpResponse(Response.fail(ERRORS.NOT_INITED, None).to_json()) data_saving_path = mk_working_directory(project_id, component_id, 'data.csv') if not os.path.exists(data_saving_path): return HttpResponse( Response.fail(ERRORS.CSV_NOT_UPLOAD, None).to_json()) result = list() with (open(data_saving_path, 'r', encoding='utf-8')) as f: csv_reader = csv.reader(f) for row_num, row in enumerate(csv_reader): if row_num > 10: break if len(result) == 0: for col in row: result.append(dict(name=col, value=list())) else: for column, sample in zip(result, row): column['value'].append(sample) return HttpResponse(Response.success(result).to_json())
def upload(request, project_id, component_id, file): # 保存文件 file_name = file.name data_saving_path = mk_working_directory(project_id, component_id, file_name) with open(data_saving_path, 'wb') as destination: if file.multiple_chunks(): for chunk in file.chunks(): destination.write(chunk) else: destination.write(file.read()) # 检查文件,判断数据类型 response = None field_types = None # type: dict[str,FieldType] try: header = None column_num = -1 with (open(data_saving_path, 'r', encoding='utf-8')) as f: csv_reader = csv.reader(f) for row_num, row in enumerate(csv_reader): # if row_num == 0 and "Target" not in [item.capitalize() for item in row]: # return Response.fail(ERRORS.NO_TARGET_FEILD, None) if row_num > 21: break if header is None: column_num = len(row) if column_num < 2: # csv列数量太少 response = Response.fail(ERRORS.CSV_COLUMN_SIZE_ERROR, None) return response header = row field_types = {column: FieldType(column) for column in row} else: len_of_column = len(row) if len_of_column != column_num: response = Response.fail( ERRORS.CSV_COLUMN_NUM_ERROR, dict(header_column_num=column_num, line=row_num + 1, row_column_num=len_of_column)) return response for column, sample in zip(header, row): # if column.capitalize() == "Target" and sample not in ["0","1"]: # return Response.fail(ERRORS.TARGET_FIELD_ERROR, None) field_types[column].add_sample_data(sample) if header is None: response = Response.fail(ERRORS.CSV_EMPTY, None) return response if len(field_types[header[0]].sample_data) < 20: response = Response.fail(ERRORS.CSV_ROW_TOO_SMALL, None) return response # 数据类型判断 db_field_types = [] fields = field_types.values() sorted(fields, key=lambda x: x.field) for head in header: field = field_types[head] field.guess_field_type() # for field in field_types.values(): # field.guess_field_type() db_field_types.append(field.to_db_type(project_id, component_id)) # 保存类型 CsvReaderInfotype.objects.filter(project_id=project_id, component_id=component_id).delete() CsvReaderInfotype.objects.bulk_create(db_field_types) response = Response.success(db_field_types) # response = Response.success(list(field_types.values())) return response except UnicodeDecodeError as e: response = Response.fail(ERRORS.CSV_UTF8_ERROR, None) return response
def get_config_path(project_id, component_id): return mk_working_directory(project_id, component_id, FeatureCombine.CONFIG_FILE_NAME)
def csv_file_path(project_id, component_id): return mk_working_directory(project_id, component_id, SelfDefinedFeature.CSV_NAME)
def get_test_metrics_local_path(project_id, component_id): return mk_working_directory(project_id, component_id, AtomTest.TEST_METRICS)
def get_robotx_dict_local_path(project_id, component_id): return mk_working_directory(project_id, component_id, AtomLearn.ROBOTX_DICT)
def upload(request, project_id, component_id, file): # 保存文件 file_name = file.name data_saving_path = mk_working_directory(project_id, component_id, 'data.csv') with open(data_saving_path, 'wb') as destination: if file.multiple_chunks(): for chunk in file.chunks(): destination.write(chunk) else: destination.write(file.read()) # 检查文件,判断数据类型 response = None field_types = None # type: dict[str,FieldType] try: header = None column_num = -1 with (open(data_saving_path, 'r', encoding='utf-8')) as f: csv_reader = csv.reader(f) for row_num, row in enumerate(csv_reader): if row_num > 21: break if header is None: column_num = len(row) if column_num < 2: # csv列数量太少 response = Response.fail(ERRORS.CSV_COLUMN_SIZE_ERROR, None) return HttpResponse(response.to_json()) header = row field_types = {column: FieldType(column) for column in row} else: len_of_column = len(row) if len_of_column != column_num: response = Response.fail( ERRORS.CSV_COLUMN_NUM_ERROR, dict(header_column_num=column_num, line=row_num + 1, row_column_num=len_of_column)) return HttpResponse(response.to_json()) for column, sample in zip(header, row): field_types[column].add_sample_data(sample) if header is None: response = Response.fail(ERRORS.CSV_EMPTY, None) return HttpResponse(response.to_json()) if len(field_types[header[0]].sample_data) < 20: response = Response.fail(ERRORS.CSV_ROW_TOO_SMALL, None) return HttpResponse(response.to_json()) # 数据类型判断 db_field_types = [] # fields = field_types.values() # sorted(fields, key=lambda x:x.field) for field in field_types.values(): field.guess_field_type() db_field_types.append(field.to_db_type(project_id, component_id)) # 保存组件 SelfDefinedFeature.objects.filter(project_id=project_id, component_id=component_id).delete() SelfDefinedFeature(project_id=project_id, component_id=component_id, file_name=file_name).save() # 保存类型 SelfDefinedFeatureType.objects.filter( project_id=project_id, component_id=component_id).delete() SelfDefinedFeatureType.objects.bulk_create(db_field_types) response = Response.success(list(field_types.values())) return HttpResponse(response.to_json()) except UnicodeDecodeError as e: response = Response.fail(ERRORS.CSV_UTF8_ERROR, None) return HttpResponse(response.to_json())
def csv_reader_dict_path(project_id, component_id): return mk_working_directory(project_id, component_id, AtomExplore.EXPLORE_DICT_FILE)
def get_config_path(project_id, component_id): return mk_working_directory(project_id, component_id, AtomAct.CONFIG_FILE_NAME)
def get_export_model_local_path(project_id, component_id): return mk_working_directory(project_id, component_id, AtomLearn.EXPORT_MODEL_MOJO)
def prepare(self): config_json = to_json(self.config, indent=4) config_path = mk_working_directory(self.project_id, self.component_id, FeatureCombine.CONFIG_FILE_NAME) with open(config_path, 'w', encoding='utf-8') as f: f.write(config_json)
def get_zip_export_model_local_path(project_id, component_id, export_model_zipfile): return mk_working_directory(project_id, component_id, export_model_zipfile)
def get_yarn_log_path(project_id, component_id): return mk_working_directory(project_id, component_id, Component.YARN_LOG_NAME)
def get_config_path(self): return mk_working_directory(self.project_id, self.component_id, RobotX.CONFIG_FILE_NAME)
def get_data_dir_path(project_id, component_id, file_name): return mk_working_directory(project_id, component_id, file_name)
def get_model_properties_local_path(project_id, component_id): return mk_working_directory(project_id, component_id, AtomLearn.MODEL_PROPERTIES)
def __load_from_db__(self): project_id = self.project_id component_id = self.component_id atom_explore_model = AtomExploreModel.objects.filter( project_id=project_id, component_id=component_id) if len(atom_explore_model) == 0: raise Exception("ATOM EXPLORE NOT CONFIGURED") atom_explore_model = atom_explore_model[0] assert isinstance(atom_explore_model, AtomExploreModel) input_comp_id = atom_explore_model.input_comp_id feature_id = atom_explore_model.feature_id feature_target = atom_explore_model.feature_target # data.filename 数据文件名 data__filename = None # dictionary.filename 字典文件名 dictionary__filename = None # 训练数据路径 input_comp_type = extract_component_type(input_comp_id) if input_comp_type == COMPONENTS.CSV_READER: # csv_reader csv_reader = CsvReaderInfo.objects.filter( project_id=project_id, component_id=input_comp_id) if len(csv_reader) == 0: raise Exception("ATOM EXPLORE INPUT CSVREADER NOT FOUND") csv_reader = csv_reader[0] assert isinstance(csv_reader, CsvReaderInfo) input_file = csv_reader.file_name data__filename = "%s/%s" % (mk_working_directory( project_id, input_comp_id), input_file) # 生成数据字典 io_field_types = CsvReaderInfotype.objects.filter( project_id=project_id, component_id=input_comp_id, selected=True) with open(AtomExplore.csv_reader_dict_path(project_id, component_id), 'w', encoding='utf-8') as f: lines = list() lines.append("variable,type\n") for io_f_type_ in io_field_types: assert isinstance(io_f_type_, CsvReaderInfotype) if io_f_type_.field_type not in ["factor", "numeric"]: continue lines.append('"%s",%s\n' % (io_f_type_.field, io_f_type_.field_type)) f.writelines(lines) dictionary__filename = AtomExplore.csv_reader_dict_path( project_id, component_id) elif input_comp_type == COMPONENTS.ROBOTX: # robotx # relations = Relation.objects.filter(project_id=project_id,component_id=input_comp_type) containers = Container.objects.filter(project_id=project_id, component_id=input_comp_id) # if len(relations)==0: # raise Exception("ATOM EXPLORE INPUT ROBOTX-RELATION NOT FOUND") if len(containers) == 0: raise Exception( "ATOM EXPLORE INPUT ROBOTX-CONTAINER NOT FOUND") # relation = relations[0] container = containers[0] csvReaders = CsvReaderInfo.objects.filter( project_id=project_id, component_id=container.container_id) # csvReader1 = CsvReaderInfo.objects.filter(project_id=project_id, component_id=relation.target) if len(csvReaders) == 0: raise Exception( "ATOM EXPLORE INPUT ROBOTX-CSVREADER NOT FOUND") dictionary__filename = RobotX.output_dict(project_id, input_comp_type) data__filename = "%s/%s" % (Component.cluster_working_directory( project_id, csvReaders[0].component_id), csvReaders[0].file_name) # explore输出路径 output__dir = self.explore_fold_path(project_id, component_id) self.config = Config(data__filename, dictionary__filename, feature_id, feature_target, output__dir) # data__filename, dictionary__filename, id__varname, target__varname, output__dir algorithm_params = setting.EXPLORE_COMMON_PARAMS atom_explore_param = AtomExploreParam.objects.filter( project_id=project_id, component_id=component_id) if len(algorithm_params) != len(atom_explore_param): raise Exception("ALGORITHM %s LUCK OF PARAMETER" % str(ALGORITHM_COMMON_PARAMS)) for param in atom_explore_param: assert isinstance(param, AtomExploreParam) param_name = param.param_name param_value = param.param_value # 转换为真实参数 param_description = COMM_PARAMS[param_name] true_value = param_transform(param_description, param_value) if param_name in ALGORITHM_COMMON_PARAMS: # 通用参数 self.config.add_common_param(param_name, true_value)
def get_model_metrics_local_path(project_id, component_id): return mk_working_directory(project_id, component_id, AtomLearn.MODEL_METRICS)
def get_prediction_csv_local_path(project_id, component_id): return mk_working_directory(project_id, component_id, AtomAct.PREDICTION_CSV)
def hive_reader_dict_path(project_id, component_id): return mk_working_directory(project_id, component_id, AtomLearn.HIVE_READER_DICT_NAME)