def extract_input_data(self, datasets, model): model_data = {} data = {} LOGGER.debug(f"Input data_sets: {datasets}") for cpn_name, data_dict in datasets.items(): for data_type in ["train_data", "eval_data", "validate_data", "test_data"]: if data_type in data_dict: d_table = data_dict.get(data_type) model_data[data_type] = model.obtain_data(d_table) del data_dict[data_type] if len(data_dict) > 0: LOGGER.debug(f"data_dict: {data_dict}") for k, v in data_dict.items(): data_list = model.obtain_data(v) LOGGER.debug(f"data_list: {data_list}") if isinstance(data_list, list): for i, data_i in enumerate(data_list): data[".".join([cpn_name, k, str(i)])] = data_i else: data[".".join([cpn_name, k])] = data_list train_data = model_data.get("train_data") validate_data = None if self.has_train_data: if self.has_eval_data: validate_data = model_data.get("eval_data") elif self.has_validate_data: validate_data = model_data.get("validate_data") test_data = None if self.has_test_data: test_data = model_data.get("test_data") self.has_test_data = True elif self.has_eval_data and not self.has_train_data: test_data = model_data.get("eval_data") self.has_test_data = True if validate_data or (self.has_train_data and self.has_eval_data): self.has_validate_data = True if self.has_train_data and is_table(train_data): self.input_data_count = train_data.count() elif self.has_normal_input_data: for data_key, data_table in data.items(): if is_table(data_table): self.input_data_count = data_table.count() if self.has_validate_data and is_table(validate_data): self.input_eval_data_count = validate_data.count() self._abnormal_dsl_config_detect() LOGGER.debug( f"train_data: {train_data}, validate_data: {validate_data}, " f"test_data: {test_data}, data: {data}" ) return train_data, validate_data, test_data, data
def check_consistency(self): if not is_table(self.data_output): return if self.component_properties.input_data_count + self.component_properties.input_eval_data_count != \ self.data_output.count() and \ self.component_properties.input_data_count != self.component_properties.input_eval_data_count: raise ValueError("Input data count does not match with output data count")
def predict(self, data): if self.is_version_0(): from federatedml.nn.homo_nn import _version_0 results = _version_0.client_predict(self=self, data_inst=data) return results else: from federatedml.nn.homo_nn._torch import make_predict_dataset dataset = make_predict_dataset(data=data, trainer=self._trainer) predict_tbl, classes = self._trainer.predict( dataset=dataset, batch_size=self.param.batch_size, ) data_instances = data if is_table( data) else dataset.as_data_instance() results = self.predict_score_to_output( data_instances, predict_tbl, classes=classes, threshold=self.param.predict_param.threshold, ) return results
def make_dataset(data, **kwargs): if is_table(data): dataset = TableDataSet(data_instances=data, **kwargs) elif isinstance(data, LocalData): dataset = VisionDataSet(data.path, **kwargs) else: raise TypeError(f"data type {data} not supported") return dataset
def _func(*args, **kwargs): input_count = None all_args = [] all_args.extend(args) all_args.extend(kwargs.values()) for arg in all_args: if is_table(arg): input_count = arg.count() break result = func(*args, **kwargs) if input_count is not None and is_table(result): output_count = result.count() LOGGER.debug( f"num row of input: {input_count} -> num row of output: {output_count}" ) if input_count != output_count: raise EnvironmentError( f"num row of input({input_count}) not equals to num row of output({output_count})" ) return result
def _func(*args, **kwargs): input_schema = None all_args = [] all_args.extend(args) all_args.extend(kwargs.values()) for arg in all_args: if is_table(arg): input_schema = arg.schema break result = func(*args, **kwargs) if input_schema is not None: # single data set if is_table(result) and result.count() > 0: output_schema = result.schema check_schema(input_schema, output_schema) # multiple data sets elif type(result).__name__ in ["list", "tuple"]: for output_data in result: if is_table(output_data) and output_data.count() > 0: output_schema = output_data.schema check_schema(input_schema, output_schema) return result
def _func(*args, **kwargs): input_with_inst_id = None all_args = [] all_args.extend(args) all_args.extend(kwargs.values()) for arg in all_args: if is_table(arg): input_with_inst_id = check_with_inst_id(arg) break result = func(*args, **kwargs) if input_with_inst_id is not None and is_table(result): if check_is_instance(result): result_with_inst_id = check_with_inst_id(result) LOGGER.debug( f"Input with match id: {input_with_inst_id} -> output with match id: {result_with_inst_id}" ) if input_with_inst_id and not result_with_inst_id: raise EnvironmentError( f"Input with match id: {input_with_inst_id} -> output with match id: {result_with_inst_id}," f"func: {func}") return result
def dot(self, other, target_name=None): def _vec_dot(x, y): ret = np.dot(x, y) if not isinstance(ret, np.ndarray): ret = np.array([ret]) return ret if isinstance(other, (FixedPointTensor, fixedpoint_table.FixedPointTensor)): other = other.value if isinstance(other, np.ndarray): ret = _vec_dot(self.value, other) return self._boxed(ret, target_name) elif is_table(other): f = functools.partial(_vec_dot, self.value) ret = other.mapValues(f) return fixedpoint_table.PaillierFixedPointTensor( value=ret, tensor_name=target_name, cipher=self.cipher) else: raise ValueError(f"type={type(other)}")