def fit(self, data_instances, validate_data=None): if not self.need_run: return # check if empty table LOGGER.info("Enter Local Baseline fit") abnormal_detection.empty_table_detection(data_instances) abnormal_detection.empty_feature_detection(data_instances) # get model model = self.get_model() # get header self.header = data_overview.get_header(data_instances) X_table = data_instances.mapValues(lambda v: v.features) y_table = data_instances.mapValues(lambda v: v.label) X = np.array([v[1] for v in list(X_table.collect())]) y = np.array([v[1] for v in list(y_table.collect())]) w = None if data_overview.with_weight(data_instances): LOGGER.info(f"Input Data with Weight. Weight will be used to fit model.") weight_table = data_instances.mapValues(lambda v: v.weight) w = np.array([v[1] for v in list(weight_table.collect())]) self.model_fit = model.fit(X, y, w) self.need_one_vs_rest = len(self.model_fit.classes_) > 2 self.set_summary(self.get_model_summary())
def _abnormal_detection(self, data_instances): """ Make sure input data_instances is valid. """ abnormal_detection.empty_table_detection(data_instances) abnormal_detection.empty_feature_detection(data_instances) self.check_schema_content(data_instances.schema)
def data_alignment(self, data_inst): abnormal_detection.empty_table_detection(data_inst) abnormal_detection.empty_feature_detection(data_inst) new_data_inst = data_inst.mapValues( lambda row: BoostingTree.data_format_transform(row)) return new_data_inst
def data_alignment(data_inst): """ align data: abnormal detection and transform data to sparse format """ abnormal_detection.empty_table_detection(data_inst) abnormal_detection.empty_feature_detection(data_inst) schema = data_inst.schema new_data_inst = data_inst.mapValues(lambda row: Boosting.data_format_transform(row)) new_data_inst.schema = schema return new_data_inst
def fit(self, data_instances, validate_data=None): if not self.need_run: return # check if empty table abnormal_detection.empty_table_detection(data_instances) abnormal_detection.empty_feature_detection(data_instances) # get model model = self.get_model() # get header self.header = data_overview.get_header(data_instances) X_table = data_instances.mapValues(lambda v: v.features) y_table = data_instances.mapValues(lambda v: v.label) X = np.array([v[1] for v in list(X_table.collect())]) y = np.array(list(y_table.collect()))[:, 1] self.model_fit = model.fit(X, y)
def _abnormal_detection(self, data_instances): """ Make sure input data_instance is valid """ abnormal_detection.empty_table_detection(data_instances) abnormal_detection.empty_feature_detection(data_instances)
def _abnormal_detection(self, data_instances): """检查输入的数据是否有效""" abnormal_detection.empty_table_detection(data_instances) abnormal_detection.empty_feature_detection(data_instances) ModelBase.check_schema_content(data_instances.schema)