Ejemplo n.º 1
0
    def fit(self, data_instances, validate_data=None):
        if not self.need_run:
            return
        # check if empty table
        LOGGER.info("Enter Local Baseline fit")
        abnormal_detection.empty_table_detection(data_instances)
        abnormal_detection.empty_feature_detection(data_instances)
        # get model
        model = self.get_model()
        # get header
        self.header = data_overview.get_header(data_instances)

        X_table = data_instances.mapValues(lambda v: v.features)
        y_table = data_instances.mapValues(lambda v: v.label)

        X = np.array([v[1] for v in list(X_table.collect())])
        y = np.array([v[1] for v in list(y_table.collect())])

        w = None
        if data_overview.with_weight(data_instances):
            LOGGER.info(f"Input Data with Weight. Weight will be used to fit model.")
            weight_table = data_instances.mapValues(lambda v: v.weight)
            w = np.array([v[1] for v in list(weight_table.collect())])

        self.model_fit = model.fit(X, y, w)
        self.need_one_vs_rest = len(self.model_fit.classes_) > 2
        self.set_summary(self.get_model_summary())
Ejemplo n.º 2
0
 def _abnormal_detection(self, data_instances):
     """
     Make sure input data_instances is valid.
     """
     abnormal_detection.empty_table_detection(data_instances)
     abnormal_detection.empty_feature_detection(data_instances)
     self.check_schema_content(data_instances.schema)
Ejemplo n.º 3
0
    def data_alignment(self, data_inst):
        abnormal_detection.empty_table_detection(data_inst)
        abnormal_detection.empty_feature_detection(data_inst)

        new_data_inst = data_inst.mapValues(
            lambda row: BoostingTree.data_format_transform(row))

        return new_data_inst
Ejemplo n.º 4
0
 def data_alignment(data_inst):
     """
     align data: abnormal detection and transform data to sparse format
     """
     abnormal_detection.empty_table_detection(data_inst)
     abnormal_detection.empty_feature_detection(data_inst)
     schema = data_inst.schema
     new_data_inst = data_inst.mapValues(lambda row: Boosting.data_format_transform(row))
     new_data_inst.schema = schema
     return new_data_inst
Ejemplo n.º 5
0
    def fit(self, data_instances, validate_data=None):
        if not self.need_run:
            return
        # check if empty table
        abnormal_detection.empty_table_detection(data_instances)
        abnormal_detection.empty_feature_detection(data_instances)
        # get model
        model = self.get_model()
        # get header
        self.header = data_overview.get_header(data_instances)

        X_table = data_instances.mapValues(lambda v: v.features)
        y_table = data_instances.mapValues(lambda v: v.label)

        X = np.array([v[1] for v in list(X_table.collect())])
        y = np.array(list(y_table.collect()))[:, 1]

        self.model_fit = model.fit(X, y)
Ejemplo n.º 6
0
 def _abnormal_detection(self, data_instances):
     """
     Make sure input data_instance is valid
     """
     abnormal_detection.empty_table_detection(data_instances)
     abnormal_detection.empty_feature_detection(data_instances)
Ejemplo n.º 7
0
 def _abnormal_detection(self, data_instances):
     """检查输入的数据是否有效"""
     abnormal_detection.empty_table_detection(data_instances)
     abnormal_detection.empty_feature_detection(data_instances)
     ModelBase.check_schema_content(data_instances.schema)