Exemple #1
0
    def check_label(self):

        LOGGER.info("check label")
        classes_ = []
        num_classes, booster_dim = 1, 1
        if self.task_type == consts.CLASSIFICATION:
            num_classes, classes_ = ClassifyLabelChecker.validate_label(
                self.data_bin)
            if num_classes > 2:
                booster_dim = num_classes

            range_from_zero = True
            for _class in classes_:
                try:
                    if 0 <= _class < len(classes_) and isinstance(_class, int):
                        continue
                    else:
                        range_from_zero = False
                        break
                except:
                    range_from_zero = False

            classes_ = sorted(classes_)
            if not range_from_zero:
                class_mapping = dict(zip(classes_, range(num_classes)))
                self.y = self.y.mapValues(lambda _class: class_mapping[_class])

        else:
            RegressionLabelChecker.validate_label(self.data_bin)

        return classes_, num_classes, booster_dim
Exemple #2
0
    def check_label(self):
        LOGGER.info("check label")
        if self.task_type == consts.CLASSIFICATION:
            self.num_classes, self.classes_ = ClassifyLabelChecker.validate_label(
                self.data_bin)
            if self.num_classes > 2:
                self.classify_target = "multinomial"
                self.tree_dim = self.num_classes

            range_from_zero = True
            for _class in self.classes_:
                try:
                    if _class >= 0 and _class < self.num_classes and isinstance(
                            _class, int):
                        continue
                    else:
                        range_from_zero = False
                        break
                except:
                    range_from_zero = False

            self.classes_ = sorted(self.classes_)
            if not range_from_zero:
                class_mapping = dict(
                    zip(self.classes_, range(self.num_classes)))
                self.y = self.y.mapValues(lambda _class: class_mapping[_class])

        else:
            RegressionLabelChecker.validate_label(self.data_bin)

        self.set_loss(self.objective_param)
class TeskClassifyLabelChecker(unittest.TestCase):
    def setUp(self):
        session.init("test_label_checker")

        self.small_label_set = [Instance(label=i % 5) for i in range(100)]
        self.classify_inst = session.parallelize(self.small_label_set,
                                                 include_key=False,
                                                 partition=16)
        self.regression_label = [
            Instance(label=random.random()) for i in range(100)
        ]
        self.regression_inst = session.parallelize(self.regression_label,
                                                   partition=16)
        self.classify_checker = ClassifyLabelChecker()
        self.regression_checker = RegressionLabelChecker()

    def test_classify_label_checkert(self):
        num_class, classes = self.classify_checker.validate_label(
            self.classify_inst)
        self.assertTrue(num_class == 5)
        self.assertTrue(sorted(classes) == [0, 1, 2, 3, 4])

    def test_regression_label_checker(self):
        self.regression_checker.validate_label(self.regression_inst)

    def tearDown(self):
        session.stop()
    def setUp(self):
        session.init("test_label_checker")

        self.small_label_set = [Instance(label=i % 5) for i in range(100)]
        self.classify_inst = session.parallelize(self.small_label_set, include_key=False)
        self.regression_label = [Instance(label=random.random()) for i in range(100)]
        self.regression_inst = session.parallelize(self.regression_label)
        self.classify_checker = ClassifyLabelChecker()
        self.regression_checker = RegressionLabelChecker()
    def check_labels(self, data_inst, ) -> List[int]:

        LOGGER.debug('checking labels')

        classes_ = None
        if self.task_type == consts.CLASSIFICATION:
            num_classes, classes_ = ClassifyLabelChecker.validate_label(data_inst)
        else:
            RegressionLabelChecker.validate_label(data_inst)

        return classes_