Beispiel #1
0
    def test_cross_validation_randomQuery_MaxIteration(self):

        ml_technique = LogisticRegression()
        # ml_technique = BernoulliNB()
        # ml_technique = svm.SVC(kernel='rbf', probability=True)
        # ml_technique = svm.NuSVC(gamma='auto', probability=True)
        # stopping_criteria = PercentOfUnlabel(70)
        stopping_criteria = MaxIteration(25)
        # stopping_criteria = TimeLimit(2)
        # query_strategy = QueryInstanceRandom()
        query_strategy = QueryInstanceRandom()

        performance_metrics = [
            Accuracy(), F1(average='weighted'),
            HammingLoss()
        ]

        # init the ALExperiment
        experiment = CrossValidationExperiment(
            client=self.__client,
            X=self.__X,
            Y=self.__y,
            scenario_type=PoolBasedSamplingScenario,
            ml_technique=ml_technique,
            performance_metrics=performance_metrics,
            query_strategy=query_strategy,
            oracle=SimulatedOracle(labels=self.__y),
            stopping_criteria=stopping_criteria,
            self_partition=True,
            kfolds=10,
            oracle_name='SimulatedOracle',
            test_ratio=0.3,
            initial_label_rate=0.05,
            all_class=True,
            rebalance=True)

        results = experiment.evaluate(verbose=True,
                                      multithread=True,
                                      max_threads=10,
                                      client=self.__client)

        for result in results:
            query_analyser = ExperimentAnalyserFactory.experiment_analyser(
                performance_metrics=[
                    metric.metric_name for metric in performance_metrics
                ],
                method_name=query_strategy.query_function_name,
                method_results=result,
                type="queries")

            # get a brief description of the experiment
            query_analyser.plot_learning_curves(
                title='Active Learning experiment results')
Beispiel #2
0
    def test_cross_validation_without_self_partitioning_ok(self):

        split_count = 5
        instance_num = 100

        self.__X, self.__y = make_classification(n_samples=instance_num,
                                                 n_features=4,
                                                 n_informative=2,
                                                 n_redundant=2,
                                                 n_repeated=0,
                                                 n_classes=2,
                                                 n_clusters_per_class=2,
                                                 weights=None,
                                                 flip_y=0.01,
                                                 class_sep=1.0,
                                                 hypercube=True,
                                                 shift=0.0,
                                                 scale=1.0,
                                                 shuffle=True,
                                                 random_state=None)

        train_idx, test_idx, label_idx, unlabel_idx = split(
            X=self.__X,
            y=self.__y,
            test_ratio=0.3,
            initial_label_rate=0.05,
            split_count=split_count,
            all_class=True)

        # init the AlExperiment
        experiment = CrossValidationExperiment(
            self.__X,
            self.__y,
            self_partition=False,
            stopping_criteria=UnlabelSetEmpty(),
            train_idx=train_idx,
            test_idx=test_idx,
            label_idx=label_idx,
            unlabel_idx=unlabel_idx)

        assert len(experiment._train_idx) == split_count
        assert len(experiment._test_idx) == split_count
        assert len(experiment._label_idx) == split_count
        assert len(experiment._unlabel_idx) == split_count

        for i in range(split_count):
            train = set(experiment._train_idx[i])
            test = set(experiment._test_idx[i])
            lab = set(experiment._label_idx[i])
            unl = set(experiment._unlabel_idx[i])

            assert len(test) == round(0.3 * instance_num)
            assert len(lab) == round(0.05 * len(train))

            # validity
            traintest = train.union(test)
            labun = lab.union(unl)
            assert traintest == set(range(instance_num))
            assert labun == train
Beispiel #3
0
    def test_cross_validation_randomQuery_unlabelSetEmpty_singleThread(self):

        ml_technique = LogisticRegression(solver='liblinear')
        stopping_criteria = MaxIteration(50)
        query_strategy = QueryInstanceRandom()
        performance_metrics = [
            Accuracy(),
            F1(average='macro'),
            HammingLoss(),
            Precision(average='macro'),
            Recall(average='macro')
        ]

        # init the ALExperiment
        experiment = CrossValidationExperiment(
            self.__X,
            self.__y,
            scenario_type=PoolBasedSamplingScenario,
            ml_technique=ml_technique,
            performance_metrics=performance_metrics,
            query_strategy=query_strategy,
            oracle=SimulatedOracle(labels=self.__y),
            stopping_criteria=stopping_criteria,
            self_partition=True,
            kfolds=10,
            test_ratio=0.3,
            initial_label_rate=0.05,
            all_class=True)

        results = experiment.evaluate(verbose=False)

        for result in results:
            query_analyser = ExperimentAnalyserFactory.experiment_analyser(
                performance_metrics=[
                    metric.metric_name for metric in performance_metrics
                ],
                method_name=query_strategy.query_function_name,
                method_results=result,
                type="queries")

            # get a brief description of the experiment
            query_analyser.plot_learning_curves(
                title='Active Learning experiment results')
Beispiel #4
0
    def test_cross_validation_without_self_partitioning_wrong_kfold_size(self):

        split_count = 5
        instance_num = 100

        self.__X, self.__y = make_classification(n_samples=instance_num,
                                                 n_features=4,
                                                 n_informative=2,
                                                 n_redundant=2,
                                                 n_repeated=0,
                                                 n_classes=2,
                                                 n_clusters_per_class=2,
                                                 weights=None,
                                                 flip_y=0.01,
                                                 class_sep=1.0,
                                                 hypercube=True,
                                                 shift=0.0,
                                                 scale=1.0,
                                                 shuffle=True,
                                                 random_state=None)

        train_idx, test_idx, label_idx, unlabel_idx = split(
            X=self.__X,
            y=self.__y,
            test_ratio=0.3,
            initial_label_rate=0.05,
            split_count=split_count,
            all_class=True)

        train_idx.pop()
        test_idx.pop()
        label_idx.pop()
        unlabel_idx.pop()

        # init the AlExperiment
        try:
            CrossValidationExperiment(X=self.__X,
                                      Y=self.__y,
                                      self_partition=False,
                                      kfolds=5,
                                      stopping_criteria=UnlabelSetEmpty(),
                                      train_idx=train_idx,
                                      test_idx=test_idx,
                                      label_idx=label_idx,
                                      unlabel_idx=unlabel_idx)
        except ValueError as valExc:
            assert ("Number of folds for inputs" in "{0}".format(valExc))
        else:
            raise Exception("Expected ValueError exception")