def test_cross_validation_randomQuery_MaxIteration(self): ml_technique = LogisticRegression() # ml_technique = BernoulliNB() # ml_technique = svm.SVC(kernel='rbf', probability=True) # ml_technique = svm.NuSVC(gamma='auto', probability=True) # stopping_criteria = PercentOfUnlabel(70) stopping_criteria = MaxIteration(25) # stopping_criteria = TimeLimit(2) # query_strategy = QueryInstanceRandom() query_strategy = QueryInstanceRandom() performance_metrics = [ Accuracy(), F1(average='weighted'), HammingLoss() ] # init the ALExperiment experiment = CrossValidationExperiment( client=self.__client, X=self.__X, Y=self.__y, scenario_type=PoolBasedSamplingScenario, ml_technique=ml_technique, performance_metrics=performance_metrics, query_strategy=query_strategy, oracle=SimulatedOracle(labels=self.__y), stopping_criteria=stopping_criteria, self_partition=True, kfolds=10, oracle_name='SimulatedOracle', test_ratio=0.3, initial_label_rate=0.05, all_class=True, rebalance=True) results = experiment.evaluate(verbose=True, multithread=True, max_threads=10, client=self.__client) for result in results: query_analyser = ExperimentAnalyserFactory.experiment_analyser( performance_metrics=[ metric.metric_name for metric in performance_metrics ], method_name=query_strategy.query_function_name, method_results=result, type="queries") # get a brief description of the experiment query_analyser.plot_learning_curves( title='Active Learning experiment results')
def test_cross_validation_without_self_partitioning_ok(self): split_count = 5 instance_num = 100 self.__X, self.__y = make_classification(n_samples=instance_num, n_features=4, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None) train_idx, test_idx, label_idx, unlabel_idx = split( X=self.__X, y=self.__y, test_ratio=0.3, initial_label_rate=0.05, split_count=split_count, all_class=True) # init the AlExperiment experiment = CrossValidationExperiment( self.__X, self.__y, self_partition=False, stopping_criteria=UnlabelSetEmpty(), train_idx=train_idx, test_idx=test_idx, label_idx=label_idx, unlabel_idx=unlabel_idx) assert len(experiment._train_idx) == split_count assert len(experiment._test_idx) == split_count assert len(experiment._label_idx) == split_count assert len(experiment._unlabel_idx) == split_count for i in range(split_count): train = set(experiment._train_idx[i]) test = set(experiment._test_idx[i]) lab = set(experiment._label_idx[i]) unl = set(experiment._unlabel_idx[i]) assert len(test) == round(0.3 * instance_num) assert len(lab) == round(0.05 * len(train)) # validity traintest = train.union(test) labun = lab.union(unl) assert traintest == set(range(instance_num)) assert labun == train
def test_cross_validation_randomQuery_unlabelSetEmpty_singleThread(self): ml_technique = LogisticRegression(solver='liblinear') stopping_criteria = MaxIteration(50) query_strategy = QueryInstanceRandom() performance_metrics = [ Accuracy(), F1(average='macro'), HammingLoss(), Precision(average='macro'), Recall(average='macro') ] # init the ALExperiment experiment = CrossValidationExperiment( self.__X, self.__y, scenario_type=PoolBasedSamplingScenario, ml_technique=ml_technique, performance_metrics=performance_metrics, query_strategy=query_strategy, oracle=SimulatedOracle(labels=self.__y), stopping_criteria=stopping_criteria, self_partition=True, kfolds=10, test_ratio=0.3, initial_label_rate=0.05, all_class=True) results = experiment.evaluate(verbose=False) for result in results: query_analyser = ExperimentAnalyserFactory.experiment_analyser( performance_metrics=[ metric.metric_name for metric in performance_metrics ], method_name=query_strategy.query_function_name, method_results=result, type="queries") # get a brief description of the experiment query_analyser.plot_learning_curves( title='Active Learning experiment results')
def test_cross_validation_without_self_partitioning_wrong_kfold_size(self): split_count = 5 instance_num = 100 self.__X, self.__y = make_classification(n_samples=instance_num, n_features=4, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None) train_idx, test_idx, label_idx, unlabel_idx = split( X=self.__X, y=self.__y, test_ratio=0.3, initial_label_rate=0.05, split_count=split_count, all_class=True) train_idx.pop() test_idx.pop() label_idx.pop() unlabel_idx.pop() # init the AlExperiment try: CrossValidationExperiment(X=self.__X, Y=self.__y, self_partition=False, kfolds=5, stopping_criteria=UnlabelSetEmpty(), train_idx=train_idx, test_idx=test_idx, label_idx=label_idx, unlabel_idx=unlabel_idx) except ValueError as valExc: assert ("Number of folds for inputs" in "{0}".format(valExc)) else: raise Exception("Expected ValueError exception")