def evaluate_individual( individual: Individual, evaluate_pipeline: Callable, timeout: float = 1e6, deadline: Optional[float] = None, add_length_to_score: bool = True, **kwargs, ) -> Evaluation: """ Evaluate the pipeline specified by individual, and record Parameters ---------- individual: Individual Blueprint for the pipeline to evaluate. evaluate_pipeline: Callable Function which takes the pipeline and produces validation predictions, scores, estimators and errors. timeout: float (default=1e6) Maximum time in seconds that the evaluation is allowed to take. Don't depend on high accuracy. A shorter timeout is imposed if `deadline` is in less than `timeout` seconds. deadline: float, optional A time in seconds since epoch. Cut off evaluation at `deadline` even if `timeout` seconds have not yet elapsed. add_length_to_score: bool (default=True) Add the length of the individual to the score result of the evaluation. **kwargs: Dict, optional (default=None) Passed to `evaluate_pipeline` function. Returns ------- Evaluation """ result = Evaluation(individual, pid=os.getpid()) result.start_time = datetime.now() if deadline is not None: time_to_deadline = deadline - time.time() timeout = min(timeout, time_to_deadline) with Stopwatch() as wall_time, Stopwatch( time.process_time) as process_time: evaluation = evaluate_pipeline(individual.pipeline, timeout=timeout, **kwargs) result._predictions, result.score, result._estimators, result.error = evaluation result.duration = wall_time.elapsed_time if add_length_to_score: result.score = result.score + (-len(individual.primitives), ) individual.fitness = Fitness( result.score, result.start_time, wall_time.elapsed_time, process_time.elapsed_time, ) return result
def _test_gama_regressor(gama, X_train, X_test, y_train, y_test, data, metric): with Stopwatch() as sw: gama.fit(X_train, y_train) assert TOTAL_TIME_S * FIT_TIME_MARGIN >= sw.elapsed_time, 'fit must stay within 110% of allotted time.' predictions = gama.predict(X_test) assert isinstance(predictions, np.ndarray), 'predictions should be numpy arrays.' assert (data['test_size'], ) == predictions.shape, 'predict should return (N,) shaped array.' # Majority classifier on this split achieves 0.6293706293706294 mse = mean_squared_error(y_test, predictions) print(data['name'], metric, 'mse:', mse) assert data[ 'base_mse'] >= mse, 'predictions should be at least as good as predicting mean.'
def _test_gama_regressor(gama, X_train, X_test, y_train, y_test, data, metric): with Stopwatch() as sw: gama.fit(X_train, y_train) assert (TOTAL_TIME_S * FIT_TIME_MARGIN >= sw.elapsed_time), "fit must stay within 110% of allotted time." predictions = gama.predict(X_test) assert isinstance(predictions, np.ndarray), "predictions should be numpy arrays." assert (data["test_size"], ) == predictions.shape, "should predict (N,) shape array." # Majority classifier on this split achieves 0.6293706293706294 mse = mean_squared_error(y_test, predictions) print(data["name"], metric, "mse:", mse) assert (data["base_mse"] >= mse), "predictions should be at least as good as predicting mean."
def _test_dataset_problem( data, metric: str, arff: bool = False, y_type: Type = pd.DataFrame, search: BaseSearch = AsyncEA(), missing_values: bool = False, max_time: int = 60, ): """ :param data: :param metric: :param arff: :param y_type: pd.DataFrame, pd.Series, np.ndarray or str :return: """ gama = GamaClassifier( random_state=0, max_total_time=max_time, scoring=metric, search=search, n_jobs=1, post_processing=EnsemblePostProcessing(ensemble_size=5), store="nothing", ) if arff: train_path = f"tests/data/{data['name']}_train.arff" test_path = f"tests/data/{data['name']}_test.arff" X, y = data["load"](return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0) y_test = [str(val) for val in y_test] with Stopwatch() as sw: gama.fit_from_file(train_path, target_column=data["target"]) class_predictions = gama.predict_from_file( test_path, target_column=data["target"]) class_probabilities = gama.predict_proba_from_file( test_path, target_column=data["target"]) gama_score = gama.score_from_file(test_path) else: X, y = data["load"](return_X_y=True) if y_type == str: databunch = data["load"]() y = np.asarray( [databunch.target_names[c_i] for c_i in databunch.target]) if y_type in [pd.Series, pd.DataFrame]: y = y_type(y) X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0) if missing_values: X_train[1:300:2, 0] = X_train[2:300:5, 1] = float("NaN") X_test[1:100:2, 0] = X_test[2:100:5, 1] = float("NaN") with Stopwatch() as sw: gama.fit(X_train, y_train) class_predictions = gama.predict(X_test) class_probabilities = gama.predict_proba(X_test) gama_score = gama.score(X_test, y_test) assert (60 * FIT_TIME_MARGIN > sw.elapsed_time), "fit must stay within 110% of allotted time." assert isinstance(class_predictions, np.ndarray), "predictions should be numpy arrays." assert ( data["test_size"], ) == class_predictions.shape, "predict should return (N,) shaped array." accuracy = accuracy_score(y_test, class_predictions) # Majority classifier on this split achieves 0.6293706293706294 print(data["name"], metric, "accuracy:", accuracy) assert (data["base_accuracy"] <= accuracy ), "predictions should be at least as good as majority class." assert isinstance( class_probabilities, np.ndarray), "probability predictions should be numpy arrays." assert (data["test_size"], data["n_classes"]) == class_probabilities.shape, ( "predict_proba should return" " (N,K) shaped array.") # Majority classifier on this split achieves 12.80138131184662 logloss = log_loss(y_test, class_probabilities) print(data["name"], metric, "log-loss:", logloss) assert (data["base_log_loss"] >= logloss ), "predictions should be at least as good as majority class." score_to_match = logloss if metric == "neg_log_loss" else accuracy assert score_to_match == pytest.approx(gama_score) gama.cleanup("all") return gama
def test_stopwatch_initialization_zero(): """ Test that elapsed time is 0 if stopwatch is not started yet. """ sw = Stopwatch() assert pytest.approx(0, abs=ROUND_ERROR) == sw.elapsed_time
def test_stopwatch_elapsed_time_after_running(): """ Tests that time elapsed is stored after exiting the context. """ with Stopwatch() as sw: time.sleep(1) time.sleep(1) assert pytest.approx(1, abs=ROUND_ERROR) == sw.elapsed_time
def test_stopwatch_elapsed_time_while_running(): """ Tests that elapsed_time increments as expected while running. """ with Stopwatch() as sw: assert pytest.approx(0, abs=ROUND_ERROR) == sw.elapsed_time time.sleep(1) assert pytest.approx(1, abs=ROUND_ERROR) == sw.elapsed_time
def _test_dataset_problem(data, metric: str, arff: bool = False, y_type: Type = pd.DataFrame, search: BaseSearch = AsyncEA(), missing_values: bool = False, max_time: int = 60): """ :param data: :param metric: :param arff: :param y_type: pd.DataFrame, pd.Series, np.ndarray or str :return: """ gama = GamaClassifier( random_state=0, max_total_time=max_time, scoring=metric, search_method=search, n_jobs=1, post_processing_method=EnsemblePostProcessing(ensemble_size=5)) if arff: train_path = 'tests/data/{}_train.arff'.format(data['name']) test_path = 'tests/data/{}_test.arff'.format(data['name']) X, y = data['load'](return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0) y_test = [str(val) for val in y_test] with Stopwatch() as sw: gama.fit_arff(train_path, target_column=data['target']) class_predictions = gama.predict_arff(test_path, target_column=data['target']) class_probabilities = gama.predict_proba_arff( test_path, target_column=data['target']) gama_score = gama.score_arff(test_path) else: X, y = data['load'](return_X_y=True) if y_type == str: databunch = data['load']() y = np.asarray( [databunch.target_names[c_i] for c_i in databunch.target]) if y_type in [pd.Series, pd.DataFrame]: y = y_type(y) X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0) if missing_values: X_train[1:300:2, 0] = X_train[2:300:5, 1] = float("NaN") X_test[1:100:2, 0] = X_test[2:100:5, 1] = float("NaN") with Stopwatch() as sw: gama.fit(X_train, y_train) class_predictions = gama.predict(X_test) class_probabilities = gama.predict_proba(X_test) gama_score = gama.score(X_test, y_test) assert 60 * FIT_TIME_MARGIN > sw.elapsed_time, 'fit must stay within 110% of allotted time.' assert isinstance(class_predictions, np.ndarray), 'predictions should be numpy arrays.' assert ( data['test_size'], ) == class_predictions.shape, 'predict should return (N,) shaped array.' accuracy = accuracy_score(y_test, class_predictions) # Majority classifier on this split achieves 0.6293706293706294 print(data['name'], metric, 'accuracy:', accuracy) assert data[ 'base_accuracy'] <= accuracy, 'predictions should be at least as good as majority class.' assert isinstance( class_probabilities, np.ndarray), 'probability predictions should be numpy arrays.' assert (data['test_size'], data['n_classes']) == class_probabilities.shape, ( 'predict_proba should return' ' (N,K) shaped array.') # Majority classifier on this split achieves 12.80138131184662 logloss = log_loss(y_test, class_probabilities) print(data['name'], metric, 'log-loss:', logloss) assert data[ 'base_log_loss'] >= logloss, 'predictions should be at least as good as majority class.' score_to_match = logloss if metric == 'log_loss' else accuracy assert score_to_match == pytest.approx(gama_score)