def check_methods_have_no_side_effects(Estimator): # Check that calling methods has no side effects on args if not isclass(Estimator): Estimator = type(Estimator) estimator = _construct_instance(Estimator) set_random_state(estimator) # Fit for the first time fit_args = _make_args(estimator=estimator, method="fit") old_fit_args = deepcopy(fit_args) estimator.fit(*fit_args) assert deep_equals( old_fit_args, fit_args ), f"Estimator: {estimator} has side effects on arguments of fit" for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): new_args = _make_args(estimator=estimator, method=method) old_args = deepcopy(new_args) getattr(estimator, method)(*new_args) assert deep_equals( old_args, new_args ), f"Estimator: {estimator} has side effects on arguments of {method}"
def test_reconstruct_identical(scitype, mtype, fixture_index, iterate_as): """Tests that reconstruct recreates the original input X. Parameters ---------- scitype : str - name of scitype for which mtype conversions are tested Raises ------ RuntimeError if scitype is not defined or has no mtypes or examples AssertionError if examples are not correctly identified error if check itself raises an error """ # escape for the invalid Panel/Panel combination, see above if iterate_as == "Panel" and scitype == "Panel": return None # retrieve fixture for checking fixture = get_examples(mtype=mtype, as_scitype=scitype).get(fixture_index) # construct VectorizedDF - we've tested above that this works X_vect = VectorizedDF(X=fixture, iterate_as=iterate_as, is_scitype=None) # get list of iterated elements - we've tested above that this yields correct result X_list = list(X_vect) # reconstructed fixture should equal multiindex fixture if not convert_back assert deep_equals(X_vect.reconstruct(X_list), X_vect.X_multiindex) # reconstructed fixture should equal original fixture if convert_back assert deep_equals(X_vect.reconstruct(X_list, convert_back=True), fixture)
def test_methods_have_no_side_effects(self, estimator_instance, scenario): """Check that calling methods has no side effects on args.""" estimator = estimator_instance set_random_state(estimator) # Fit the model, get args before and after _, args_after = scenario.run(estimator, method_sequence=["fit"], return_args=True) fit_args_after = args_after[0] fit_args_before = scenario.args["fit"] assert deep_equals( fit_args_before, fit_args_after ), f"Estimator: {estimator} has side effects on arguments of fit" for method in NON_STATE_CHANGING_METHODS: if _has_capability(estimator, method): # Fit the model, get args before and after _, args_after = scenario.run(estimator, method_sequence=[method], return_args=True) method_args_after = args_after[0] method_args_before = scenario.get_args(method, estimator) assert deep_equals( method_args_after, method_args_before ), f"Estimator: {estimator} has side effects on arguments of {method}"
def test_iteration(scitype, mtype, fixture_index, iterate_as): """Tests __getitem__ returns pd-multiindex mtype if iterate_as="Series". Fixtures parameterized ---------------------- scitype : str - scitype of fixture mtype : str - mtype of fixture fixture_index : int - index of fixture tuple with that scitype and mtype iterate_as : str - level on which to iterate over """ # escape for the invalid Panel/Panel combination, see above if iterate_as == "Panel" and scitype == "Panel": return None # retrieve fixture for checking fixture = get_examples(mtype=mtype, as_scitype=scitype).get(fixture_index) # construct VectorizedDF - we've tested above that this works X_vect = VectorizedDF(X=fixture, iterate_as=iterate_as, is_scitype=None) # testing list comprehension works with indexing X_iter1 = [X_vect[i] for i in range(len(X_vect))] assert isinstance(X_iter1, list) # testing that iterator comprehension works X_iter2 = [X_idx for X_idx in X_vect] assert isinstance(X_iter2, list) # testing that as_list method works X_iter3 = X_vect.as_list() assert isinstance(X_iter3, list) # check that these are all the same assert deep_equals(X_iter1, X_iter2) assert deep_equals(X_iter2, X_iter3)
def test_methods_do_not_change_state(self, estimator_instance, scenario): """Check that non-state-changing methods do not change state. Check that methods that are not supposed to change attributes of the estimators do not change anything (including hyper-parameters and fitted parameters) """ estimator = estimator_instance set_random_state(estimator) for method in NON_STATE_CHANGING_METHODS: if _has_capability(estimator, method): # dict_before = copy of dictionary of estimator before predict, post fit _ = scenario.run(estimator, method_sequence=["fit"]) dict_before = estimator.__dict__.copy() # dict_after = dictionary of estimator after predict and fit _ = scenario.run(estimator, method_sequence=[method]) dict_after = estimator.__dict__ is_equal, msg = deep_equals(dict_after, dict_before, return_msg=True) assert is_equal, ( f"Estimator: {type(estimator).__name__} changes __dict__ " f"during {method}, " f"reason/location of discrepancy (x=after, y=before): {msg}" )
def test_mock_univariate_forecaster_log(y, X_train, X_pred, fh): """Tests the log of the MockUnivariateForecasterLogger. Tests the following: - log format and content - All the private methods that have logging enabled are in the log - the correct inner mtypes are preserved, according to the forecaster tags """ forecaster = MockUnivariateForecasterLogger() forecaster.fit(y, X_train, fh) forecaster.predict(fh, X_pred) forecaster.update(y, X_train, fh) forecaster.predict_quantiles(fh=fh, X=X_pred, alpha=[0.1, 0.9]) _X_train = deepcopy(X_frame_train) if X_train is not None else None _X_pred = deepcopy(X_frame_pred) if X_pred is not None else None expected_log = [ ("_fit", {"y": y_series, "X": _X_train, "fh": fh}), ("_predict", {"fh": fh, "X": _X_pred}), ("_update", {"y": y_series, "X": _X_train, "update_params": fh}), ( "_predict_quantiles", {"fh": fh, "X": _X_pred, "alpha": [0.1, 0.9]}, ), ] assert deep_equals(forecaster.log, expected_log)
def test_featureunion_transform_cols(): """Test FeatureUnion name and number of columns.""" X = pd.DataFrame({"test1": [1, 2], "test2": [3, 4]}) t1 = ExponentTransformer(power=2) t2 = ExponentTransformer(power=5) t3 = ExponentTransformer(power=3) t123 = t1 + t2 + t3 Xt = t123.fit_transform(X) expected_cols = pd.Index([ "ExponentTransformer_1__test1", "ExponentTransformer_1__test2", "ExponentTransformer_2__test1", "ExponentTransformer_2__test2", "ExponentTransformer_3__test1", "ExponentTransformer_3__test2", ]) msg = ( f"FeatureUnion creates incorrect column names for DataFrame output. " f"Expected: {expected_cols}, found: {Xt.columns}") assert deep_equals(Xt.columns, expected_cols), msg
def test_deep_equals_negative(fixture1, fixture2): """Tests that deep_equals correctly identifies unequal objects as unequal.""" x = deepcopy(fixture1) y = deepcopy(fixture2) msg = (f"deep_copy incorrectly returned True when comparing " f"the following, different objects: x={x}, y={y}") assert not deep_equals(x, y), msg
def test_deep_equals_positive(fixture): """Tests that deep_equals correctly identifies equal objects as equal.""" x = deepcopy(fixture) y = deepcopy(fixture) msg = (f"deep_copy incorrectly returned False for two identical copies of " f"the following object: {x}") assert deep_equals(x, y), msg
def test_convert(scitype, from_mtype, to_mtype, fixture_index): """Tests that conversions for scitype agree with from/to example fixtures. Parameters ---------- scitype : str - scitypes from_mtype : str - mtype of "from" conversion to test, belongs to scitype to_mtype : str - mtype of conversion target ("to") to test, belongs to scitype from_fixture : int - index of fixture tuple use for conversion Raises ------ AssertionError if a converted object does not match fixture error if conversion itself raises an error """ # retrieve from/to fixture for conversion from_fixture = get_examples(mtype=from_mtype, as_scitype=scitype, return_lossy=True).get(fixture_index) to_fixture = get_examples(mtype=to_mtype, as_scitype=scitype, return_lossy=True).get(fixture_index) # retrieve indicators whether conversion makes sense # to-fixture is in example dict and is not None cond1 = to_fixture is not None and to_fixture[0] is not None # from-fixture is in example dict and is not None cond2 = from_fixture is not None and from_fixture[0] is not None # from-fixture is not None and not lossy cond3 = cond2 and from_fixture[1] is not None and not from_fixture[1] msg = ( f"conversion {from_mtype} to {to_mtype} failed for fixture {fixture_index}, " "expected result (y) and converted result (x) are not equal because: ") # test that converted from-fixture equals to-fixture if cond1 and cond2 and cond3: converted_fixture_i = convert( obj=from_fixture[0], from_type=from_mtype, to_type=to_mtype, as_scitype=scitype, ) equals, deep_equals_msg = deep_equals( converted_fixture_i, to_fixture[0], return_msg=True, ) assert equals, msg + deep_equals_msg
def _transform(self, X, X2=None): """Compute distance/kernel matrix. Core logic. Behaviour: returns pairwise distance/kernel matrix between samples in X and X2 if X2 is not passed, is equal to X if X/X2 is a pd.DataFrame and contains non-numeric columns, these are removed before computation Parameters ---------- X: pd.DataFrame of length n, or 2D np.array with n rows X2: pd.DataFrame of length m, or 2D np.array with m rows, optional default X2 = X Returns ------- distmat: np.array of shape [n, m] (i,j)-th entry contains distance/kernel between X.iloc[i] and X2.iloc[j] """ n = len(X) m = len(X2) X_equals_X2 = deep_equals(X, X2) aggfunc = self.aggfunc aggfunc_is_symm = self.aggfunc_is_symm if aggfunc is None: aggfunc = np.mean aggfunc_is_symm = True transformer_symm = self.transformer.get_tag("symmetric", False) # whether we know that resulting matrix must be symmetric # a sufficient condition for this: # transformer is symmetric; X equals X2; and aggfunc is symmetric all_symm = aggfunc_is_symm and transformer_symm and X_equals_X2 distmat = np.zeros((n, m), dtype="float") for i in range(n): for j in range(m): if all_symm and j < i: distmat[i, j] = distmat[j, i] else: distmat[i, j] = aggfunc(self.transformer.transform(X[i], X2[j])) return distmat
def test_convert_to_without_scitype(): """Testing convert_to call without scitype specification.""" scitype = SCITYPES[0] from_fixt = get_examples(mtype=MTYPES_SERIES[1], as_scitype=scitype).get(0) # convert_to should recognize the correct scitype, otherwise same as above exp_fixt = get_examples(mtype=MTYPES_SERIES[0], as_scitype=scitype).get(0) # carry out the conversion using convert_to converted = convert_to(from_fixt, to_type=MTYPES_SERIES[0]) # compare expected output with actual output of convert_to msg = "convert_to call without scitype does not seem to work." assert deep_equals(converted, exp_fixt), msg
def test_convert_to_simple(): """Testing convert_to basic call works.""" scitype = SCITYPES[0] from_fixt = get_examples(mtype=MTYPES_SERIES[1], as_scitype=scitype).get(0) # expectation is that the conversion is to mtype MTYPES_SERIES[0] exp_fixt = get_examples(mtype=MTYPES_SERIES[0], as_scitype=scitype).get(0) # carry out the conversion using convert_to converted = convert_to(from_fixt, to_type=MTYPES_SERIES[0], as_scitype=scitype) # compare expected output with actual output of convert_to msg = "convert_to basic call does not seem to work." assert deep_equals(converted, exp_fixt), msg
def test_convert_to_mtype_list_different_scitype(): """Testing convert_to call to_type being a list, of different scitypes.""" # convert_to list target_list = MTYPES_SERIES[:2] + MTYPES_PANEL[:2] scitype0 = SCITYPES[0] scitype1 = SCITYPES[1] # example that is on the list and of scitype0 from_fixt_on_0 = get_examples(mtype=MTYPES_SERIES[1], as_scitype=scitype0).get(0) # example that is not on the list and of scitype0 from_fixt_off_0 = get_examples(mtype=MTYPES_SERIES[2], as_scitype=scitype0).get(0) # example that is on the list and of scitype1 from_fixt_on_1 = get_examples(mtype=MTYPES_PANEL[1], as_scitype=scitype1).get(0) # example that is not on the list and of scitype1 from_fixt_off_1 = get_examples(mtype=MTYPES_PANEL[2], as_scitype=scitype1).get(0) # if on the list, result should be equal to input exp_fixt_on_0 = get_examples(mtype=MTYPES_SERIES[1], as_scitype=scitype0).get(0) exp_fixt_on_1 = get_examples(mtype=MTYPES_PANEL[1], as_scitype=scitype1).get(0) # if off the list, result should be converted to mtype # of the same scitype that appears earliest on the list exp_fixt_off_0 = get_examples(mtype=MTYPES_SERIES[0], as_scitype=scitype0).get(0) exp_fixt_off_1 = get_examples(mtype=MTYPES_PANEL[0], as_scitype=scitype1).get(0) # carry out the conversion using convert_to converted_on_0 = convert_to(from_fixt_on_0, to_type=target_list) converted_off_0 = convert_to(from_fixt_off_0, to_type=target_list) converted_on_1 = convert_to(from_fixt_on_1, to_type=target_list) converted_off_1 = convert_to(from_fixt_off_1, to_type=target_list) # compare expected output with actual output of convert_to msg = "convert_to call does not work with list for to_type of different scitypes." assert deep_equals(converted_on_0, exp_fixt_on_0), msg assert deep_equals(converted_off_0, exp_fixt_off_0), msg assert deep_equals(converted_on_1, exp_fixt_on_1), msg assert deep_equals(converted_off_1, exp_fixt_off_1), msg
def test_methods_have_no_side_effects(estimator_instance): """Check that calling methods has no side effects on args.""" estimator = estimator_instance set_random_state(estimator) # Fit for the first time fit_args = _make_args(estimator, "fit") old_fit_args = deepcopy(fit_args) estimator.fit(*fit_args) assert deep_equals( old_fit_args, fit_args ), f"Estimator: {estimator} has side effects on arguments of fit" for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): new_args = _make_args(estimator, method) old_args = deepcopy(new_args) getattr(estimator, method)(*new_args) assert deep_equals( old_args, new_args ), f"Estimator: {estimator} has side effects on arguments of {method}"
def test_convert_to_mtype_list(): """Testing convert_to call to_type being a list, of same scitype.""" # convert_to list target_list = MTYPES_SERIES[:2] scitype = SCITYPES[0] # example that is on the list from_fixt_on = get_examples(mtype=MTYPES_SERIES[1], as_scitype=scitype).get(0) # example that is not on the list from_fixt_off = get_examples(mtype=MTYPES_SERIES[2], as_scitype=scitype).get(0) # if on the list, result should be equal to input exp_fixt_on = get_examples(mtype=MTYPES_SERIES[1], as_scitype=scitype).get(0) # if off the list, result should be converted to mtype that is first on the list exp_fixt_off = get_examples(mtype=MTYPES_SERIES[0], as_scitype=scitype).get(0) # carry out the conversion using convert_to converted_on = convert_to(from_fixt_on, to_type=target_list) converted_off = convert_to(from_fixt_off, to_type=target_list) # compare expected output with actual output of convert_to msg = "convert_to call does not work with list for to_type." assert deep_equals(converted_on, exp_fixt_on), msg assert deep_equals(converted_off, exp_fixt_off), msg
def test_methods_do_not_change_state(estimator_instance, scenario): """Check that non-state-changing methods do not change state. Check that methods that are not supposed to change attributes of the estimators do not change anything (including hyper-parameters and fitted parameters) """ estimator = estimator_instance set_random_state(estimator) for method in NON_STATE_CHANGING_METHODS: if _has_capability(estimator, method): # dict_before = copy of dictionary of estimator before predict, after fit _ = scenario.run(estimator, method_sequence=["fit"]) dict_before = estimator.__dict__.copy() # dict_after = dictionary of estimator after predict and fit _ = scenario.run(estimator, method_sequence=[method]) dict_after = estimator.__dict__ if method == "transform" and estimator.get_class_tag("fit-in-transform"): # Some transformations fit during transform, as they apply # some transformation to each series passed to transform, # so transform will actually change the state of these estimator. continue if method == "predict" and estimator.get_class_tag("fit-in-predict"): # Some annotators fit during predict, as they apply # some apply annotation to each series passed to predict, # so predict will actually change the state of these annotators. continue # old logic uses equality without auto-msg, keep comment until refactor # is_equal = dict_after == dict_before is_equal, msg = deep_equals(dict_after, dict_before, return_msg=True) assert is_equal, ( f"Estimator: {type(estimator).__name__} changes __dict__ " f"during {method}, " f"reason/location of discrepancy (x=after, y=before): {msg}" )
def test_convert(scitype): """Tests that conversions for scitype agree with from/to example fixtures. Parameters ---------- scitype : str - name of scitype for which mtype conversions are tested Raises ------ AssertionError if a converted object does not match fixture error if conversion itself raises an error """ conv_mat = _conversions_defined(scitype) mtypes = conv_mat.index.values if len(mtypes) == 0: raise RuntimeError("no mtypes defined for scitype " + scitype) fixtures = dict() for mtype in mtypes: # if we don't do this we get into a clash between linters mtype_long_variable_name_to_avoid_linter_clash = mtype fixtures[mtype] = get_examples( mtype=mtype_long_variable_name_to_avoid_linter_clash, as_scitype=scitype, return_lossy=True, ) if len(fixtures[mtypes[0]]) == 0: raise RuntimeError("no fixtures defined for scitype " + scitype) # by convention, all fixtures are mirrored across all mtypes # so len(fixtures[mtypes[i]]) does not depend on i n_fixtures = len(fixtures[mtypes[0]]) for i in range(n_fixtures): for from_type in mtypes: for to_type in mtypes: # retrieve from/to fixture for conversion to_fixture = fixtures[to_type].get(i) from_fixture = fixtures[from_type].get(i) # retrieve indicators whether conversion makes sense # to-fixture is in example dict and is not None cond1 = to_fixture is not None and to_fixture[0] is not None # from-fixture is in example dict and is not None cond2 = from_fixture is not None and from_fixture[0] is not None # from-fixture is not None and not lossy cond3 = cond2 and from_fixture[ 1] is not None and not from_fixture[1] # conversion is implemented cond4 = conv_mat[to_type][from_type] msg = f"conversion {from_type} to {to_type} failed for fixture {i}" # test that converted from-fixture equals to-fixture if cond1 and cond2 and cond3 and cond4: converted_fixture_i = convert( obj=from_fixture[0], from_type=from_type, to_type=to_type, as_scitype=scitype, ) assert deep_equals( converted_fixture_i, to_fixture[0], ), msg