def add_message(uuid): """ This route will be accessed remotely from the Chrome extension. It takes the user input (which is the Reddit submission's title and text) and returns the top 3 predicted subreddits for each model if they are close enough to the decision boundary (or are across it). Then it returns the results back to the extension so it can display them. """ content = request.json title = content["title"] text = content["text"] threshold = content["threshold"] max_predicted_classes = content["max_per_model"] X = title + " " + text X = vectorizer.transform([X]) selected_predictions = [] for i in clf: with sklearn.config_context(assume_finite=True): my_dec = i.decision_function(X) argsorted_dec = my_dec.argsort()[0][::-1] argsorted_dec_thresh = argsorted_dec[:max_predicted_classes][ my_dec[0][argsorted_dec[:max_predicted_classes]] > threshold] sorted_classes = i.classes_[argsorted_dec_thresh] selected_predictions += list(sorted_classes) return jsonify(selected_predictions)
def test_knn_imputer_distance_weighted_not_enough_neighbors( na, working_memory): X = np.array([[3, na], [2, na], [na, 4], [5, 6], [6, 8], [na, 5]]) dist = pairwise_distances(X, metric="nan_euclidean", squared=False, missing_values=na) X_01 = np.average(X[3:5, 1], weights=1 / dist[0, 3:5]) X_11 = np.average(X[3:5, 1], weights=1 / dist[1, 3:5]) X_20 = np.average(X[3:5, 0], weights=1 / dist[2, 3:5]) X_50 = np.average(X[3:5, 0], weights=1 / dist[5, 3:5]) X_expected = np.array([[3, X_01], [2, X_11], [X_20, 4], [5, 6], [6, 8], [X_50, 5]]) with config_context(working_memory=working_memory): knn_3 = KNNImputer(missing_values=na, n_neighbors=3, weights="distance") assert_allclose(knn_3.fit_transform(X), X_expected) knn_4 = KNNImputer(missing_values=na, n_neighbors=4, weights="distance") assert_allclose(knn_4.fit_transform(X), X_expected)
def test_get_chunk_n_rows_warns(): """Check that warning is raised when working_memory is too low.""" row_bytes = 1024 * 1024 + 1 max_n_rows = None working_memory = 1 expected = 1 warn_msg = ( "Could not adhere to working_memory config. Currently 1MiB, 2MiB required." ) with pytest.warns(UserWarning, match=warn_msg): actual = get_chunk_n_rows( row_bytes=row_bytes, max_n_rows=max_n_rows, working_memory=working_memory, ) assert actual == expected assert type(actual) is type(expected) with config_context(working_memory=working_memory): with pytest.warns(UserWarning, match=warn_msg): actual = get_chunk_n_rows(row_bytes=row_bytes, max_n_rows=max_n_rows) assert actual == expected assert type(actual) is type(expected)
def test_pairwise_distances_data_derived_params(n_jobs, metric, dist_function, y_is_x): # check that pairwise_distances give the same result in sequential and # parallel, when metric has data-derived parameters. with config_context(working_memory=1): # to have more than 1 chunk rng = np.random.RandomState(0) X = rng.random_sample((1000, 10)) if y_is_x: Y = X expected_dist_default_params = squareform(pdist(X, metric=metric)) if metric == "seuclidean": params = {'V': np.var(X, axis=0, ddof=1)} else: params = {'VI': np.linalg.inv(np.cov(X.T)).T} else: Y = rng.random_sample((1000, 10)) expected_dist_default_params = cdist(X, Y, metric=metric) if metric == "seuclidean": params = {'V': np.var(np.vstack([X, Y]), axis=0, ddof=1)} else: params = {'VI': np.linalg.inv(np.cov(np.vstack([X, Y]).T)).T} expected_dist_explicit_params = cdist(X, Y, metric=metric, **params) dist = np.vstack(tuple(dist_function(X, Y, metric=metric, n_jobs=n_jobs))) assert_allclose(dist, expected_dist_explicit_params) assert_allclose(dist, expected_dist_default_params)
def run_bench(repeat=10, verbose=False): pbefore = dict(n_neighbors=[2, 5], leaf_size=[10], dim=[2, 5], onnx_options=[ None, { KNeighborsClassifier: { 'optim': 'cdist', 'zipmap': False } } ], metric=["euclidean"]) pafter = dict(N=[1, 10, 100]) test = lambda dim=None, **opts: OnnxRuntimeBenchPerfTestBinaryClassification( KNeighborsClassifier, dim=dim, **opts) bp = BenchPerf(pbefore, pafter, test) with sklearn.config_context(assume_finite=True): start = time() results = list( bp.enumerate_run_benchs(repeat=repeat, verbose=verbose, stop_if_error=False)) end = time() results_df = pandas.DataFrame(results) print("Total time = %0.3f sec\n" % (end - start)) return results_df
def run_bench(repeat=10, verbose=False): pbefore = dict(dim=[1, 5, 10, 100], max_depth=[2, 10], n_estimators=[1, 10, 100, 1000, 10000], onnx_options=[{ RandomForestClassifier: { 'zipmap': False } }]) pafter = dict(N=[1, 10, 100]) test = lambda dim=None, **opts: OnnxRuntimeBenchPerfTestBinaryClassification( RandomForestClassifier, dim=dim, **opts) bp = BenchPerf(pbefore, pafter, test) with sklearn.config_context(assume_finite=True): start = time() results = list( bp.enumerate_run_benchs(repeat=repeat, verbose=verbose, stop_if_error=False)) end = time() results_df = pandas.DataFrame(results) print("Total time = %0.3f sec\n" % (end - start)) return results_df
def already_posted(uuid): """ This route is accessed from submissions pages that already exist. It returns the top 3 predictions for each model if they meet the threshold specified above. It will not suggest a subreddit the post is actually in. It also uses the Reddit API to get the actual information about the page. """ content = request.json submission = praw.models.Submission(reddit, url=content["url"]) threshold = content["threshold"] max_predicted_classes = content["max_per_model"] title = submission.title text = submission.selftext subreddit = submission.subreddit X = title + " " + text X = vectorizer.transform([X]) selected_predictions = [] for i in clf: with sklearn.config_context(assume_finite=True): my_dec = i.decision_function(X) argsorted_dec = my_dec.argsort()[0][::-1] argsorted_dec_thresh = argsorted_dec[:max_predicted_classes][ my_dec[0][argsorted_dec[:max_predicted_classes]] > threshold] sorted_classes = i.classes_[argsorted_dec_thresh] selected_predictions += list(sorted_classes) # Remove prediction if it is the same subreddit you are in if subreddit in selected_predictions: selected_predictions.remove(subreddit) return jsonify(selected_predictions)
def test_one_estimator_print_change_only(print_changed_only): pca = PCA(n_components=10) with config_context(print_changed_only=print_changed_only): pca_repr = str(pca) html_output = estimator_html_repr(pca) assert pca_repr in html_output
def test_kwargs_in_init(): # Make sure the changed_only=True mode is OK when an argument is passed as # kwargs. # Non-regression test for # https://github.com/scikit-learn/scikit-learn/issues/17206 class WithKWargs(BaseEstimator): # Estimator with a kwargs argument. These need to hack around # set_params and get_params. Here we mimic what LightGBM does. def __init__(self, a='willchange', b='unchanged', **kwargs): self.a = a self.b = b self._other_params = {} self.set_params(**kwargs) def get_params(self, deep=True): params = super().get_params(deep=deep) params.update(self._other_params) return params def set_params(self, **params): for key, value in params.items(): setattr(self, key, value) self._other_params[key] = value return self est = WithKWargs(a='something', c='abcd', d=None) expected = "WithKWargs(a='something', c='abcd', d=None)" assert expected == est.__repr__() with config_context(print_changed_only=False): expected = "WithKWargs(a='something', b='unchanged', c='abcd', d=None)" assert expected == est.__repr__()
def run_bench(repeat=10, verbose=False): pbefore = dict( dim=[1, 5, 10, 20], alpha=[0.1, 1., 10.], onnx_options=[None, { GaussianProcessRegressor: { 'optim': 'cdist' } }], dtype=[numpy.float32, numpy.float64]) pafter = dict(N=[1, 10, 100, 1000]) test = lambda dim=None, **opts: OnnxRuntimeBenchPerfTestRegression( GaussianProcessRegressor, dim=dim, N_fit=100, **opts) bp = BenchPerf(pbefore, pafter, test) with sklearn.config_context(assume_finite=True): start = time() results = list( bp.enumerate_run_benchs(repeat=repeat, verbose=verbose, stop_if_error=False)) end = time() results_df = pandas.DataFrame(results) print("Total time = %0.3f sec\n" % (end - start)) return results_df
def main(m="LogisticRegression", e=100, n=10000, f=10, r=1000, a=True, o=True, j=2, opts=""): """ Builds a model and benchmarks the model converted into ONNX. :param m: model name or experiment :param e: number of estimators or trees :param n: number of rows :param f: number of features :param r: number of repetitions :param a: assume finite or not :param o: compares to ONNX :param j: n_jobs :param opts: options """ model_data = build_model(m, e, n, f, o, j, opts) if a: with config_context(assume_finite=True): benchmark(model_data['model'], model_data.get('onnx', None), model_data['data'], r) else: benchmark(model_data['model'], model_data.get('onnx', None), model_data['data'], r)
def test_knn_imputer_with_simple_example(na, working_memory): X = np.array([ [0, na, 0, na], [1, 1, 1, na], [2, 2, na, 2], [3, 3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5], [6, 6, 6, 6], [na, 7, 7, 7], ]) r0c1 = np.mean(X[1:6, 1]) r0c3 = np.mean(X[2:-1, -1]) r1c3 = np.mean(X[2:-1, -1]) r2c2 = np.mean(X[[0, 1, 3, 4, 5], 2]) r7c0 = np.mean(X[2:-1, 0]) X_imputed = np.array([ [0, r0c1, 0, r0c3], [1, 1, 1, r1c3], [2, 2, r2c2, 2], [3, 3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5], [6, 6, 6, 6], [r7c0, 7, 7, 7], ]) with config_context(working_memory=working_memory): imputer_comp = KNNImputer(missing_values=na) assert_allclose(imputer_comp.fit_transform(X), X_imputed)
def test_pairwise_distances_data_derived_params(n_jobs, metric, dist_function, y_is_x): # check that pairwise_distances give the same result in sequential and # parallel, when metric has data-derived parameters. with config_context(working_memory=1): # to have more than 1 chunk rng = np.random.RandomState(0) X = rng.random_sample((1000, 10)) if y_is_x: Y = X expected_dist_default_params = squareform(pdist(X, metric=metric)) if metric == "seuclidean": params = {'V': np.var(X, axis=0, ddof=1)} else: params = {'VI': np.linalg.inv(np.cov(X.T)).T} else: Y = rng.random_sample((1000, 10)) expected_dist_default_params = cdist(X, Y, metric=metric) if metric == "seuclidean": params = {'V': np.var(np.vstack([X, Y]), axis=0, ddof=1)} else: params = {'VI': np.linalg.inv(np.cov(np.vstack([X, Y]).T)).T} expected_dist_explicit_params = cdist(X, Y, metric=metric, **params) dist = np.vstack( tuple(dist_function(X, Y, metric=metric, n_jobs=n_jobs))) assert_allclose(dist, expected_dist_explicit_params) assert_allclose(dist, expected_dist_default_params)
def run_bench(repeat=5, verbose=False): pbefore = dict(dim=[-1], model=list( sorted([ 'XGB', 'LGB', 'SVR', 'NuSVR', 'RF', 'DT', 'ADA', 'MLP', 'LR', 'GBT', 'KNN', 'KNN-cdist', 'HGB' ])), norm=[False, True], dataset=["boston", "diabetes", "rndbin100"]) pafter = dict(N=[ 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000 ]) test = lambda dim=None, **opts: DatasetsOrtBenchPerfTest(**opts) bp = BenchPerf(pbefore, pafter, test) with sklearn.config_context(assume_finite=True): start = time() results = list( bp.enumerate_run_benchs(repeat=repeat, verbose=verbose, stop_if_error=False)) end = time() results_df = pandas.DataFrame(results) print("Total time = %0.3f sec\n" % (end - start)) return results_df
def benchmark(X, fct1, fct2, N, repeat=10, number=20): def ti(r, n): if n <= 1: return 40 * r if n <= 10: return 10 * r if n <= 100: return 4 * r if n <= 1000: return r return r // 2 with sklearn.config_context(assume_finite=True): # to warm up the engine time_kwargs = {n: dict(repeat=10, number=10) for n in N} benchmark_fct(fct1, X, time_kwargs=time_kwargs, skip_long_test=False) benchmark_fct(fct2, X, time_kwargs=time_kwargs, skip_long_test=False) # real measure time_kwargs = {n: dict(repeat=ti(repeat, n), number=number) for n in N} res1 = benchmark_fct( fct1, X, time_kwargs=time_kwargs, skip_long_test=False) res2 = benchmark_fct( fct2, X, time_kwargs=time_kwargs, skip_long_test=False) res = {} for r in sorted(res1): r1 = res1[r] r2 = res2[r] ratio = r2['ttime'] / r1['ttime'] res[r] = ratio return res
def fit(self, X, y=None, *, target_col=None): """Fit estimator. Requires to either specify the target as separate 1d array or Series y (in scikit-learn fashion) or as column of the dataframe X specified by target_col. If y is specified, X is assumed not to contain the target. Parameters ---------- X : DataFrame Input features. If target_col is specified, X also includes the target. y : Series or numpy array, optional. Target. You need to specify either y or target_col. target_col : string or int, optional Column name of target if included in X. """ # copy and paste from above?! if ((y is None and target_col is None) or (y is not None) and (target_col is not None)): raise ValueError( "Need to specify either y or target_col.") X, y = _validate_Xyt(X, y, target_col, do_clean=False) if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) types = detect_types(X, type_hints=self.type_hints) self.feature_names_ = X.columns self.types_ = types cv = 5 ratio = 3 y, self.scoring_ = self._preprocess_target(y) self.log_ = [] # reimplement cross-validation so we only do preprocessing once pipe = Pipeline([('preprocessing', EasyPreprocessor(verbose=self.verbose, types=types)), ('classifier', DummyClassifier())]) estimators = self._get_estimators() param_grid = [{'classifier': [est]} for est in estimators] gs = GridSuccessiveHalving( ratio=ratio, estimator=pipe, param_grid=param_grid, force_exhaust_budget=self.force_exhaust_budget, verbose=self.verbose, cv=cv, error_score='raise', scoring=self.scoring_, refit='recall_macro', n_jobs=self.n_jobs) self.search_ = gs with sklearn.config_context(print_changed_only=True): gs.fit(X, y) self.est_ = gs.best_estimator_ print("best classifier: ", gs.best_params_['classifier']) print("best score: {:.3f}".format(gs.best_score_)) return self
def test_config_context_exception(): assert get_config()['assume_finite'] is False try: with config_context(assume_finite=True): assert get_config()['assume_finite'] is True raise ValueError() except ValueError: pass assert get_config()['assume_finite'] is False
def test_config_context_exception(): assert_equal(get_config(), {'assume_finite': False}) try: with config_context(assume_finite=True): assert_equal(get_config(), {'assume_finite': True}) raise ValueError() except ValueError: pass assert_equal(get_config(), {'assume_finite': False})
def test_config_context(): assert get_config() == { 'assume_finite': False, 'working_memory': 1024, 'print_changed_only': True, 'display': 'text' } # Not using as a context manager affects nothing config_context(assume_finite=True) assert get_config()['assume_finite'] is False with config_context(assume_finite=True): assert get_config() == { 'assume_finite': True, 'working_memory': 1024, 'print_changed_only': True, 'display': 'text' } assert get_config()['assume_finite'] is False with config_context(assume_finite=True): with config_context(assume_finite=None): assert get_config()['assume_finite'] is True assert get_config()['assume_finite'] is True with config_context(assume_finite=False): assert get_config()['assume_finite'] is False with config_context(assume_finite=None): assert get_config()['assume_finite'] is False # global setting will not be retained outside of context that # did not modify this setting set_config(assume_finite=True) assert get_config()['assume_finite'] is True assert get_config()['assume_finite'] is False assert get_config()['assume_finite'] is True assert get_config() == { 'assume_finite': False, 'working_memory': 1024, 'print_changed_only': True, 'display': 'text' } # No positional arguments assert_raises(TypeError, config_context, True) # No unknown arguments assert_raises(TypeError, config_context(do_something_else=True).__enter__)
def test_repr_html_wraps(): # Checks the display configuration flag controls the html output tree = DecisionTreeClassifier() msg = "_repr_html_ is only defined when" with pytest.raises(AttributeError, match=msg): output = tree._repr_html_() with config_context(display='diagram'): output = tree._repr_html_() assert "<style>" in output
def test_convert_arff_data_dataframe_warning_low_memory_pandas(monkeypatch): pytest.importorskip('pandas') data_id = 1119 _monkey_patch_webbased_functions(monkeypatch, data_id, True) msg = 'Could not adhere to working_memory config.' with pytest.warns(UserWarning, match=msg): with config_context(working_memory=1e-6): fetch_openml(data_id=data_id, as_frame=True, cache=False)
def test_repr_mimebundle_(): # Checks the display configuration flag controls the json output tree = DecisionTreeClassifier() output = tree._repr_mimebundle_() assert "text/plain" in output assert "text/html" not in output with config_context(display='diagram'): output = tree._repr_mimebundle_() assert "text/plain" in output assert "text/html" in output
def test_birch_duck_typing_meta(): # Test duck typing meta estimators with Birch birch = Birch(n_clusters=AgglomerativeClustering(n_clusters=3)) html_output = estimator_html_repr(birch) # inner estimators do not show changes with config_context(print_changed_only=True): assert f"<pre>{str(birch.n_clusters)}" in html_output assert "AgglomerativeClustering</label>" in html_output # outer estimator contains all changes assert f"<pre>{str(birch)}" in html_output
def test_ovo_classifier_duck_typing_meta(): # Test duck typing metaestimators with OVO ovo = OneVsOneClassifier(LinearSVC(penalty="l1")) html_output = estimator_html_repr(ovo) # inner estimators do not show changes with config_context(print_changed_only=True): assert f"<pre>{str(ovo.estimator)}" in html_output assert "LinearSVC</label>" in html_output # outer estimator assert f"<pre>{str(ovo)}" in html_output
def run_bench(repeat=100, verbose=False): pbefore = dict(dim=[5, 10, 50]) pafter = dict(N=[10, 100, 1000]) bp = BenchPerf(pbefore, pafter, PolyBenchPerfTest) with sklearn.config_context(assume_finite=True): start = time() results = list(bp.enumerate_run_benchs(repeat=repeat, verbose=verbose)) end = time() results_df = pandas.DataFrame(results) print("Total time = %0.3f sec\n" % (end - start)) return results_df
def preview(self, *, display: str = "text") -> str: """ Create a text representation of the model. :param display. If ‘diagram’, estimators will be displayed as a diagram in an HTML format when shown in a jupyter notebook. If ‘text’, estimators will be displayed as text. :return. A string representation of the models internal configuration. """ if self.model: with config_context(display=display): return str(self.model) else: return ""
def run_bench(repeat=100, verbose=False): n_obs = [10, 100, 1000] n_features = [5, 10, 50] with sklearn.config_context(assume_finite=True): start = time() results = bench(n_obs, n_features, repeat=repeat, verbose=verbose) end = time() results_df = pandas.DataFrame(results) print("Total time = %0.3f sec\n" % (end - start)) # plot the results plot_results(results_df, verbose=verbose) return results_df
def _run_skl_prediction(obs, check_runtime, assume_finite, inst, method_name, predict_kwargs, X_test, benchmark, debug, verbose, time_kwargs, skip_long_test, time_kwargs_fact, fLOG): if not check_runtime: return None # pragma: no cover if verbose >= 2 and fLOG is not None: fLOG("[enumerate_compatible_opset] check_runtime SKL {}-{}-{}-{}-{}". format(id(inst), method_name, predict_kwargs, time_kwargs, time_kwargs_fact)) with sklearn.config_context(assume_finite=assume_finite): # compute sklearn prediction obs['ort_version'] = ort_version try: meth = getattr(inst, method_name) except AttributeError as e: # pragma: no cover if debug: raise # pragma: no cover obs['_2skl_meth_exc'] = str(e) return e try: ypred, t4, ___ = _measure_time( lambda: meth(X_test, **predict_kwargs)) obs['lambda-skl'] = (lambda xo: meth(xo, **predict_kwargs), X_test) except ( ValueError, AttributeError, # pragma: no cover TypeError, MemoryError, IndexError) as e: if debug: raise # pragma: no cover obs['_3prediction_exc'] = str(e) return e obs['prediction_time'] = t4 obs['assume_finite'] = assume_finite if benchmark and 'lambda-skl' in obs: obs['bench-skl'] = benchmark_fct(*obs['lambda-skl'], obs=obs, time_kwargs=_multiply_time_kwargs( time_kwargs, time_kwargs_fact, inst), skip_long_test=skip_long_test) if verbose >= 3 and fLOG is not None: fLOG("[enumerate_compatible_opset] scikit-learn prediction") _dispsimple(ypred, fLOG) if verbose >= 2 and fLOG is not None: fLOG("[enumerate_compatible_opset] predictions stored") return ypred
def get_distance(x1, x2, triplet_similarity, mode='numpy'): n_jobs = 8 if x1.shape[0] > 1 else 1 is_item = False if isinstance(x1, np.ndarray): if len(x1.shape) == 1: is_item = True x1 = np.expand_dims(x1, 0) x2 = np.expand_dims(x2, 0) else: if len(x1.size()) == 1: x1 = x1.unsqueeze(0) x2 = x2.unsqueeze(0) is_item = True if mode != 'numpy': if isinstance(x1, np.ndarray): x1 = torch.FloatTensor(x1).to(mode) x2 = torch.FloatTensor(x2).to(mode) # latest needed: conda install -c anaconda scikit-learn if isinstance(x1, np.ndarray): with sklearn.config_context(working_memory=1024): if triplet_similarity == 'cos': dist = np.zeros((0, )) for each in sklearn.metrics.pairwise.pairwise_distances_chunked( x1, x2, metric="cosine", n_jobs=n_jobs): dist = np.concatenate((dist, np.diag(each)), axis=0) else: dist = np.zeros((0, )) for each in sklearn.metrics.pairwise.pairwise_distances_chunked( x1, x2, metric="euclidean", n_jobs=n_jobs): dist = np.concatenate((dist, np.diag(each)), axis=0) else: if triplet_similarity == 'cos': dist = 1. - F.cosine_similarity(x1, x2, dim=1, eps=1e-20) # -1 .. 1 => 0 .. 2 else: dist = F.pairwise_distance(x1, x2, eps=1e-20) # 0 .. 2 if mode != 'numpy': if isinstance(x1, np.ndarray): dist = dist.to('cpu').numpy() if is_item: dist = dist[0] return dist
def test_get_chunk_n_rows(row_bytes, max_n_rows, working_memory, expected): with warnings.catch_warnings(): warnings.simplefilter("error", UserWarning) actual = get_chunk_n_rows( row_bytes=row_bytes, max_n_rows=max_n_rows, working_memory=working_memory, ) assert actual == expected assert type(actual) is type(expected) with config_context(working_memory=working_memory): with warnings.catch_warnings(): warnings.simplefilter("error", UserWarning) actual = get_chunk_n_rows(row_bytes=row_bytes, max_n_rows=max_n_rows) assert actual == expected assert type(actual) is type(expected)
def test_pairwise_distances_argmin_min(X_blobs): centers = X_blobs[::100].compute() # X_blobs has 500 rows per block. # Ensure 500 rows in the scikit-learn version too. working_memory = float(80 * 500) / 2**20 ctx = sklearn.config_context(working_memory=working_memory) with ctx: a_, b_ = sklearn.metrics.pairwise_distances_argmin_min( X_blobs.compute(), centers) a, b = dask_ml.metrics.pairwise_distances_argmin_min(X_blobs, centers) a, b = dask.compute(a, b) npt.assert_array_equal(a, a_) npt.assert_array_equal(b, b_)
def test_config_context(): assert get_config() == {'assume_finite': False, 'working_memory': 1024, 'print_changed_only': False} # Not using as a context manager affects nothing config_context(assume_finite=True) assert get_config()['assume_finite'] is False with config_context(assume_finite=True): assert get_config() == {'assume_finite': True, 'working_memory': 1024, 'print_changed_only': False} assert get_config()['assume_finite'] is False with config_context(assume_finite=True): with config_context(assume_finite=None): assert get_config()['assume_finite'] is True assert get_config()['assume_finite'] is True with config_context(assume_finite=False): assert get_config()['assume_finite'] is False with config_context(assume_finite=None): assert get_config()['assume_finite'] is False # global setting will not be retained outside of context that # did not modify this setting set_config(assume_finite=True) assert get_config()['assume_finite'] is True assert get_config()['assume_finite'] is False assert get_config()['assume_finite'] is True assert get_config() == {'assume_finite': False, 'working_memory': 1024, 'print_changed_only': False} # No positional arguments assert_raises(TypeError, config_context, True) # No unknown arguments assert_raises(TypeError, config_context(do_something_else=True).__enter__)
def test_config_context(): assert_equal(get_config(), {'assume_finite': False}) # Not using as a context manager affects nothing config_context(assume_finite=True) assert_equal(get_config(), {'assume_finite': False}) with config_context(assume_finite=True): assert_equal(get_config(), {'assume_finite': True}) assert_equal(get_config(), {'assume_finite': False}) with config_context(assume_finite=True): with config_context(assume_finite=None): assert_equal(get_config(), {'assume_finite': True}) assert_equal(get_config(), {'assume_finite': True}) with config_context(assume_finite=False): assert_equal(get_config(), {'assume_finite': False}) with config_context(assume_finite=None): assert_equal(get_config(), {'assume_finite': False}) # global setting will not be retained outside of context that # did not modify this setting set_config(assume_finite=True) assert_equal(get_config(), {'assume_finite': True}) assert_equal(get_config(), {'assume_finite': False}) assert_equal(get_config(), {'assume_finite': True}) assert_equal(get_config(), {'assume_finite': False}) # No positional arguments assert_raises(TypeError, config_context, True) # No unknown arguments assert_raises(TypeError, config_context(do_something_else=True).__enter__)
def test_get_chunk_n_rows(row_bytes, max_n_rows, working_memory, expected, warning): if warning is not None: def check_warning(*args, **kw): return assert_warns_message(UserWarning, warning, *args, **kw) else: check_warning = assert_no_warnings actual = check_warning(get_chunk_n_rows, row_bytes=row_bytes, max_n_rows=max_n_rows, working_memory=working_memory) assert actual == expected assert type(actual) is type(expected) with config_context(working_memory=working_memory): actual = check_warning(get_chunk_n_rows, row_bytes=row_bytes, max_n_rows=max_n_rows) assert actual == expected assert type(actual) is type(expected)