Python InputValidator.transform 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: autosklearn.data.validation

클래스/타입: InputValidator

메소드/함수: transform

hotexamples.com에서의 예제들: 4

Python InputValidator.transform - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 autosklearn.data.validation.InputValidator.transform에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

InputValidator(27)

validate_target(15)

validate_features(7)

validate(5)

fit(4)

transform(4)

decode_target(2)

join_and_check(2)

예제 #1

파일 보기

def test_validation_unsupported():
    """
    Makes sure we raise a proper message to the user,
    when providing not supported data input
    """
    validator = InputValidator()
    with pytest.raises(ValueError,
                       match=r"Inconsistent number of train datapoints.*"):
        validator.fit(
            X_train=np.array([[0, 1, 0], [0, 1, 1]]),
            y_train=np.array([0, 1, 0, 0, 0, 0]),
        )
    with pytest.raises(ValueError,
                       match=r"Inconsistent number of test datapoints.*"):
        validator.fit(
            X_train=np.array([[0, 1, 0], [0, 1, 1]]),
            y_train=np.array([0, 1]),
            X_test=np.array([[0, 1, 0], [0, 1, 1]]),
            y_test=np.array([0, 1, 0, 0, 0, 0]),
        )
    with pytest.raises(ValueError,
                       match=r"Cannot call transform on a validator .*fitted"):
        validator.transform(
            X=np.array([[0, 1, 0], [0, 1, 1]]),
            y=np.array([0, 1]),
        )

예제 #2

파일 보기

파일: test_validation.py 프로젝트: tmielika/auto-sklearn

def test_data_validation_for_regression(openmlid, as_frame):
    x, y = sklearn.datasets.fetch_openml(data_id=openmlid,
                                         return_X_y=True,
                                         as_frame=as_frame)
    validator = InputValidator(is_classification=False)

    if as_frame:
        # NaN is not supported in categories, so
        # drop columns with them.
        nan_cols = [i for i in x.columns if x[i].isnull().any()]
        cat_cols = [
            i for i in x.columns if x[i].dtype.name in ['category', 'bool']
        ]
        unsupported_columns = list(set(nan_cols) & set(cat_cols))
        if len(unsupported_columns) > 0:
            x.drop(unsupported_columns, axis=1, inplace=True)

    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
        x, y, test_size=0.33, random_state=0)

    validator.fit(X_train=X_train, y_train=y_train)

    X_train_t, y_train_t = validator.transform(X_train, y_train)
    assert np.shape(X_train) == np.shape(X_train_t)

    # Leave columns that are complete NaN
    # The sklearn pipeline will handle that
    if as_frame and np.any(pd.isnull(X_train).values.all(axis=0)):
        assert np.any(pd.isnull(X_train_t).values.all(axis=0))
    elif not as_frame and np.any(pd.isnull(X_train).all(axis=0)):
        assert np.any(pd.isnull(X_train_t).all(axis=0))

    validator.feature_validator.feat_type is not None

예제 #3

파일 보기

def test_sparse_data_validation_for_regression():
    X, y = sklearn.datasets.make_regression(n_samples=100,
                                            n_features=50,
                                            random_state=0)
    X_sp = sparse.coo_matrix(X)
    validator = InputValidator(is_classification=False)

    validator.fit(X_train=X_sp, y_train=y)

    X_t, y_t = validator.transform(X, y)
    assert np.shape(X) == np.shape(X_t)

    # make sure everything was encoded to number
    assert np.issubdtype(X_t.dtype, np.number)
    assert np.issubdtype(y_t.dtype, np.number)

    # Make sure we can change the sparse format
    X_t, y_t = validator.transform(sparse.csr_matrix(X), y)

예제 #4

파일 보기

def test_data_validation_for_classification(openmlid, as_frame):
    x, y = sklearn.datasets.fetch_openml(data_id=openmlid,
                                         return_X_y=True,
                                         as_frame=as_frame)
    validator = InputValidator(is_classification=True)

    if as_frame:
        # NaN is not supported in categories, so
        # drop columns with them.
        nan_cols = [i for i in x.columns if x[i].isnull().any()]
        cat_cols = [
            i for i in x.columns if x[i].dtype.name in ['category', 'bool']
        ]
        unsupported_columns = list(set(nan_cols) & set(cat_cols))
        if len(unsupported_columns) > 0:
            x.drop(unsupported_columns, axis=1, inplace=True)

    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
        x, y, test_size=0.33, random_state=0)

    validator.fit(X_train=X_train,
                  y_train=y_train,
                  X_test=X_test,
                  y_test=y_test)

    X_train_t, y_train_t = validator.transform(X_train, y_train)
    assert np.shape(X_train) == np.shape(X_train_t)

    # Leave columns that are complete NaN
    # The sklearn pipeline will handle that
    if as_frame and np.any(pd.isnull(X_train).values.all(axis=0)):
        assert np.any(pd.isnull(X_train_t).values.all(axis=0))
    elif not as_frame and np.any(pd.isnull(X_train).all(axis=0)):
        assert np.any(pd.isnull(X_train_t).all(axis=0))

    # make sure everything was encoded to number
    assert np.issubdtype(X_train_t.dtype, np.number)
    assert np.issubdtype(y_train_t.dtype, np.number)

    # Categorical columns are sorted to the beginning
    if as_frame:
        validator.feature_validator.feat_type is not None
        ordered_unique_elements = list(
            dict.fromkeys(validator.feature_validator.feat_type))
        if len(ordered_unique_elements) > 1:
            assert ordered_unique_elements[0] == 'categorical'