Python load_dataset 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: houseprices_model.data_management

메소드/함수: load_dataset

hotexamples.com에서의 예제들: 8

Python load_dataset - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 houseprices_model.data_management.load_dataset에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def test_prediction_quality_against_benchmark():
    # Given
    train_data = load_dataset('train.csv')
    input_df = train_data.drop(config.TARGET, axis=1)
    output_df = train_data[config.TARGET]


    benchmark_flexibility = 50000
    benchmark_lower_boundary = (
        round(output_df.iloc[0], ndigits=-4) - benchmark_flexibility
    )  # 210000 - 50000 = 160000
    benchmark_upper_boundary = (
        round(output_df.iloc[0], ndigits=-4) + benchmark_flexibility
    )  # 210000 + 50000 = 260000


    multiple_test_json = input_df.to_json(orient='records')
    # When
    subject = make_prediction(input_data=multiple_test_json)

    # Then
    assert subject is not None
    assert isinstance(subject.get('predictions')[0], float)
    
    value = math.ceil(subject.get('predictions')[0])

    assert value > benchmark_lower_boundary
    assert value < benchmark_upper_boundary

예제 #2

파일 보기

def test_pipeline_drops_unnecessary_features():
    # Given
    test_data = load_dataset('train.csv')

    X_train, X_test, y_train, y_test = train_test_split(
        test_data, test_data[config.TARGET], test_size=0.1, random_state=0)
    assert len(config.FEATURES) != len(X_train.columns)
    X_transformed, _ = price_pipe._fit(X_train, y_train)

    assert len(X_transformed[0]) == len(config.FEATURES)

예제 #3

파일 보기

def test_make_multiple_predictions():
    # Given
    test_data = load_dataset('test.csv')
    original_data_length = len(test_data)
    multiple_test_json = test_data.to_json(orient='records')

    # When
    subject = make_prediction(input_data=multiple_test_json)

    # Then
    assert subject is not None
    assert len(subject.get('predictions')) != original_data_length

예제 #4

파일 보기

def test_transformer_drops_unnecessary_features():
    test_data = load_dataset('train.csv')

    X_train, X_test, y_train, y_test = train_test_split(
        test_data, test_data[config.TARGET], test_size=0.1, random_state=0)

    transformer = pp.KeepColumnsTransformer(variables=config.FEATURES, )

    assert len(config.FEATURES) != len(X_train.columns)
    X_transformed = transformer.transform(X_train)

    assert len(X_transformed.columns) == len(config.FEATURES)

예제 #5

파일 보기

def test_pipeline_transform_min_max_features():
    # Given
    test_data = load_dataset('train.csv')

    X_train, X_test, y_train, y_test = train_test_split(
        test_data, test_data[config.TARGET], test_size=0.1, random_state=0)

    X_transformed, _ = price_pipe._fit(X_train, y_train)

    for x in X_transformed:
        for v in x:
            assert 0.0 <= v <= 1.0

예제 #6

파일 보기

def test_prediction_endpoint_returns_prediction(flask_test_client):
    test_data = load_dataset(file_name=model_config.TESTING_DATA_FILE)
    post_json = test_data[0:5].to_json(orient='records')

    # When
    response = flask_test_client.post('/v1/predict',
                                      json=post_json)

    # Then
    assert response.status_code == 200
    response_json = json.loads(response.data)
    prediction = response_json['predictions']
    assert len(prediction) == 5

예제 #7

파일 보기

def test_make_single_prediction():
    # Given
    test_data = load_dataset('test.csv')
    single_test_json = test_data[0:1].to_json(orient='records')

    # When
    subject = make_prediction(input_data=single_test_json)

    # Then
    assert subject is not None
    assert isinstance(subject.get('predictions')[0], float)
    
    print(math.ceil(subject.get('predictions')[0]))
    assert math.ceil(subject.get('predictions')[0]) == 112964

예제 #8

파일 보기

def run_training():
    print("training model")
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES],
                                                        data[config.TARGET],
                                                        test_size=0.1,
                                                        random_state=0)

    y_train = np.log(y_train)

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)

    save_pipeline(pipeline_to_persist=pipeline.price_pipe)
    print("training finished")