예제 #1
0
파일: job_eval.py 프로젝트: neurale/datapot
from __future__ import print_function

import sys
import bz2
import time
import xgboost as xgb
import pandas as pd
from sklearn.model_selection import cross_val_score
import datapot as dp
from datapot.datasets import load_job_salary

data = load_job_salary()
datapot = dp.DataPot()

t0 = time.time()
datapot.detect(data)
print('detect time:', time.time() - t0)

t0 = time.time()
datapot.fit(data, verbose=True)
print('fit time:', time.time() - t0)

t0 = time.time()
df = datapot.transform(data)
print('transform time:', time.time() - t0)

X = df.drop(['SalaryNormalized', 'Id'], axis=1)
y = pd.qcut(df['SalaryNormalized'].values, q=2, labels=[0, 1]).ravel()

model = xgb.XGBClassifier()
cv_score = cross_val_score(model, X, y, cv=5)
예제 #2
0
from sklearn.model_selection import cross_val_score
import xgboost as xgb

import datapot as dp

dummy_data = [
    '{"name": "Gilbert", "wins": [3, 4, 12], "rating": 32}',
    '{"name": "Alexa", "wins": [1, 2, 5, 7], "rating": 24}',
    '{"name": "May", "wins": [], "rating": 1240}',
    '{"name": "Deloise", "wins": [6, 8, 9, 10, 11], "rating": 25}',
]

# create DataPot instance
data = dp.DataPot()
print(data)

# fit it with data
data.fit(dummy_data)
print(data)
print(data.fields())

# apply transformers
df = data.transform(dummy_data, drop_non_numerical=True)
print(df)

# we are going to predict rating
y = df['rating']
X = df.drop('rating', axis=1)

# evaluate prediction score using xgboost
model = xgb.XGBRegressor()