Beispiel #1
0
from joblib import dump, load
from sklearn.metrics import make_scorer
from sklearn.metrics import roc_auc_score
# from sklearn.metrics import roc_auc_ovr
from sklearn.model_selection import GridSearchCV
#TODO move into preprocess

pkr_data = prp.pkr_data()
pkr_data.clean()
pkr_data.target = 'hand'
pkr_data.features = pkr_data.all.columns[pkr_data.all.columns != pkr_data.target]

pkr_data.init_model_data(target =['hand'],features = ['suit1','card1','suit2','card2','suit3','card3','suit4','card4'])

#ab data
ab_data = prp.ab_data()
# ab_data.encode = ['room_type']
ab_data.clean()
ab_data.target = 'room_type'
ab_data.features = ab_data.all.columns[ab_data.all.columns != ab_data.target]

ab_data.init_model_data(target=ab_data.target,features=ab_data.features)
print("Models Initiated")
#validate classifier ab_data

ab_train_scores = {}
ab_test_scores = {}
pkr_train_scores = {}
pkr_test_scores = {}
pkr_test_auc = {}
pkr_train_auc = {}
Beispiel #2
0
from joblib import dump, load

pkr_data = prp.pkr_data()
pkr_data.clean()
pkr_data.target = 'hand'
pkr_data.features = pkr_data.all.columns[
    pkr_data.all.columns != pkr_data.target]

pkr_data.init_model_data(target=['hand'],
                         features=[
                             'suit1', 'card1', 'suit2', 'card2', 'suit3',
                             'card3', 'suit4', 'card4'
                         ])

#ab data
ab_data = prp.ab_data(n=100)
# ab_data.encode = ['room_type']
ab_data.clean()
ab_data.target = 'room_type'
ab_data.features = ab_data.all.columns[ab_data.all.columns != ab_data.target]

ab_data.init_model_data(target=ab_data.target, features=ab_data.features)
print("Models Initiated")

ab_train_scores = {}
ab_test_scores = {}
ab_test_auc = {}
ab_train_auc = {}

pkr_train_scores = {}
pkr_test_scores = {}
import preprocess as prp
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score  #probably don't need this
from sklearn.ensemble import AdaBoostRegressor

#TODO get some kind of model that makes predictions
#write helper functions for error ROC etc.
#write helper for cross validation
#identify how to tune model

#get data
ab_obj = prp.ab_data()
#clean data select target/features
ab_obj.clean()  #TODO decide if this should be done automatically
# ab_obj.target = ['price']
# ab_obj.features = ['host_id','price']
ab_obj.init_model_data()
X = ab_obj.X
Y = ab_obj.Y

#TODO rewrite this
x_train, x_test, y_train, y_test = train_test_split(
    X, Y, test_size=.1, random_state=105)  #TODO move to helper function

DTree = DecisionTreeRegressor(min_samples_leaf=.0001)
DTree.fit(x_train, y_train)
y_predict = DTree.predict(x_test)

r2_score(y_test, y_predict)
print(r2_score(y_test, y_predict))
import preprocess as prp
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn import svm

#get data
ab_obj = prp.ab_data(n=5000)
#clean data select target/features
ab_obj.clean() #TODO decide if this should be done automatically
# ab_obj.target = ['price']
# ab_obj.features = ['host_id','price']
ab_obj.init_model_data()
X = ab_obj.X
Y = ab_obj.Y

x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=.1,random_state=105) #TODO move to helper function


SVM_model = svm.SVC(kernel='linear')
SVM_model.fit(x_train,y_train)
y_predict=SVM_model.predict(x_test)
score = metrics.accuracy_score(y_test,y_predict)
print(score)
from sklearn.multiclass import OneVsRestClassifier

from sklearn.model_selection import GridSearchCV

#pkr_data
#TODO move into preprocess
pkr_data = prp.pkr_data()
pkr_data.clean()
pkr_data.init_model_data(target=['hand'],
                         features=[
                             'suit1', 'card1', 'suit2', 'card2', 'suit3',
                             'card3', 'suit4', 'card4'
                         ])  #

#ab data
ab_data = prp.ab_data()
ab_data.clean()
ab_data.target = 'room_type'
ab_data.features = ab_data.all.columns[ab_data.all.columns != ab_data.target]
ab_data.init_model_data(target=ab_data.target, features=ab_data.features)

# x_vars=list(ab_data.features)
# x_vars.append(ab_data.target)
# ab_corell_data = ab_data.all

# ab_corell_plt_obj = sns.pairplot(ab_corell_data,x_vars=ab_data.features,y_vars=ab_data.target,hue='room_type',diag_kind='hist')
# plt.suptitle('Airbnb Feature vs Target Grouping')
# plt.subplots_adjust(top=0.9)
# ab_corell_plt_obj.savefig('plt_ab_correl.png')
# plt.legend('top')
# plt.close()
from sklearn.multiclass import OneVsRestClassifier

from sklearn.model_selection import GridSearchCV

#pkr_data
#TODO move into preprocess
pkr_data = prp.pkr_data()
pkr_data.clean()
pkr_data.init_model_data(target=['hand'],
                         features=[
                             'suit1', 'card1', 'suit2', 'card2', 'suit3',
                             'card3', 'suit4', 'card4'
                         ])  #

#ab data
ab_data = prp.ab_data()
ab_data.clean()
ab_data.target = 'room_type'
ab_data.features = ab_data.all.columns[ab_data.all.columns != ab_data.target]
ab_data.init_model_data(target=ab_data.target, features=ab_data.features)

# x_vars=list(ab_data.features)
# x_vars.append(ab_data.target)
# ab_corell_data = ab_data.all

# ab_corell_plt_obj = sns.pairplot(ab_corell_data,x_vars=ab_data.features,y_vars=ab_data.target,hue='room_type',diag_kind='hist')
# plt.suptitle('Airbnb Feature vs Target Grouping')
# plt.subplots_adjust(top=0.9)
# ab_corell_plt_obj.savefig('plt_ab_correl.png')
# plt.legend('top')
# plt.close()