from joblib import dump, load from sklearn.metrics import make_scorer from sklearn.metrics import roc_auc_score # from sklearn.metrics import roc_auc_ovr from sklearn.model_selection import GridSearchCV #TODO move into preprocess pkr_data = prp.pkr_data() pkr_data.clean() pkr_data.target = 'hand' pkr_data.features = pkr_data.all.columns[pkr_data.all.columns != pkr_data.target] pkr_data.init_model_data(target =['hand'],features = ['suit1','card1','suit2','card2','suit3','card3','suit4','card4']) #ab data ab_data = prp.ab_data() # ab_data.encode = ['room_type'] ab_data.clean() ab_data.target = 'room_type' ab_data.features = ab_data.all.columns[ab_data.all.columns != ab_data.target] ab_data.init_model_data(target=ab_data.target,features=ab_data.features) print("Models Initiated") #validate classifier ab_data ab_train_scores = {} ab_test_scores = {} pkr_train_scores = {} pkr_test_scores = {} pkr_test_auc = {} pkr_train_auc = {}
from joblib import dump, load pkr_data = prp.pkr_data() pkr_data.clean() pkr_data.target = 'hand' pkr_data.features = pkr_data.all.columns[ pkr_data.all.columns != pkr_data.target] pkr_data.init_model_data(target=['hand'], features=[ 'suit1', 'card1', 'suit2', 'card2', 'suit3', 'card3', 'suit4', 'card4' ]) #ab data ab_data = prp.ab_data(n=100) # ab_data.encode = ['room_type'] ab_data.clean() ab_data.target = 'room_type' ab_data.features = ab_data.all.columns[ab_data.all.columns != ab_data.target] ab_data.init_model_data(target=ab_data.target, features=ab_data.features) print("Models Initiated") ab_train_scores = {} ab_test_scores = {} ab_test_auc = {} ab_train_auc = {} pkr_train_scores = {} pkr_test_scores = {}
import preprocess as prp from sklearn.tree import DecisionTreeRegressor from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score #probably don't need this from sklearn.ensemble import AdaBoostRegressor #TODO get some kind of model that makes predictions #write helper functions for error ROC etc. #write helper for cross validation #identify how to tune model #get data ab_obj = prp.ab_data() #clean data select target/features ab_obj.clean() #TODO decide if this should be done automatically # ab_obj.target = ['price'] # ab_obj.features = ['host_id','price'] ab_obj.init_model_data() X = ab_obj.X Y = ab_obj.Y #TODO rewrite this x_train, x_test, y_train, y_test = train_test_split( X, Y, test_size=.1, random_state=105) #TODO move to helper function DTree = DecisionTreeRegressor(min_samples_leaf=.0001) DTree.fit(x_train, y_train) y_predict = DTree.predict(x_test) r2_score(y_test, y_predict) print(r2_score(y_test, y_predict))
import preprocess as prp from sklearn.model_selection import train_test_split from sklearn import metrics from sklearn import svm #get data ab_obj = prp.ab_data(n=5000) #clean data select target/features ab_obj.clean() #TODO decide if this should be done automatically # ab_obj.target = ['price'] # ab_obj.features = ['host_id','price'] ab_obj.init_model_data() X = ab_obj.X Y = ab_obj.Y x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=.1,random_state=105) #TODO move to helper function SVM_model = svm.SVC(kernel='linear') SVM_model.fit(x_train,y_train) y_predict=SVM_model.predict(x_test) score = metrics.accuracy_score(y_test,y_predict) print(score)
from sklearn.multiclass import OneVsRestClassifier from sklearn.model_selection import GridSearchCV #pkr_data #TODO move into preprocess pkr_data = prp.pkr_data() pkr_data.clean() pkr_data.init_model_data(target=['hand'], features=[ 'suit1', 'card1', 'suit2', 'card2', 'suit3', 'card3', 'suit4', 'card4' ]) # #ab data ab_data = prp.ab_data() ab_data.clean() ab_data.target = 'room_type' ab_data.features = ab_data.all.columns[ab_data.all.columns != ab_data.target] ab_data.init_model_data(target=ab_data.target, features=ab_data.features) # x_vars=list(ab_data.features) # x_vars.append(ab_data.target) # ab_corell_data = ab_data.all # ab_corell_plt_obj = sns.pairplot(ab_corell_data,x_vars=ab_data.features,y_vars=ab_data.target,hue='room_type',diag_kind='hist') # plt.suptitle('Airbnb Feature vs Target Grouping') # plt.subplots_adjust(top=0.9) # ab_corell_plt_obj.savefig('plt_ab_correl.png') # plt.legend('top') # plt.close()