def class_fit_predict_print(data): ''' Automating model estimation ''' # train the model classifier = fit_kNN_classifier((data[0], data[1])) # classify the unseen data predicted = classifier.predict(data[2]) # print out the results hlp.printModelSummary(data[3], predicted)
return logistic_classifier.fit() # the file name of the dataset r_filename = '../../Data/Chapter03/bank_contacts.csv' # read the data csv_read = pd.read_csv(r_filename) # split the data into training and testing train_x, train_y, \ test_x, test_y, \ labels = hlp.split_data( csv_read, y = 'credit_application' ) # train the model classifier = fitLogisticRegression((train_x, train_y)) # classify the unseen data predicted = classifier.predict(test_x) # assign the class predicted = [1 if elem > 0.5 else 0 for elem in predicted] # print out the results hlp.printModelSummary(test_y, predicted) # print out the parameters print(classifier.summary())
csv_read, y = 'credit_application', x = ['n_duration','n_nr_employed', 'prev_ctc_outcome_success','n_euribor3m', 'n_cons_conf_idx','n_age','month_oct', 'n_cons_price_idx','edu_university_degree','n_pdays', 'dow_mon','job_student','job_technician', 'job_housemaid','edu_basic_6y'] ) # train the model classifier = fitRandomForest((train_x, train_y)) # classify the unseen data predicted = classifier.predict(test_x) # print out the results hlp.printModelSummary(test_y, predicted) # print out the importance of features for counter, (nm, label) \ in enumerate( zip(labels, classifier.feature_importances_) ): print("{0}. {1}: {2}".format(counter, nm,label)) # and export the trees to .dot files for counter, tree in enumerate(classifier.estimators_): sk.export_graphviz(tree, out_file='../../Data/Chapter03/randomForest/tree_' \ + str(counter) + '.dot')
train_x_orig, train_y_orig, \ test_x_orig, test_y_orig, \ labels_orig = hlp.split_data( csv_read, y = 'credit_application' ) # reduce the dimensionality csv_read['reduced'] = reduce_LDA(x, y).transform(x) # split the reduced data into training and testing train_x_r, train_y_r, \ test_x_r, test_y_r, \ labels_r = hlp.split_data( csv_read, y = 'credit_application', x = ['reduced'] ) # train the models classifier_r = fitLinearSVM((train_x_r, train_y_r)) classifier_orig = fitLinearSVM((train_x_orig, train_y_orig)) # classify the unseen data predicted_r = classifier_r.pred(test_x_r) predicted_orig = classifier_orig.pred(test_x_orig) # print out the results hlp.printModelSummary(test_y_r, predicted_r) hlp.printModelSummary(test_y_orig, predicted_orig)