# fit the data return forest.fit(data[0], data[1]) # the file name of the dataset r_filename = 'bank_contacts.csv' # read the data csv_read = pd.read_csv(r_filename) # split the data into training and testing train_x, train_y, test_x, test_y, labels = hlp.split_data( csv_read, y='credit_application', x=[ 'n_duration', 'n_nr_employed', 'prev_ctc_outcome_success', 'n_euribor3m', 'n_cons_conf_idx', 'n_age', 'month_oct', 'n_cons_price_idx', 'edu_university_degree', 'n_pdays', 'dow_mon', 'job_student', 'job_technician', 'job_housemaid', 'edu_basic_6y' ]) # train the model classifier = fitRandomForest((train_x, train_y)) # classify the unseen data predicted = classifier.predict(test_x) # print out the results hlp.printModelSummary(test_y, predicted) # print out the importance of features
logistic_classifier = sm.GLM(data[1], data[0], family=sm.families.Binomial(link=fm.logit)) # fit the data return logistic_classifier.fit() # the file name of the dataset r_filename = 'bank_contacts.csv' #reading data and storing to a dataframe csv_read = pd.read_csv(r_filename) # split the data into training and testing train_x, train_y, test_x, test_y, labels = hlp.split_data( csv_read, y='credit_application') # train the model classifier = fitLogisticRegression((train_x, train_y)) # classify the unseen data predicted = classifier.predict(test_x) # assign the class predicted = [1 if elem > 0.5 else 0 for elem in predicted] # print out the results hlp.printModelSummary(test_y, predicted) # print out the parameters print(classifier.summary())
import sklearn.naive_bayes as nb @hlp.timeit def fitNaiveBayes(data): #Build the Naive Bayes classifier # create the classifier object naiveBayes_classifier = nb.GaussianNB() # fit the model return naiveBayes_classifier.fit(data[0], data[1]) #reading data and storing to a dataframe Read_csv_filename = 'bank_contacts.csv' Read_csv_data = pd.read_csv(Read_csv_filename) # split the data into training and testing train_x, train_y, test_x, test_y, labels = hlp.split_data(Read_csv_data, y = 'credit_application') # train the model classifier = fitNaiveBayes((train_x, train_y)) # classify the unseen data predicted = classifier.predict(test_x) # print out the results hlp.printModelSummary(test_y, predicted) print("Naive bayes Model fitted successfully")
# fit the data return logistic_classifier.fit(data[0], data[1]) # the file name of the dataset r_filename = 'bank_contacts.csv' # read the data csv_read = pd.read_csv(r_filename) # split the data into training and testing train_x, train_y, \ test_x, test_y, \ labels = hlp.split_data( csv_read, y = 'credit_application' ) # train the model classifier = fitLogisticRegression((train_x, train_y)) # classify the unseen data predicted = classifier.predict(test_x) # print out the results hlp.printModelSummary(test_y, predicted) # print out the parameters coef = {nm: coeff[0] for (nm, coeff) in zip(labels, classifier.coef_.T)} print(coef)
# fit the data return svm.fit(data[0], data[1]) # names of file to read from r_filenameCSV = 'winequality-red.csv' # read the data csv_read = pd.read_csv(r_filenameCSV, sep=';') # print the first 10 records print(csv_read.head(10)) # print the last 10 records print(csv_read.tail(10)) #splitting data into test and train train_x, train_y, test_x, test_y, labels = hlp.split_data(csv_read, y='quality') # train the model classifier = fitSVM((train_x, train_y)) # classify the unseen data predicted = classifier.predict(test_x) # print out the results hlp.printModelSummary(test_y, predicted) print(classifier.support_vectors_)