コード例 #1
0
    # fit the data
    return forest.fit(data[0], data[1])


# the file name of the dataset
r_filename = 'bank_contacts.csv'

# read the data
csv_read = pd.read_csv(r_filename)

# split the data into training and testing
train_x, train_y, test_x, test_y, labels = hlp.split_data(
    csv_read,
    y='credit_application',
    x=[
        'n_duration', 'n_nr_employed', 'prev_ctc_outcome_success',
        'n_euribor3m', 'n_cons_conf_idx', 'n_age', 'month_oct',
        'n_cons_price_idx', 'edu_university_degree', 'n_pdays', 'dow_mon',
        'job_student', 'job_technician', 'job_housemaid', 'edu_basic_6y'
    ])

# train the model
classifier = fitRandomForest((train_x, train_y))

# classify the unseen data
predicted = classifier.predict(test_x)

# print out the results
hlp.printModelSummary(test_y, predicted)

# print out the importance of features
コード例 #2
0
    logistic_classifier = sm.GLM(data[1],
                                 data[0],
                                 family=sm.families.Binomial(link=fm.logit))

    # fit the data
    return logistic_classifier.fit()


# the file name of the dataset
r_filename = 'bank_contacts.csv'

#reading data and storing to a dataframe
csv_read = pd.read_csv(r_filename)

# split the data into training and testing
train_x, train_y, test_x, test_y, labels = hlp.split_data(
    csv_read, y='credit_application')

# train the model
classifier = fitLogisticRegression((train_x, train_y))

# classify the unseen data
predicted = classifier.predict(test_x)

# assign the class
predicted = [1 if elem > 0.5 else 0 for elem in predicted]

# print out the results
hlp.printModelSummary(test_y, predicted)

# print out the parameters
print(classifier.summary())
コード例 #3
0
import sklearn.naive_bayes as nb

@hlp.timeit
def fitNaiveBayes(data):

    #Build the Naive Bayes classifier
    
    # create the classifier object
    naiveBayes_classifier = nb.GaussianNB()

    # fit the model
    return naiveBayes_classifier.fit(data[0], data[1])

#reading data and storing to a dataframe
Read_csv_filename = 'bank_contacts.csv'
Read_csv_data = pd.read_csv(Read_csv_filename)

# split the data into training and testing
train_x, train_y, test_x,  test_y, labels = hlp.split_data(Read_csv_data, y = 'credit_application')

# train the model
classifier = fitNaiveBayes((train_x, train_y))

# classify the unseen data
predicted = classifier.predict(test_x)

# print out the results
hlp.printModelSummary(test_y, predicted)

print("Naive bayes Model fitted successfully")
コード例 #4
0
    # fit the data
    return logistic_classifier.fit(data[0], data[1])


# the file name of the dataset
r_filename = 'bank_contacts.csv'

# read the data
csv_read = pd.read_csv(r_filename)

# split the data into training and testing
train_x, train_y, \
test_x,  test_y, \
labels = hlp.split_data(
    csv_read,
    y = 'credit_application'
)

# train the model
classifier = fitLogisticRegression((train_x, train_y))

# classify the unseen data
predicted = classifier.predict(test_x)

# print out the results
hlp.printModelSummary(test_y, predicted)

# print out the parameters
coef = {nm: coeff[0] for (nm, coeff) in zip(labels, classifier.coef_.T)}
print(coef)
コード例 #5
0
    # fit the data
    return svm.fit(data[0], data[1])


# names of file to read from
r_filenameCSV = 'winequality-red.csv'

# read the data
csv_read = pd.read_csv(r_filenameCSV, sep=';')

# print the first 10 records
print(csv_read.head(10))
# print the last 10 records
print(csv_read.tail(10))

#splitting data into test and train
train_x, train_y, test_x, test_y, labels = hlp.split_data(csv_read,
                                                          y='quality')

# train the model
classifier = fitSVM((train_x, train_y))

# classify the unseen data
predicted = classifier.predict(test_x)

# print out the results
hlp.printModelSummary(test_y, predicted)

print(classifier.support_vectors_)