예제 #1
0
def class_fit_predict_print(data):
    '''
        Automating model estimation
    '''
    # train the model
    classifier = fit_kNN_classifier((data[0], data[1]))

    # classify the unseen data
    predicted = classifier.predict(data[2])

    # print out the results
    hlp.printModelSummary(data[3], predicted)
    return logistic_classifier.fit()


# the file name of the dataset
r_filename = '../../Data/Chapter03/bank_contacts.csv'

# read the data
csv_read = pd.read_csv(r_filename)

# split the data into training and testing
train_x, train_y, \
test_x,  test_y, \
labels = hlp.split_data(
    csv_read,
    y = 'credit_application'
)

# train the model
classifier = fitLogisticRegression((train_x, train_y))

# classify the unseen data
predicted = classifier.predict(test_x)

# assign the class
predicted = [1 if elem > 0.5 else 0 for elem in predicted]

# print out the results
hlp.printModelSummary(test_y, predicted)

# print out the parameters
print(classifier.summary())
    csv_read, 
    y = 'credit_application',
    x = ['n_duration','n_nr_employed',
        'prev_ctc_outcome_success','n_euribor3m',
        'n_cons_conf_idx','n_age','month_oct',
        'n_cons_price_idx','edu_university_degree','n_pdays',
        'dow_mon','job_student','job_technician',
        'job_housemaid','edu_basic_6y']
)

# train the model
classifier = fitRandomForest((train_x, train_y))

# classify the unseen data
predicted = classifier.predict(test_x)

# print out the results
hlp.printModelSummary(test_y, predicted)

# print out the importance of features
for counter, (nm, label) \
    in enumerate(
        zip(labels, classifier.feature_importances_)
    ):
    print("{0}. {1}: {2}".format(counter, nm,label))

# and export the trees to .dot files
for counter, tree in enumerate(classifier.estimators_):
    sk.export_graphviz(tree,
        out_file='../../Data/Chapter03/randomForest/tree_' \
        + str(counter) + '.dot')
train_x_orig, train_y_orig, \
test_x_orig,  test_y_orig, \
labels_orig = hlp.split_data(
    csv_read, 
    y = 'credit_application'
)

# reduce the dimensionality
csv_read['reduced'] = reduce_LDA(x, y).transform(x)

# split the reduced data into training and testing
train_x_r, train_y_r, \
test_x_r,  test_y_r, \
labels_r = hlp.split_data(
    csv_read, 
    y = 'credit_application',
    x = ['reduced']
)

# train the models
classifier_r    = fitLinearSVM((train_x_r, train_y_r))
classifier_orig = fitLinearSVM((train_x_orig, train_y_orig))

# classify the unseen data
predicted_r    = classifier_r.pred(test_x_r)
predicted_orig = classifier_orig.pred(test_x_orig)

# print out the results
hlp.printModelSummary(test_y_r, predicted_r)
hlp.printModelSummary(test_y_orig, predicted_orig)
예제 #5
0
train_x_orig, train_y_orig, \
test_x_orig,  test_y_orig, \
labels_orig = hlp.split_data(
    csv_read,
    y = 'credit_application'
)

# reduce the dimensionality
csv_read['reduced'] = reduce_LDA(x, y).transform(x)

# split the reduced data into training and testing
train_x_r, train_y_r, \
test_x_r,  test_y_r, \
labels_r = hlp.split_data(
    csv_read,
    y = 'credit_application',
    x = ['reduced']
)

# train the models
classifier_r = fitLinearSVM((train_x_r, train_y_r))
classifier_orig = fitLinearSVM((train_x_orig, train_y_orig))

# classify the unseen data
predicted_r = classifier_r.pred(test_x_r)
predicted_orig = classifier_orig.pred(test_x_orig)

# print out the results
hlp.printModelSummary(test_y_r, predicted_r)
hlp.printModelSummary(test_y_orig, predicted_orig)