コード例 #1
0
ファイル: main.py プロジェクト: neerajkesav/PythonML_Examples
    def run(self):
        """Performs various stages in predictive modeling"""
        #Path to Data set.
        path = "../../neeraj/resource/pima-indians-diabetes.data"
        #Column names of Data set.
        column_names = [ ' preg ' , ' plas ' , ' pres ' , ' skin ' , ' test ' , ' mass ' , ' pedi ' , ' age ' , ' class ' ]
        #Loading Data set using class DatasetLoader.
        load_data = DatasetLoader(path, column_names)
        data = load_data.load()
        load_data.print_shape(data)

        #Understanding data using class DataExplorer.
        explore_data = DataExplorer()
        explore_data.print_data_statistics(data)
        explore_data.visualize(data)

        #Performing data preprocessing.
        process_data = DataPreprocessor()
        input_set, output_set = process_data.split_dataset(data,0,8,8)
        process_data.display_dataset()
        process_data.summarize(input_set, 0, 5, 3)

        #Model evaluation using class Evaluator.
        evaluator = Evaluator()
        evaluator.validate(LogisticRegression(), input_set, output_set, 10, 7)
        evaluator.evaluate(LogisticRegression(), input_set, output_set, 10, 7,'log_loss')

        #Selecting best model using class ModelSelector.
        model = ModelSelector()
        #A set of models for selection.
        models = []
        models.append(( ' LR ' , LogisticRegression()))
        models.append(( ' LDA ' , LinearDiscriminantAnalysis()))
        models.append(( ' RF ' , RandomForestClassifier(n_estimators=100, max_features=3)))
        selected_model = model.select_model(models, input_set, output_set, 10, 7)
        print("\nSelected Model:\n %s") % (selected_model)

        #Improving Accuracy using class AccuracyImprover.
        improve_accuracy = AccuracyImprover()
        improve_accuracy.tuning(Ridge(),input_set, output_set)
        improve_accuracy.ensemble_prediction(RandomForestClassifier(n_estimators=100, max_features=3), input_set, output_set, 10, 7)

        #Finalizing the model and performing prediction.
        finalize_model = ModelFinalizer()
        input_train, input_test, output_train, output_test = finalize_model.split_train_test_sets(input_set, output_set, 0.33, 7)
        finalize_model.finalize_and_save(LogisticRegression(), "../../neeraj/resource/pima_model.sav", input_train, output_train)
        finalize_model.predict("../../neeraj/resource/pima_model.sav", input_test, output_test)
コード例 #2
0
import argparse

# Parse arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True, 
    help="path to input dataset")
ap.add_argument("-k", "--neighbors", type=int, default=1,
    help="# of nearest neighbors for classification")
ap.add_argument("-j", "--jobs", type=int, default=-1,
    help="# of CPU cores used for classification")
args = vars(ap.parse_args())

# Load images
print("[INFO] Loading images")
image_paths = list(paths.list_images(args["dataset"]))
sdl = DatasetLoader()
(data, labels) = sdl.load(image_paths, verbose=500)
data = data.reshape((data.shape[0], 32*32*3))

# Encode labels as intergers
le = LabelEncoder()
labels = le.fit_transform(labels)

# Partition the data.
# training: 75%, testing: 25%
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42)

# Evaluate k-NN classifier
model = KNeighborsClassifier(n_neighbors=args["neighbors"], n_jobs=args["jobs"], weights = 'distance')
model.fit(trainX, trainY)
print(classification_report(testY, model.predict(testX), target_names=le.classes_))
コード例 #3
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Main.

@author: neeraj
"""
from dataset_loader import DatasetLoader
from data_preprocessor import DataPreprocessor
from model_builder import ModelBuilder

#Loading dataset
data_loader = DatasetLoader('../resources/ann-train.data')
train = data_loader.load()

data_loader = DatasetLoader('../resources/ann-test.data')
test = data_loader.load()

#Preprocessing data
dp = DataPreprocessor()
train, test = dp.preprocess(train, test)

#Splitting data to predictors and target vaiables
train_X, train_y = dp.split_predictors(train)
test_X, test_y = dp.split_predictors(test)

#splitting data for validation set
X_train, X_val, y_train, y_val = dp.validation_split(train_X, train_y)

#scaling train and validation data
X_train, X_val = dp.scale_data(X_train, X_val)