Example #1
0
    save_dict = {}
    save_dict['train_features'] = train_features
    save_dict['train_labels'] = train_labels
    save_dict['test_features'] = test_features
    save_dict['test_labels'] = test_labels
    scio.savemat(save_path, save_dict)
    return train_features, train_labels, test_features, test_labels


if __name__ == '__main__':
    # 提取视觉单词
    # extract_words(
    #    '/home/give/homework/CV/dataset/affNIST/training_and_validation_batches',
    #    '/home/give/homework/CV/dataset/affNIST/test.mat',
    #     '/home/give/homework/CV/dataset/affNIST/patches/'
    # )

    # 随机选取单词
    # patches = load_words('/home/give/homework/CV/dataset/affNIST/patches', save_path='./vocabulary.npy')
    # print(np.shape(patches))
    #
    # get_KMeans_model(patches)

    train_features, train_labels, test_features, test_labels = get_features(
        '/home/give/homework/CV/dataset/affNIST/training_and_validation_batches',
        '/home/give/homework/CV/dataset/affNIST/test.mat',
        './BoVW_model.m',
        reload=True)
    from classification import SVM
    SVM.do(train_features, train_labels, test_features, test_labels)
Example #2
0
def main():

    params_dict = parse_option()
    
    # Read data
    train = np.genfromtxt(open(params_dict['training_set_fn'],'rb'), delimiter=',')
    print "Number of training samples: {0}.".format(train.shape[0])
    print "Number of features: {0}.".format(train.shape[1])
    target = np.genfromtxt(open(params_dict['target_set_fn'],'rb'), delimiter=',')
    len_train_set = train.shape[0]
    if params_dict["test_set_flag"]:
        test = np.genfromtxt(open(params_dict['test_set_fn'],"rb"), delimiter=',')
        
    if not params_dict["overnight_simulation"]:
        print "Visualizing features for understanding the most suitable scaling type."
        if params_dict["test_set_flag"]:
            plot_features(np.vstack((train,test)))
        else:
            plot_features(train)
        plt.show()
    
    balances = classes_balance(target)
    counter = 0
    for b in balances:
        print "For class {0} the balance is {1:.4f}.".format(counter, b)
        counter += 1
    
    n_feat = train.shape[1]
    num_samples = train.shape[0] 
    
    #features scaling
    print "Starting features preprocessing ..."
    
    if params_dict["sparse_filtering_flag"]:
        
        print "Performing sparse filtering..."
        
        if params_dict["load_sf_flag"]:
            sf, train_sf, test_sf = load_sf_features(params_dict["load_sf_path"])
        else:
            sf = SparseFilter(n_layers=params_dict["n_layers_sf"],n_features=params_dict["n_features_sf"], n_iterations=params_dict["n_iterations_sf"])
            if params_dict["test_set_flag"]:
                sf.fit(np.r_[train,test])
                train_sf = sf.transform(train)
                test_sf = sf.transform(test)
            else:
                sf.fit(train)
                train_sf = sf.transform(train)        

        if params_dict["save_sf_flag"]:
            if params_dict["test_set_flag"]:
                save_sf_features(sf, train_sf, test_sf, params_dict["save_sf_path"])
            else:
                save_sf_features(sf, train_sf, None, params_dict["save_sf_path"])
        print "Features sparse filtering performed!"
        
        print train_sf.shape
    
    if params_dict["test_set_flag"]:
        dataset = np.r_[train, test]
    else:
        dataset = train
        
    if params_dict["pca_flag"]:
        
        print "Performing PCA..."
        
        pca = PCA(variance_retain = params_dict["pca_variance_retain"])
        pca.fit(dataset)
        dataset_pca = pca.transform(dataset)
        if params_dict["test_set_flag"]:
            train_pca = dataset_pca[:len_train_set,:]
            test_pca = dataset_pca[len_train_set:,:]
        else:
            train_pca = dataset_pca
    
        n_feat_pca = dataset_pca.shape[1]
        print "Number of features after PCA: {0}.".format(n_feat_pca)
    
    else:
    
        dataset_pca = dataset
        train_pca = train
        if params_dict["test_set_flag"]:
            test_pca = test
            
        n_feat_pca = dataset_pca.shape[1]
        print "Number of features after PCA: {0}.".format(n_feat_pca)
    
    if params_dict["pca_flag"]:
        
        if not params_dict["overnight_simulation"]:
            print "Visualizing features after PCA..."
        
            plot_features(dataset_pca)
            plt.show()
            
    if params_dict["scaling_flag"]:
        scaler = Scaler(bias_and_variance_flag = True, log10_flag = False, log2_flag = False, log1p_flag = False)
        if params_dict["test_set_flag"]:
            dataset_scaled = scaler.fit(np.r_[train_pca,test_pca])
            train_scaled = dataset_scaled[:len_train_set,:]
            test_scaled = dataset_scaled[len_train_set:,:]
        else:
            dataset_scaled = scaler.fit(train_pca)
            train_scaled = dataset_scaled
    else:
        train_scaled = train_pca
        if params_dict["test_set_flag"]:
            test_scaled = test_pca
    
    if params_dict["scaling_flag"]:
        
        if not params_dict["overnight_simulation"]:
            print "Visualizing features after features preprocessing.."
    
            plot_features(dataset_scaled)
            plt.show()
    
    if params_dict["sparse_filtering_flag"]:
        
        train_data = np.c_[train_scaled, train_sf]
        if params_dict["test_set_flag"]:
            test_data = np.c_[test_scaled, test_sf]
    
    else:
        
        train_data = train_scaled
        if params_dict["test_set_flag"]:
            test_data = test_scaled
        
    print "Features preprocessing done!"
    
    if params_dict["rf_features_selection_flag"]:
        
        print "Starting features selection by means of random forests..."
        
        fsrf = FeaturesSelectionRandomForests()
        fsrf.fit(train_data, target)
        
        if not params_dict["overnight_simulation"]:
            fsrf.plot_features_importance()
        
        fsrf_mask = fsrf.features_mask
        
        train_data = fsrf.transform(train_data)
        if params_dict["test_set_flag"]:
            test_data = fsrf.transform(test_data)
        
        n_feat_fsrf = train_data.shape[1]
        
        print "Random forests features selection done!"
    
    classification_obj=SVM()
    
    if not params_dict["skip_model_selection"]:
    
        print "Starting model selection ..."
        
        if not params_dict.has_key("C_list"):
            C_list = [0.0001, 0.001,0.01,0.1,1,10,100,1000,10000]
        else:
            C_list = params_dict["C_list"]
        
        if params_dict["kernel"] == SVM_RBF: 
            if not params_dict.has_key("gamma_list"):
                gamma_list = [0.0001, 0.001,0.01,0.1,1,10,100,1000,10000]
            else:
                gamma_list = params_dict["gamma_list"]
        else:
            gamma_list = None
            
        #performing model selection
        ms_result = classification_obj.model_selection(train_data,target, kernel = params_dict["kernel"], 
                                                       n_iterations=params_dict["n_iterations_ms"],
                                                       C_list = C_list,
                                                       gamma_list = gamma_list,
                                                       show_accuracy_flag = params_dict["show_accuracy_flag"], 
                                                       show_precision_flag = params_dict["show_precision_flag"], 
                                                       show_recall_flag = params_dict["show_recall_flag"], 
                                                       show_f1_score_flag = params_dict["show_f1score_flag"],
                                                       max_num_cpus = params_dict["max_num_cpus"])
        
        if not params_dict["overnight_simulation"]:
            #displaying model selection
            if params_dict["kernel"] == SVM_RBF:
                if params_dict["show_accuracy_flag"]:
                    plot_3d(x=ms_result["gamma_list"], y=ms_result["C_list"], z=ms_result["acc_by_C_and_gamma"], zlabel="accuracy", title="Accuracy by C and gamma")
                if params_dict["show_precision_flag"]:
                    plot_3d(x=ms_result["gamma_list"], y=ms_result["C_list"], z=ms_result["recall_by_C_and_gamma"], zlabel="recall", title="Recall by C and gamma")
                if params_dict["show_recall_flag"]:
                    plot_3d(x=ms_result["gamma_list"], y=ms_result["C_list"], z=ms_result["prec_by_C_and_gamma"], zlabel="precision", title="Precision by C and gamma")
                if params_dict["show_f1score_flag"]:
                    plot_3d(x=ms_result["gamma_list"], y=ms_result["C_list"], z=ms_result["f1_by_C_and_gamma"], zlabel="accuracy", title="f1 score by C and gamma")
                if params_dict["show_trerr_flag"]:
                    plot_3d(x=ms_result["gamma_list"], y=ms_result["C_list"], z=ms_result["tr_err_by_C_and_gamma"], zlabel="training error", title="Training error score by C and gamma")
                if params_dict["show_cverr_flag"]:
                    plot_3d(x=ms_result["gamma_list"], y=ms_result["C_list"], z=ms_result["cv_err_by_C_and_gamma"], zlabel="cross-validation error", title="Cross-validation error score by C and gamma")
            elif params_dict["kernel"] == SVM_linear  or params_dict["kernel"] == SVM_RBF_Chi2_squared:
                if params_dict["show_accuracy_flag"]:
                    plot_2d(x=ms_result["C_list"], y=ms_result["acc_by_C"], ylabel="accuracy", title="Accuracy by C")
                if params_dict["show_precision_flag"]:
                    plot_2d(x=ms_result["C_list"], y=ms_result["recall_by_C"], ylabel="recall", title="Recall by C")
                if params_dict["show_recall_flag"]:
                    plot_2d(x=ms_result["C_list"], y=ms_result["prec_by_C"], ylabel="precision", title="Precision by C and gamma")
                if params_dict["show_f1score_flag"]:
                    plot_2d(x=ms_result["C_list"], y=ms_result["f1_by_C"], ylabel="accuracy", title="f1 score by C")
                if params_dict["show_trerr_flag"]:
                    plot_2d(x=ms_result["C_list"], y=ms_result["tr_err_by_C"], ylabel="training error", title="Training error score by C")
                if params_dict["show_cverr_flag"]:
                    plot_2d(x=ms_result["C_list"], y=ms_result["cv_err_by_C"], ylabel="cross-validation error", title="Cross-validation error score by C")
            else:
                raise Exception("Unsupported kernel type!")
            
            plt.show()
        
        if not params_dict["overnight_simulation"]:
            #entering the C and gamma chosen
            print "Plotted graphics for model selection. Choose the best C and gamma ..."
                        
            while True:
                C_str = raw_input("Enter the C value suggested by model selection:")
                try:
                    C = float(C_str)
                except Exception as e:
                    print "Invalid C inserted. C has to be numeric. Exception: {0}".format(e)
                    continue
                break
            
            if params_dict["kernel"] == SVM_RBF:
                while True:
                    gamma_str = raw_input("Enter the gamma value suggested by model selection:")
                    try:
                        gamma = float(gamma_str)
                    except Exception as e:
                        print "Invalid gamma inserted. gamma has to be numeric. Exception: {0}".format(e)
                        continue
                    break
            
            if params_dict["kernel"] == SVM_linear or params_dict["kernel"] == SVM_RBF_Chi2_squared:
                print "Parameters selection performed! C = {0}.".format(C)
            else:
                print "Parameters selection performed! C = {0}, gamma = {1}".format(C, gamma)    
    
        else:
            
            if params_dict["kernel"] == SVM_linear  or params_dict["kernel"] == SVM_RBF_Chi2_squared:
                C,accuracy = classification_obj.best_accuracy_C(ms_result)
            elif params_dict["kernel"] == SVM_RBF:
                C,gamma,accuracy = classification_obj.best_accuracy_C_and_gamma(ms_result)
            else:
                raise Exception("Unsupported kernel type!")
                
            print "C automatically selected equals to {0}.".format(C)
            if params_dict["kernel"] == SVM_RBF:
                print "gamma automatically selected equals to {0}.".format(gamma)
            print "The accuracy attained by those parameters during model selection is {0}.".format(accuracy)
    
    else:
    
        if params_dict.has_key("C"):
            C = params_dict["C"]
            print "C specified by the user: {0}.".format(C)
        if params_dict.has_key("gamma"):
            gamma = params_dict["gamma"]
            print "gamma specified by the user: {0}".format(gamma)
    
    if params_dict["rfe_features_selection_flag"]:
        print "Performing recursive features elimination..."
        
        if params_dict["kernel"] == SVM_linear or params_dict["kernel"] == SVM_RBF_Chi2_squared:
            rfe = RecursiveFeaturesElimination(C=C,kernel=SVM_linear,
                                               n_iterations=params_dict["n_iterations_rfe"],
                                               test_size=0.3)
        elif params_dict["kernel"] == SVM_RBF:
            rfe = RecursiveFeaturesElimination(C=C,gamma=gamma,kernel=params_dict["kernel"],
                                               n_iterations=params_dict["n_iterations_rfe"],
                                               test_size=0.3)
        else:
                raise Exception("Unsupported kernel type!")
            
        tr_err_rfe, cv_err_rfe, accuracy_rfe,recall_rfe, precision_rfe, f1_score_rfe = rfe.rfe_curves(train_data, target) 
    
        if not params_dict["overnight_simulation"]:
            if params_dict["show_accuracy_flag"]:
                plot_rfe_curve(accuracy_rfe,"accuracy")
            if params_dict["show_precision_flag"]:
                plot_rfe_curve(precision_rfe,"precision")
            if params_dict["show_recall_flag"]:
                plot_rfe_curve(recall_rfe,"recall")
            if params_dict["show_f1score_flag"]:
                plot_rfe_curve(f1_score_rfe,"f1 score")
            if params_dict["show_trerr_flag"]:
                plot_rfe_curve(tr_err_rfe,"training error")
            if params_dict["show_cverr_flag"]:
                plot_rfe_curve(cv_err_rfe,"cross-validation error")
            plt.show()
        
        train_data, rfe_mask = rfe.select_features(train_data, accuracy_rfe)
        if params_dict["test_set_flag"]:
            test_data = rfe.apply_features_selection(test_data)
            
        n_feat_rfe = train_data.shape[1]
        print "Number of features after Recursive Features Elimination: {0}.".format(n_feat_rfe)
    
        print "Recursive features elimination done!."
       
    #training
    print "Performing training..."
    
    if params_dict["kernel"] == SVM_linear or params_dict["kernel"] == SVM_RBF_Chi2_squared:
        model = classification_obj.training(train_data, target, kernel = SVM_linear, C=C)
    elif params_dict["kernel"] == SVM_RBF:
        model = classification_obj.training(train_data, target, kernel = params_dict["kernel"], C=C, gamma=gamma)
    else:
        raise Exception("Unsupported kernel type!")
    
    print "Training performed!"
    
    if params_dict["test_set_flag"]:
        
        #prediction on kaggle test set
        print "Performing classification on the test set..."
        
        predicted = classification_obj.classify(test_data)
        
        print "Classification performed on the test set!"
        
        #save data in the submission format
        save_csv_submitted_labels(predicted, os.path.join(params_dict["dest_path"],params_dict["predicted_set_fn"]))
        
    if params_dict["kernel"] == SVM_linear or params_dict["kernel"] == SVM_RBF_Chi2_squared:
        acc, prec, rec, f1 = classification_obj.performance_estimation(train_data, target, kernel = params_dict["kernel"], C = C, n_iterations = params_dict["n_iterations_performance_estimation"])
    elif params_dict["kernel"] == SVM_RBF: 
        acc, prec, rec, f1 = classification_obj.performance_estimation(train_data, target, kernel = params_dict["kernel"], C = C, gamma = gamma, n_iterations = params_dict["n_iterations_performance_estimation"])
    print "Estimated performances:\nAccuracy: {0}\nPrecision: {1}\nRecall: {2}\nf1 Score: {3}".format(acc, prec, rec, f1)

    seedid = random.randint(0,100)
    today = date.today()
    if today.day < 10:
        day = "0%s" % today.day
    else:
        day = "%s" % today.day
    if today.month < 10:
        month = "0%s" % today.month
    else:
        month = "%s" % today.month
    bn = "{name}_{year}_{month}_{day}_rand{seed}_acc{acc:.4f}_prec{prec:4f}_rec{rec:4f}".format(name=get_model_name(params_dict),seed=seedid,year=today.year, month=month, day=day, acc=acc, prec=prec, rec=rec)
    bn = bn.replace(".","")

    """
        FILLING MODEL DICT
        Making the predicted model persistent!
    """

    model_dict = dict()

    dumped_model = pickle.dumps(model)
    model_dict["classifier"] = dumped_model

    model_dict["scaling_flag"] = params_dict["scaling_flag"]
    if model_dict["scaling_flag"]:
        dumped_scaler = pickle.dumps(scaler)
        model_dict["scaler"] = dumped_scaler

    model_dict["pca_flag"] = params_dict["pca_flag"]
    if params_dict["pca_flag"]:
        dumped_pca = pickle.dumps(pca)
        model_dict["pca"] = dumped_pca   

    model_dict["fsrf_flag"] = params_dict["rf_features_selection_flag"]
    if params_dict["rf_features_selection_flag"]:
        dumped_fsrf_mask = pickle.dumps(fsrf_mask)
        model_dict["fsrf_mask"] = dumped_fsrf_mask
    else:
        model_dict["fsrf_mask"] = None

    model_dict["rfe_flag"] = params_dict["rfe_features_selection_flag"]
    if params_dict["rfe_features_selection_flag"]:
        dumped_rfe_mask = pickle.dumps(rfe_mask)
        model_dict["rfe_mask"] = dumped_rfe_mask
    else:
        model_dict["rfe_mask"] = None

    json_model = json.dumps(model_dict, sort_keys=True, indent=4, separators=(',', ': '))
    models_path = os.path.join(params_dict["dest_path"],"models")
    if not os.path.exists(models_path):
        os.makedirs(models_path)
    
    fn = "model_%s.json" % bn

    f = open(os.path.join(models_path, fn),"w")
    f.write(json_model)
    f.close()
    
    """
        FILLING EXPERIMENT DICT
        Saving a summary of the experiment, useful for the data scientist.
    """

    experiment_dict = dict()

    experiment_dict["01) number of samples dataset"] = num_samples
    experiment_dict["02) number of features dataset"] = n_feat
    balances_dict = dict()
    for i in xrange(len(balances)):
        balances_dict["{0}".format(i)] = balances[i]
    experiment_dict["01b) Balance of the classes of the dataset"] = balances_dict     

    if params_dict["kernel"] == SVM_linear:
        experiment_dict["03) classifier type"] = "SVM linear"
    elif params_dict["kernel"] == SVM_RBF:
        experiment_dict["03) classifier type"] = "SVM RBF"
    elif params_dict["kernel"] == SVM_RBF_Chi2_squared:
        experiment_dict["03) classifier type"] = "SVM RBF Chi2"
    else:
        experiment_dict["03) classifier type"] = "Not specified"
    
    if not params_dict["skip_model_selection"]:
        experiment_dict["04) C list"] = C_list
    if params_dict["kernel"] == SVM_RBF and not params_dict["skip_model_selection"]:
        experiment_dict["05) gamma list"] = gamma_list

    experiment_dict["06) selected_C"] = C
    if params_dict["kernel"] == SVM_RBF:
        experiment_dict["07) selected gamma"] = gamma
    
    experiment_dict["08) accuracy"] = acc
    experiment_dict["09) precision"] = prec
    experiment_dict["10) recall"] = rec
    experiment_dict["11) f1 score"] = f1  

    experiment_dict["12) number iterations in model selection"] = params_dict["n_iterations_ms"]
    
    experiment_dict["13) pca flag"] = params_dict["pca_flag"]
    if params_dict["pca_flag"]:
        experiment_dict["14) pca retain"] = params_dict["pca_variance_retain"]
        experiment_dict["16) number of features after pca"] = n_feat_pca

    experiment_dict["17) features scaling"] = params_dict["scaling_flag"]

    experiment_dict["18) random forests features selection"] = params_dict["rf_features_selection_flag"]
    if params_dict["rf_features_selection_flag"]:
        experiment_dict["19) number of features after random forests features selection"] = n_feat_fsrf

    experiment_dict["20) recursive features elimination"] = params_dict["rfe_features_selection_flag"]
    if params_dict["rfe_features_selection_flag"]:
        experiment_dict["21) number of iterations in rfe"] = params_dict["n_iterations_rfe"]

    json_experiment = json.dumps(experiment_dict, sort_keys=True, indent=4, separators=(',', ': '))
    experiments_path = os.path.join(params_dict["dest_path"],"experiments")
    if not os.path.exists(experiments_path):
        os.makedirs(experiments_path)
    fn = "experiment_%s.json" % bn
    f = open(os.path.join(experiments_path, fn),"w")
    f.write(json_experiment)
    f.close()
    
    if not params_dict["skip_model_selection"]:
        if params_dict["kernel"] == SVM_RBF:
            acc_table = print_model_selection_results(results = ms_result["acc_by_C_and_gamma"], 
                                          C_list = ms_result["C_list"], 
                                          gamma_list = ms_result["gamma_list"] ) 
            prec_table = print_model_selection_results(results = ms_result["prec_by_C_and_gamma"], 
                                          C_list = ms_result["C_list"], 
                                          gamma_list = ms_result["gamma_list"] ) 
            recall_table = print_model_selection_results(results = ms_result["recall_by_C_and_gamma"], 
                                          C_list = ms_result["C_list"], 
                                          gamma_list = ms_result["gamma_list"] ) 
            f1_table = print_model_selection_results(results = ms_result["f1_by_C_and_gamma"], 
                                          C_list = ms_result["C_list"], 
                                          gamma_list = ms_result["gamma_list"] ) 
        else:
            acc_table = print_model_selection_results(results = ms_result["acc_by_C"], 
                                          C_list = ms_result["C_list"], 
                                          gamma_list = None )
            prec_table = print_model_selection_results(results = ms_result["prec_by_C"], 
                                          C_list = ms_result["C_list"], 
                                          gamma_list = None )
            recall_table = print_model_selection_results(results = ms_result["recall_by_C"], 
                                          C_list = ms_result["C_list"], 
                                          gamma_list = None )
            f1_table = print_model_selection_results(results = ms_result["f1_by_C"], 
                                          C_list = ms_result["C_list"], 
                                          gamma_list = None )
        acc_str = "Accuracy:\n{0}\n".format(acc_table)        
        prec_str = "Precision:\n{0}\n".format(prec_table)
        recall_str = "Recall:\n{0}\n".format(recall_table)
        f1_str = "f1_score:\n{0}\n".format(f1_table)
        
        fn = "experiment_%s_results.txt" % bn
        f = open(os.path.join(experiments_path, fn),"w")
        f.write(acc_str)
        f.write(prec_str)
        f.write(recall_str)
        f.write(f1_str)
        f.close()    

        basename = os.path.basename(bn)

        print "Results saved in %s." % basename
Example #3
0
from classification import Naive_bayesian, KNN, random_forest, SVM
from sklearn.metrics import accuracy_score, classification_report
from classification import test_document_list, train_document_list
from search import Naive_bayesian

classes_test = [test_document[0] for test_document in test_document_list]
classes_pred_1 = Naive_bayesian(train_document_list[1:500],
                                test_document_list[1:20])
# print(classes_test, classes_pred_1)
print(classification_report(classes_test[1:20], classes_pred_1))
print(accuracy_score(classes_test[1:20], classes_pred_1))

classes_pred_2 = KNN(train_document_list[1:200], test_document_list[1:20], 5)
# print(classes_test[1:20], classes_pred_2)
print(
    classification_report([int(c) for c in classes_test[1:20]],
                          classes_pred_2))
print(accuracy_score(classes_test[1:20], classes_pred_2))

classes_pred_3 = SVM(train_document_list[1:500], test_document_list[1:20])
print(classification_report(classes_test[1:20], classes_pred_3))
print(accuracy_score(classes_test[1:20], classes_pred_3))

classes_pred_4 = random_forest(train_document_list[1:500],
                               test_document_list[1:20])
print(classification_report(classes_test[1:20], classes_pred_4))
print(accuracy_score(classes_test[1:20], classes_pred_4))
Example #4
0
kernel_func = linear                                 # Use this for clusters data.
output_path = '../outputs/'                          # Where to save the plots.
class_colors = {-1: 'b', 1: 'r'}                     # Colors for plotting.

# Load data.
x_train, y_train = load_data_csv(os.path.join(data_folder, dataset_name+'_train.csv'))
x_test, y_test = load_data_csv(os.path.join(data_folder, dataset_name+'_test.csv'))

plot_points(x_train, y_train, class_colors=class_colors, title='Train - correct labels')
plot_points(x_test, y_test, class_colors=class_colors, title='Test - correct labels')

# Train the SVM classifier on the training data.
C_group = [1e-4, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3, 1e4]
for i in range(len(C_group)):
    C = C_group[i]
    svm = SVM(kernel_func=kernel_func, C=C)
    print('Training...')
    svm.train(x_train, y_train)
    print('Plotting...')
    plot_svm_decision_boundary(svm, x_train, y_train,
        title='SVM decision boundary on training data', output_path=output_path,
        file_name=str(dataset_name) + '_support_vectors_train.png',
        class_colors=class_colors)

    # Make predictions on train and test data.
    y_train_pred = svm.predict(x_train)
    y_test_pred = svm.predict(x_test)
    
    plot_points(x_train, y_train_pred, class_colors=class_colors,
        title='Your predictions for training data')
    plot_points(x_test, y_test_pred, class_colors=class_colors,
Example #5
0
        feature_df = generate_training_feature_vectors(training_dir)
    else:
        feature_df = pd.read_csv(training_features)

    user_input = input('Perform Classification (y/n)?')
    if (user_input == "y"):
        data = process()
        c0 = data[0]
        c1 = data[1]
        X_train_0, X_test_0, y_train_0, y_test_0 = c0[0], c0[1], c0[2], c0[3]
        X_train_1, X_test_1, y_train_1, y_test_1 = c1[0], c1[1], c1[2], c1[3]

        user_input = input('Perform SVM Classification (y/n)?')

        if (user_input == "y"):
            SVM(X_train_0, X_test_0, y_train_0, y_test_0)

        user_input = input(
            'Perform Grid Search Classification (this can take a while) (y/n)? '
        )

        if (user_input == "y"):
            GridSearch(X_train_0, X_test_0, y_train_0, y_test_0)

        user_input = input('Perform Sequential Classification (y/n)?')

        if (user_input == "y"):
            seq(X_train_1, X_test_1, y_train_0, y_test_0)

    # For each image in training set
    ##### Image Pre-processing #####
Example #6
0
print "Reading the dataset from file..."
# Read data
train = np.genfromtxt(open(os.path.join(testdir, 'train.csv'),'rb'), delimiter=',')
target = np.genfromtxt(open(os.path.join(testdir, 'trainLabels.csv'),'rb'), delimiter=',')
test = np.genfromtxt(open(os.path.join(testdir, 'test.csv'),'rb'), delimiter=',')
print "Dataset loaded!"
#features scaling
print "Starting features preprocessing ..."

dataset_scaled, scaler = dataset_scaling(np.vstack((train,test)))
train_scaled = dataset_scaled[:1000]
test_scaled = dataset_scaled[1000:]

print "Features preprocessing done!"

classification_obj=SVM()

print "Starting model selection ..."

#performing model selection

C_list = [0.0001,0.001,0.01,0.1,1,10,100,1000,10000]
gamma_list = [0.0001,0.001,0.01,0.1,1,10, 100,10000]

ms_result = classification_obj.model_selection(train_scaled,target,n_iterations=3, 
                                               C_list=C_list, 
                                               gamma_list=gamma_list)

#displaying model selection
plot_3d(x=ms_result["gamma_list"], y=ms_result["C_list"], z=ms_result["acc_by_C_and_gamma"], zlabel="accuracy", title="Accuracy by C and gamma")
plot_3d(x=ms_result["gamma_list"], y=ms_result["C_list"], z=ms_result["recall_by_C_and_gamma"], zlabel="recall", title="Recall by C and gamma")