Exemple #1
0
def validate_feature_linear(features,
                            labels,
                            classes,
                            n_folds=5,
                            print_folds=True,
                            print_absolute=True,
                            print_logloss=True):
    kfold = cv.LabelKFold(labels, n_folds)
    model = lda.LDA()
    if print_absolute:
        score = cross_validation.cross_val_score(model,
                                                 features,
                                                 classes,
                                                 cv=kfold)
    if print_absolute: print("absolute scores")
    if print_folds: print("\tfolds:", score)
    if print_absolute: print("\tmean:", score.mean(), "std:", numpy.std(score))

    scores = score_calculation.loglossKFold(features,
                                            classes,
                                            model,
                                            kfold,
                                            given_kfold=True)
    if print_logloss: print("logloss scores")
    if print_folds: print("\tfolds", scores)
    if print_logloss:
        print("\tmean:", numpy.mean(scores), "std:", numpy.std(scores))
    harald = extractor.calculateDarktoBrightRatio(thumbs[i])
    rian = extractor.splitColorFeatures(thumbs[i], splits)
    features.append(numpy.append(harald, rian))

#model = grid_search.GridSearchCV(svm.SVC(),{'kernel' : ['poly'], 'C' : [1, 10, 100, 1000], 'degree' : [4,7,10], 'shrinking' : [True, False]})
#model.fit(features, classes)
#print(model.best_estimator_)
#print('\a')

print("Producing KFold indexes")
kfold = cv.KFold(amount, n_folds=5, shuffle=True)
model = lda.LDA()
#model = svm.SVC(kernel = 'linear')
#model = qda.QDA()
score = cross_validation.cross_val_score(model, features, classes, cv=kfold)
print("scores ", score)
print("mean score ", score.mean())

#model = svm.SVC(kernel = 'linear', probability = True)
model = lda.LDA()
#model = neighbors.KNeighborsClassifier(n_neighbors = 1)
scores = score_calculation.loglossKFold(features, classes, model, 5)
print("logloss scores ", scores)
print("logloss score mean ", numpy.mean(scores), " ", numpy.std(scores))

#predictions = cross_validation.cross_val_predict(model, features, classes, cv = kfold)
#wrongIndexes = numpy.nonzero(predictions != classes)
#uniqueWrongs, counts = numpy.unique(numpy.append(predictions[[wrongIndexes]], numpy.array(classes)[[wrongIndexes]]), return_counts = True)
#wrongs = uniqueWrongs[counts > 10]

print('\a')