예제 #1
0
파일: Exercise1.py 프로젝트: Broshen/CS480
def testAllClassifiers(Xfile, yfile):
    X, Xtrain, Xtest, y, ytrain, ytest = loadAndSplitData(Xfile, yfile)
    clfs = [
        linear_model.Perceptron(max_iter=1000),
        neighbors.KNeighborsClassifier(15, weights='uniform'),
        linear_model.LogisticRegression(),
        tree.DecisionTreeClassifier(),
        ensemble.BaggingClassifier(),
        ensemble.AdaBoostClassifier(),
        ensemble.RandomForestClassifier(),
        svm.LinearSVC()
    ]

    clfNames = [
        "perceptron", "kNN, k=15", "logistic regression", "decision tree",
        "bagging", "boosting", "random forest", "support vector machines"
    ]

    for i, clf in enumerate(clfs):
        clf.fit(Xtrain, ytrain)
        print(clfNames[i] + " :", clf.score(Xtest, ytest))
예제 #2
0
def main():
    df = pd.read_csv("./SimplePerceptron/iris_train.txt", header=None)
    np.random.seed(0)
    df = df.sample(frac=1)
    x_train, y_train = data_preprocessing(df)
    df_test = pd.read_csv("./SimplePerceptron/iris_test.txt", header=None)
    x_test, y_test = data_preprocessing(df_test)
    for current_index in range(0, 3):
        p = linear_model.Perceptron()
        p.fit(x_train, y_train[current_index])
        y_predicted = p.predict(x_test)
        accuracy = accuracy_score(y_test[current_index], y_predicted)
        precision = precision_score(y_test[current_index], y_predicted)
        recall = recall_score(y_test[current_index], y_predicted)
        if current_index == 0:
            print("Iris-setosa ", end='')
        elif current_index == 1:
            print("Iris-versicolor ", end='')
        elif current_index == 2:
            print("Iris-virginica ", end='')
        print("accuracy:", accuracy * 100, "% precision:", precision * 100, "% recall:", recall * 100, "%")
예제 #3
0
def examples():
    X = np.array([[1, 2], [3, 4], [5, 6]])
    y = np.array([0, 1, 0])
    clf = sllm.Perceptron()
    clf.fit(X, y)
    predictions = clf.predict(X)

    print(predictions)

    slm.accuracy_score()  # metrics of accuracy
    scaler = slp.StandardScaler()
    scaler.fit_transform(
    )  # находит параметры нормализации (средние и дисперсии каждого признака) по выборке,
    # и сразу же делает нормализацию выборки с использованием этих параметров

    scaler = StandardScaler()
    X_train = np.array([[100.0, 2.0], [50.0, 4.0], [70.0, 6.0]])
    X_test = np.array([[90.0, 1], [40.0, 3], [60.0, 4]])
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return 0
예제 #4
0
    def kappa_pruning(self, k_fold, n_times, pool_size, m):
        comb = combinations(range(pool_size), 2)
        pruning = []
        pruningKdnGreater = []
        pruningKdnLess = []
        for i in range(n_times):
            skf = StratifiedKFold(n_splits=k_fold, shuffle=True)

            for train_index, test_index in skf.split(self.x, self.y):
                X_train, X_test = self.x[train_index], self.x[test_index]
                Y_train, Y_test = self.y[train_index], self.y[test_index]

                x_train, y_train = SMOTE().fit_sample(X_train, Y_train)
                # x_test, y_test = SMOTE().fit_sample(X_test, Y_test)

                kdnGreater, kdnLess = self.k_Disagreeing_neighbors_kDN(
                    x_train, y_train)

                # X_validationGreater, X_validationLess = self.x[kdnGreater], self.x[kdnLess]
                # Y_validationGreater, Y_validationLess = self.y[kdnGreater], self.y[kdnLess]

                BagPercep = BaggingClassifier(
                    linear_model.Perceptron(max_iter=5), pool_size)
                BagPercep.fit(x_train, y_train)
                for tupla in comb:
                    kappa = cohen_kappa_score(
                        BagPercep.estimators_[tupla[0]].predict(x_train),
                        BagPercep.estimators_[tupla[1]].predict(x_train))
                    pruning.append(tupla + (kappa, ))

                    # kappa = cohen_kappa_score(BagPercep.estimators_[tupla[0]].predict(X_validationGreater), BagPercep.estimators_[tupla[1]].predict(X_validationGreater))
                    # pruningKdnGreater.append(tupla + (kappa,))

                    # kappa = cohen_kappa_score(BagPercep.estimators_[tupla[0]].predict(X_validationLess), BagPercep.estimators_[tupla[1]].predict(X_validationLess))
                    # pruningKdnLess.append(tupla + (kappa,))
                break

        pruning.sort(key=lambda tup: tup[2])

        return (pruning[:m], pruningKdnGreater[:m], pruningKdnLess[:m])
예제 #5
0
파일: ner_w2v.py 프로젝트: zbhno37/paper_z
def rbm_lr(train_set, test_set):
    logistic = linear_model.LogisticRegression(C=1000.0,
                                               penalty='l1',
                                               tol=1e-6)
    rbm = BernoulliRBM(random_state=0,
                       verbose=True,
                       learning_rate=0.6,
                       n_iter=5,
                       n_components=256)
    perce = linear_model.Perceptron()
    classifier = Pipeline(steps=[('Percetron', perce), ('logistic', logistic)])
    print '....start training ner_model[RBM->LR] at ', time.strftime(
        '%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    classifier.fit(train_set[0], train_set[1])
    print '....finished training ner_model[Percetron->LR] at ', time.strftime(
        '%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    print '..predicting'
    ner_rs = classifier.predict(test_set)
    print ner_rs[0], ner_rs[1]
    print '....finished predicting ner_model[Percetron->LR] at ', time.strftime(
        '%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    return ner_rs
def LinearModel_classify(dataset):
    models = (linear_model.LogisticRegression(random_state=0,
                                              solver='lbfgs',
                                              multi_class='multinomial'),
              linear_model.Perceptron(tol=1e-3, random_state=0),
              ensemble.RandomForestClassifier(n_estimators=10,
                                              max_depth=50,
                                              random_state=0),
              naive_bayes.GaussianNB())
    models = (clf.fit(dataset['train_labled_x'], dataset['train_labled_y'])
              for clf in models)

    test_acc = []
    unlabel_error = []
    for clf in models:
        test_acc.append(clf.score(dataset['test_x'], dataset['test_y']))
        unlabel_error.append(
            clf.score(dataset['train_unlabled_x'],
                      dataset['train_unlabled_y']))

    print(unlabel_error)
    print(test_acc)
예제 #7
0
    def fit(self, X1, Y1, X2, Y2):
        for X, Y in zip([X1, X2], [Y1, Y2]):
            X_train, X_test, y_train, y_test = train_test_split(
                X, Y, test_size=0.33, random_state=42)

            n_train, n_labels = np.shape(y_train)
            n_test, n_labels = np.shape(y_test)

            y_pred = np.zeros(np.shape(y_test))
            plt.figure()
            iter_range = np.arange(500, 5500, 500)
            m = np.zeros(len(iter_range))
            for j in range(len(iter_range)):

                per = linear_model.Perceptron(n_iter=iter_range[j])
                acc = np.zeros(n_labels)
                for i in range(n_labels):
                    # we create an instance of Neighbours Classifier and fit the data.
                    per.fit(X_train, y_train[:, i])

                    y_pred[:, i] = per.predict(X_test)
                    #accuaracy for each label
                    acc[i] = accuracy_score(y_test[:, i],
                                            y_pred[:, i],
                                            normalize=True)

                m[j] = np.mean(acc)
                print("iteration {}".format(iter_range[j]))
                print("accuracy mean {}, variance {}".format(
                    m[j], np.var(acc)))
                #accuracy for all labels
                print("accuacy {}".format(
                    accuracy_score(y_test, y_pred, normalize=True)))
                if n_labels > 10:
                    break
            plt.plot(iter_range, m, '-o')
            plt.title("BR perceptron for \"emotions\"")
            plt.yticks(np.arange(0, 1.1, 0.1))
            plt.show()
예제 #8
0
    def decide_model(self, datapoints, labels):

        classifiers = []
        classifiers.append(RandomForestClassifier(n_estimators=4))
        classifiers.append(svm.SVC(probability=True))
        classifiers.append(linear_model.Perceptron())
        classifiers.append(linear_model.SGDClassifier(shuffle=True))
        # classifiers.append( KNeighborsClassifier() )

        best = 0
        model = None
        for clf in classifiers:

            p = np.random.permutation(len(labels))
            datapoints = datapoints[p]
            labels = labels[p]

            partition = datapoints.shape[0] / 10
            Tr_data = datapoints[partition:]
            Tr_labels = labels[partition:]
            Te_data = datapoints[:partition]
            Te_labels = labels[:partition]

            fit = clf.fit(Tr_data, Tr_labels)
            '''
            scores = cross_val_score(fit, datapoints, labels, cv=10, n_jobs=-1)
            res = scores.mean()
            '''
            Te_pred = fit.predict(Te_data)

            cr = classification_report(Te_labels, Te_pred)
            trues = self.get_precission_from_report(cr)[1]

            if trues > best:
                best = trues
                model = fit

        print type(model), best
        return model
예제 #9
0
    def Classify(self, N, D, Distance):

        # Generate the train data
        cp = SimClasses()
        Xtr, Ytr = cp.GetData(N, D, Distance)

        # Train the data
        pr = lm.Perceptron()
        start = time.clock()
        pr.fit(Xtr, Ytr)
        end = time.clock() - start

        # Test the data
        N = 100
        Xte, Yte = cp.GetData(N, D, Distance)
        Z = pr.predict(Xte)
        parameters = pr.coef_

        # Caclulate accuracy
        accuracy = (Yte.reshape(1, N) == Z)
        tmp = np.ones((1, N))
        accuracy = len(tmp[accuracy])
        return accuracy, end, parameters
def perceptron():

    print(' ')
    print('===== Perceptron =====')

    (X, L) = load_classification_data()

    # Instantiate and learn a linear regression model.
    # X:  Training data.
    # L:  Training labels.
    model = linear_model.Perceptron()
    model.fit(X, L)

    # Determine the average error.
    correct = 0
    predictions = []
    for i in range(len(X)):
        x = X[i].copy()
        x = x.tolist()
        y = []
        y.append(x)
        prediction = model.predict(y)
        if prediction < 0.0:
            prediction = -1
        else:
            prediction = +1
        if prediction == L[i]:
            correct += 1

    #     total_error += (L[i] - model.predict(x))**2
    print(' ')
    print('Perceptron using Sklearn:')
    print('\tAccuracy:', round((correct / len(X)), 2))
    print(' ')
    print('Perceptron from Scratch:')
    print('\tAccuracy:', 0.99)
    print(' ')
예제 #11
0
#Gradient Boosting Classifier
clf = GradientBoostingClassifier()
clf.set_params(n_estimators=100, learning_rate=0.25)
gb_clf = clf.fit(X_train, Y_train)
gb_predict = gb_clf.predict(X_test)
gb_acc = accuracy_score(Y_test, gb_predict)
accuracy = cross_val_score(clf, X_train, Y_train, cv=10, scoring='accuracy')
f_score = cross_val_score(clf, X_train, Y_train, cv=10, scoring='f1_micro')
print("GradientBoostingClassifier:")
print("Accuracy" + "                " + "F-Score")
print(accuracy.mean(), " - ", f_score.mean())
classifiers["GradientBoostingClassifier"] = clf

#Perceptron
clf = linear_model.Perceptron()
#clf.set_params(max_iter = 1000,alpha = 0.01)
pt_clf = clf.fit(X_train, Y_train)
pt_predict = pt_clf.predict(X_test)
pt_acc = accuracy_score(Y_test, pt_predict)
accuracy = cross_val_score(clf, X_train, Y_train, cv=10, scoring='accuracy')
f_score = cross_val_score(clf, X_train, Y_train, cv=10, scoring='f1_micro')
print("Perceptron:")
print("Accuracy" + "                " + "F-Score")
print(accuracy.mean(), " - ", f_score.mean())
classifiers["Perceptron"] = clf

print(
    "Next we proceed to apply Feature Scaling to see if the performance of our various classifiers improves"
)
#Feature scaling aims to bring the values of our numerical features between 0 and 1
import sklearn.linear_model as lm
import numpy as np
import SimplePerceptron as sp

print("load training data")
trainData, trainLabel = sp.loadData('mnist_train.csv', 'training')
print("load test data")
testData, testLabel = sp.loadData('mnist_test.csv', 'test')

perceptron = lm.Perceptron()
perceptron.fit(trainData, trainLabel)
w = perceptron.coef_
b = perceptron.intercept_
print("w:",w,"\n", "b:", b, "\n", "n_iter:", perceptron.n_iter_)

res = perceptron.score(trainData, trainLabel)
print("correct rate on training set:{:.0%}".format(res))

res2 = perceptron.score(testData, testLabel)
print("correct rate on test set:{:.0%}".format(res2))
예제 #13
0
    ensemble.BaggingClassifier(),
    ensemble.ExtraTreesClassifier(),
    ensemble.GradientBoostingClassifier(),
    ensemble.RandomForestClassifier(),

    #Gaussian Processes
    #gaussian_process.GaussianProcessClassifier(),

    #GLM
    linear_model.LogisticRegressionCV(),
    linear_model.LogisticRegression(C=1000, random_state=0,
                                    solver='liblinear'),
    linear_model.PassiveAggressiveClassifier(),
    linear_model.RidgeClassifierCV(),
    linear_model.SGDClassifier(),
    linear_model.Perceptron(),

    #Navies Bayes
    naive_bayes.BernoulliNB(),
    #naive_bayes.GaussianNB(),

    #Nearest Neighbor
    neighbors.KNeighborsClassifier(),

    #SVM
    svm.SVC(probability=True),
    svm.NuSVC(probability=True),
    svm.LinearSVC(),

    #Trees
    tree.DecisionTreeClassifier(),
예제 #14
0
 def get_skl_estimator(self, **default_parameters):
     return linear_model.Perceptron(**default_parameters)
예제 #15
0
파일: usps.py 프로젝트: ATidiane/ARF
def error_curves(class1, whichclf="implemented_perceptron"):
    """ Plots the errors curves based on the number of iterations, in this case
        till 10000 iterations and class1 vs All.
        :return: 2 courbes d'erreurs, l'une sur l'apprentissage et l'autre sur
        le test.
    """

    # Définitions des itérations

    # Now, let's plot
    cols, marks = [
        "red", "green", "blue", "orange", "black", "cyan", "yellow", "magenta",
        "green"
    ], [".", "+", "*", "o", "x", "^", ',', 'v', '^']

    fig, (ax1, ax2) = plt.subplots(ncols=2, sharex=True)
    plt.suptitle(
        "Courbes d’erreurs en apprentissage et en test en\n fonction du "
        "nombre d'itérations")

    ax1.set_title("Apprentissage")
    ax2.set_title("Test")

    for i in range(9):
        if i == class1:
            continue
        # Extraction des données usps des classes passées
        datax, datay = extract_usps("USPS_train.txt", class1, i)
        dataTx, dataTy = extract_usps("USPS_test.txt", class1, i)

        x_iter = range(1, 40)
        # Calcul des erreurs en learning et en test pour les diff iterations
        err_learning, err_test = [], []
        clf = None
        for iter in x_iter:
            if whichclf == "implemented_perceptron":
                clf = Perceptron(loss=hinge,
                                 loss_g=hinge_g,
                                 max_iter=iter,
                                 eps=0.1,
                                 kernel=None)
                clf.fit(datax, datay)
                err_learning.append(clf.score(datax, datay))
                err_test.append(clf.score(dataTx, dataTy))

            else:
                clf = linear_model.Perceptron(max_iter=iter, n_jobs=-1)
                clf.fit(datax, datay)
                err_learning.append(1 - clf.score(datax, datay))
                err_test.append(1 - clf.score(dataTx, dataTy))

        ax1.plot(x_iter,
                 err_learning,
                 c=cols[i],
                 marker=marks[i],
                 label='{} vs {}'.format(class1, i))
        ax2.plot(x_iter,
                 err_test,
                 c=cols[i],
                 marker=marks[i],
                 label='{} vs {}'.format(class1, i))

    ax1.legend(loc='upper right', ncol=1, fancybox=True, shadow=True)
    ax2.legend(loc='upper right', ncol=1, fancybox=True, shadow=True)
    fig.tight_layout(rect=[0, 0.03, 1, 0.85])
    # plt.savefig("error_curves_{}vsAll".format(class1))
    plt.show()
                            s=70,
                            alpha=.07)
plt.title('Testing set age range vs degree malignant')
plt.xlabel('age range')
plt.ylabel('degree malignant')
plt.xticks(np.arange(0, 100, 10.0))
plt.yticks(np.arange(0, 3, 1.0))
plt.show()

#########################
# Logistic Regression
#########################

# Perceptron
print("Perceptron")
perceptron = lm.Perceptron(verbose=1)
perceptron.fit(X_train, Y_train)

Y_pred = perceptron.predict(X_test)
print("\n\nPerceptron")
print("\tNumber of Features...", perceptron.n_features_in_)
print("\tColumns", X_train.columns)
print("\tCoefficients", perceptron.coef_)
print("\tIntercept", perceptron.intercept_)

print('\nAccuracy of perceptron on test set: {:.2f}'.format(
    perceptron.score(X_test, Y_test)))

print("Confusion_Matrix...")
confusion_matrixP = confusion_matrix(Y_test, Y_pred)
print(confusion_matrixP)
from sklearn import datasets, linear_model, svm, neighbors, naive_bayes
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
breast_cancer = datasets.load_breast_cancer()
x, y = breast_cancer.data, breast_cancer.target

# Split the train and test samples
test_samples = 100
x_train, y_train = x[:-test_samples], y[:-test_samples]
x_test, y_test = x[-test_samples:], y[-test_samples:]

# Instantiate the learners (classifiers)
learner_1 = neighbors.KNeighborsClassifier(n_neighbors=5)
learner_2 = linear_model.Perceptron(tol=1e-2, random_state=0)
learner_3 = svm.SVC(gamma=0.001)

# Instantiate the voting classifier
voting = VotingClassifier([('KNN', learner_1), ('Prc', learner_2),
                           ('SVM', learner_3)])

# Fit classifier with the training data
voting.fit(x_train, y_train)

# Predict the most voted class
hard_predictions = voting.predict(x_test)

# Accuracy of hard voting
print('Hard Voting:', accuracy_score(y_test, hard_predictions))
예제 #18
0
파일: ldf.py 프로젝트: shubham1310/mlt
train_data = count_vect2.fit_transform(train_data)
train_data = train_data.toarray()
# for x in train_data[0]:
#     print x
# print train_data[0]
# print train_data.shape
# vocab=count_vect.get_feature_names()
#print vocab
# dist = np.sum(train_data, axis=0)
# for tag, count in zip(vocab, dist):
#     print count, tag

for i in range(1, 11):
    [data, label, size] = ttdata(i)
    data = count_vect.fit_transform(data)
    data = data.toarray()
    data = count_vect2.fit_transform(data)
    data = data.toarray()
    traindata = data[:size]
    trainlabel = label[:size]
    testdata = data[size - 1:]
    testlabel = label[size - 1:]
    clf = linear_model.Perceptron(alpha=0, fit_intercept=True)
    clf.fit(traindata, trainlabel)
    prediction = clf.predict(testdata)
    print 'Taking part %d as the test data the accuracy is:' % (i),
    print '%.4f' % (metrics.accuracy_score(prediction, testlabel))
    # print(metrics.classification_report(testlabel,prediction))
# for i in range(577,867):
#     print prediction[i-577],label[i]
예제 #19
0
def perceptron(data_train, data_test, target_train, target_test):
    prp = linear_model.Perceptron(penalty=None, class_weight='balanced')
    prp.fit(data_train, target_train)
    y_pred = prp.predict(data_test)
    return classifierStats(target_test, y_pred)
 def __init__(self):
     self.classes = [True, False]
     self.classifier = linear_model.Perceptron(penalty='l1', alpha=0.0008)
 def model(self, **kwargs):
     alpha = kwargs.get('alpha', 0.1)
     max_iter = kwargs.get('max_iter', 100)
     tol = kwargs.get('tol', 0.01)
     return linear_model.Perceptron(alpha=alpha, max_iter=max_iter, tol=tol)
예제 #22
0
 def __init__(self):
     self.model = linear_model.Perceptron(warm_start=True)
예제 #23
0
    'features': {
        'C': [0.1, 1, 100],
        'gamma': [0.001, 0.1, 10],
        'kernel': ['rbf'],
        'class_weight': ['auto'],
        'random_state': [random_seed]
    },
    'model': svm.SVC()
}

perceptron = {
    'features': {
        'penalty': [None, 'l2', 'l1', 'elasticnet'],
        'random_state': [random_seed]
    },
    'model': linear_model.Perceptron()
}

n_bayes = {'features': {}, 'model': naive_bayes.GaussianNB()}

pca_logit = {
    'features': {
        'pca__n_components': [0.001, 0.33, 0.67],
        'logit__C': [0.001, 0.01, 1, 100, 1000],
        'logit__random_state': [random_seed]
    },
    'model':
    Pipeline(steps=[(
        'pca',
        decomposition.PCA()), ('logit', linear_model.LogisticRegression())])
}
예제 #24
0
    mean_squared_error(datas['Livraisons réelles'].values, predicto)
)  #ecartmoy=sum(abs(list(datas['Livraisons réelles'].values)-predicto))/len(predicto)

## MODELE LINEAIRE SGDRegressor ----> 10^5
from sklearn import linear_model
regr = linear_model.SGDRegressor()
regr.fit(list(datas['Historique'].values),
         list(datas['Livraisons réelles'].values))
predicto = regr.predict(list(datas['Historique'].values))
ecartmoy = sqrt(
    mean_squared_error(datas['Livraisons réelles'].values, predicto)
)  #ecartmoy=sum(abs(list(datas['Livraisons réelles'].values)-predicto))/len(predicto)

## MODELE LINEAIRE Perceptron ----> 307
from sklearn import linear_model
regr = linear_model.Perceptron()
regr.fit(list(datas['Historique'].values),
         list(datas['Livraisons réelles'].values))
predicto = regr.predict(list(datas['Historique'].values))
ecartmoy = sqrt(
    mean_squared_error(datas['Livraisons réelles'].values, predicto)
)  #ecartmoy=sum(abs(list(datas['Livraisons réelles'].values)-predicto))/len(predicto)

## MODELE LINEAIRE PassiveAggressiveClassifier ----> 260
from sklearn import linear_model
regr = linear_model.PassiveAggressiveClassifier()
regr.fit(list(datas['Historique'].values),
         list(datas['Livraisons réelles'].values))
predicto = regr.predict(list(datas['Historique'].values))
ecartmoy = sqrt(
    mean_squared_error(datas['Livraisons réelles'].values, predicto)
예제 #25
0
 def reinit(self):
     params = self.get_params()
     super().__init__(params["data_opts"])
     del params["data_opts"]
     # Init decision perceptron learner, pass in all parameters passed to self.__init__
     self.learner_ = sklm.Perceptron(**params)
X = min_max_scaler.fit_transform(X)

#Elimina las columnas que no aportan nada
selector = VarianceThreshold()
X = selector.fit_transform(X)
poly = PolynomialFeatures(degree=2)
X = poly.fit_transform(X)
#Una vez normalizados vuelvo a dejar el mismo grupo de train y test
X_test = X[:X_test.shape[0], :]
X_train = X[X_test.shape[0]:, :]

#-------------Eleccion de clase de funciones

model = linear_model.LogisticRegression(penalty='l2', multi_class='ovr', C=1)
model.fit(X_train, y_train.ravel())
print("---------Regresion logistica---------")

prediccion = model.predict(X_test)
confusionMatrix(prediccion, y_test, "CM de la Regresion Logistica")
print("E_in: " + str(model.score(X_train, y_train)))
print("E_out: " + str(model.score(X_test, y_test)))

print("---------Perceptron---------")
perceptron = linear_model.Perceptron(tol=0, penalty='l1')
perceptron.fit(X_train, y_train.ravel())
prediccion = perceptron.predict(X_test)
confusionMatrix(prediccion, y_test, "CM del Perceptron")

print("E_in: " + str(perceptron.score(X_train, y_train)))
print("E_out: " + str(perceptron.score(X_test, y_test)))
#Parameterize the data
data = data.head(1000)
#Display the values and lables
#print('Image Values: ', data.values)
#print('Image label : ', data.label)

trainingTime = []
predictionTime = []
predictionAccuracy = []

#K-Fold cross validation
kf = model_selection.KFold(n_splits=2, shuffle=True)
for train_index,test_index in kf.split(data.values):
    #Linear perception
    clf1 = linear_model.Perceptron()
    #clf2 = svm.SVC(kernel="rbf", gamma=1e-3)    
    #clf3 = svm.SVC(kernel="sigmoid", gamma=1e-4)
    
    #Starting time for train
    trainStartTime = time.time()
    print('\nTrain Start Time was %g seconds :'%trainStartTime )
    
    clf1.fit(data.values[train_index], data.label[train_index ])
    
    #End time for train 
    trainEndTime = time.time()
    print('Train End Time was %g seconds'%trainEndTime )
    print('Total Proess Elapsed time was %g seconds '% (trainEndTime - trainStartTime ))
    #Adding the time to the list
    trainingTime.append(trainEndTime - trainStartTime )
예제 #28
0
from sklearn import tree, neighbors, svm, metrics, linear_model
import numpy as np


# [height, width, shoe size]
X = [[181, 80, 44], [177, 70, 43], [160, 60, 38], [154, 54, 37], [166, 65, 40], [190, 90, 47], [175, 64, 39], [177, 70, 40], [159, 55, 37],
     [171, 75, 42], [181, 85, 43]]

Y = ['male', 'female', 'female', 'female', 'male', 'male', 'male', 'female', 'male', 'female', 'male']

# Classifiers
clf = tree.DecisionTreeClassifier()
clf_svc = svm.SVC()
clf_pt = linear_model.Perceptron()
neigh = neighbors.KNeighborsClassifier()

# Training models
clf = clf.fit(X, Y)
clf_svc = clf_svc.fit(X, Y)
clf_pt = clf_pt.fit(X, Y)
neigh = neigh.fit(X, Y)

# Testing the same data
predict_clf = clf.predict(X)
acc_dtc = metrics.accuracy_score(Y, predict_clf) * 100
result_dtc = clf.predict([[190, 70, 43]])
print('Accuracy for DecisionTreeClassifier: {}'.format(acc_dtc))
print(result_dtc)

predict_svc = clf_svc.predict(X)
acc_svc = metrics.accuracy_score(Y, predict_svc) * 100
예제 #29
0
def main():
    ####################
    #####  Task 1  #####
    ####################
    data = pd.read_csv(
        "product_images.csv")  # Load the product images and labels

    labels = data["label"]  # Keep only the labels
    feature_vectors = data.drop("label", axis=1)  # Keep the pixel values
    print(labels.head())
    print(type(labels))
    print(feature_vectors.head())
    print(type(feature_vectors))

    # Print amount of each type of images
    print("There are:"
          "\n\t%d images of sneakers"
          "\n\t%d images of ankle boots" %
          (labels[labels == 0].size, labels[labels == 1].size))

    # Get and show first sneaker
    plt.imshow(
        np.array(feature_vectors.iloc[labels[labels == 0].index[0]]).reshape(
            28, 28))
    plt.show()

    # Get and show first ankle boot
    plt.imshow(
        np.array(feature_vectors.iloc[labels[labels == 1].index[0]]).reshape(
            28, 28))
    plt.show()

    # Parameterised data
    feature_vectors_parameterised = feature_vectors.sample(6000)
    labels_parameterised = labels[feature_vectors_parameterised.index]

    # Print parameterised statistics
    print("Parameterised dataset contains:"
          "\n\t%d images of sneakers"
          "\n\t%d images of ankle boots" %
          (labels_parameterised[labels_parameterised == 0].size,
           labels_parameterised[labels_parameterised == 1].size))

    ####################
    #####  Task 2  #####
    ####################
    number_of_kfolds = 5
    print("#######################")
    print("#     Perceptron      #")
    print("# Number of kfolds: %d #" % number_of_kfolds)
    print("#######################")

    perceptron_training_times = []
    perceptron_prediction_times = []
    perceptron_prediction_accuracies = []
    current_fold = 0
    kf = model_selection.KFold(n_splits=number_of_kfolds, shuffle=True)
    for train_index, test_index in kf.split(feature_vectors_parameterised,
                                            labels_parameterised):
        current_fold += 1
        feature_vectors_parameterised_train_fold = feature_vectors_parameterised.iloc[
            train_index]
        feature_vectors_parameterised_test_fold = feature_vectors_parameterised.iloc[
            test_index]
        labels_parameterised_train_fold = labels_parameterised.iloc[
            train_index]
        labels_parameterised_test_fold = labels_parameterised.iloc[test_index]

        perceptron_classifier = linear_model.Perceptron()

        perceptron_fit_start_time = time.time()
        perceptron_classifier.fit(feature_vectors_parameterised_train_fold,
                                  labels_parameterised_train_fold)
        perceptron_fit_end_time = time.time()

        perceptron_predict_start_time = time.time()
        perceptron_prediction = perceptron_classifier.predict(
            feature_vectors_parameterised_test_fold)
        perceptron_predict_end_time = time.time()

        perceptron_accuracy_score = metrics.accuracy_score(
            labels_parameterised_test_fold, perceptron_prediction)

        perceptron_training_times.append(perceptron_fit_end_time -
                                         perceptron_fit_start_time)
        perceptron_prediction_times.append(perceptron_predict_end_time -
                                           perceptron_predict_start_time)
        perceptron_prediction_accuracies.append(perceptron_accuracy_score)

        true_negative, false_positive, false_negative, true_positive = confusion_matrix(
            labels_parameterised_test_fold, perceptron_prediction).ravel()
        print("\t## Fold number: %d ##" % current_fold)
        print("\t\t# Training time",
              perceptron_fit_end_time - perceptron_fit_start_time)
        print("\t\t# Predicting time",
              perceptron_predict_end_time - perceptron_predict_start_time)
        print("\t\t# Perceptron accuracy score: ", perceptron_accuracy_score)
        print("\t\t# true negative", true_negative)
        print("\t\t# false positive", false_positive)
        print("\t\t# false negative", false_negative)
        print("\t\t# true positive", true_positive)

    print("### Training Times (in ms) ###")
    print("# Minimum: ", min(perceptron_training_times))
    print("# Maximum: ", max(perceptron_training_times))
    print("# Average: ",
          sum(perceptron_training_times) / len(perceptron_training_times))
    print("### Prediction Times (in ms) ###")
    print("# Minimum: ", min(perceptron_prediction_times))
    print("# Maximum: ", max(perceptron_prediction_times))
    print("# Average: ",
          sum(perceptron_prediction_times) / len(perceptron_prediction_times))
    print("### Accuracies ###")
    print("# Minimum: ", min(perceptron_prediction_accuracies))
    print("# Maximum: ", max(perceptron_prediction_accuracies))
    print(
        "# Average: ",
        sum(perceptron_prediction_accuracies) /
        len(perceptron_prediction_accuracies))

    ####################
    #####  Task 3  #####
    ####################
    print()
    print("#########################")
    print("#    Linear Kernel      #")
    print("#         And           #")
    print("# Radial Basis Function #")
    print("#  Number of kfolds: %d  #" % number_of_kfolds)
    print("#########################")
    linear_kernel_training_times = []
    linear_kernel_prediction_times = []
    linear_kernel_prediction_accuracies = []
    radial_basis_function_training_times = []
    radial_basis_function_prediction_times = []
    radial_basis_function_prediction_accuracies = []
    current_fold = 0
    kf = model_selection.KFold(n_splits=number_of_kfolds, shuffle=True)
    for train_index, test_index in kf.split(feature_vectors_parameterised,
                                            labels_parameterised):
        current_fold += 1
        feature_vectors_parameterised_train_fold = feature_vectors_parameterised.iloc[
            train_index]
        feature_vectors_parameterised_test_fold = feature_vectors_parameterised.iloc[
            test_index]
        labels_parameterised_train_fold = labels_parameterised.iloc[
            train_index]
        labels_parameterised_test_fold = labels_parameterised.iloc[test_index]

        linear_kernel_classifier = svm.SVC(kernel="linear", gamma=1e-3)
        radial_basis_function_classifier = svm.SVC(kernel="rbf", gamma=1e-7)

        linear_kernel_fit_start_time = time.time()
        linear_kernel_classifier.fit(feature_vectors_parameterised_train_fold,
                                     labels_parameterised_train_fold)
        linear_kernel_fit_end_time = time.time()
        radial_basis_function_fit_start_time = time.time()
        radial_basis_function_classifier.fit(
            feature_vectors_parameterised_train_fold,
            labels_parameterised_train_fold)
        radial_basis_function_fit_end_time = time.time()

        linear_kernel_predict_start_time = time.time()
        linear_kernel_prediction = linear_kernel_classifier.predict(
            feature_vectors_parameterised_test_fold)
        linear_kernel_predict_end_time = time.time()
        radial_basis_function_predict_start_time = time.time()
        radial_basis_function_prediction = radial_basis_function_classifier.predict(
            feature_vectors_parameterised_test_fold)
        radial_basis_function_predict_end_time = time.time()

        linear_kernel_accuracy_score = metrics.accuracy_score(
            labels_parameterised_test_fold, linear_kernel_prediction)
        radial_basis_function_accuracy_score = metrics.accuracy_score(
            labels_parameterised_test_fold, radial_basis_function_prediction)

        linear_kernel_training_times.append(linear_kernel_fit_end_time -
                                            linear_kernel_fit_start_time)
        linear_kernel_prediction_times.append(linear_kernel_predict_end_time -
                                              linear_kernel_predict_start_time)
        linear_kernel_prediction_accuracies.append(
            linear_kernel_accuracy_score)
        radial_basis_function_training_times.append(
            radial_basis_function_fit_end_time -
            radial_basis_function_fit_start_time)
        radial_basis_function_prediction_times.append(
            radial_basis_function_predict_end_time -
            radial_basis_function_predict_start_time)
        radial_basis_function_prediction_accuracies.append(
            radial_basis_function_accuracy_score)

        l_true_negative, l_false_positive, l_false_negative, l_true_positive = confusion_matrix(
            labels_parameterised_test_fold, linear_kernel_prediction).ravel()
        rbf_true_negative, rbf_false_positive, rbf_false_negative, rbf_true_positive = confusion_matrix(
            labels_parameterised_test_fold,
            radial_basis_function_prediction).ravel()

        print("\t## Fold number: %d ##" % current_fold)
        print("\t\t# Linear Kernel")
        print("\t\t\t# Training time",
              linear_kernel_fit_end_time - linear_kernel_fit_start_time)
        print(
            "\t\t\t# Predicting time",
            linear_kernel_predict_end_time - linear_kernel_predict_start_time)
        print("\t\t\t# Perceptron accuracy score: ",
              linear_kernel_accuracy_score)
        print("\t\t\t# true negative", l_true_negative)
        print("\t\t\t# false positive", l_false_positive)
        print("\t\t\t# false negative", l_false_negative)
        print("\t\t\t# true positive", l_true_positive)
        print("\t\t# Radial Basis Function Kernel")
        print(
            "\t\t\t# Training time", radial_basis_function_fit_end_time -
            radial_basis_function_fit_start_time)
        print(
            "\t\t\t# Predicting time", radial_basis_function_predict_end_time -
            radial_basis_function_predict_start_time)
        print("\t\t\t# Perceptron accuracy score: ",
              radial_basis_function_accuracy_score)
        print("\t\t\t# true negative", rbf_true_negative)
        print("\t\t\t# false positive", rbf_false_positive)
        print("\t\t\t# false negative", rbf_false_negative)
        print("\t\t\t# true positive", rbf_true_positive)

    print("##### Linear Kernel")
    print("\t### Training Times (in ms) ###")
    print("\t\t# Minimum: ", min(linear_kernel_training_times))
    print("\t\t# Maximum: ", max(linear_kernel_training_times))
    print(
        "\t\t# Average: ",
        sum(linear_kernel_training_times) / len(linear_kernel_training_times))
    print("\t### Prediction Times (in ms) ###")
    print("\t\t# Minimum: ", min(linear_kernel_prediction_times))
    print("\t\t# Maximum: ", max(linear_kernel_prediction_times))
    print(
        "\t\t# Average: ",
        sum(linear_kernel_prediction_times) /
        len(linear_kernel_prediction_times))
    print("\t### Accuracies ###")
    print("\t\t# Minimum: ", min(linear_kernel_prediction_accuracies))
    print("\t\t# Maximum: ", max(linear_kernel_prediction_accuracies))
    print(
        "\t\t# Average: ",
        sum(linear_kernel_prediction_accuracies) /
        len(linear_kernel_prediction_accuracies))
    print("##### Radial Basis Function Kernel")
    print("\t### Training Times (in ms) ###")
    print("\t\t# Minimum: ", min(radial_basis_function_training_times))
    print("\t\t# Maximum: ", max(radial_basis_function_training_times))
    print(
        "\t\t# Average: ",
        sum(radial_basis_function_training_times) /
        len(radial_basis_function_training_times))
    print("\t### Prediction Times (in ms) ###")
    print("\t\t# Minimum: ", min(radial_basis_function_prediction_times))
    print("\t\t# Maximum: ", max(radial_basis_function_prediction_times))
    print(
        "\t\t# Average: ",
        sum(radial_basis_function_prediction_times) /
        len(radial_basis_function_prediction_times))
    print("\t### Accuracies ###")
    print("\t\t# Minimum: ", min(radial_basis_function_prediction_accuracies))
    print("\t\t# Maximum: ", max(radial_basis_function_prediction_accuracies))
    print(
        "\t\t# Average: ",
        sum(radial_basis_function_prediction_accuracies) /
        len(radial_basis_function_prediction_accuracies))
예제 #30
0
def mx_Perceptron(train_x, train_y):
    mx = linear_model.Perceptron()
    mx.fit(train_x, train_y)
    return mx