コード例 #1
0
class ClassificationPLA(ClassficationBase.ClassificationBase):
    def __init__(self, isTrain, isOutlierRemoval=0):
        super(ClassificationPLA, self).__init__(isTrain, isOutlierRemoval)

        # data preprocessing
        self.dataPreprocessing()

        # PLA object
        self.clf = Perceptron()


    def dataPreprocessing(self):
        # deal with unbalanced data
        self.dealingUnbalancedData()

        # Standardization
        #self.Standardization()



    def training(self):
        # train the K Nearest Neighbors model
        self.clf.fit(self.X_train, self.y_train.ravel())

    def predict(self):
        # predict the test data
        self.y_pred = self.clf.predict(self.X_test)

        # print the error rate
        self.y_pred = self.y_pred.reshape((self.y_pred.shape[0], 1))
        err = 1 - np.sum(self.y_test == self.y_pred) * 1.0 / self.y_pred.shape[0]
        print "Error rate: {}".format(err)
コード例 #2
0
def PERCEPTRON(data_train, data_train_vectors, data_test_vectors, **kwargs):
    # Implementing classification model- using Perceptron
    clf_p =  Perceptron()
    clf_p.fit(data_train_vectors, data_train.target)
    y_pred = clf_p.predict(data_test_vectors)
    
    return y_pred
コード例 #3
0
    def run(self):
        """
        Пуск задачи
        """
        train_data = pd.read_csv(self.param.get('train'))
        test_data = pd.read_csv(self.param.get('test'))
        X_train = train_data[['1', '2']]
        y_train = train_data['0']

        X_test = test_data[['1', '2']]
        y_test = test_data['0']


        if self.param.get('scale') is True:

            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train)

            X_test = scaler.transform(X_test)

        perceptron = Perceptron(random_state=241)
        perceptron.fit(X_train, y_train)

        predictions = perceptron.predict(X_test)

        accuracy = accuracy_score(y_test, predictions)
        with self.output().open('w') as output:
            output.write(str(accuracy))
コード例 #4
0
ファイル: Models.py プロジェクト: ineilm/BountyApp
def Perceptron_1(train_predictors,test_predictors,train_target,test_target):
    clf = Perceptron()
    clf.fit(train_predictors,train_target)
    predicted = clf.predict(test_predictors)
    accuracy = accuracy_score(test_target, predicted)
    print "Accuracy for Linear Model Perceptron: "+str(accuracy)
    return accuracy,predicted  
コード例 #5
0
ファイル: main.py プロジェクト: wenzhengong/salary
def percep(X_tr, y_tr, X_te):
    clf = Perceptron(n_iter = 1000)
    X_tr_aug = add_dummy_feature(X_tr)
    X_te_aug = add_dummy_feature(X_te)
    clf.fit(X_tr_aug, y_tr)
    y_pred = clf.predict(X_te_aug)
    return y_pred
コード例 #6
0
ファイル: s1-8.py プロジェクト: wargile/ML1
    def t():
        # 1
        from pandas import read_csv
        df = read_csv('w2/perceptron-train.csv', header=None)
        dt = read_csv('w2/perceptron-test.csv', header=None)
        yf = df[0]
        xf = df.drop([0], axis=1)
        # print(yf, xf)
        yt = dt[0]
        xt = dt.drop([0], axis=1)
        # print(yt, xt)

        # 2
        from sklearn.linear_model import Perceptron
        clf = Perceptron(random_state=241)
        clf.fit(xf, yf)
        af1 = clf.score(xf, yf)
        at1 = clf.score(xt, yt)
        rf = clf.predict(xf)
        rt = clf.predict(xt)
        # print(list(yf))
        # print(pf)
        # print(list(yt))
        # print(pt)

        # 3
        from sklearn.metrics import accuracy_score
        af = accuracy_score(yf, rf)
        at = accuracy_score(yt, rt)
        print(af, at)
        print(af1, at1)

        # 4
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        xfs = scaler.fit_transform(xf)
        xts = scaler.transform(xt)
        clf.fit(xfs, yf)
        afs1 = clf.score(xfs, yf)
        ats1 = clf.score(xts, yt)
        pfs = clf.predict(xfs)
        pts = clf.predict(xts)
        afs = accuracy_score(yf, pfs)
        ats = accuracy_score(yt, pts)
        print(afs, ats)
        print(afs1, ats1)
        pf('5', round(ats - at, 3))
コード例 #7
0
def main():
    iris = load_iris()
    X = iris.data[:, (2, 3)]  # 花弁の長さ、花弁の幅
    y = (iris.target == 0.).astype(np.int32)
    perceptron_classifier = Perceptron(random_state=42)
    perceptron_classifier.fit(X, y)
    y_prediction = perceptron_classifier.predict([[2, 0.5]])
    print(y_prediction)
コード例 #8
0
def classify_perceptron():
    print "perceptron"
    (X_train, y_train), (X_test, y_test) = util.load_all_feat()
    print "original X_train shape", X_train.shape
    clf = Perceptron()
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    print "accuracy score:", accuracy_score(y_test, pred)
コード例 #9
0
class PerceptronModel(BaseModel):
    
    def __init__(self, cached_features):
        BaseModel.__init__(self, cached_features)
        self.model = Perceptron(penalty="l2", random_state=1)

    def _predict_internal(self, X_test):
        return self.model.predict(X_test)
コード例 #10
0
def solve(train_set_x, train_set_y, test_set_x, test_set_y):
    clf = Perceptron(random_state=241)
    clf.fit(X=train_set_x, y=train_set_y)
    prediction = clf.predict(test_set_x)

    accuracy = accuracy_score(test_set_y, prediction)

    return accuracy
コード例 #11
0
ファイル: s1-8.py プロジェクト: wargile/ML1
 def t1():
     from sklearn.linear_model import Perceptron
     X = np.array([[1, 2], [3, 4], [5, 6]])
     y = np.array([0, 1, 0])
     clf = Perceptron()
     clf.fit(X, y)
     predictions = clf.predict(X)
     print(predictions)
コード例 #12
0
ファイル: w2.py プロジェクト: romos/Coursera.IntroML
def perceptron_classifier(data_train, data_test):
    # Load train and test data sets
    X_train = data_train.iloc[:, 1:].values
    y_train = data_train.iloc[:, :1].values.ravel()
    X_test = data_test.iloc[:, 1:].values
    y_test = data_test.iloc[:, :1].values.ravel()

    # Init Perceptron
    clf = Perceptron(random_state=241)

    # --- Perceptron w/o normalization of Training Data Set ---

    # Fit Perceptron linear model using training data
    clf.fit(X_train, y_train)
    # Use the model to predict test data
    y_test_prediction = clf.predict(X_test)
    # Calculate accuracy:
    accuracy_notnorm = metrics.accuracy_score(y_test, y_test_prediction)

    # --- Perceptron w/ normalization of Training Data Set ---

    # feature scaling (standardization/normalization)
    scaler = preprocessing.StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    # Fit Perceptron using Training Set and predict results for tTest Set
    clf.fit(X_train_scaled, y_train)
    y_test_prediction = clf.predict(X_test_scaled)
    accuracy_norm = metrics.accuracy_score(y_test, y_test_prediction)

    # Note [FEATURE SCALING]:
    #   You MUST use fit_transform() over Training Set only.
    #   The scaler will compute necessary statistics like std_dev and mean [aka 'fit']
    #   and normalize Training Set [aka 'transform']
    #   But for the Test Set you must not fit the scaler again!
    #   Just re-use existing statistics and normalize the Test Set using transform() w/o fitting.

    print('Accuracy (non-normalized):', accuracy_notnorm)
    print('Accuracy (normalized):', accuracy_norm)
    diff = accuracy_norm - accuracy_notnorm
    print('Diff:', diff)

    return diff
コード例 #13
0
ファイル: task5.py プロジェクト: aleksaad4/ML
def get_accuracy(_data_train_features, _data_train_labels, _data_test_features, _data_test_labels):
    # Обучите персептрон со стандартными параметрами и random_state=241.
    clf = Perceptron(random_state=241, shuffle=True)
    clf.fit(_data_train_features, numpy.ravel(_data_train_labels))

    # Подсчитайте качество (долю правильно классифицированных объектов, accuracy)
    # полученного классификатора на тестовой выборке.
    predictions = clf.predict(_data_test_features)
    score = accuracy_score(_data_test_labels, predictions)
    return score
コード例 #14
0
def neural_net(train, test):
	y = []
	xTrain, yTrain = loadData(train)
	xTest, yTest = loadData(test)
	nN = Perceptron()
	nN.fit(xTrain, yTrain)
	y = nN.predict(xTest)
	testError = 1 - nN.score(xTest, yTest)
	print 'Test error: ' , testError
	return y
コード例 #15
0
def test():
    X = np.array([[1, 2], [3, 4], [5, 6]])
    y = np.array([0, 1, 0])
    clf = Perceptron()
    clf.fit(X, y)

    predictions = clf.predict(X)

    print("Predictions: %s" % predictions)

    print("Accuracy: %s" % accuracy_score(y, predictions))
def neural_net():
    Xtrain,ytrain,Xtest,ytest = getSplitData()
    Xtrain, Xtest = getScaledData(Xtrain, Xtest)
    ntest = Xtest.shape[0]
    #Your code here
    clf = Perceptron()
    clf.fit(Xtrain, ytrain) 
    
    yPredict = clf.predict(Xtest)
    
    #print "parameter: n_neighbors = ",n
    print "neural_net classification accuracy: ", accuracy_score(ytest,yPredict)
コード例 #17
0
def neural_net(train, test):
    y = []
    trainY, trainX = loadData(train)
    testY, testX = loadData(test)

    neuralNet = Perceptron()
    neuralNet.fit(trainX, trainY)
    y = neuralNet.predict(testX)

    testError = 1 - neuralNet.score(testX, testY)
    print 'Test error: ' + str(testError)
    return y
コード例 #18
0
def main():
	start = time.time()

	print "Reading train data and its features from: " + train_file
	data = cu.get_dataframe(train_file)
	global fea
	fea = features.extract_features(feature_names,data)

	percep = Perceptron(penalty=None, alpha=0.0001, fit_intercept=False, n_iter=5, shuffle=False, verbose=1, eta0=1.0, n_jobs=-1, seed=0, class_weight="auto", warm_start=False)

	X = []
	for i in data["OwnerUndeletedAnswerCountAtPostTime"]:
		X.append([i])
	# Must be array type object. Strings must be converted to
	# to integer values, otherwise fit method raises ValueError
	global y
	y = [] 

	print "Collecting statuses"
	
	for element in data["OpenStatus"]:
            for index, status in enumerate(ques_status):
                if element == status:
                    y.append(index)
            
	print "Fitting"
	percep.fit(fea, y)
	
	'''Make sure you have the up to date version of sklearn; v0.12 has the
           predict_proba method; http://scikit-learn.org/0.11/install.html '''   
	
	print "Reading test data and features"
	test_data = cu.get_dataframe(test_file)
	test_fea = features.extract_features(feature_names,test_data)

	print "Making predictions"
	global probs
	#probs = percep.predict_proba(test_fea) # only available for binary classification
	probs = percep.predict(test_fea)
	# shape of probs is [n_samples]
	# convert probs to shape [n_samples,n_classes]
	probs = np.resize(probs, (len(probs) / 5, 5))
	
	#if is_full_train_set == 0:
	#	print("Calculating priors and updating posteriors")
	#	new_priors = cu.get_priors(full_train_file)
	#	old_priors = cu.get_priors(train_file)
	#	probs = cu.cap_and_update_priors(old_priors, probs, new_priors, 0.001)	

	print "writing submission to " + submission_file
	cu.write_submission(submission_file, probs)
	finish = time.time()
	print "completed in %0.4f seconds" % (finish-start)
コード例 #19
0
ファイル: perceptron.py プロジェクト: unrealwork/ML_VSE
 def __test_perceptron(self, normalized):
     clf = Perceptron()
     X_train = self.train_data.iloc[:, 1:]
     y_train = self.train_data.iloc[:, 0]
     X_test = self.test_data.iloc[:, 1:]
     y_test = self.test_data.iloc[:, 0]
     if normalized:
         scaler = StandardScaler()
         X_train = scaler.fit_transform(X_train)
         X_test = scaler.transform(X_test)
     clf.fit(X_train, y_train)
     predictions = clf.predict(X_test)
     return accuracy_score(y_test, predictions)
コード例 #20
0
class learn_by_perceptron:
    def __init__(self, X=None, Y=None, path=r"..\..\per_dump.pkl", penalty='l1', alpha=0.00001, fit=True):
        if X is None or Y is None:
            self.clf = joblib.load(path)
        else:
            self.clf = Perceptron(penalty=penalty, alpha=alpha, n_jobs=6, class_weight='auto', shuffle=True)
            if fit:
                self.clf.fit(X, Y)
                self.dump(path)

    def predict(self, X):
        return self.clf.predict(X)

    def cross_val(self, X, Y, n, cpus=6):
        return cross_validation.cross_val_score(self.clf, X, Y, cv=n, n_jobs=cpus, scoring='f1')

    def dump(self, path=r"..\..\svm_dump.pkl"):
        joblib.dump(self.clf, path)
コード例 #21
0
def perecptronClassification():
  from sklearn.datasets import fetch_20newsgroups
  from sklearn.metrics.metrics import f1_score, classification_report
  from sklearn.feature_extraction.text import TfidfVectorizer
  from sklearn.linear_model import Perceptron

  categories = ['rec.sport.hockey','rec.sport.baseball','rec.autos']
  newsgroups_train = fetch_20newsgroups(subset='train',categories=categories,remove=('headers','footers','quotes'))
  newsgroups_test = fetch_20newsgroups(subset='test',categories=categories,remove=('headers','footers','quotes'))


  vectorizer = TfidfVectorizer()
  X_train = vectorizer.fit_transform(newsgroups_train.data)
  X_test = vectorizer.transform(newsgroups_test.data)

  classifier = Perceptron(n_iter=100,eta0=0.1)
  classifier.fit(X_train,newsgroups_train.target)
  predictions = classifier.predict(X_test)
  print classification_report(newsgroups_test.target,predictions)
コード例 #22
0
def main():

    #import data
    iris = datasets.load_iris()
    X = iris.data[:,[2,3]]
    y = iris.target

    #cross_validation
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
    #print (X_train) 

    #standardize the feature
    sc = StandardScaler()
    sc.fit(X_train)
    X_train_std = sc.transform(X_train) 
    X_test_std = sc.transform(X_test)
   
    #trainning model
    ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0)
    ppn.fit(X_train_std, y_train)
    joblib.dump(ppn, 'ppn.pkl') 

    #predict
    y_pred = ppn.predict(X_test_std);
    print("Misclassified samples: %d" %(y_test != y_pred).sum()) 
    
    #Accuracy
    print(y_test)
    print(y_pred)
    print("Accuracy: %.2f" % accuracy_score(y_test, y_pred))

    #
    X_combined_std = np.vstack((X_train_std, X_test_std)) 
    y_combined = np.hstack((y_train, y_test))
    plot_decision_regions(X=X_combined_std,
                          y=y_combined,
                          classifier=ppn,
                          test_idx=range(105,150))
    plt.xlabel('petal length [standardized]')
    plt.ylabel('petal width [standardized]')
    plt.legend(loc='upper left')
    plt.show()
コード例 #23
0
def thePerceptron( irisData ):

    print("\n####################")
    print("thePerceptron():\n")

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    X = irisData.data[:,(2,3)]
    y = (irisData.target == 0).astype(np.int)

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    myPerceptron = Perceptron(random_state=1234567)
    myPerceptron.fit(X,y)

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    y_predicted = myPerceptron.predict([[2,0.5]])
    print( "y_predicted = " + str(y_predicted) )

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    print("\nexiting: thePerceptron()")
    print("####################")
    return( None )
コード例 #24
0
ファイル: run.py プロジェクト: ks6g10/classify
def train(a,sizel,intercept):
    d = a.copy()    
    pes = Perceptron(n_jobs=4,n_iter=500,fit_intercept=intercept)
#    d = d.tolist()
    train = d[:len(d)/sizel]
    C = d[len(d)/sizel:]
    train_res = numpy.zeros(shape=(len(train)))#[0.0 for i in range(len(train))]
    C_res = numpy.zeros(shape=(len(C)))#[0.0 for i in range(len(C))]
#    C = [0.0 for i in range(len(C))]
    class_index = len(d[0])-1
    for i in range(len(train)):
        train_res[i] = (train[i][class_index] > 1)# and train[i][class_index] < 16)
        train[i][class_index] = 0        
        C_res[i] = (C[i][class_index]> 1)# and C[i][class_index] < 16)
        C[i][class_index] = 0
    
    pes.fit(train,train_res)
    output = pes.predict(C)
    (falsepr, truepr, thr) = roc_curve(C_res, output, 1)
    area = auc(falsepr, truepr)
    output = pes.score(C,C_res)
    return (output, area)
コード例 #25
0
ファイル: SciPerceptron.py プロジェクト: vsharma2013/pyapps
def run():
	iris = datasets.load_iris()
	X = iris.data[:,[2,3]]
	y = iris.target
	
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
	
	sc = StandardScaler();
	sc.fit(X_train)
	
	X_train_std = sc.transform(X_train)
	X_test_std  = sc.transform(X_test)

	ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0)
	ppn.fit(X_train_std, y_train)

	y_pred = ppn.predict(X_test_std)

	print ('Missclassified samples = %d' % (y_test != y_pred).sum())
	print('Accuracy = %.2f' % accuracy_score(y_test, y_pred))

	print(y_pred)
コード例 #26
0
def predict_with_perceptron():
    '''
    # 用神经元训练
    :return:
    '''
    ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0)
    ppn.fit(X_train_std, y_train)

    # 预测
    y_pred = ppn.predict(X_test_std)
    # 与真实值对比
    print('Misclassified samples: %d' % (y_test != y_pred).sum())
    print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))

    # 绘图
    X_combined_std = np.vstack((X_train_std, X_test_std))
    y_combined = np.hstack((y_train, y_test))
    plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150))
    plt.xlabel('petal length [standardized]')
    plt.ylabel('petal width [standardized]')
    plt.legend(loc='upper left')
    plt.show()
コード例 #27
0
def Perceptron_Parejas(pareja1, pareja2):
     
    datos,etiquetas,tamanio=CargarBaseDatos("iris")
    X_train,y_train,X_test,y_test=particionarDatos(tamanio,0.7, datos, etiquetas) 
    X= X_train[:,[pareja1,pareja2]]
    Y= X_test[:,[pareja1,pareja2]]
    tamTrain=len(X_train)

    prc = Perceptron().fit(X,y_train)
    coef = prc.coef_
    intercept = prc.intercept_
    color = "rgb"

    for i in range(0,tamTrain,1):
        if(y_train[i]==0):
            pl.scatter(X_train[i,pareja1], X_train[i,pareja2], color=color[y_train[i]])
        elif(y_train[i]==1):
            pl.scatter(X_train[i,pareja1], X_train[i,pareja2], color=color[y_train[i]])
        elif(y_train[i]==2):
            pl.scatter(X_train[i,pareja1], X_train[i,pareja2], color=color[y_train[i]])
    pl.axis('tight')
    
    xmin, xmax = pl.xlim()
    ymin, ymax = pl.ylim()    
    
    for i in range(0,3,1):
        pl.plot([xmin, xmax], [((-(xmin * coef[i, 0]) - intercept[i]) / coef[i, 1]), ((-(xmax * coef[i, 0]) - intercept[i]) / coef[i, 1])],ls="--", color=color[i])
    pl.show()
    
    y_ = prc.predict(Y)
    accuracy = accuracy_score(y_test, y_)
    recall = recall_score(y_test, y_, average=None)
    precision = precision_score(y_test, y_, average=None)
    print "accuracy: "+str(accuracy)
    print "recall: "+str(recall)
    print "precision por clase: "+str(precision)
コード例 #28
0
ファイル: incDriver.py プロジェクト: mitchlam/INCREMENT
def classify_data(X,Y, args, holdout = 0.5):
    X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=holdout, random_state=np.random.RandomState())
    
    train_data = np.array(map(lambda x: x.flatten(), X_train))
    test_data = np.array(map(lambda x: x.flatten(), X_test))
    
    
    model = None
    if (args.initial == "perceptron"):
        model = Perceptron()
    elif(args.initial == "svm"):
        model = SVC(kernel="poly")
    elif (args.initial == "GMM"):
        model = GMM(n_components=args.K)
    else:
        raise("Model Not Supported.")
    
    model.fit(train_data, Y_train)
    
    Y_pred = model.predict(test_data)
    
    labels = set(Y_pred)
    
    k = len(labels)
    
    labels = list(labels)
    
    clusters = []
    
    for i in range(k):
        clusters.append([])
    
    for x,y,t in zip(X_test, Y_pred, Y_test):
        clusters[labels.index(y)].append(Instance(x,t))
        
    return clusters
コード例 #29
0
from sklearn.metrics import accuracy_score


def generate(count):
    x = []s
    y = []
    for ir in range(0, count):
        math = np.random.randint(1, 6)
        physics = np.random.randint(1, 6)
        russian = np.random.randint(1, 6)
        disabled = np.random.randint(0, 2)
        x.append([math, physics, russian, disabled])
        y.append(1 if (disabled == 1 and math >= 3 and physics >= 3 and russian>=3) or (math >= 4 and physics >= 4 and math + physics + russian >= 11) else 0)
    return np.array(x), np.array(y)


if __name__ == '__main__':
    np.random.seed(42)
    X, y = generate(400)
    X_test, y_test = generate(50)
    perceptron = Perceptron(tol=1e-7)
    perceptron.fit(X, y)
    predict = perceptron.predict(X_test)
    print(accuracy_score(predict, y_test))
    print(perceptron.predict([[3,3,3,1]])) #prints 1
    print(perceptron.predict([[5,5,5,0]])) #prints 1
    print(perceptron.predict([[4,4,3,0]])) #prints 1
    print(perceptron.predict([[3,4,3,0]])) #prints 0
    print(perceptron.predict([[3,4,2,1]])) #prints 0

コード例 #30
0
clf_percept = clf_percept.fit(X, Y)
clf_KNN = clf_KNN.fit(X, Y)

#Test models on the same training set to find the training accuracy
# Decision Trees
clf_tree_prediction = clf_tree.predict(X)
acc_tree = accuracy_score(Y, clf_tree_prediction) * 100
print("Accuracy using Decision Trees:"), acc_tree, "%"

#SVM
clf_svm_prediction = clf_svm.predict(X)
acc_svm = accuracy_score(Y, clf_svm_prediction) * 100
print("Labels for training set using SVM:'"), acc_svm, "%"

#Perceptron
clf_percept_prediction = clf_percept.predict(X)
acc_per = accuracy_score(Y, clf_percept_prediction) * 100
print("Labels for training set using Perceptron:"), acc_per, "%"

#KNN
distances, indices = clf_KNN.kneighbors(X)
new_label = indices[:, 0]
clf_KNN_prediction = [Y[i][:] for i in new_label]
acc_knn = accuracy_score(Y, clf_KNN_prediction) * 100
print("Labels for training set using K-nearst neighbour:"), acc_knn, "%"

#All accuracies
acc_all = [acc_tree, acc_svm, acc_per, acc_knn]

#Chosing the best among all
score_bestmethod = np.max(acc_all)
コード例 #31
0
#testX =pd.read_csv("data/perceptron-test.csv")

Xtrain = pd.read_csv('data/perceptron-train.csv',
                     header=None,
                     usecols=np.arange(1, 3))
ytrain = pd.read_csv('data/perceptron-train.csv', header=None, usecols=[0])

Xtest = pd.read_csv('data/perceptron-test.csv',
                    header=None,
                    usecols=np.arange(1, 3))
ytest = pd.read_csv('data/perceptron-test.csv', header=None, usecols=[0])

clf = Perceptron(random_state=241)
clf.fit(Xtrain, ytrain.values.ravel())

predictions = clf.predict(Xtest)

accuracy = accuracy_score(ytest, predictions)

print(accuracy)

print(classification_report(clf.predict(Xtest), ytest))
"""
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(Xtrain)
X_test_scaled = scaler.transform(Xtest)
clf.fit(X_train_scaled, ytrain.values.ravel())
predictions_scaled = clf.predict(X_test_scaled)
accuracy_scaled = accuracy_score(ytest,predictions_scaled)

print(accuracy_scaled)
コード例 #32
0
clf_svm = SVC()
clf_perceptron = Perceptron()
clf_KNN = KNeighborsClassifier()

# Training the models
clf_tree.fit(X, Y)
clf_svm.fit(X, Y)
clf_perceptron.fit(X, Y)
clf_KNN.fit(X, Y)

# Testing using the same data
pred_tree = clf_tree.predict(X)
acc_tree = accuracy_score(Y, pred_tree) * 100
print('Accuracy for DecisionTree: {}'.format(acc_tree))

pred_svm = clf_svm.predict(X)
acc_svm = accuracy_score(Y, pred_svm) * 100
print('Accuracy for SVM: {}'.format(acc_svm))

pred_per = clf_perceptron.predict(X)
acc_per = accuracy_score(Y, pred_per) * 100
print('Accuracy for perceptron: {}'.format(acc_per))

pred_KNN = clf_KNN.predict(X)
acc_KNN = accuracy_score(Y, pred_KNN) * 100
print('Accuracy for KNN: {}'.format(acc_KNN))

# The best classifier from svm, per, KNN
index = np.argmax([acc_svm, acc_per, acc_KNN, acc_tree])
classifiers = {0: 'SVM', 1: 'Perceptron', 2: 'KNN', 3: 'DecisionTree'}
print('Best gender classifier is {}'.format(classifiers[index]))
コード例 #33
0
    #Training Classifiers

    svm.fit(features_train, train_labels)
    mnb.fit(features_train, train_labels)
    bnb.fit(features_train, train_labels)
    logit.fit(features_train, train_labels)
    percept.fit(features_train, train_labels)
    sgd.fit(features_train, train_labels)

    #Predicting output on test data

    svm_predict = svm.predict(features_test)
    mnb_predict = mnb.predict(features_test)
    bnb_predict = bnb.predict(features_test)
    logit_predict = logit.predict(features_test)
    percept_predict = percept.predict(features_test)
    sgd_predict = sgd.predict(features_test)

    classifier_names = [
        "SVM", "Multinomial NB", "Bernoulli NB", "Logistic Regression",
        "Perceptron", "SGD"
    ]
    classifiers_predictions = [
        svm_predict, mnb_predict, bnb_predict, logit_predict, percept_predict,
        sgd_predict
    ]

    for predict, name in zip(classifiers_predictions, classifier_names):

        #Performance Metrics of Classifiers
#Separamos las clase de las etiquetas
# Clase
X = datosFull.iloc[:, 0:-1]
# Etiquetas
Y = datosFull.iloc[:, -1]

# Divide matrices en sub conjuntos de pruebas y trenes aleatorio
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.3,
                                                    random_state=0)

# Mandamos a llamar al perceptron para trabajar con él
perceptron = Perceptron()

# Se usa la función fit para entrenar al perceptron con los datos ya dados
perceptron.fit(X_train, y_train)

# Eficiencia del algoritmo
print(perceptron.score(X_test, y_test))

# Imprimimos si el valor es -1 0 1 dependiendo la salida
print("La predicción es: ", int(perceptron.predict(imT)))

# Para que el usuario entienda mas imprimimos la respuesta si es o no humano
if ((int(perceptron.predict(imT))) == 1):
    print("Es un humano =D")
else:
    print("No es humano")
コード例 #35
0
# logistic regression
logreg = LogisticRegression()
logreg.fit(X_train, y_train.ravel())
y_pred = logreg.predict(X_test)
print()
acc, spe, sen = score(y_pred, y_test)
print(
    'Accuracy, specifity, sensitivity of logistic regression classifier on test set: ',
    round(acc, 4), round(spe[0], 4), round(sen[0], 4))

#Perceptron
for i in range(len(y_train)):
    y_train[i] = y_train[i][0]
clf = Perceptron(tol=1e-3, random_state=0)
clf.fit(X_train, y_train.ravel())
y_pred = clf.predict(X_test)
acc, spe, sen = score(y_pred, y_test)
print('Accuracy, specifity, sensitivity of Perceptron on test set: ',
      round(acc, 4), round(spe[0], 4), round(sen[0], 4))

#SVM
for i in range(len(y_train)):
    y_train[i] = y_train[i][0]
clf = SVC(kernel='linear')
clf.fit(X_train, y_train.ravel())
y_pred = clf.predict(X_test)
acc, spe, sen = score(y_pred, y_test)
print(
    'Accuracy, specifity, sensitivity of Support Vector Classification on test set: ',
    round(acc, 4), round(spe[0], 4), round(sen[0], 4))
コード例 #36
0
linear_svc = LinearSVC()
linear_svc.fit(x_train, y_train)
y_pred = linear_svc.predict(x_val)
acc_linear_svc = round(accuracy_score(y_pred, y_val) * 100, 2)
print("MODEL-4: Accuracy of LinearSVC : ", acc_linear_svc)

#OUTPUT:-
#MODEL-4: Accuracy of LinearSVC :  78.68

#MODEL-5) Perceptron
#------------------------------------------
from sklearn.linear_model import Perceptron

perceptron = Perceptron()
perceptron.fit(x_train, y_train)
y_pred = perceptron.predict(x_val)
acc_perceptron = round(accuracy_score(y_pred, y_val) * 100, 2)
print("MODEL-5: Accuracy of Perceptron : ", acc_perceptron)

#OUTPUT:-
#MODEL-5: Accuracy of Perceptron :  79.19

#MODEL-6) Decision Tree Classifier
#------------------------------------------
from sklearn.tree import DecisionTreeClassifier

decisiontree = DecisionTreeClassifier()
decisiontree.fit(x_train, y_train)
y_pred = decisiontree.predict(x_val)
acc_decisiontree = round(accuracy_score(y_pred, y_val) * 100, 2)
print("MODEL-6: Accuracy of DecisionTreeClassifier : ", acc_decisiontree)
コード例 #37
0
def perceptron(train_instances, train_labels, test_instances):
    percep = Perceptron(tol=1e-3, random_state=0)
    percep.fit(train_instances, train_labels)
    prediction = percep.predict(test_instances)
    return prediction
コード例 #38
0
print("Accuracy:", metrics.accuracy_score(y_test, lt_predictions))
print("Precision:", metrics.precision_score(y_test, lt_predictions))
print("Recall:", metrics.recall_score(y_test, lt_predictions))

y_pred_proba = lr_classifier.predict_proba(X_test)[::, 1]
fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba)
auc = metrics.roc_auc_score(y_test, y_pred_proba)
plt.plot(fpr, tpr, label="data 1, auc=" + str(auc))
plt.legend(loc=4)
plt.show()

# Perceptron
perceptron_classifier = Perceptron(random_state=11)
perceptron_classifier.fit(X_train, y_train)
perc_predictions = perceptron_classifier.predict(X_test)
score = accuracy_score(y_test, perc_predictions)
f_score = f1_score(y_test, perc_predictions, average='micro')
print("The accuracy score (Perceptron) is:", score)
print("The F score-Micro (Perceptron) is:", f_score)

# Support Vector Machine
svm_classifier = svm.SVC(gamma='scale')
svm_classifier.fit(X_train, y_train)
svm_predictions = svm_classifier.predict(X_test)
score = accuracy_score(y_test, svm_predictions)
f_score = f1_score(y_test, svm_predictions, average='micro')
print("The accuracy score (SVM) is:", score)
print("The F score-Micro (SVM) is:", f_score)

#print('Number of spam messages: %s' % df[df[0] == 1][0].count())
# In[73]:

# Gaussian Naive Bayes
gaussian = GaussianNB()
gaussian.fit(train_x, train_y)
y_pred = gaussian.predict(test_x)
acc_gaussian = round(gaussian.score(train_x, train_y) * 100, 2)
print(acc_gaussian)

# In[74]:

# Perceptron
perceptron = Perceptron()
perceptron.fit(train_x, train_y)
y_pred = perceptron.predict(test_x)
acc_perceptron = round(perceptron.score(train_x, train_y) * 100, 2)
print(acc_perceptron)

# In[75]:

# Linear SVC
linear_svc = LinearSVC()
linear_svc.fit(train_x, train_y)
y_pred = linear_svc.predict(test_x)
acc_linear_svc = round(linear_svc.score(train_x, train_y) * 100, 2)
print(acc_linear_svc)

# In[76]:

# Stochastic Gradient Descent
print("tvec's shape : ", tvec.shape, "tvec : ", tvec)

shuffle_index = np.random.permutation(number_of_data)
xdata, tvec = xdata[shuffle_index], tvec[shuffle_index]

#   Train the classifier
classifier = Perceptron(tol=1e-3, random_state=0)

print(
    "\n\n******************************************* Paramesters ****************************************************"
)
print("Perceptron Parameters    : ", classifier.fit(xdata, tvec))
print("Classiifer Coefficient   : ", classifier.coef_)
print("Classiifer Intercept     : ", classifier.intercept_)
print("Classiifer Iteration     : ", classifier.n_iter_)
print("Classiifer Correctness   : ", np.equal(classifier.predict(xdata), tvec))

print(
    "\n\n********************************************* Accuracy *****************************************************"
)
#   Accuracy Score: mean accuracy on the given test data and labels
print("Accuracy                                                     : ",
      classifier.score(testData, testLabel))

#   n-fold cross validation : Accuracy
print(
    "Average of accuracies found from 10-fold cross-validation    : ",
    np.average(
        cross_val_score(classifier, xdata, tvec, cv=10, scoring="accuracy")))

print(
コード例 #41
0
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

#Standardize the dataset
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

#Perceptron One-Vs-Rest
from sklearn.linear_model import Perceptron
ppn = Perceptron(n_iter = 40, eta0 = 0.1, random_state = 0)
ppn.fit(X_train_std, y_train)

# Test model
y_pred = ppn.predict(X_test_std)
print('Misclassified samples: %d'% (y_test != y_pred).sum())

#Accuracy
from sklearn.metrics import accuracy_score
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))


from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt

def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):
	markers = ('s', 'x', 'o', '^', 'v')
	colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
	cmap = ListedColormap(colors[:len(np.unique(y))])
	
コード例 #42
0
plt.grid(True)
plt.title('31240232 / Sunwung Lee')
plt.gca().legend(('Training Set', 'Test Set'))
#plt.savefig('2_percentage_correct')
#plt.clf()

#plt.scatter(Y_train[:,0], Y_train[:,1], s=3, c='r')
#plt.plot(yy,xx, c='k')
# ---------------------------------------------
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
model = Perceptron()
model.fit(Y_train, f_train) # SGD, 
# parameter: training data, target values
# return: self(returns an instance of self).
fh_train = model.predict(Y_test) 
# parameter: samples.
# return: predicted class label per sample
print(accuracy_score(f_test, fh_train))
# parameter: Ground truth labels, Predicted labels
# return: score (if normalize = True, return fraction of correctly classified samples(float), 
#         else returns the number of correctly classified samples(int))


#
#
#이 짓거리가 뭐냐 하면,
#model이란 이름의 Perceptron을 하나 만들어, 그리고 Y_train이랑 f_train을 사용해서 model을 만들어 --> training 시킴
#model.predict 함수 -> Y_train 샘플들 넣어놓고 예상되는 class 라벨들 따내 = fh_train이야
#그리고 이 fh_train이랑 f_train이랑 얼마나 똑같은지 비교해 
#
コード例 #43
0
gaussian.fit(X_train, Y_train)
Y_pred = gaussian.predict(X_test)
acc_gaussian = round(gaussian.score(X_train, Y_train) * 100, 2)
acc_gaussian


# The perceptron is an algorithm for supervised learning of binary classifiers (functions that can decide whether an input, represented by a vector of numbers, belongs to some specific class or not). It is a type of linear classifier, i.e. a classification algorithm that makes its predictions based on a linear predictor function combining a set of weights with the feature vector. The algorithm allows for online learning, in that it processes elements in the training set one at a time. Reference [Wikipedia](https://en.wikipedia.org/wiki/Perceptron).

# In[ ]:


# Perceptron

perceptron = Perceptron()
perceptron.fit(X_train, Y_train)
Y_pred = perceptron.predict(X_test)
acc_perceptron = round(perceptron.score(X_train, Y_train) * 100, 2)
acc_perceptron


# In[ ]:


# Linear SVC

linear_svc = LinearSVC()
linear_svc.fit(X_train, Y_train)
Y_pred = linear_svc.predict(X_test)
acc_linear_svc = round(linear_svc.score(X_train, Y_train) * 100, 2)
acc_linear_svc
コード例 #44
0
    for b in r: #iterates over r

        #Create the perceptron classifier
        clf = Perceptron(eta0=a, random_state=b, max_iter=1000) #eta0 = learning rate, random_state = used to shuffle the training data

        #Fitperceptron to the training data
        clf.fit(X_training, y_training)

        #make the classifier prediction for each test sample and start computing its accuracy
        #hint: to iterate over two collections simultaneously with zip() Example:
        #for (x_testSample, y_testSample) in zip(X_test, y_test):
        #to make a prediction do: clf.predict([x_testSample])
        #--> add your Python code here
        counter = 0
        for (x_testSample, y_testSample) in zip(X_test, y_test):
            prediction = clf.predict([x_testSample])
            if prediction == y_testSample:
                counter += 1
        accuracy = counter/len(y_test)

        #check if the calculated accuracy is higher than the previously one calculated. If so, update the highest accuracy and print it together with the perceprton hyperparameters
        #Example: "Highest Perceptron accuracy so far: 0.88, Parameters: learning rate=00.1, random_state=True"
        #--> add your Python code here
        if accuracy > highestAccuracy:
            highestAccuracy = accuracy
            highestAccuracyLR = a
            highestAccuracyRS = b
            print("Highest Perceptron accuracy so far: " + str(highestAccuracy) + ", Parameters: learning rate=" + str(a) + ", random_state=" + str(b))


print("\nHighest Perceptron accuracy: " + str(highestAccuracy) + ", Parameters: learning rate=" + str(highestAccuracyLR) + ", random_state=" + str(highestAccuracyRS))
コード例 #45
0
class NaiveNLP:
    def __init__(self, train_set, valid_set, multi_classification=False):
        self.train_set = train_set
        self.valid_set = valid_set
        self.multi_classification = multi_classification
        self.my_LR = sklearn.linear_model.logistic.LogisticRegression()
        self.my_RF = RandomForestClassifier(criterion='entropy',
                                            max_depth=50,
                                            min_samples_leaf=1,
                                            min_samples_split=3,
                                            n_estimators=50)
        self.my_P = Perceptron(max_iter=10000, tol=0.1)
        self.my_SVM_rbf = SVC(kernel='rbf', gamma=0.03, C=30, max_iter=10000)
        self.my_SVM_linear = SVC(kernel='linear',
                                 gamma=0.03,
                                 C=30,
                                 max_iter=10000)
        self.my_DT = DecisionTreeClassifier()
        self.my_NB = GaussianNB()
        self.my_KNN = KNeighborsClassifier(n_neighbors=3)

    def penalized_accuracy(self, predict, target):
        index_un_p = predict == 'unrelated'
        index_un = target == 'unrelated'
        index_re = np.where(predict != 'unrelated')
        acc1 = np.mean(index_un_p == index_un)
        acc2 = np.mean(predict[index_re] == np.array(target)[index_re])
        return (str(0.25 * acc1 + 0.75 * acc2))

    def method_KNeighborsClassifier(self):
        #        pipeline = Pipeline([('clf', KNeighborsClassifier())])
        #        parameters = {'clf__n_neighbors': (5, 10, 3, 50)}
        #        grid_search = GridSearchCV(pipeline,
        #                                   parameters,
        #                                   verbose=1,
        #                                   scoring='accuracy')
        #        grid_search.fit(self.train_set[0], self.train_set[1])
        #        print('Best score: %0.3f' % grid_search.best_score_)
        #        print('Best parameters; ')
        #        best_parameters = grid_search.best_estimator_.get_params()
        #        for param_name in sorted(best_parameters.keys()):
        #            print('\t%s: %r' % (param_name, best_parameters[param_name]))
        self.my_KNN.fit(self.train_set[0], self.train_set[1])
        self.my_KNN_pred = self.my_KNN.predict(self.valid_set[0])
        self.my_KNN_acc = accuracy_score(self.my_KNN_pred, self.valid_set[1])
        print('KNeighborsClassifier accuracy is: ' + str(self.my_KNN_acc))
        if self.multi_classification:
            print('KNeighborsClassifier penalized accuracy is: ' +
                  self.penalized_accuracy(self.my_KNN_pred, self.valid_set[1]))

    def method_GaussianNB(self):
        self.my_NB.fit(self.train_set[0], self.train_set[1])
        self.my_NB_pred = self.my_NB.predict(self.valid_set[0])
        self.my_NB_acc = accuracy_score(self.my_NB_pred, self.valid_set[1])
        print('GaussianNB accuracy is: ' + str(self.my_NB_acc))
        if self.multi_classification:
            print('GaussianNB penalized accuracy is: ' +
                  self.penalized_accuracy(self.my_NB_pred, self.valid_set[1]))

    def method_LogisticRegression(self):
        self.my_LR.fit(self.train_set[0], self.train_set[1])
        self.my_LR_pred = self.my_LR.predict(self.valid_set[0])
        self.my_LR_acc = accuracy_score(self.my_LR_pred, self.valid_set[1])
        print('LogisticRegression accuracy is: ' + str(self.my_LR_acc))

    def method_DecisionTreeClassifier(self):
        self.my_DT.fit(self.train_set[0], self.train_set[1])
        self.my_DT_pred = self.my_DT.predict(self.valid_set[0])
        self.my_DT_acc = accuracy_score(self.my_DT_pred, self.valid_set[1])
        print('DecisionTreeClassifier accuracy is: ' + str(self.my_DT_acc))
        if self.multi_classification:
            print('DecisionTreeClassifier penalized accuracy is: ' +
                  self.penalized_accuracy(self.my_DT_pred, self.valid_set[1]))

    def method_RandomForestClassifier(self):
        #        pipeline = Pipeline([('clf', RandomForestClassifier(criterion='entropy'))])
        #        parameters = {'clf__n_estimators': (5, 10, 20, 50),
        #                      'clf__max_depth': (50, 150, 250),
        #                      'clf__min_samples_split': (1.0, 2, 3),
        #                      'clf__min_samples_leaf': (1, 2, 3)}
        #        grid_search = GridSearchCV(pipeline,
        #                                   parameters,
        #                                   #n_jobs=-1,
        #                                   verbose=1,
        #                                   scoring='accuracy')
        #        grid_search.fit(self.train_set[0], self.train_set[1])
        #        print('Best score: %0.3f' % grid_search.best_score_)
        #        print('Best parameters; ')
        #        best_parameters = grid_search.best_estimator_.get_params()
        #        for param_name in sorted(best_parameters.keys()):
        #            print('\t%s: %r' % (param_name, best_parameters[param_name]))
        #        self.my_RF_score = self.scores(grid_search,
        #                                       self.valid_set[0],
        #                                       self.valid_set[1],
        #                                       cv=5)
        self.my_RF.fit(self.train_set[0], self.train_set[1])
        #self.my_RF_score = self.my_RF.score(self.valid_set[0],self.valid_set[1])
        self.my_RF_pred = self.my_RF.predict(self.valid_set[0])
        self.my_RF_acc = accuracy_score(self.my_RF_pred, self.valid_set[1])
        print('RandomForestClassifier accuracy is: ' + str(self.my_RF_acc))
        if self.multi_classification:
            print('RandomForestClassifier penalized accuracy is: ' +
                  self.penalized_accuracy(self.my_RF_pred, self.valid_set[1]))

    def method_Perception(self):
        self.my_P.fit(self.train_set[0], self.train_set[1])
        self.my_P_pred = self.my_P.predict(self.valid_set[0])
        self.my_P_acc = accuracy_score(self.my_P_pred, self.valid_set[1])
        print('Perception accuracy is: ' + str(self.my_P_acc))
        if self.multi_classification:
            print('Perception penalized accuracy is: ' +
                  self.penalized_accuracy(self.my_P_pred, self.valid_set[1]))

    def method_SVM_rbf(self):
        #        pipeline = Pipeline([('clf', sklearn.svm.SVC(kernel='rbf', gamma=0.01, C=100))])
        #        parameters = {'clf__gamma': (0.01, 0.03, 0.1, 0.3, 1),
        #                      'clf__C': (0.1, 0.3, 1, 3, 10, 30), }
        #        parameters = {'clf__gamma': (0.03),
        #                      'clf__C': (30), }
        #        grid_search = GridSearchCV(pipeline,
        #                                   parameters,
        #                                   verbose=1,
        #                                   scoring='accuracy')
        #        grid_search.fit(self.train_set[0], self.train_set[1])
        #        print('Best score:%0.3f' % grid_search.best_score_)
        #        print('Best paragram:')
        #        best_parameters = grid_search.best_estimator_.get_params()
        #        for param_name in sorted(parameters.keys()):
        #            print('\t%s: %r' % (param_name, best_parameters[param_name]))
        self.my_SVM_rbf.fit(self.train_set[0], self.train_set[1])
        self.my_SVM_rbf_pred = self.my_SVM_rbf.predict(self.valid_set[0])
        self.my_SVM_rbf_acc = accuracy_score(self.my_SVM_rbf_pred,
                                             self.valid_set[1])
        print('SVM_rbf accuracy is: ' + str(self.my_SVM_rbf_acc))
        if self.multi_classification:
            print('SVM_rbf penalized accuracy is: ' + self.penalized_accuracy(
                self.my_SVM_rbf_pred, self.valid_set[1]))

    def method_SVM_linear(self):
        #        pipeline = Pipeline([('clf', SVC(kernel='linear', gamma=0.01, C=100))])
        #        parameters = {'clf__gamma': (0.01, 0.03, 0.1, 0.3, 1),
        #                      'clf__C': (0.1, 0.3, 1, 3, 10, 30), }
        #        grid_search = GridSearchCV(pipeline,
        #                                   parameters,
        #                                   verbose=1,
        #                                   scoring='accuracy')
        #        grid_search.fit(self.train_set[0], self.train_set[1])
        #        print('Best score:%0.3f' % grid_search.best_score_)
        #        print('Best paragram:')
        #        best_parameters = grid_search.best_estimator_.get_params()
        #        for param_name in sorted(parameters.keys()):
        #            print('\t%s: %r' % (param_name, best_parameters[param_name]))
        #        #self.my_SVM_rbf.fit(self.train_set[0], self.train_set[1])
        #        self.my_SVM_rbf_score = grid_search.score(self.valid_set[0],self.valid_set[1])
        #        print('SVM_rbf score is: ' + str(self.my_SVM_rbf_score))
        self.my_SVM_linear.fit(self.train_set[0], self.train_set[1])
        self.my_SVM_linear_pred = self.my_SVM_linear.predict(self.valid_set[0])
        self.my_SVM_linear_acc = accuracy_score(self.my_SVM_linear_pred,
                                                self.valid_set[1])
        print('SVM_linear accuracy is: ' + str(self.my_SVM_linear_acc))
        if self.multi_classification:
            print('SVM_linear penalized accuracy is: ' +
                  self.penalized_accuracy(self.my_SVM_linear_pred,
                                          self.valid_set[1]))
コード例 #46
0
# Treinando os modelos com dados X e Y
clf_tree.fit(X, Y)
clf_svm.fit(X, Y)
clf_perceptron.fit(X, Y)
clf_KNN.fit(X, Y)

# Testando
pred_tree = clf_tree.predict(X)
acc_tree = accuracy_score(Y, pred_tree) * 100
print('Acurácia do método DecisionTree: {}'.format(acc_tree))

pred_svm = clf_svm.predict(X)
acc_svm = accuracy_score(Y, pred_svm) * 100
print('Acurácia do método SVM: {}'.format(acc_svm))

pred_per = clf_perceptron.predict(X)
acc_per = accuracy_score(Y, pred_per) * 100
print('Acurácia do método perceptron: {}'.format(acc_per))

pred_KNN = clf_KNN.predict(X)
acc_KNN = accuracy_score(Y, pred_KNN) * 100
print('Acurácia do método KNN: {}'.format(acc_KNN))

# The best classifier from svm, per, KNN
index = np.argmax([acc_svm, acc_per, acc_KNN])
classifiers = {0: 'SVM', 1: 'Perceptron', 2: 'KNN'}
print('\nO melhor método foi: {}\n'.format(classifiers[index]))

test = [168, 60, 39]

prediction = clf_tree.predict([test])
コード例 #47
0
#classifiers
dtc = tree.DecisionTreeClassifier()
svmc = SVC()
perC = Perceptron()
KNNc = KNeighborsClassifier()

#model training
dtc = dtc.fit(X, Y)
svmc = svmc.fit(X, Y)
perC = perC.fit(X, Y)
KNNc = KNNc.fit(X, Y)

# prediction
prediction1 = dtc.predict(X)
prediction2 = svmc.predict(X)
prediction3 = perC.predict(X)
prediction4 = KNNc.predict(X)

#accuracy
acc_dtc = accuracy_score(Y, prediction1)
print("dtc", acc_dtc)
acc_svmc = accuracy_score(Y, prediction2)
print("svmc", acc_svmc)
acc_perC = accuracy_score(Y, prediction3)
print("perC", acc_perC)
acc_knnc = accuracy_score(Y, prediction4)
print("knnc", acc_knnc)

# print best result
mval = max(acc_dtc, acc_svmc, acc_perC, acc_knnc)
acc = {
コード例 #48
0
pn = Perceptron()
pn.fit(X, y)
plt.plot(range(1, len(pn.errors) + 1), pn.errors, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Number of misclassifications')
plt.show()

#metrics calculation
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)

ppn = Perceptron()
ppn.fit(X_train, y_train)
y_pred = ppn.predict(X_test)

#printing the results
print('Metrics for perceptron classifier\n\nMisclassified samples: %d' %
      (y_test != y_pred).sum())
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))
print('Confusion matrix: \n%s' % confusion_matrix(y_test, y_pred))

#metrics calculation

df = pd.read_csv(
    'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
    header=None)
y = df.iloc[0:100, 4].values
y = np.where(y == 'Iris-setosa', 0, 1)
X = df.iloc[0:100, [0, 1, 2]].values
コード例 #49
0
print()

# Gaussian Naive Bayes
gaussian = GaussianNB()
gaussian.fit(X, Y)
prediction = gaussian.predict([[190, 70, 43], [186, 65, 39]])
acc_gaussian = round(gaussian.score(X, Y) * 100, 2)
print("Naive Bayes: ")
print(prediction)
print(acc_gaussian)
print()

#Perceptron
perceptron = Perceptron()
perceptron.fit(X, Y)
prediction = perceptron.predict([[190, 70, 43], [186, 65, 39]])
acc_perceptron = round(perceptron.score(X, Y) * 100, 2)
print("Perceptron: ")
print(prediction)
print(acc_perceptron)
print()

# Linear SVC
linear_svc = LinearSVC()
linear_svc.fit(X, Y)
Y_pred = linear_svc.predict([[190, 70, 43], [186, 65, 39]])
acc_linear_svc = round(linear_svc.score(X, Y) * 100, 2)
print("LinearSVC: ")
print(prediction)
print(acc_linear_svc)
print()
コード例 #50
0
    msk = np.random.rand(len(dataSet)) < 0.8

    trainData = dataSet[msk]
    testData = dataSet[~msk]

    X_train, Y_train = getXandY(trainData)
    X_test, Y_test = getXandY(testData)

    pla = Perceptron(max_iter=1000, random_state=np.random, warm_start=True)
    print(pla.get_params())

    for i in range(0, 700):
        pla = pla.fit(X_train, Y_train)
        score = pla.score(X_test, Y_test)
        Y_pred = pla.predict(X_test)
        # F1 Measure
        Y_test = pd.Series(Y_test)
        series = Y_test.value_counts()
        null_accuracy = (series[0] / (series[0] + series[1]))
        print('Null Acuuracy: ', str(null_accuracy))
        cm = confusion_matrix(Y_test, Y_pred)
        print(cm)
        print('Confusion matrix\n\n', cm)

        print('\nTrue Positives(TP) = ', cm[0, 0])

        print('\nTrue Negatives(TN) = ', cm[1, 1])

        print('\nFalse Positives(FP) = ', cm[0, 1])
コード例 #51
0
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)



### 004 train a perceptron model
from sklearn.linear_model import Perceptron

ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0)
ppn.fit(X_train_std, y_train)



### 005 make prediction
y_pred = ppn.predict(X_test_std)
print('Misclassified samples: %d' % (y_test != y_pred).sum())



### 006 calculate the classification accuracy
from sklearn.metrics import accuracy_score

print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))



### 007 plot decision regions
from plot_decision_regions import plot_decision_regions
import matplotlib.pyplot as plt
コード例 #52
0
X_train, X_valid, y_train, y_valid = train_test_split(X,
                                                      y,
                                                      shuffle=True,
                                                      random_state=10,
                                                      test_size=0.1)
print(X_train.shape, X_valid.shape, len(y_train), len(y_valid))

# 4.2 train a linear model using Perceptron
from sklearn.linear_model import Perceptron
from sklearn.metrics import f1_score, precision_recall_fscore_support, classification_report, confusion_matrix

model_pc = Perceptron(tol=1e-4, random_state=42,
                      penalty='l2')  # initialize perceptron model
model_pc.fit(X_train, y_train)  # train model (learning)
y_pred_pc = model_pc.predict(X_valid)

#4.3 evaluate performance by printing f1 score, confusion matrix and classification report
confusion_matrix(y_valid, y_pred_pc)
f1_score(y_valid, y_pred_pc)
print(classification_report(y_valid, y_pred_pc))

#5. RandomForestClassifier

from sklearn.ensemble import RandomForestClassifier
model_rf = RandomForestClassifier(n_estimators=500,
                                  criterion='entropy',
                                  random_state=10,
                                  n_jobs=-1,
                                  max_depth=10,
                                  verbose=1)
コード例 #53
0
from sklearn.linear_model import Perceptron
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

scaler = StandardScaler()
clf = Perceptron()
X_train = pd.read_csv('perceptron-train.csv', header=None).ix[:, 1:]
X_test = pd.read_csv('perceptron-test.csv', header=None).ix[:, 1:]
y_train = pd.read_csv('perceptron-train.csv', header=None).ix[:, 0]
y_test = pd.read_csv('perceptron-test.csv', header=None).ix[:, 0]
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print('Accuracy: ', accuracy_score(y_test, predictions))
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
clf.fit(X_train_scaled, y_train)
predict_scaled_data = clf.predict(X_test_scaled)
print('Accuracy after scalibng: ', accuracy_score(y_test, predict_scaled_data))
print(
    'Difference: ',
    abs(
        accuracy_score(y_test, predictions) -
        accuracy_score(y_test, predict_scaled_data)).round(3))
コード例 #54
0
X_train_std = sc.transform(X_train)        # apply to the training data
X_test_std = sc.transform(X_test)          # and SAME transformation of test data!!!

# perceptron linear
# epoch is one forward and backward pass of all training samples (also an iteration)
# eta0 is rate of convergence
# max_iter, tol, if it is too low it is never achieved
# and continues to iterate to max_iter when above tol
# fit_intercept, fit the intercept or assume it is 0
# slowing it down is very effective, eta is the learning rate

ppn = Perceptron(max_iter=10, tol=1e-3, eta0=0.001, fit_intercept=True, random_state=100, verbose=True)
ppn.fit(X_train_std, y_train)              # do the training

print('Number in test ',len(y_test))
y_pred = ppn.predict(X_test_std)           # now try with the test data

# Note that this only counts the samples where the predicted value was wrong
print('Misclassified samples: %d' % (y_test != y_pred).sum())  # how'd we do?
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))

# vstack puts first array above the second in a vertical stack
# hstack puts first array to left of the second in a horizontal stack
# NOTE the double parens!
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
print('Number in combined ',len(y_combined))

# we did the stack so we can see how the combination of test and train data did
y_combined_pred = ppn.predict(X_combined_std)
print('Misclassified combined samples: %d' % (y_combined != y_combined_pred).sum())
コード例 #55
0
# produce diagram with the parameters and score
showDiagram(parameters_array,
            score_array,
            title='F1 score - Random Forest - Validation set',
            parameters="n_estimators, max_depth",
            color='brown')

clear_arrays()

# Perceptron
for max_it in (100, 500, 2000):
    parameters_array.append(max_it)

    Per_classifier = Perceptron(max_iter=max_it)
    Per_classifier = Per_classifier.fit(X_train, Y_train)
    Per_prediction = Per_classifier.predict(X_validation)
    f1 = f1_score(Y_validation, Per_prediction, average='micro')

    score_array.append(f1)

# produce diagram with the parameters and score
showDiagram(parameters_array,
            score_array,
            title='F1 score - Perceptron - Validation set',
            parameters="max_iter",
            color='brown')

clear_arrays()

# GaussianNB
for var_sm in (1e-9, 1e-10, 1e-11):
コード例 #56
0
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron

iris = load_iris()
X = iris.data[:, (2, 3)]  # 꽃잎의 길이와 너비
y = (iris.target == 0).astype(np.int)  # 부채붓꽃(Iris Setosa)인가?

# loss="perceptron", learning_rate="constant", eta0=1(학습률), penalty=None(규제 없음)인 SGDClassifier와 같습니다.
# 로지스틱 회귀와 달리 확률을 제공하지 않으며 고정된 임곗값을 기준으로 예측을 만듭니다.
per_clf = Perceptron()

per_clf.fit(X, y)

y_pred = per_clf.predict([[2, 0.5]])
print(y_pred)
コード例 #57
0
#est = CalibratedClassifierCV (rf, method='isotonic', cv=5)

est = Perceptron(fit_intercept=False, n_iter=100, shuffle=False)

est.fit(data_x, data_y)

scores = cross_val_score(est, data_x, data_y)
print("score mean  =    %f" % scores.mean())

#Z = hierarchy.linkage(data_x, 'single')

#plt.figure()

#dn = hierarchy.dendrogram(Z)

#plt.show()

#print (est.score (data_x,data_y))

xxx = est.predict(data_xt)

y2 = xxx.tolist()

#scores = cross_val_score(est, data_x, data_y, cv=5, scoring='accuracy')
#print("Accuracy: %0.2f (+/- %0.2f) " % (scores.mean(), scores.std()))

#print ("AUC-ROC (oob) = %0.2f" % est.oob_score_)

for i in y2:
    print(int(round(i, 0)))
コード例 #58
0
actual_targets = capture_targets(
    'test_with_label_2.csv')  # pass test set with targets
"""
Run GNB model
"""
fitted_gnb = GaussianNB().fit(
    train_features, train_targets)  # fit model with training set values
predicted_targets = list(fitted_gnb.predict(
    test_features))  # get predictions from model and record them
export_results(actual_targets, predicted_targets, 'GNB-DS2.csv')
"""
Run PER model
"""
fitted_per = Perceptron().fit(
    train_features, train_targets)  # fit model with training set values
predicted_targets = list(fitted_per.predict(
    test_features))  # get predictions from model and record them
export_results(actual_targets, predicted_targets, 'PER-DS2.csv')
"""
Run BaseDT model
"""
fitted_baseDT = DecisionTreeClassifier(criterion='entropy').fit(
    train_features, train_targets)  # fit model with training set values
predicted_targets = list(fitted_baseDT.predict(
    test_features))  # get predictions from model and record them
export_results(actual_targets, predicted_targets, 'Base-DT-DS2.csv')
"""
Find best hyperparameters for the BestDT model

Parameter options to tune:
  • splitting criterion: gini and entropy
  • maximum depth of the tree: 10 and no maximum
コード例 #59
0
ファイル: a3.py プロジェクト: tz3/sandbox
import numpy as np
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = np.array([[1, 2], [3, 4], [5, 6]])
y = np.array([0, 1, 0])
clf = Perceptron()
clf.fit(X, y)
predictions = clf.predict(X)
コード例 #60
0
# VisualizeResult(X_test, y_test, clf2,'SVM(Testing set)' )

#Perceptron #########################################################################
skf = StratifiedKFold(shuffle=True)
table = []
for train_index, val_index in skf.split(X, y):
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    clf3 = Perceptron()
    clf3.fit(X_train, y_train)
    val_acc = clf3.score(X_val, y_val)
    table.append(val_acc)

y_pred3 = clf3.predict(X_test)
acc3 = accuracy_score(y_test, y_pred3)
cm3 = confusion_matrix(y_test, y_pred3)
print("Perceptron:", round(100 * acc3, 2), "%")
print("cross_val_acc mean:", round(np.mean(table), 3))
print("cross_val_acc std:", round(np.std(table), 3))
print(cm3, "\n")

# VisualizeResult(X_test, y_test, clf3,'Perceptron(Testing set)' )

#OVR #########################################################################
skf = StratifiedKFold(shuffle=True)
table = []
for train_index, val_index in skf.split(X, y):
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]