예제 #1
0
def perceptron_histo():
    "Interprétation des images comme histogrammes de couleurs et classification via le Perceptron"
    alphas = np.arange(0.01,1.01,0.1)
    best=np.zeros(4)
    
    _, data, target, _ = utils.chargementHistogrammesImages(mer,ailleurs,1,-1)
    X_train,X_test,Y_train,Y_test=train_test_split(data,target,test_size=0.3,random_state=random.seed())
    
    
    for iterations in range(1,5):
        for a in alphas:
            start_time = time.time()
            
            p = Perceptron(alpha=a, n_iter=iterations, random_state=random.seed(), n_jobs=-1)
            
            x1=np.array(X_train)
            x2=np.array(X_test)
            
            p.fit(X=x1, y=Y_train)
            score = p.score(x2,Y_test)
            
            end_time = time.time()
            if score>best[0]:
                best[0] = score
                best[1] = a
                best[2] = iterations
                best[3] = end_time-start_time
        
    print("| Perceptron simple               | V.Histo    | alpha={:1.2f} iterations={:1.0f}            | {:10.3f}ms | {:1.3f} |".format(best[1],best[2],best[3]*1000,best[0]))
예제 #2
0
def perceptron_vecteur():
    "Interprétation des images comme vecteurs de pixels et classification via le Perceptron"
    alphas = np.arange(0.01,1.01,0.1)
    best=np.zeros(5)
    
    for npix in range(50,200,50):
        _, data, target, _ = utils.chargementVecteursImages(mer,ailleurs,1,-1,npix)
        X_train,X_test,Y_train,Y_test=train_test_split(data,target,test_size=0.3,random_state=random.seed())
        
        
        for iterations in range(1,5):
            for a in alphas:
                start_time = time.time()
                
                p = Perceptron(alpha=a, n_iter=iterations, random_state=random.seed(), n_jobs=-1)
                
                #X_train, etc, sont des tableaux à 3 dimensiosn par défaut, (93,1,30000) par exemple, qu'il faut remmener en 2 dimensions
                x1=np.array(X_train)
                x1 = np.reshape(x1, (x1.shape[0],x1.shape[2]))
                x2=np.array(X_test)
                x2 = np.reshape(x2, (x2.shape[0],x2.shape[2]))
                
                p.fit(X=x1, y=Y_train)
                score = p.score(x2,Y_test)
                
                end_time = time.time()
                if score>best[0]:
                    best[0] = score
                    best[1] = a
                    best[2] = iterations
                    best[3] = end_time-start_time
                    best[4] = npix
        
    print("| Perceptron simple              | V.Pix {:4.0f} | alpha={:1.2f} iterations={:1.0f}              | {:10.3f}ms | {:1.3f} |".format(best[4],best[1],best[2],best[3]*1000,best[0]))
예제 #3
0
파일: s1-8.py 프로젝트: wargile/ML1
    def t():
        # 1
        from pandas import read_csv
        df = read_csv('w2/perceptron-train.csv', header=None)
        dt = read_csv('w2/perceptron-test.csv', header=None)
        yf = df[0]
        xf = df.drop([0], axis=1)
        # print(yf, xf)
        yt = dt[0]
        xt = dt.drop([0], axis=1)
        # print(yt, xt)

        # 2
        from sklearn.linear_model import Perceptron
        clf = Perceptron(random_state=241)
        clf.fit(xf, yf)
        af1 = clf.score(xf, yf)
        at1 = clf.score(xt, yt)
        rf = clf.predict(xf)
        rt = clf.predict(xt)
        # print(list(yf))
        # print(pf)
        # print(list(yt))
        # print(pt)

        # 3
        from sklearn.metrics import accuracy_score
        af = accuracy_score(yf, rf)
        at = accuracy_score(yt, rt)
        print(af, at)
        print(af1, at1)

        # 4
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        xfs = scaler.fit_transform(xf)
        xts = scaler.transform(xt)
        clf.fit(xfs, yf)
        afs1 = clf.score(xfs, yf)
        ats1 = clf.score(xts, yt)
        pfs = clf.predict(xfs)
        pts = clf.predict(xts)
        afs = accuracy_score(yf, pfs)
        ats = accuracy_score(yt, pts)
        print(afs, ats)
        print(afs1, ats1)
        pf('5', round(ats - at, 3))
def neural_net(train, test):
	y = []
	xTrain, yTrain = loadData(train)
	xTest, yTest = loadData(test)
	nN = Perceptron()
	nN.fit(xTrain, yTrain)
	y = nN.predict(xTest)
	testError = 1 - nN.score(xTest, yTest)
	print 'Test error: ' , testError
	return y
def neural_net(train, test):
    y = []
    trainY, trainX = loadData(train)
    testY, testX = loadData(test)

    neuralNet = Perceptron()
    neuralNet.fit(trainX, trainY)
    y = neuralNet.predict(testX)

    testError = 1 - neuralNet.score(testX, testY)
    print 'Test error: ' + str(testError)
    return y
예제 #6
0
def test_model(training_data, testing_data, word2vec_model):
    v = DictVectorizer()
    train_features, train_labels = build_features(training_data, word2vec_model, v, 'train')
    test_features, test_labels = build_features(testing_data, word2vec_model, v)
    
    # create the perceptron model
    model = Perceptron(n_iter = 5)
    # fit the model to the training data
    model.fit(train_features, train_labels)
    # get the accuracy on the testing data
    accuracy = model.score(test_features, test_labels)

    return accuracy
def __Accuracy(dataDict, parameterDict):
    train_X = dataDict['train_X']
    train_Y = dataDict['train_Y']
    cross_X = dataDict['cross_X']
    cross_Y = dataDict['cross_Y']

    penalty = parameterDict['penalty']
    alpha = parameterDict['alpha']
    fit_intercept = parameterDict['fit_intercept']
    n_iter = parameterDict['n_iter']
    shuffle = parameterDict['shuffle']
    eta0 = parameterDict['eta0']

    clf = Perceptron(penalty=penalty, alpha=alpha, fit_intercept=fit_intercept, n_iter=n_iter, shuffle=shuffle, random_state=1, eta0=eta0, warm_start=False)
    model = clf.fit(train_X, train_Y) # All features must be float.
    accuracy = clf.score(cross_X, cross_Y) # Score=Accuracy=(TP+TN)/(TP+TN+FP+FN)=%Correct

    return accuracy
예제 #8
0
def main( argv ):
	try:
		training_filename  = argv[ 1 ]
		testing_filename = argv[ 2 ]
		output_filename = argv[ 3 ]
	except IndexError:
		print( "Error, usage: \"python3 {} <training> <testing> <output>\"".format( argv[ 0 ] ) ) 
		return

	
	Training_DataFrame = pd.read_csv( training_filename )
	
	X = Training_DataFrame.ix[:,0:-1]
	Y = Training_DataFrame.ix[:,-1]


	Testing_DataFrame = pd.read_csv( testing_filename )
	
	testing_X = Testing_DataFrame.ix[:,0:-1]
	testing_Y = Testing_DataFrame.ix[:,-1]


	'''
		Perceptron
	'''
	from sklearn.linear_model import Perceptron

	# Hyper Parameters:
	alpha 	= 0.0001
	n_iter 	= 20

	# Fit Classifier
	print( "{} Started training".format( str( datetime.now() ) ) )
	P_classifier = Perceptron( alpha = alpha, n_iter = n_iter )
	P_classifier.fit( X, Y )
	print( "{} Stopped training".format( str( datetime.now() ) ) )

	# Report results
	P_score = P_classifier.score( testing_X, testing_Y )

	print( "\nPerceptron Accuracy:", P_score )
예제 #9
0
파일: run.py 프로젝트: ks6g10/classify
def train(a,sizel,intercept):
    d = a.copy()    
    pes = Perceptron(n_jobs=4,n_iter=500,fit_intercept=intercept)
#    d = d.tolist()
    train = d[:len(d)/sizel]
    C = d[len(d)/sizel:]
    train_res = numpy.zeros(shape=(len(train)))#[0.0 for i in range(len(train))]
    C_res = numpy.zeros(shape=(len(C)))#[0.0 for i in range(len(C))]
#    C = [0.0 for i in range(len(C))]
    class_index = len(d[0])-1
    for i in range(len(train)):
        train_res[i] = (train[i][class_index] > 1)# and train[i][class_index] < 16)
        train[i][class_index] = 0        
        C_res[i] = (C[i][class_index]> 1)# and C[i][class_index] < 16)
        C[i][class_index] = 0
    
    pes.fit(train,train_res)
    output = pes.predict(C)
    (falsepr, truepr, thr) = roc_curve(C_res, output, 1)
    area = auc(falsepr, truepr)
    output = pes.score(C,C_res)
    return (output, area)
# generate a random prediction (majority class)
ns_probs = [0 for _ in range(len(y_test))]

clf = Perceptron(eta0=0.1, random_state=0, max_iter=1000)
clf.fit(X_train, y_train.argmax(axis=1))

# Split dataset in to Train:Test - 75:25

# Instead of targets, store output as prediction probabilities
y_score = clf.predict(X_test)

clf_predict = clf.predict(X_test)
clf_predict_on_train = clf.predict(X_train)
# Accuracy factors
print('acc for training data: {:.3f}'.format(
    clf.score(X_train, y_train.argmax(axis=1))))
print('acc for test data: {:.3f}'.format(
    clf.score(X_test, y_test.argmax(axis=1))))
print('MLP Classification report:\n\n',
      classification_report(y_test.argmax(axis=1), clf_predict))

# disp = metrics.plot_confusion_matrix(clf, X_test, y_test.argmax(axis=1))
# disp.figure_.suptitle("Confusion Matrix")
# print("Confusion matrix:\n%s" % disp.confusion_matrix)
# #
# plt.show()
#
#
cm = confusion_matrix(y_test.argmax(axis=1), clf_predict)
cm_on_train = confusion_matrix(y_train.argmax(axis=1), clf_predict_on_train)
# print(cm)
예제 #11
0
mat = Arff("standardVoting.arff",label_count=1)
data = mat.data[:,0:-1]
labels = mat.data[:,-1]
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3)


# In[21]:


ptron.fit(X_train,y_train)


# In[22]:


ptron.score(X_test,y_test)


# We see that our naive perceptron does fairly well compaired to the sklearn version. 

# # 6. Iris Data Set

# In[23]:


from sklearn.datasets import load_iris


# In[24]:

예제 #12
0
def test_perceptron_accuracy():
    for data in (X, X_csr):
        clf = Perceptron(max_iter=100, tol=None, shuffle=False)
        clf.fit(data, y)
        score = clf.score(data, y)
        assert score > 0.7
예제 #13
0
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=0)  # 为了看模型在没有见过数据集上的表现,随机拿出数据集中30%的部分做测试

# 为了追求机器学习和最优化算法的最佳性能,我们将特征缩放
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)  # 估算每个特征的平均值和标准差
sc.mean_  # 查看特征的平均值,由于Iris我们只用了两个特征,结果是array([ 3.82857143,  1.22666667])
sc.scale_  # 查看特征的标准差,结果是array([ 1.79595918,  0.77769705])
X_train_std = sc.transform(X_train)
# 注意:这里我们要用同样的参数来标准化测试集,使得测试集和训练集之间有可比性
X_test_std = sc.transform(X_test)

# 训练感知机模型
from sklearn.linear_model import Perceptron
# n_iter:可以理解成梯度下降中迭代的次数
# eta0:可以理解成梯度下降中的学习率
# random_state:设置随机种子的,为了每次迭代都有相同的训练集顺序
ppn = Perceptron(n_iter=40, eta0=0.2, random_state=0)
ppn.fit(X_train_std, y_train)

# 分类测试集,这将返回一个测试结果的数组
y_pred = ppn.predict(X_test_std)
# 计算模型在测试集上的准确性
print(y_pred)
print(ppn.coef_)
print(ppn.n_iter_)
print(ppn.intercept_)
print(accuracy_score(y_test, y_pred))
print(ppn.score(X_test_std, y_test))
예제 #14
0
    def CheckingClassifer(ClassiferName):
        if ClassiferName == "perceptron":
            #perceptron classifer with fit and predict function
            #computing Running Time and Accuracy
            print(
                "-----------------------------Perceptron------------------------------------------------"
            )
            start_time = time.time()
            pop = Perceptron(penalty=None,
                             alpha=0.0001,
                             fit_intercept=True,
                             max_iter=50,
                             tol=None,
                             shuffle=True,
                             verbose=0,
                             eta0=0.01,
                             n_jobs=None,
                             random_state=0,
                             early_stopping=False,
                             validation_fraction=0.1,
                             n_iter_no_change=5,
                             class_weight=None,
                             warm_start=False)
            pop.fit(X, y)
            pop.predict(Xtest)
            print("--- %s seconds ---" % (time.time() - start_time))

            print(pop.score(Xtest, ytest))
        elif ClassiferName == "RBFSVC":
            #SVM classifier with RBF Kernel
            # computing Running Time and Accuracy
            print(
                "------------------------NON linear SVC-------------------------------------"
            )
            start_time = time.time()
            pip = SVC(gamma='auto', C=15)
            df = pip.fit(X, y)
            dd = pip.predict(Xtest)
            print(dd)

            print("--- %s seconds ---" % (time.time() - start_time))

            print(pip.score(Xtest, ytest))
        elif ClassiferName == "LinerSVC":
            #the SVC Classifier with linear Kernel with fit and predict function
            # computing Running Time and Accuracy
            print(
                "------------------------linear SVC-------------------------------------"
            )
            start_time = time.time()
            pip = SVC(gamma='auto', kernel='linear')
            df = pip.fit(X, y)
            dd = pip.predict(Xtest)
            print(dd)

            print("--- %s seconds ---" % (time.time() - start_time))

            print(pip.score(Xtest, ytest))
        elif ClassiferName == "TreeDescion":
            #the descision tree classifer with fit and predict function
            # computing Running Time and Accuracy
            print(
                "-----------------------------TreeDescion---------------------------------------"
            )
            start_time = time.time()
            clf = DecisionTreeClassifier(random_state=0, max_depth=15)
            df = clf.fit(X, y)
            dd = clf.predict(Xtest)
            print(dd)

            print("--- %s seconds ---" % (time.time() - start_time))

            print(clf.score(Xtest, ytest))
        elif ClassiferName == "KNN":
            #the K-neaset neighbors with fit and predict function
            # computing Running Time and Accuracy
            print(
                "--------------------------KNN--------------------------------------------"
            )
            start_time = time.time()
            model = KNeighborsClassifier(n_neighbors=1)
            nr = model.fit(X, y)
            nrd = model.predict(Xtest)
            print("--- %s seconds ---" % (time.time() - start_time))

            print("Accuracy:", metrics.accuracy_score(ytest, nrd))
        elif ClassiferName == "LG":
            #this is logestic Regression Classifier with fit and predict method
            # computing Running Time and Accuracy
            print(
                "-------------------------LG---------------------------------------------"
            )
            start_time = time.time()
            model2 = LogisticRegression(penalty='l2',
                                        dual=False,
                                        tol=0.0001,
                                        C=1.0,
                                        fit_intercept=True,
                                        intercept_scaling=1,
                                        class_weight=None,
                                        random_state=None,
                                        max_iter=500,
                                        solver='liblinear')
            nr1 = model2.fit(X, y)
            nrd1 = model2.predict(Xtest)

            print("--- %s seconds ---" % (time.time() - start_time))

            print("Accuracy:", metrics.accuracy_score(ytest, nrd1))
예제 #15
0
파일: gadioux.py 프로젝트: S-GADIOUX/m1-all
    'examples_file',
    default=None,
    help=
    'Exemples utilisés comme voisins pour la prédiction KNN (au format .examples)'
)

parser.add_argument('test_file',
                    default=None,
                    help='Exemples de test (au format .examples)')

parser.add_argument('--tfidf',
                    '-i',
                    action='store_true',
                    help='Exemples de test (au format .examples)')

args = parser.parse_args()

#------------------------------------------------------------
if args.tfidf:
    vectorizer = TfidfVectorizer(token_pattern=r"\w+")
else:
    vectorizer = CountVectorizer(token_pattern=r"\w+")
# Chargement des exemples d'apprentissage du classifieur KNN
Y_train, X_train = read_examples(args.examples_file, vectorizer)
# Chargement des exemples de test
Y_test, X_test = read_examples(args.test_file, vectorizer, False)
#Creation des matrices

perceptron = Perceptron().fit(X_train, Y_train)
print(perceptron.score(X_test, Y_test))

data_train = read_csv('perceptron-train.csv', header=None)
data_test = read_csv('perceptron-test.csv', header=None)

y_train = data_train[data_train.columns[0]]
y_test = data_test[data_test.columns[0]]

X_train = data_train[data_train.columns[1:]]
X_test = data_test[data_test.columns[1:]]


perceptron = Perceptron(random_state=241)
perceptron.fit(X_train, y_train)

accuracy = perceptron.score(X_test, y_test)

print()
print('Accuracy:', accuracy)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

perceptron2 = Perceptron(random_state=241)
perceptron2.fit(X_train_scaled, y_train)

accuracy2 = perceptron2.score(X_test_scaled, y_test)
accuracy_delta = accuracy2 - accuracy
예제 #17
0
파일: train.py 프로젝트: bdepwgjqet/fondue
y_pred = knn.predict(x_test)
acc_knn = round(knn.score(x_train, y_train)*100, 2)
print("KNN Acc: ", acc_knn)

# Gaussian Naive Bayes
gaussian = GaussianNB()
gaussian.fit(x_train, y_train)
y_pred = gaussian.predict(x_test)
acc_gaussian = round(gaussian.score(x_train, y_train)*100, 2)
print("Gaussian NB Acc: ", acc_gaussian)

# perceptron
perceptron = Perceptron()
perceptron.fit(x_train, y_train)
y_pred = perceptron.predict(x_test)
acc_perceptron = round(perceptron.score(x_train, y_train)*100, 2)
print("Perceptron Acc: ", acc_perceptron)

# Linear SVC
linear_svc = LinearSVC()
linear_svc.fit(x_train, y_train)
y_pred = linear_svc.predict(x_test)
acc_linear_svc = round(linear_svc.score(x_train, y_train)*100, 2)
print("Linear SVC Acc: ", acc_linear_svc)

# SGD
sgd = SGDClassifier()
sgd.fit(x_train, y_train)
y_pred = sgd.predict(x_test)
acc_sgd = round(sgd.score(x_train, y_train)*100, 2)
print("SGD Acc: ", acc_sgd)
예제 #18
0
y_pred = ppn.predict(X_test_std)
print('Misclassified samples: %d' % (y_test != y_pred).sum())


# In[10]:


from sklearn.metrics import accuracy_score

print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))


# In[11]:


print('Accuracy: %.2f' % ppn.score(X_test_std, y_test))


# In[12]:


from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt


def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):

    # setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])
예제 #19
0
# Perceptron
import numpy as np
from sklearn import datasets
from sklearn.linear_model import Perceptron
# load the diabetes datasets
dataset = datasets.load_diabetes()
# fit a Perceptron model to the data
model = Perceptron()
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# summarize the fit of the model
mse = np.mean((predicted-expected)**2)
print(mse)
print(model.score(dataset.data, dataset.target))
예제 #20
0
train = pandas.read_csv('perceptron-train.csv')
test = pandas.read_csv('perceptron-test.csv')
y = train[['class']]
X = train[['p1', 'p2']]
y_test = test[['class']]
X_test = test[['p1', 'p2']]
'''
2. Обучите персептрон со стандартными параметрами и random_state=241.
'''
clf = Perceptron(random_state=241)
clf.fit(X, y)
'''
3. Подсчитайте качество (долю правильно классифицированных объектов, accuracy)
 полученного классификатора на тестовой выборке.
'''
scores = clf.score(X_test, y_test)
print("score of simple data clf = %0.3f" % scores)
print("use metric acc = %0.3f " % accuracy_score(y_test, clf.predict(X_test)))

'''
4. Нормализуйте обучающую и тестовую выборку с помощью класса StandardScaler.
'''
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

'''
5. Обучите персептрон на новых выборках. Найдите долю правильных ответов на тестовой выборке.
'''
clf2 = Perceptron(random_state=241)
clf2.fit(X_train_scaled, y)
classifierNB=LinearSVC(C=5.0) #0.75
#classifier= GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
#                             max_depth=1, random_state=0)
#classifier= RandomForestClassifier()
classifier=Perceptron(penalty=None, alpha=0.0001, fit_intercept=True, n_iter=5, shuffle=True, verbose=0, eta0=1.0, n_jobs=1, random_state=0, class_weight=None, warm_start=False)
#classifierKNN=KNeighborsClassifier(n_neighbors=3, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None)
#classifierNB=MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
classifierSGDC=SGDClassifier(loss='hinge', penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True, n_iter=5, shuffle=True, verbose=0, epsilon=0.1, n_jobs=1, random_state=None, learning_rate='optimal', eta0=0.0, power_t=0.5, class_weight=None, warm_start=False) #0.65
#classifier=LDA(n_components=None, priors=None, shrinkage=None,store_covariance=False, tol=0.0001)

#fit the model on the training data
classifier.fit(transformed_train,pol_train)
classifierSGDC.fit(transformed_train,pol_train)
classifierNB.fit(transformed_train,pol_train)
#get the accuracy on the test data
print ' lregACCURACY:\t',classifier.score(transformed_test,pol_test)
#0.866095238095
#print 'PREDICTED:\t',classifier.predict(transformed_test)
#print 'CORRECT:\t', array(pol_test)
logit_list = list(array(classifier.predict(transformed_test)))
#print logit_list
print 'sgdc ACCURACY:\t',classifierSGDC.score(transformed_test,pol_test)

#print 'PREDICTED:\t',classifierSGDC.predict(transformed_test)
#print 'CORRECT:\t', array(pol_test)
#print 'SGDC_List'
SGDC_list = list(array(classifier.predict(transformed_test)))
print ' nbACCURACY:\t',classifierNB.score(transformed_test,pol_test)

#print 'PREDICTED:\t',classifierNB.predict(transformed_test)
#print 'NB_List'
예제 #22
0
X = np.array([[1,1],
              [2,2],
              [4,4],
              [5,5]])
y = np.array([-1, -1, 1, 1])
w = np.array([0,99, 5])
Example2Perceptron = Perceptron(X,y,plot_data_lines = True, plot_errors = True)
w_ex2 = Example2Perceptron.train(w,epochs = 20)
print(w_ex2)



from sklearn.linear_model import Perceptron
sk_perceptron = Perceptron(tol=1e-5, random_state=0)
sk_perceptron.fit(X,y)
print(sk_perceptron.score(X,y))
print(sk_perceptron.get_params())
print([sk_perceptron.coef_, sk_perceptron.intercept_])
print(sk_perceptron.n_iter_)

sk_bigdata = Perceptron(max_iter = 1000, eta0=0.1, tol=1e-5, random_state=0)
sk_bigdata.fit(X_train, y_train)
print([sk_bigdata.coef_, sk_bigdata.intercept_])
print('Accuracy Training= ',sk_bigdata.score(X_train, y_train)*100)
print('Accuracy Testing= ',sk_bigdata.score(X_test, y_test)*100)


ciplakAyak = X-np.mean(X, axis = 0)
cov = np.dot(ciplakAyak.T,ciplakAyak)/X.shape[0]
print(cov)
cov_numpy = np.cov(X, rowvar = False, ddof = 0)
예제 #23
0
def test_perceptron_accuracy():
    for data in (X, X_csr):
        clf = Perceptron(max_iter=100, tol=None, shuffle=False)
        clf.fit(data, y)
        score = clf.score(data, y)
        assert_greater(score, 0.7)
예제 #24
0
print "Training..."
for i in range(10):
	random.shuffle(rawData)
	trainClass = []
	trainData = []
	testClass = []
	testData = []
	for i in range(len(rawData)):
		if i%10 == 0:
			testClass.append(rawData[i][0])
			testData.append(rawData[i][1:])
		else:
			trainClass.append(rawData[i][0])
			trainData.append(rawData[i][1:])

	trainClass = np.array(trainClass)
	trainData = np.array(trainData)
	testClass = np.array(testClass)
	testData = np.array(testData)

	model = Perceptron()
	model.fit(trainData, trainClass)
	model1 = tree.DecisionTreeClassifier(max_depth=3)
	model1.fit(trainData, trainClass)
	print model.score(testData, testClass)


dot_data = StringIO() 
tree.export_graphviz(model1, out_file=dot_data) 
graph = pydot.graph_from_dot_data(dot_data.getvalue()) 
graph.write_pdf("tree.pdf") 
예제 #25
0
knn = KNeighborsClassifier(n_neighbors=19, weights='distance')
knn.fit(X_train, Y_train)
Y_pred = knn.predict(X_test)
Y_pred.tolist()
Y_test.tolist()
acc_knn = round(knn.score(X_test, Y_test) * 100, 2)
acc_knn
#-------------------------------------------------------------------------------------------------------

# Perceptron
perceptron = Perceptron()
perceptron.fit(X_train, Y_train)
Y_pred = perceptron.predict(X_test)
Y_pred.tolist()
Y_test.tolist()
acc_perceptron = round(perceptron.score(X_test, Y_test) * 100, 2)
acc_perceptron

#-------------------------------------------------------------------------------------------------------
#�����ɸ� 2
# Linear SVC
linear_svc = LinearSVC()
linear_svc.fit(X_train, Y_train)
Y_pred = linear_svc.predict(X_test)
Y_pred.tolist()
Y_test.tolist()
acc_linear_svc = round(linear_svc.score(X_test, Y_test) * 100, 2)
acc_linear_svc

#-------------------------------------------------------------------------------------------------------
예제 #26
0
def main( argv ):
	try:
		input_csv  = argv[ 1 ]
		output_model = argv[ 2 ]
	except IndexError:
		print( "Error, usage: \"python3 {} <input_csv> <output_csv>\"".format( argv[ 0 ] ) ) 
		return

	
	df = pd.read_csv( input_csv )

	convert_to_num = True

	print_individual_classifier_accuracies( df )


	# Split data into test and train
	msk = np.random.rand( len( df ) ) < 0.8

	Training_DataFrame = df[ msk ].copy()
	if convert_to_num:
		X = Training_DataFrame.ix[:,0:-1].applymap( str_to_num )
		Y = Training_DataFrame.ix[:,-1].map( str_to_num )
	else:
		X = Training_DataFrame.ix[:,0:-1]
		Y = Training_DataFrame.ix[:,-1]


	Testing_DataFrame = df[ ~msk ].copy()
	if convert_to_num:
		testing_X = Testing_DataFrame.ix[:,0:-1].applymap( str_to_num )
		testing_Y = Testing_DataFrame.ix[:,-1].map( str_to_num )
	else:
		testing_X = Testing_DataFrame.ix[:,0:-1]
		testing_Y = Testing_DataFrame.ix[:,-1]

	print( "\nTraining on Classifier Predictions:" )


	''' LINEAR CLASSIFIERS '''
	print( "Linear Classifiers\n" )


	''' Logistic Regression '''
	from sklearn.linear_model import LogisticRegression

	# Hyper Parameters:
	tol = 0.0001

	# Fit Classifier
	LR_classifier = LogisticRegression( )
	LR_classifier.fit( X, Y )

	# Report results
	LR_score = LR_classifier.score( testing_X, testing_Y )

	printAccuracy( "Logistic Regression", LR_score )
	#

	''' Perceptron '''
	from sklearn.linear_model import Perceptron

	# Hyper Parameters:

	# Fit Classifier
	P_classifier = Perceptron( )
	P_classifier.fit( X, Y )

	# Report results
	P_score = P_classifier.score( testing_X, testing_Y )

	printAccuracy( "Perceptron", P_score )
	#

	''' Gaussian Naive Bayes '''
	from sklearn.naive_bayes import GaussianNB

	# Hyper Parameters

	# Fit Classifier
	MNB_classifier = GaussianNB( )
	MNB_classifier.fit( X, Y )

	# Report results
	MNB_score = MNB_classifier.score( testing_X, testing_Y )
	
	printAccuracy( "Gaussian Naive Bayes", MNB_score )
	#

	''' Linear Support Vector Machine ( SVM ) '''
	from sklearn.svm import LinearSVC

	# Hyper Parameters

	# Fit Classifier
	LSVC_classifier = LinearSVC( )
	LSVC_classifier.fit( X, Y )

	# Report results
	LSVC_score = LSVC_classifier.score( testing_X, testing_Y )
	
	printAccuracy( "Linear SVM", LSVC_score )
	#



	''' NONLINEAR ALGOS '''
	print( "\nNonlinear Classifiers\n" )

	''' Decision Tree '''
	from sklearn.tree import DecisionTreeClassifier

	# Hyper Parameters

	# Fit Classifier
	DT_classifier = DecisionTreeClassifier( )
	DT_classifier.fit( X, Y )

	# Report results
	DT_score = DT_classifier.score( testing_X, testing_Y )
	
	printAccuracy( "Decision Tree", DT_score )
	#

	''' Random Forest '''
	from sklearn.ensemble import RandomForestClassifier

	# Hyper Parameters
	n_estimators = 22

	# Fit Classifier
	RF_classifier = RandomForestClassifier( 
		n_estimators=n_estimators 
	)
	RF_classifier.fit( X, Y )

	# Report results
	RF_score = RF_classifier.score( testing_X, testing_Y )
	
	printAccuracy( "Random Forest", RF_score )
	#

	''' KNN '''
	from sklearn.neighbors import KNeighborsClassifier

	# Hyper Parameters
	n_neighbors = 20

	# Fit Classifier
	KNN_classifier = KNeighborsClassifier( )
	KNN_classifier.fit( X, Y )

	# Report results
	KNN_score = KNN_classifier.score( testing_X, testing_Y )
	
	printAccuracy( "KNN", KNN_score )
	#


	''' VOTING '''
	print( "\nMajority Vote Classifier\n" )

	V_correct = 0
	V_incorrect = 0
	V_total = len( testing_X )

	for idx, row in testing_X.iterrows():
		prediction = Counter( row ).most_common()[0][0]
		if testing_Y[ idx ] == prediction:
			V_correct += 1
		else:
			V_incorrect += 1

	printAccuracy( "Voting", V_correct / V_total )


	print( "\n\nDone." )
예제 #27
0
knn = KNeighborsClassifier(n_neighbors = 3) 
knn.fit(X_train, Y_train)  
Y_pred = knn.predict(X_test)  
acc_knn = round(knn.score(X_train, Y_train) * 100, 2)

#Gaussian Naive Bayes:
gaussian = GaussianNB() 
gaussian.fit(X_train, Y_train)  
Y_pred = gaussian.predict(X_test)  
acc_gaussian = round(gaussian.score(X_train, Y_train) * 100, 2)

#Perceptron:
perceptron = Perceptron(max_iter=5)
perceptron.fit(X_train, Y_train)
Y_pred = perceptron.predict(X_test)
acc_perceptron = round(perceptron.score(X_train, Y_train) * 100, 2)

#Linear Support Vector Machine:
linear_svc = LinearSVC()
linear_svc.fit(X_train, Y_train)
Y_pred = linear_svc.predict(X_test)
acc_linear_svc = round(linear_svc.score(X_train, Y_train) * 100, 2)

#Decision Tree
decision_tree = DecisionTreeClassifier() 
decision_tree.fit(X_train, Y_train)  
Y_pred = decision_tree.predict(X_test)  
acc_decision_tree = round(decision_tree.score(X_train, Y_train) * 100, 2)

"""==================================== Which is the best Model ? ================================"""
예제 #28
0
    # Step 1: Fit the CountVectorizer to the trainTweets
    countVec.fit(trainTweets)

    print "Vocab of countVec"
    print countVec.get_feature_names()


    # Step 2: Implement getFeautres() to return a feature matrix for any
    # list of tweets.

    #Now get train features.
    trainX = getFeatures(trainTweets, countVec, dictVec, True, True)
    
    perceptron.fit(trainX, trainY)

    #Get features and labels for development set.
    devSet = p.load(open(devSetPath, 'rb'))
    devTweets = [d[0] for d in devSet]

    devX = getFeatures(devTweets, countVec, dictVec)
    devY = [d[1] for d in devSet]

    print "Train label distribution", getLabelDist(devY)

    # Predict labels for devSet
    perceptron.predict(devX)

    #Print out accuracy for trainSet
    print "Train set accuracy:", perceptron.score(trainX, trainY)
    #Print out accuracy for devSet
    print "Dev set accuracy:", perceptron.score(devX, devY)
예제 #29
0
# Standardizing the features:

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# ## Training a perceptron via scikit-learn

ppn = Perceptron(max_iter=40, eta0=0.1, random_state=1)
ppn.fit(X_train_std, y_train)

y_pred = ppn.predict(X_test_std)
print('Misclassified examples: %d' % (y_test != y_pred).sum())
print('Accuracy: %.3f' % accuracy_score(y_test, y_pred))
print('Accuracy: %.3f' % ppn.score(X_test_std, y_test))

# Training a perceptron model using the standardized training data:

X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))

plot_decision_regions(X=X_combined_std,
                      y=y_combined,
                      classifier=ppn,
                      test_idx=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')

plt.tight_layout()
예제 #30
0
def main( argv ):

	try:

		input_csv_filename 	  = argv[ 1 ]
		output_csv_filename   = argv[ 2 ]

	except IndexError:
		print( "Error, usage: \"python3 {} <CSV> <output_CSV>\"".format( argv[ 0 ] ) ) 
		return


	''' Cross validation parameters '''
	split_count = 3
	
	import crossValidationGenerator as cvg

	cvg.splitData( input_csv_filename, split_count )

	Y_results 		= getY( input_csv_filename )
	RF_predictions  = []
	P_predictions   = []
	KNN_predictions = []

	for set_idx in range( split_count ):

		print( "\n{} Starting split {}:".format( str( datetime.now() ), set_idx + 1 ) )

		train_filename = "train_split_{}.csv".format( set_idx )
		test_filename  =  "test_split_{}.csv".format( set_idx )


		# Read training data
		train_df = pd.read_csv( train_filename )
		
		X = train_df.ix[:,0:-1]
		Y = train_df.ix[:,-1]


		# Read training data
		test_df = pd.read_csv( test_filename )
		
		test_X = test_df.ix[:,0:-1]
		test_Y = test_df.ix[:,-1]



		''' Random Forest '''
		from sklearn.ensemble import RandomForestClassifier
	
		# Hyper Parameters
		n_estimators = 60


		RF_classifier = RandomForestClassifier (
			n_estimators = n_estimators
		)

		print( "{} | Training Random Forest".format( str( datetime.now() ) ) )
		RF_classifier.fit( X, Y )

		RF_pred = RF_classifier.predict( test_X )
		RF_predictions.extend( RF_pred )

		print( "{} > Random forest completed for split {} with accuracy {}%\n".format( str( datetime.now() ), set_idx + 1, 100 * RF_classifier.score( test_X, test_Y ) ) )



		''' Perceptron '''
		from sklearn.linear_model import Perceptron

		# Hyper Parameters
		alpha  = 0.0001
		n_iter = 20

		
		P_classifier = Perceptron (
			alpha = alpha,
			n_iter = n_iter
		)

		print( "{} | Training Perceptron".format( str( datetime.now() ) ) )
		P_classifier.fit( X, Y )

		P_pred = P_classifier.predict( test_X )
		P_predictions.extend( P_pred )

		print( "{} > Perceptron completed for split {} with accuracy {}%\n".format( str( datetime.now() ), set_idx + 1, 100 * P_classifier.score( test_X, test_Y ) ) )



		''' K-NN '''
		from sklearn.neighbors import KNeighborsClassifier

		# Hyper Parameters
		n_neighbors = 20

		KNN_classifier = KNeighborsClassifier (
			n_neighbors = n_neighbors
		)

		print( "{} | Training KNN".format( str( datetime.now() ) ) )
		KNN_classifier.fit( X, Y )

		KNN_pred = KNN_classifier.predict( test_X )
		KNN_predictions.extend( KNN_pred )

		print( "{} > K-NN completed for split {} with accuracy {}%\n".format( str( datetime.now() ), set_idx + 1, 100 * KNN_classifier.score( test_X, test_Y ) ) )


	#


	with open( output_csv_filename, 'w+' ) as output_stream:
		output_stream.write( "Random_Forest,Perceptron,KNN,Label\n" )
	
	Y = [ y for y in Y ]

	'''
	print( "len Y   = {}", len( Y_results ) )
	print( "len RF  = {}", len( RF_predictions  ) )
	print( "len P   = {}", len( P_predictions   ) )
	print( "len KNN = {}", len( KNN_predictions ) )
	'''

	for idx in range( len( RF_predictions ) ):
		with open( output_csv_filename, 'a' ) as output_stream:
			output_stream.write( ','.join( [ RF_predictions[ idx ], P_predictions[ idx ], KNN_predictions[ idx ], Y_results[ idx ] ] ) )
			output_stream.write( '\n' )

	print( "\n\nComplete at {}\n\n".format( str( datetime.now() ) ) )
예제 #31
0
    [0.7, 0.8]
])

Y = np.array([1, 1, 1, 0])
h = 0.02


# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))
fig = plt.figure()

for e in range(1, 7):
    print '\nStarting epoch', e
    clf = Perceptron(n_iter=e, verbose=5).fit(X, Y)
    print clf.intercept_, clf.coef_
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # fig.add_subplot(1, 5, e)
    plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
    # ax.contourf(xx, yy, Z, cmap=plt.cm.Paired)
    # Plot also the training points
    plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
    plt.title('Epoch %s' % e)

    if clf.score(X, Y) == 1:
        print 'converged in epoch', e
        break
    plt.show()
예제 #32
0
파일: lesson2.2.py 프로젝트: rema7/coursera
X_test = pd.read_csv('perceptron-test.csv', header=None)

y = X_train[X_train.columns[0]]
X_train = X_train.drop(X_train.columns[0], axis=1, inplace=False)
print X_train


clf = Perceptron(random_state=42)
clf.fit(X_train, y)

print clf.predict(X_train)

# 0.34
y1 = X_test[X_test.columns[0]]
X_test = X_test.drop(X_test.columns[0], axis=1, inplace=False)
score = clf.score(X_test, y1)

print score

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

clf = Perceptron(random_state=42)
clf.fit(X_train_scaled, y)

# 0.89
score_scaled = clf.score(X_test_scaled, y1)
print score_scaled

print (score_scaled - score)
예제 #33
0
#!/usr/bin/env python
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
import numpy as np

from titanic import answer

if __name__ == '__main__':
    train_data = np.genfromtxt('perceptron-train.csv', delimiter=',')
    test_data = np.genfromtxt('perceptron-test.csv', delimiter=',')

    X_train_data = features = train_data[:, 1:]
    Y_train_data = train_data[:, 0]
    X_test_data = features = test_data[:, 1:]
    Y_test_data = test_data[:, 0]

    scaler = StandardScaler()
    clf = Perceptron(random_state=241)

    clf.fit(X_train_data, Y_train_data)
    scores = clf.score(X_test_data, Y_test_data)
    print(scores.mean())

    X_train_data_scaled = scaler.fit_transform(X_train_data)
    X_test_data_scaled = scaler.transform(X_test_data)

    clf.fit(X_train_data_scaled, Y_train_data)
    scaled_scores = clf.score(X_test_data_scaled, Y_test_data)
    print(scores.mean(), scaled_scores.mean())
    answer(scaled_scores.mean() - scores.mean(), 'feature_normalization.txt')
예제 #34
0
# Getting more accuray without Normalization

ppn = Perceptron(max_iter=100, eta0=0.01, random_state=0)
ppn.fit(X_train_std, y_train)  #This is training the model
y_pred = ppn.predict(X_test_std)  #Test/Validating the model

#Printing of results and plot

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

print('Misclassified samples: %d' % (y_test != y_pred).sum())

print('Accuracy for Perceptron: %.2f' % accuracy_score(y_test, y_pred))
print('Test Accuracy for Perceptron: %.2f' % ppn.score(X_test_std, y_test))
print('Train Accuracy for Tree: %.2f' % ppn.score(X_train_std, y_train))


# plot data
def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):

    # setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])
    # plot the decision surface
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1

    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
예제 #35
0
    ['Nominated Best Picture', 'Won Best Picture', 'Num of Awards'], [
        'genres', 'plot_keywords', 'movie_imdb_link', 'director_name',
        'actor_3_facebook_likes', 'actor_2_name', 'actor_1_facebook_likes',
        'actor_1_name', 'movie_title', 'cast_total_facebook_likes',
        'actor_3_name', 'facenumber_in_poster', 'language', 'country',
        'content_rating', 'budget', 'actor_2_facebook_likes', 'aspect_ratio'
    ], 'movies_original.csv')
preprocessor.preprocess()

preprocessor.numerify()

# Create the test set:

preprocessor.create_test_set(0.3, 0, True)

# Perform cross-validation:

clf = Perceptron()
clf = clf.fit(preprocessor.features_numerical,
              preprocessor.labels_numerical[0])
"""
scores = cross_validation.cross_val_score(clf, preprocessor.features_numerical,
        preprocessor.labels_numerical[2], cv=10)

print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
"""

score = clf.score(preprocessor.test_features, preprocessor.test_labels)

print("Accuracy after testing (no CV): %3.2f%%") % (score * 100)
예제 #36
0
파일: lab14.py 프로젝트: liviubouruc/AI
    plt.show()


# %%
# incarcarea datelor de antrenare
X = np.loadtxt('./datalab14/3d-points/x_train.txt')
y = np.loadtxt('./datalab14/3d-points/y_train.txt', 'int')

plot3d_data(X, y)
# incarcarea datelor de testare
X_test = np.loadtxt('./datalab14/3d-points/x_test.txt')
y_test = np.loadtxt('./datalab14/3d-points/y_test.txt', 'int')

# %%
perceptron_model.fit(X, y)
print(perceptron_model.score(X, y))
print(perceptron_model.score(X_test, y_test))
W = perceptron_model.coef_
b = perceptron_model.intercept_
epochs = perceptron_model.n_iter_
print(W)
print(b)
plot3d_data_and_decision_function(X_test, y_test, W[0], b)
# %%
X = np.loadtxt('./datalab14/MNIST/train_images.txt')
y = np.loadtxt('./datalab14/MNIST/train_labels.txt', 'int')

# incarcarea datelor de testare
X_test = np.loadtxt('./datalab14/MNIST/test_images.txt')
y_test = np.loadtxt('./datalab14/MNIST/test_labels.txt', 'int')
예제 #37
0
y_pred_svc = svc.predict(x_test)
svc_score = svc.score(x_train, y_train)
print("Linear SVC Score:", svc_score, round(svc_score * 100, 2))

#KNN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train, y_train)
y_pred_knn = knn.predict(x_test)
knn_score = knn.score(x_train, y_train)
print("KNN Score:", knn_score, round(knn_score * 100, 2))

#Perceptron
pt = Perceptron(max_iter=5)
pt.fit(x_train, y_train)
y_pred_pt = pt.predict(x_test)
pt_score = pt.score(x_train, y_train)
print("Perceptron Score:", pt_score, round(pt_score * 100, 2))

#Decision Tree Classifier
tree = DecisionTreeClassifier()
tree.fit(x_train, y_train)
y_pred_tree = tree.predict(x_test)
tree_score = tree.score(x_train, y_train)
print("Decision Tree Classifier Score:", tree_score,
      round(tree_score * 100, 2))

#GaussianNB
nb = GaussianNB()
nb.fit(x_train, y_train)
y_pred_nb = nb.predict(x_test)
nb_score = nb.score(x_train, y_train)
예제 #38
0
EPOCHS = 200

# Initialize perceptrons
my_p = my_perceptron.MyPerceptron(LR, EPOCHS)
sk_p = Perceptron(max_iter=EPOCHS, tol=1e-3)

# MY DATA

train = [[5, 5, 1], [6, 5, 0], [5, 5, 3], [1, 2, 1], [2, 2, 3], [0, 1, 2]]

labels = [1, 1, 1, -1, -1, -1]

my_p.fit(train, labels)
sk_p.fit(train, labels)
my_score = my_p.test(train, labels)
sk_score = sk_p.score(train, labels)

print("\nMy dataset:")
print("My perceptron score: " + str(my_score))
print("Sklearn perceptron score: " + str(sk_score))

# IRIS DATA STUFF

# Download iris dataset, define labels
iris = load_iris()
data = iris['data']

SETOSA = 0
VERSICOLOR = 1
VIRGINICA = 2
예제 #39
0
    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    iteration_values.append(acc)
    print(i, acc)

# Plot
plt.plot(range(1, 30), iteration_values)
plt.xlabel('max_iter')
plt.ylabel('Accuracy')

# In[88]:

per_clf = Perceptron(max_iter=4, tol=None)
per_clf.fit(X_train, y_train)
y_pred = per_clf.predict(X_test)
print('Score: %.2f%%' % (round(per_clf.score(X_test, y_test) * 100, 4)))
print('Accuracy: %.2f' % (accuracy_score(y_test, y_pred)))

# ### 11. Stochastic Gradient Decent (SGD)

# In[89]:

sgd_clf = SGDClassifier(max_iter=8, tol=None)
sgd_clf.fit(X_train, y_train)
y_pred = sgd_clf.predict(X_test)
print('Score: %.2f%%' % (round(sgd_clf.score(X_test, y_test) * 100, 4)))
print('Accuracy: %.2f' % (accuracy_score(y_test, y_pred)))

# ### 12. Bagging

# In[90]:
예제 #40
0
"""
... needs import np statement 
For example, when dealing with boolean features, x_i^n = x_i for all n and is therefore useless; but x_i x_j represents the conjunction of two booleans. This way, we can solve the XOR problem with a linear classifier:
"""


from sklearn.linear_model import Perceptron
from sklearn.preprocessing import PolynomialFeatures
import numpy as np

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = X[:, 0] ^ X[:, 1]
X = PolynomialFeatures(interaction_only=True).fit_transform(X)
print X
# array([[1, 0, 0, 0],
#        [1, 0, 1, 0],
#        [1, 1, 0, 0],
#        [1, 1, 1, 1]])
clf = Perceptron(fit_intercept=False, n_iter=10, shuffle=False).fit(X, y)
print clf.score(X, y)
예제 #41
0
        return  max_count_tp

    def predict(self, x_array):
        pred_y = np.ones((x_array.shape[0], 1))
        for i in range(x_array.shape[0]):
            pred_y[i] = self.predict_point(x_array[i])
        return pred_y.flatten()

    def score(self, x_te, y_te):
        pred_y = self.predict(x_te)
        return sum(pred_y == y_te) / len(y_te)

clf = M_KNN(n_neighbor = 3, p = 2)
clf.fit(x_tr, y_tr)
clf.predict_point(np.array([6, 3]))
clf.score(x_te, y_te)


# draw it
sns.lmplot(x='sepal length', y='sepal width', hue='label',
        data=df.loc[:100, :], fit_reg=False)
plt.scatter(6, 3 , color='red', edgecolors='grey')
plt.show()


### 3.2.1 iris K近邻 sklearn实例
from sklearn.neighbors import KNeighborsClassifier
clf_sk = KNeighborsClassifier(p=2)
clf_sk.fit(x_tr, y_tr)
clf_sk.score(x_te, y_te)
예제 #42
0
def test_perceptron_accuracy():
    for data in (X, X_csr):
        clf = Perceptron(n_iter=30, shuffle=False, seed=0)
        clf.fit(data, y)
        score = clf.score(data, y)
        assert_true(score >= 0.7)
예제 #43
0
from sklearn.linear_model import Perceptron
import numpy as np

X_train = np.array([[3, 3], [4, 3], [1, 1]])
y = np.array([1, 1, -1])

perceptron = Perceptron()
perceptron.fit(X_train, y)
print("w:", perceptron.coef_, "\n", "b:", perceptron.intercept_, "\n",
      "n_iter:", perceptron.n_iter_)

res = perceptron.score(X_train, y)
print("correct rate:{:.0%}".format(res))

# from sklearn.linear_model import Perceptron
# from sklearn.linear_model import SGDClassifier
# import numpy as np
#
# X_train = np.array([[3, 3], [4, 3], [1, 1]])
# y = np.array([1, 1, -1])
# #perceptron=Perceptron(penalty="l2",alpha=0.01,eta0=1,max_iter=50,tol=1e-3)
# #perceptron=Perceptron()
# perceptron=SGDClassifier(loss="perceptron",eta0=1, learning_rate="constant", penalty=None)
# perceptron.fit(X_train,y)
# print(perceptron.coef_)
# print(perceptron.intercept_)
# print(perceptron.n_iter_)
# X=np.array([[2,2]])
# y=perceptron.predict(X)
예제 #44
0
파일: Lab4.py 프로젝트: jackslinger/NLP
        save_sparse_vectors('/home/jack/NLP/csr.npz', vectors, labels)
        save_sparse_vectors('/home/jack/NLP/dev_csr.npz', dev_vectors, dev_labels)

    return (vectors, labels, dev_vectors, dev_labels)



if LOAD_FROM_FILE:
    try:
        vectors, labels = load_sparse_vectors(TRAINING_VECTORS_PATH)
        dev_vectors, dev_labels = load_sparse_vectors(DEV_VECTORS_PATH)
    except Exception as e:
        print 'Failed to load from File calculating feature vectors'
        vectors, labels, dev_vectors, dev_labels = calculate_vectors(TRAINING_PATH, DEV_PATH)
else:
    'Computing feature vectors'
    vectors, labels, dev_vectors, dev_labels = calculate_vectors(TRAINING_PATH, DEV_PATH)


#random_state gives the seeed, none seams to always give the same result
perceptron = Perceptron(shuffle=True, n_iter=5, random_state=1000)
perceptron = perceptron.fit(vectors, labels)
predictions = perceptron.predict(dev_vectors)
score = perceptron.score(dev_vectors, dev_labels)
print score


print confusion_matrix(loaded_dev_labels, predictions, labels=['entailment', 'contradiction', 'neutral'])

print classification_report(loaded_dev_labels, predictions,labels=['entailment', 'contradiction', 'neutral'])
예제 #45
0
data = pandas.read_csv('perceptron-train.csv', header=None)
train, test = Bunch(), Bunch()
train.data, train.target = data.loc[:, 1:], data.loc[:, 0]
data = pandas.read_csv('perceptron-test.csv', header=None)
test.data, test.target = data.loc[:, 1:], data.loc[:, 0]

# 2. Обучите персептрон со стандартными параметрами и random_state=241

perc = Perceptron(random_state=241)
perc.fit(train.data, train.target)  # learning

# 3. Подсчитайте качество (долю правильно классифицированных объ-
# ектов, accuracy) полученного классификатора на тестовой выборке.

accuracy = perc.score(test.data, test.target)  # predicting
print accuracy

# 4. Нормализуйте обучающую и тестовую выборку с помощью класса
# StandardScaler.

scaler = StandardScaler()  # scaling
train_scaled, test_scaled = Bunch(), Bunch()
train_scaled.data = scaler.fit_transform(train.data)
test_scaled.data = scaler.transform(test.data)
train_scaled.target, test_scaled.target = train.target, test.target

# 5. Обучите персептрон на новых выборках. Найдите долю правиль-
# ных ответов на тестовой выборке.

perc.fit(train_scaled.data, train_scaled.target)
예제 #46
0
파일: Perceptron.py 프로젝트: samoubiza/ML
import numpy as np
from sklearn.linear_model import Perceptron
from sklearn.preprocessing import StandardScaler
import pandas as pd
data_test = pd.read_csv('C:/temp/machine learning/courseraYa/perceptron-test.csv', header=0)
data_train = pd.read_csv('C:/temp/machine learning/courseraYa/perceptron-train.csv', header=0)
y_train = data_train.iloc[:,0] #classes / target values
X_train = data_train.iloc[:,1:] #feaches

y_test = data_test.iloc[:,0] #classes / target values
X_test = data_test.iloc[:,1:] #feaches

clf = Perceptron(random_state=241, shuffle = True)
clf.fit(X_train, y_train)
#predictions = clf.predict(X_test)
acur = clf.score(X_test,y_test)
print(acur)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

clf_scaled = Perceptron(random_state=241, shuffle = True)
clf_scaled.fit(X_train_scaled, y_train)
#predictions = clf.predict(X_test)
acur_scaled = clf_scaled.score(X_test_scaled,y_test)
print(acur_scaled)
예제 #47
0
from sklearn.datasets import load_digits
from sklearn.linear_model import Perceptron
import numpy as np

X, Y = load_digits(return_X_y=True)
clf = Perceptron(tol=1e-3, random_state=0)
clf.fit(X, Y)

print(clf.score(X, Y))

print(X.shape)
print(X.ndim)
print "Reading files to make Training Dataset "
start_time = time.time()
traverse_over_files(str(testing_directory))
end_time = time.time() - start_time
print "It took "+ str(end_time) + " to make the Training Dataset"
print "Training Dataset completed"

print '\nTraining data'
start_time = time.time()
perceptron_classifier = Perceptron()
perceptron_classifier.fit(final_training_dataset_keys, final_training_dataset_values)
end_time = time.time() - start_time
print "It took "+ str(end_time) + " to train the classifiers"
print 'Training Completed'

print '\nTesting data '
start_time = time.time()
# Calculating Accuracy
perceptron_classifier_accuracy = perceptron_classifier.score(final_testing_dataset_keys, final_testing_dataset_values)

end_time = time.time() - start_time
print "It took "+ str(end_time) + " to test the data "
print 'Testing Completed'

# print '\nprinting Accuracy'
print "\nCase "+str(testing_directory)+": Testing folder is part"+str(testing_directory)
print "-------------------------------------------------"
print "Perceptron accuracy : "+ str(perceptron_classifier_accuracy)


# print 'Training Size:'+str(len(final_training_dataset_keys))+' and Testing size = '+str(len(final_testing_dataset_keys))
예제 #49
0
print(RF_train_score)

end = time.process_time()
print("total time taken Random Forest Search: {} min".format(
    (end - start) / 60))

# Perceptron
print("=== Perceptron===")
start = time.process_time()
per_clf = Perceptron(penalty='l1', verbose=1)
per_clf.fit(X_train, y_train)

print(per_clf.predict(X_test[[332]]))
print(y_test[332])

per_test_score = per_clf.score(X_test, y_test)
print(per_test_score)

per_train_score = per_clf.score(X_train, y_train)
print(per_train_score)

end = time.process_time()
print("total time taken for Perceptron: {} min".format((end - start) / 60))

# visualization
import matplotlib.pyplot as plt

N = 6

train_acc = [
    RF_train_score, svm_linear_train_score, per_train_score, NB_train_score,
예제 #50
0
knn=KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train,Y_train)
Y_pred=knn.predict(X_test)
acc_knn=round(knn.score(X_train,Y_train)*100,2)
#print(acc_knn)

gaussian=GaussianNB()
gaussian.fit(X_train,Y_train)
Y_pred=gaussian.predict(X_test)
acc_gaussian=round(gaussian.score(X_train,Y_train)*100,2)
#print(acc_gaussian)

perceptron=Perceptron()
perceptron.fit(X_train,Y_train)
Y_pred=perceptron.predict(X_test)
acc_perceptron=round(perceptron.score(X_train,Y_train)*100,2)
#print(acc_perceptron)

linear_svc=LinearSVC()
linear_svc.fit(X_train,Y_train)
Y_pred=linear_svc.predict(X_test)
acc_linear_svc=round(linear_svc.score(X_train,Y_train)*100,2)
#print(acc_linear_svc)

sgd=SGDClassifier()
sgd.fit(X_train,Y_train)
Y_pred=sgd.predict(X_test)
acc_sgd=round(sgd.score(X_train,Y_train)*100,2)
#print(acc_sgd)

decision_tree=DecisionTreeClassifier()
예제 #51
0
from sklearn.metrics import accuracy_score

x_std = []
x = []
for _ in range(100):
    # Classify test samples
    ppn = Perceptron(max_iter=1000, eta0=0.001, random_state=np.random)
    ppn.fit(X_train_std, y_train)
    y_pred = ppn.predict(X_test_std)

    # Treinando sem normalizar
    ppn_z_out = Perceptron(max_iter=1000, eta0=0.001, random_state=np.random)
    ppn_z_out.fit(X_train, y_train)
    y_pred_z_out = ppn_z_out.predict(X_test)

    x_std.append(ppn.score(X_test_std, y_test))
    x.append(ppn.score(X_test, y_test))

np.array(x_std)
np.array(x)

print('Normalizado Media: ')

# Measuring the accuracy in 3 different ways
print('\nClassificador normalizado')
print('Misclassified samples: %d' % (y_test != y_pred).sum())
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))
print('Accuracy: %.2f' % ppn.score(X_test_std, y_test))

print('\nClassificador sem z-score')
print('Misclassified samples: %d' % (y_test != y_pred_z_out).sum())
perc.fit(pca_x_train, y_train)
y_pred7 = perc.predict(pca_x_test)
m7_acc = accuracy_score(y_test, y_pred6)
m7_acc

# Random forest with PCA reduction
rf.fit(pca_x_train, y_train)
y_pred6 = rf.predict(pca_x_test)
m8_acc = accuracy_score(y_test, y_pred6)
m8_acc
"""Tuning hyperparameters of some models."""

from sklearn.linear_model import Perceptron
perceptron = Perceptron(max_iter=1000, eta0=0.01)
perceptron.fit(x_train, y_train)
perceptron.score(x_test, y_test)

from sklearn.metrics import classification_report
y_pred = perceptron.predict(x_test)
print(classification_report(y_test, y_pred))

from sklearn.svm import SVC
svc = SVC(C=0.8, gamma='auto')
svc.fit(x_train, y_train)
svc.score(x_test, y_test)

y_pred = svc.predict(x_test)
print(classification_report(y_test, y_pred))
"""Group Members: \
Chirag (B19CSE026)
Gautam Kumar (B19EE031)
예제 #53
0
def test_perceptron_accuracy():
    for data in (X, X_csr):
        clf = Perceptron(n_iter=30, shuffle=False, seed=0)
        clf.fit(data, y)
        score = clf.score(data, y)
        assert_true(score >= 0.7)
예제 #54
0
plt.scatter(X_train[50:100, 0],
            X_train[50:100, 1],
            color='blue',
            marker='x',
            label='versicolor')
plt.xlabel('sepal length')
plt.ylabel('petal length')
plt.legend(loc='upper left')
plt.show()

pn = Perceptron(max_iter=10, eta0=0.1, random_state=0)
pn.fit(X_train, y_train)

print((y_train != pn.predict(X_train)).sum())
print("Error :", (y_train != pn.predict(X_train)).sum())
print("SCORE :", pn.score(X_train, y_train))
'''
plt.plot(range(1, len(pn.errors) + 1), pn.errors, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Number of misclassifications')
plt.show()
'''

plot_decision_regions(X_train, y_train, classifier=pn)
plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.legend(loc='upper left')
plt.show()

from sklearn.metrics import confusion_matrix
예제 #55
0
from sklearn.linear_model import Perceptron
import matplotlib.pyplot as plt
import numpy as np
from itertools import product

data = [[0, 0], [0, 1], [1, 0], [1, 1]]

labels = [0, 1, 1, 1]

plt.scatter([point[0] for point in data], [point[1] for point in data],
            c=labels)
plt.show()
plt.clf()
classifier = Perceptron(max_iter=40)
classifier.fit(data, labels)
print(classifier.score(data, labels))

print(classifier.decision_function([[0, 0], [1, 1], [0.5, 0.5]]))

x_values = np.linspace(0, 1, 100)
print(len(x_values))
y_values = np.linspace(0, 1, 100)
print(len(y_values))
point_grid = list(product(x_values, y_values))
print(len(point_grid))
distances = classifier.decision_function(point_grid)
print(len(distances))
abs_distances = [abs(i) for i in distances]

print(len(abs_distances))
distances_matrix = np.reshape(abs_distances, (100, 100))
예제 #56
0
from sklearn.linear_model import Perceptron


# In[13]:


dataset_1 = np.loadtxt('sampleQuadData2.txt')
(numSamples_1, numFeatures_1) = dataset_1.shape
feat_1 = dataset_1[:,range(numFeatures_1-1)].reshape((numSamples_1, numFeatures_1-1))
output_1 = dataset_1[:, numFeatures_1-1].reshape((numSamples_1,))

(numSamples_1, numFeatures_1) = feat_1.shape

perceptron_1 = Perceptron(fit_intercept=False)
perceptron_1.fit(feat_1,output_1)
perceptron_1.score(feat_1,output_1)


# In[14]:


dataset_2 = np.loadtxt('sampleQuadData2Transformed.txt')
(numSamples_2, numFeatures_2) = dataset_2.shape
feat_2 = dataset_2[:,range(numFeatures_2-1)].reshape((numSamples_2, numFeatures_2-1))
output_2 = dataset_2[:, numFeatures_2-1].reshape((numSamples_2,))

perceptron_2 = Perceptron(fit_intercept=False)
perceptron_2.fit(feat_2,output_2)
perceptron_2.score(feat_2,output_2)

예제 #57
0
def do_prob1_a(data_X, data_y, N_in, N_tol=10**6, N_out=10**5, exp_num=1000):
    """
       Finishes the problem1 part(a).
    :param data_X: features of data
    :param data_y: real values of data
    :param N_in: total number of in-sample data points
    :param N_tol: number of total data points
    :param N_out: number of out-of-sample data points
    :param exp_num: number of experiments
    """
    # Part(i): compute theoretical generalization bound values as a function of delta.
    tolerance_seq = np.arange(0.01, 0.501, 0.01)
    vc_num = 4
    t_gb_values = sqrt(8 * (log(4) + vc_num * log(2 * N_in) - log(tolerance_seq)) / N_in)
    print("t_gb_values", t_gb_values)

    # Part(ii): extract D_out with N_out samples and the first d (d_vc-1) features.
    d_num = vc_num - 1
    in_X, out_X, in_y, out_y = train_test_split(data_X, data_y, test_size=N_out, random_state=660)
    out_X = out_X[:, 0:d_num]

    # Part(iii) and Part(iv):
    E_in_results = np.zeros(exp_num, dtype='float')
    E_out_results = np.zeros(exp_num, dtype='float')
    fold_num = int(N_tol / N_in)
    for exp_i in range(exp_num):
        stfied_k_fold = KFold(n_splits=fold_num, shuffle=True, random_state=exp_i)
        np.random.seed(exp_i)
        target_index, index = np.random.randint(fold_num), 0
        for _, test_index in stfied_k_fold.split(X=data_X, y=data_y):
            if index == 1:
                in_bag_X = data_X[test_index]
                in_bag_y = data_y[test_index]
                break
            index += 1

        in_bag_X = in_bag_X[:, 0:d_num]
        perceptron_model = Perceptron(random_state=660)
        perceptron_model.fit(X=in_bag_X, y=in_bag_y)
        E_in_results[exp_i] = 1 - perceptron_model.score(X=in_bag_X, y=in_bag_y)
        E_out_results[exp_i] = 1 - perceptron_model.score(X=out_X, y=out_y)

    # Part(v):
    diff_results = np.absolute(E_out_results - E_in_results)
    diff_results.sort()
    max_values = np.zeros(t_gb_values.shape[0], dtype='float')
    for tolerance_i in range(tolerance_seq.shape[0]):
        tolerance_value = tolerance_seq[tolerance_i]
        position = int((1 - tolerance_value) * exp_num)
        max_values[tolerance_i] = diff_results[position - 1]

    plt.figure(0)
    plt.subplot(211)  # rows, columns, th_plot
    plt.plot(tolerance_seq, t_gb_values, 'b')
    plt.ylabel('Generalization bounds')
    plt.xlabel('Tolerance values')
    plt.title(r'The plot for the prob1_a_i when $N_i$$_n$=' + str(N_in))
    plt.subplot(212)
    plt.plot(tolerance_seq, max_values, 'b')
    plt.ylabel('Max values')
    plt.xlabel('Tolerance values')
    plt.title(r'The plot for the prob1_a_v when $N_i$$_n$=' + str(N_in))
    plt.tight_layout()
    plt.savefig('problem1_a_Nin' + str(N_in) + '.png')
    plt.show()
예제 #58
0
# output:
#   [0 0 1 0 1 1 1 1 0 0 0 1 1 1 0 1 0 0 0 1 1 1 1 1 0 0 0 1 0 1 1 0 1 0 0 1 1
#    1 0 1 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0 0]
print("predictions_val shape: {}".format(predictions_val.shape))  # (57,)
print("predictions_val unique values: {}".format(
    np.unique(predictions_val)))  # [0 1]

# Check our accuracy: Add up the number of classifications we got wrong
# If classification was correct, then give score of 1; else 0
scores_val = np.where(predictions_val == y_val, 1, 0)
mean_accuracy_val = np.mean(scores_val)
# This will be different every time depending on how data is split in random permutation
print("mean accuracy on validation set: {}".format(mean_accuracy_val))

# We can also use scikit-learn's built-in function; it does the same thing!
mean_accuracy_val = model.score(x_val, y_val)

# Typically, we'd then use the results of validaton to tweak hyperparameters and repeat
"""
Evaluate on testing set
"""
# Predict the class/labels
predictions_test = model.predict(x_test)

# Check accuracy
scores_test = np.where(predictions_test == y_test, 1, 0)
mean_accuracy_test = np.mean(scores_test)
# or simply: mean_accuracy_test = model.score(x_test, y_test)

print("mean accuracy on test set: {}".format(mean_accuracy_test))
예제 #59
0
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

ppn = Perceptron(max_iter=40, alpha=0.1, random_state=1)
ppn.fit(X_train_std, y_train)

y_pred = ppn.predict(X_test_std)
print('Misclassified samples: %d' % (y_test != y_pred).sum())

print('Misclassification Error: ', 5 / 45, ' Accuracy is: ', (1 - (5 / 45)))

print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))

print('Accuracy score: %.2f' % ppn.score(X_test_std, y_test))

# def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):
#     markers = ('s', 'x', 'o', '^', 'v')
#     colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
#     cmap = ListedColormap(colors[:len(np.unique(y))])

#     x1_min, x1_max = X[:, 0].min() - 1, X[:,0].max() + 1
#     x2_min, x2_max = X[:, 1].min() - 1, X[:,1].max() + 1

#     xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
#                            np.arange(x2_min, x2_max, resolution))
#     z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
#     z = z.reshape(xx1.shape)
#     plt.contourf(xx1,xx2, z, alpha=0.3, cmap=cmap)
#     plt.xlim(xx1.min(), xx1.max())
예제 #60
0
def classify(title, train, test, train_labels, test_labels):
    classifier = Perceptron()
    classifier.fit(train, train_labels)
    print("{} {}".format(title,classifier.score(test, test_labels)))