Ejemplo n.º 1
0
class PerceptronClassifier(AbstractSKLearnClassifier):

    def __init__(self):
        AbstractSKLearnClassifier.__init__(self)
        self.model = False

    def set_label_encoder(self, labels):
        AbstractSKLearnClassifier.set_label_encoder(self, labels)

    def return_label_encoding(self, labels):
        return AbstractSKLearnClassifier.return_label_encoding(self, labels)

    def train_classifier(self, trainvectors, labels, alpha='1.0', iterations=10, jobs=1, v=2):
        iterations = int(iterations)
        jobs = int(jobs)
        if alpha == 'search':
            paramsearch = GridSearchCV(estimator=Perceptron(), param_grid=dict(alpha=numpy.linspace(0,2,20)[1:],n_iter=[iterations]), n_jobs=jobs)
            paramsearch.fit(trainvectors,labels)
            alpha = paramsearch.best_estimator_.alpha
        else:
            alpha = float(alpha)
        self.model = Perceptron(alpha=alpha,n_iter=iterations,n_jobs=jobs)
        self.model.fit(trainvectors, labels)

    def return_classifier(self):
        return self.model
    
    def return_model_insights(self,vocab=False):
        model_insights = []
        return model_insights
class ClassificationPLA(ClassficationBase.ClassificationBase):
    def __init__(self, isTrain, isOutlierRemoval=0):
        super(ClassificationPLA, self).__init__(isTrain, isOutlierRemoval)

        # data preprocessing
        self.dataPreprocessing()

        # PLA object
        self.clf = Perceptron()


    def dataPreprocessing(self):
        # deal with unbalanced data
        self.dealingUnbalancedData()

        # Standardization
        #self.Standardization()



    def training(self):
        # train the K Nearest Neighbors model
        self.clf.fit(self.X_train, self.y_train.ravel())

    def predict(self):
        # predict the test data
        self.y_pred = self.clf.predict(self.X_test)

        # print the error rate
        self.y_pred = self.y_pred.reshape((self.y_pred.shape[0], 1))
        err = 1 - np.sum(self.y_test == self.y_pred) * 1.0 / self.y_pred.shape[0]
        print "Error rate: {}".format(err)
def PERCEPTRON(data_train, data_train_vectors, data_test_vectors, **kwargs):
    # Implementing classification model- using Perceptron
    clf_p =  Perceptron()
    clf_p.fit(data_train_vectors, data_train.target)
    y_pred = clf_p.predict(data_test_vectors)
    
    return y_pred
    def run(self):
        """
        Пуск задачи
        """
        train_data = pd.read_csv(self.param.get('train'))
        test_data = pd.read_csv(self.param.get('test'))
        X_train = train_data[['1', '2']]
        y_train = train_data['0']

        X_test = test_data[['1', '2']]
        y_test = test_data['0']


        if self.param.get('scale') is True:

            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train)

            X_test = scaler.transform(X_test)

        perceptron = Perceptron(random_state=241)
        perceptron.fit(X_train, y_train)

        predictions = perceptron.predict(X_test)

        accuracy = accuracy_score(y_test, predictions)
        with self.output().open('w') as output:
            output.write(str(accuracy))
Ejemplo n.º 5
0
def percep(X_tr, y_tr, X_te):
    clf = Perceptron(n_iter = 1000)
    X_tr_aug = add_dummy_feature(X_tr)
    X_te_aug = add_dummy_feature(X_te)
    clf.fit(X_tr_aug, y_tr)
    y_pred = clf.predict(X_te_aug)
    return y_pred
Ejemplo n.º 6
0
def perceptron_histo():
    "Interprétation des images comme histogrammes de couleurs et classification via le Perceptron"
    alphas = np.arange(0.01,1.01,0.1)
    best=np.zeros(4)
    
    _, data, target, _ = utils.chargementHistogrammesImages(mer,ailleurs,1,-1)
    X_train,X_test,Y_train,Y_test=train_test_split(data,target,test_size=0.3,random_state=random.seed())
    
    
    for iterations in range(1,5):
        for a in alphas:
            start_time = time.time()
            
            p = Perceptron(alpha=a, n_iter=iterations, random_state=random.seed(), n_jobs=-1)
            
            x1=np.array(X_train)
            x2=np.array(X_test)
            
            p.fit(X=x1, y=Y_train)
            score = p.score(x2,Y_test)
            
            end_time = time.time()
            if score>best[0]:
                best[0] = score
                best[1] = a
                best[2] = iterations
                best[3] = end_time-start_time
        
    print("| Perceptron simple               | V.Histo    | alpha={:1.2f} iterations={:1.0f}            | {:10.3f}ms | {:1.3f} |".format(best[1],best[2],best[3]*1000,best[0]))
Ejemplo n.º 7
0
def Perceptron_1(train_predictors,test_predictors,train_target,test_target):
    clf = Perceptron()
    clf.fit(train_predictors,train_target)
    predicted = clf.predict(test_predictors)
    accuracy = accuracy_score(test_target, predicted)
    print "Accuracy for Linear Model Perceptron: "+str(accuracy)
    return accuracy,predicted  
Ejemplo n.º 8
0
def perceptron_vecteur():
    "Interprétation des images comme vecteurs de pixels et classification via le Perceptron"
    alphas = np.arange(0.01,1.01,0.1)
    best=np.zeros(5)
    
    for npix in range(50,200,50):
        _, data, target, _ = utils.chargementVecteursImages(mer,ailleurs,1,-1,npix)
        X_train,X_test,Y_train,Y_test=train_test_split(data,target,test_size=0.3,random_state=random.seed())
        
        
        for iterations in range(1,5):
            for a in alphas:
                start_time = time.time()
                
                p = Perceptron(alpha=a, n_iter=iterations, random_state=random.seed(), n_jobs=-1)
                
                #X_train, etc, sont des tableaux à 3 dimensiosn par défaut, (93,1,30000) par exemple, qu'il faut remmener en 2 dimensions
                x1=np.array(X_train)
                x1 = np.reshape(x1, (x1.shape[0],x1.shape[2]))
                x2=np.array(X_test)
                x2 = np.reshape(x2, (x2.shape[0],x2.shape[2]))
                
                p.fit(X=x1, y=Y_train)
                score = p.score(x2,Y_test)
                
                end_time = time.time()
                if score>best[0]:
                    best[0] = score
                    best[1] = a
                    best[2] = iterations
                    best[3] = end_time-start_time
                    best[4] = npix
        
    print("| Perceptron simple              | V.Pix {:4.0f} | alpha={:1.2f} iterations={:1.0f}              | {:10.3f}ms | {:1.3f} |".format(best[4],best[1],best[2],best[3]*1000,best[0]))
Ejemplo n.º 9
0
Archivo: s1-8.py Proyecto: wargile/ML1
 def t1():
     from sklearn.linear_model import Perceptron
     X = np.array([[1, 2], [3, 4], [5, 6]])
     y = np.array([0, 1, 0])
     clf = Perceptron()
     clf.fit(X, y)
     predictions = clf.predict(X)
     print(predictions)
def perceptron(kf,data,label,k):
	for train, test in kf:
		X_train, X_test, y_train, y_test = data[train,:], data[test,:], label[train], label[test]
		log = Perceptron(penalty="l2", alpha=0.003)
		logit = log.fit(X_train,y_train)
		y_pred =  logit.predict(X_test)
	scores = cross_validation.cross_val_score(log, data, label, cv=k)
	return scores.mean()
Ejemplo n.º 11
0
def main():
    iris = load_iris()
    X = iris.data[:, (2, 3)]  # 花弁の長さ、花弁の幅
    y = (iris.target == 0.).astype(np.int32)
    perceptron_classifier = Perceptron(random_state=42)
    perceptron_classifier.fit(X, y)
    y_prediction = perceptron_classifier.predict([[2, 0.5]])
    print(y_prediction)
def classify_perceptron():
    print "perceptron"
    (X_train, y_train), (X_test, y_test) = util.load_all_feat()
    print "original X_train shape", X_train.shape
    clf = Perceptron()
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    print "accuracy score:", accuracy_score(y_test, pred)
def train_data_perceptron( tup, penalty ):

	df, features, label = tup

	percep = Perceptron( penalty = penalty, fit_intercept = True, eta0 = ETA, n_iter = CYCLES, n_jobs = 2 )
	percep.fit( df[features], df[label] )

	return percep
def solve(train_set_x, train_set_y, test_set_x, test_set_y):
    clf = Perceptron(random_state=241)
    clf.fit(X=train_set_x, y=train_set_y)
    prediction = clf.predict(test_set_x)

    accuracy = accuracy_score(test_set_y, prediction)

    return accuracy
Ejemplo n.º 15
0
def get_accuracy(_data_train_features, _data_train_labels, _data_test_features, _data_test_labels):
    # Обучите персептрон со стандартными параметрами и random_state=241.
    clf = Perceptron(random_state=241, shuffle=True)
    clf.fit(_data_train_features, numpy.ravel(_data_train_labels))

    # Подсчитайте качество (долю правильно классифицированных объектов, accuracy)
    # полученного классификатора на тестовой выборке.
    predictions = clf.predict(_data_test_features)
    score = accuracy_score(_data_test_labels, predictions)
    return score
Ejemplo n.º 16
0
def neural_net(train, test):
	y = []
	xTrain, yTrain = loadData(train)
	xTest, yTest = loadData(test)
	nN = Perceptron()
	nN.fit(xTrain, yTrain)
	y = nN.predict(xTest)
	testError = 1 - nN.score(xTest, yTest)
	print 'Test error: ' , testError
	return y
def test():
    X = np.array([[1, 2], [3, 4], [5, 6]])
    y = np.array([0, 1, 0])
    clf = Perceptron()
    clf.fit(X, y)

    predictions = clf.predict(X)

    print("Predictions: %s" % predictions)

    print("Accuracy: %s" % accuracy_score(y, predictions))
Ejemplo n.º 18
0
def perceptron(trainingData,trainingLabels):
    """
    Implements a linear perceptron model as the
    machine learning algorithm.
    """
    from sklearn.linear_model import Perceptron
    clf = Perceptron()
    clf.fit(trainingData,trainingLabels)
    
    print "Perceptron has been generated with a training set size of",len(trainingLabels)
    return clf
Ejemplo n.º 19
0
def test_perceptron_correctness():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    clf1 = MyPerceptron(n_iter=2)
    clf1.fit(X, y_bin)

    clf2 = Perceptron(n_iter=2)
    clf2.fit(X, y_bin)

    assert_array_almost_equal(clf1.w, clf2.coef_.ravel())
Ejemplo n.º 20
0
def linear_train(features_train, target_train):
	data_f = pandas.read_csv(features_train, header=None, sep=';')
	features = data_f.iloc[:, 1:]
	features = scale(features)

	data_t = pandas.read_csv(target_train, header=None, sep=';')
	target = data_t.iloc[:, 1]

	perc = Perceptron(random_state=242)
	perc.fit(features, target)
	return perc
def neural_net():
    Xtrain,ytrain,Xtest,ytest = getSplitData()
    Xtrain, Xtest = getScaledData(Xtrain, Xtest)
    ntest = Xtest.shape[0]
    #Your code here
    clf = Perceptron()
    clf.fit(Xtrain, ytrain) 
    
    yPredict = clf.predict(Xtest)
    
    #print "parameter: n_neighbors = ",n
    print "neural_net classification accuracy: ", accuracy_score(ytest,yPredict)
Ejemplo n.º 22
0
def train(im_features, image_classes):
	
	# Train the Perceptron
	clf = Perceptron(n_iter=100, eta0=0.1)
	
	clf.fit(im_features, np.array(image_classes))
	n_folds = 10
	kFoldScore = evaluate_cross_validation(clf, im_features, np.array(image_classes), n_folds)
	
	#print 'SVM Score:',clf.score(im_features, np.array(image_classes))
	#print 'SVM Score:', kFoldScore
	return clf
Ejemplo n.º 23
0
def neural_net(train, test):
    y = []
    trainY, trainX = loadData(train)
    testY, testX = loadData(test)

    neuralNet = Perceptron()
    neuralNet.fit(trainX, trainY)
    y = neuralNet.predict(testX)

    testError = 1 - neuralNet.score(testX, testY)
    print 'Test error: ' + str(testError)
    return y
class DrunkLearningOnline(DrunkLearningBatch):
    """drunk_learning class for online learning"""
    def __init__(self):
    	super(DrunkLearningOnline, self).__init__()
        self.clf = Perceptron()
        self.filename = 'modelPerceptron.pkl'

    def partial_fit(self, X, y):
        X = np.array([X])
        y = np.array(y)
        self.clf.partial_fit(X, y, [0, 1])
        joblib.dump(self.clf, self.filename, compress=9)
def main():
	start = time.time()

	print "Reading train data and its features from: " + train_file
	data = cu.get_dataframe(train_file)
	global fea
	fea = features.extract_features(feature_names,data)

	percep = Perceptron(penalty=None, alpha=0.0001, fit_intercept=False, n_iter=5, shuffle=False, verbose=1, eta0=1.0, n_jobs=-1, seed=0, class_weight="auto", warm_start=False)

	X = []
	for i in data["OwnerUndeletedAnswerCountAtPostTime"]:
		X.append([i])
	# Must be array type object. Strings must be converted to
	# to integer values, otherwise fit method raises ValueError
	global y
	y = [] 

	print "Collecting statuses"
	
	for element in data["OpenStatus"]:
            for index, status in enumerate(ques_status):
                if element == status:
                    y.append(index)
            
	print "Fitting"
	percep.fit(fea, y)
	
	'''Make sure you have the up to date version of sklearn; v0.12 has the
           predict_proba method; http://scikit-learn.org/0.11/install.html '''   
	
	print "Reading test data and features"
	test_data = cu.get_dataframe(test_file)
	test_fea = features.extract_features(feature_names,test_data)

	print "Making predictions"
	global probs
	#probs = percep.predict_proba(test_fea) # only available for binary classification
	probs = percep.predict(test_fea)
	# shape of probs is [n_samples]
	# convert probs to shape [n_samples,n_classes]
	probs = np.resize(probs, (len(probs) / 5, 5))
	
	#if is_full_train_set == 0:
	#	print("Calculating priors and updating posteriors")
	#	new_priors = cu.get_priors(full_train_file)
	#	old_priors = cu.get_priors(train_file)
	#	probs = cu.cap_and_update_priors(old_priors, probs, new_priors, 0.001)	

	print "writing submission to " + submission_file
	cu.write_submission(submission_file, probs)
	finish = time.time()
	print "completed in %0.4f seconds" % (finish-start)
Ejemplo n.º 26
0
def test_model(training_data, testing_data, word2vec_model):
    v = DictVectorizer()
    train_features, train_labels = build_features(training_data, word2vec_model, v, 'train')
    test_features, test_labels = build_features(testing_data, word2vec_model, v)
    
    # create the perceptron model
    model = Perceptron(n_iter = 5)
    # fit the model to the training data
    model.fit(train_features, train_labels)
    # get the accuracy on the testing data
    accuracy = model.score(test_features, test_labels)

    return accuracy
Ejemplo n.º 27
0
 def __test_perceptron(self, normalized):
     clf = Perceptron()
     X_train = self.train_data.iloc[:, 1:]
     y_train = self.train_data.iloc[:, 0]
     X_test = self.test_data.iloc[:, 1:]
     y_test = self.test_data.iloc[:, 0]
     if normalized:
         scaler = StandardScaler()
         X_train = scaler.fit_transform(X_train)
         X_test = scaler.transform(X_test)
     clf.fit(X_train, y_train)
     predictions = clf.predict(X_test)
     return accuracy_score(y_test, predictions)
Ejemplo n.º 28
0
    def __init__(self, zmq_sub_string, channel):

        self.classes = ["pos", "neg"]
        self.re_emoticons = re.compile(r":\)|:\(")
        self.vec = HashingVectorizer(n_features=2 ** 20, non_negative=True)
        self.clf = Perceptron()

        self.count = {
            "train": {
                "pos": 0,
                "neg": 0,
            },
            "test": {
                "pos": 0,
                "neg": 0,
            }
        }

        self.train = 1
        self.eval_count = {
            "pos": {"tp": 0, "fp": 0, "fn": 0},
            "neg": {"tp": 0, "fp": 0, "fn": 0},
        }

        super(StreamingLearner, self).__init__(zmq_sub_string, channel)
Ejemplo n.º 29
0
def runTrial(numberOfTestPoints, iterationLimit, showChart = False):
    x1, y1, x2, y2, points = generatePoints(numberOfTestPoints)
    pclf = Perceptron()
    clf = SVC(C = 1000, kernel = 'linear')  
    sample = np.array(points)
    X = np.c_[sample[:,1], sample[:,2]]
    y = sample[:,3]
        #print(y)
    pclf.fit(X,y)
    clf.fit(X,y)
    
    iterations, w = train(points, iterationLimit)
    #print("weights ", w)
    #print("coefficients", pclf.coef_)
    errorProb = findErrorProbability(x1,y1,x2,y2,w, 50000, clf, pclf)

    if showChart:
        if iterations == iterationLimit:
            print( "No solution found in " + str(iterations) + " iterations!")
        print( "Iterations: " + str(iterations) + ' | Weights: ' + str(w))

        # plot points above(green) and below(blue) the target function.
        green_x = []
        green_y = []
        blue_x = []
        blue_y = []
        for x in points:
            if x[3] == 1:
                green_x.append(x[1])
                green_y.append(x[2])
            else:
                blue_x.append(x[1])
                blue_y.append(x[2])
        pylab.plot(green_x, green_y, 'go')
        pylab.plot(blue_x, blue_y, 'bo')

        # plot target function(black) and hypothesis function(red) lines
        x = np.array( [-1,1] )
        slope = (y2-y1)/(x2-x1)
        intercept = y2 - slope * x2
        pylab.plot(x, slope*x + intercept, 'k--')
        pylab.plot( x, -w[1]/w[2] * x - w[0] / w[2] , 'r' ) # this will throw an error if w[2] == 0
        pylab.ylim([-1,1])
        pylab.xlim([-1,1])
        pylab.show()

    return iterations, w, errorProb
    def __init__(self, isTrain, isOutlierRemoval=0):
        super(ClassificationPLA, self).__init__(isTrain, isOutlierRemoval)

        # data preprocessing
        self.dataPreprocessing()

        # PLA object
        self.clf = Perceptron()
Ejemplo n.º 31
0
tuned_param_knn = [{'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}]

models = {
    'dt': {
        'name': 'Decision Tree       ',
        'estimator': DecisionTreeClassifier(),
        'param': tuned_param_dt,
    },
    'nb': {
        'name': 'Gaussian Naive Bayes',
        'estimator': GaussianNB(),
        'param': tuned_param_nb
    },
    'lp': {
        'name': 'Linear Perceptron   ',
        'estimator': Perceptron(),
        'param': tuned_param_lp,
    },
    'svc': {
        'name': 'Support Vector      ',
        'estimator': SVC(),
        'param': tuned_param_svc
    },
    'knn': {
        'name': 'K Nearest Neighbor ',
        'estimator': KNeighborsClassifier(),
        'param': tuned_param_knn
    }
}

scores = ['precision', 'recall']
Ejemplo n.º 32
0

# Gaussian Naive Bayes

gaussian = GaussianNB()
scores = cross_val_score(gaussian, X_train, Y_train, cv=10)
acc_gaussian = round(scores.mean() * 100, 2)
acc_gaussian


# In[ ]:


# Perceptron (a single layer neural net)

perceptron = Perceptron()
scores = cross_val_score(perceptron, X_train, Y_train, cv=10)
acc_perceptron = round(scores.mean() * 100, 2)
acc_perceptron


# In[ ]:


# Neural Network (a multi layer neural net)

neural_net = MLPClassifier()
scores = cross_val_score(neural_net, X_train, Y_train, cv=10)
acc_neural_net = round(scores.mean() * 100, 2)
acc_neural_net
Ejemplo n.º 33
0
def IncreasingFIT():
    global total_vect_time
    classifiers = {
        'SGD': SGDClassifier(),
        'Perceptron': Perceptron(),
        'NB Multinomial': MultinomialNB(alpha=0.01),
        'Passive-Aggressive': PassiveAggressiveClassifier(),
    }

    Vocubularysave = []
    if os.path.exists("VocubularySave.v"):
        Vocubularysave = joblib.load("VocubularySave.v")
    VocubularyList = []
    for numV in Vocubularysave:
        VocubularyList.append(numV['name'])
    vectorizer = CountVectorizer(stop_words=None, vocabulary=VocubularyList)
    transformer = TfidfTransformer()

    count = vectorizer.fit_transform(xtest)
    X_test = transformer.fit_transform(count)

    for i in range(TrainDataSize):
        tick = time.time()

        # X_train = vectorizer.transform(xtrain[i])
        count = vectorizer.fit_transform(xtrain[i])
        X_train = transformer.fit_transform(count)

        total_vect_time += time.time() - tick

        for cls_name, cls_useless in partial_fit_classifiers.items():
            cls = classifiers[cls_name]

            tick = time.time()
            # update estimator with examples in the current mini-batch
            # 使用当前最小批次中的示例更新估算器
            # print(X_train)

            cls.partial_fit(X_train, ytrain[i], classes=all_classes)

            # if i % printjumpsize == 0:
            if i == (TrainDataSize - 1):
                # accumulate test accuracy stats
                # 累积测试准确度统计
                cls_stats[cls_name]['total_fit_time'] += time.time() - tick
                cls_stats[cls_name]['n_train'] += X_train.shape[0]
                cls_stats[cls_name]['n_train_pos'] += sum(ytrain[i])
                tick = time.time()

                # 测试准确性函数
                cls_stats[cls_name]['accuracy'] = cls.score(X_test, ytest)

                cls_stats[cls_name]['prediction_time'] = time.time() - tick
                acc_history = (cls_stats[cls_name]['accuracy'],
                               cls_stats[cls_name]['n_train'])
                cls_stats[cls_name]['accuracy_history'].append(acc_history)
                run_history = (cls_stats[cls_name]['accuracy'],
                               total_vect_time +
                               cls_stats[cls_name]['total_fit_time'])
                cls_stats[cls_name]['runtime_history'].append(run_history)

                # accumulate test accuracy stats
                # 累积测试准确度统计
                if T == 0:
                    print(progress(cls_name, cls_stats[cls_name]))
                if T != 0:
                    AccuracyAverage[cls_name]['total_fit_time'] += time.time(
                    ) - tick
                    AccuracyAverage[cls_name]['n_train'] += X_train.shape[0]
                    AccuracyAverage[cls_name]['n_train_pos'] += sum(ytrain[i])
                    tick = time.time()

                    # 测试准确性函数
                    AccuracyAverage[cls_name]['accuracy'] += cls.score(
                        X_test, ytest)
                    RecordOneAccuracy[cls_name]['accuracy'] += cls.score(
                        X_test, ytest)
                    acc_history = (AccuracyAverage[cls_name]['accuracy'],
                                   AccuracyAverage[cls_name]['n_train'])
                    AccuracyAverage[cls_name]['accuracy_history'].append(
                        acc_history)
                    run_history += (
                        AccuracyAverage[cls_name]['accuracy'],
                        total_vect_time +
                        AccuracyAverage[cls_name]['total_fit_time'])
                    AccuracyAverage[cls_name]['runtime_history'].append(
                        run_history)

                    recordAccuracy.append(RecordOneAccuracy)

                    print(progress2(cls_name, AccuracyAverage[cls_name], T))
Ejemplo n.º 34
0
import matplotlib.pyplot as plt
from sklearn import datasets

from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier, Perceptron
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.linear_model import LogisticRegression

heldout = [0.95, 0.90, 0.75, 0.50, 0.01]
rounds = 20
digits = datasets.load_digits()
X, y = digits.data, digits.target

classifiers = [("SGD", SGDClassifier(max_iter=100, tol=1e-3)),
               ("ASGD", SGDClassifier(average=True, max_iter=100, tol=1e-3)),
               ("Perceptron", Perceptron(tol=1e-3)),
               ("Passive-Aggressive I",
                PassiveAggressiveClassifier(loss='hinge', C=1.0, tol=1e-4)),
               ("Passive-Aggressive II",
                PassiveAggressiveClassifier(loss='squared_hinge',
                                            C=1.0,
                                            tol=1e-4)),
               ("SAG",
                LogisticRegression(solver='sag', tol=1e-1,
                                   C=1.e4 / X.shape[0]))]

xx = 1. - np.array(heldout)

for name, clf in classifiers:
    print("training %s" % name)
    rng = np.random.RandomState(42)
Ejemplo n.º 35
0
def main():

    # Number of hiddens
    n = 10

    # Instance of Read class to read in data
    trainData, testData, train, test = Read().read()

    # Ensure that both the training and testing dataframes have the same columns.
    # If we're adding any new columns through this, make all values in that column = 0.
    trainData, testData = trainData.align(testData,
                                          join='outer',
                                          fill_value=0,
                                          axis=1)

    # Run the training and testing data through an Sklearns model.
    model = SVC()
    model_GNB = GaussianNB()
    model_Bern = BernoulliNB()
    model_MNB = MultinomialNB()
    model_MLP = MLPClassifier(hidden_layer_sizes=(n, n),
                              activation='logistic',
                              max_iter=2000)
    testScore = 0
    targetLabels = train['Survived'].values
    testLabels = test['Survived'].values

    trainData, testData = dropData(trainData, testData)
    # just uncomment the data you want to use

    trainFeatures = trainData
    testFeatures = testData

    #    trainFeatures = train[["Pclass", "Age", "Sex", "Fare"]].values
    #    testFeatures = test[["Pclass", "Age", "Sex", "Fare"]].values

    print(trainFeatures.shape)
    print(trainFeatures)
    # 3 hidden layers (n, n, n)
    model_Perceptron = Perceptron(n_iter_no_change=10)

    model.fit(trainFeatures, targetLabels)
    model_GNB.fit(trainFeatures, targetLabels)
    model_Bern.fit(trainFeatures, targetLabels)
    model_MNB.fit(trainFeatures, targetLabels)
    model_MLP.fit(trainFeatures, targetLabels)
    model_Perceptron.fit(trainFeatures, targetLabels)

    # Let's test a few different models ..

    # Now do support vector machine
    trainScore = model.score(trainFeatures, targetLabels) * 100
    testScore = model.score(testFeatures, testLabels) * 100
    print("Support vector results:\n", "Train: ", trainScore, "%", "Test: ",
          testScore, "%\n\n")

    # Now do Gaussian Naive Bayes
    trainScore = model_GNB.score(trainFeatures, targetLabels) * 100
    testScore = model_GNB.score(testFeatures, testLabels) * 100
    print("Gaussian NB results:\n", "Train: ", trainScore, "%", "Test: ",
          testScore, "%\n\n")

    # Now do Bernoulli Naive Bayes
    #    trainScore = model_Bern.score(trainFeatures, targetLabels)*100
    #    testScore = model_Bern.score(testFeatures, testLabels)*100
    #    print("Bernoulli NB results:\n", "Train: ", trainScore, "%", "Test: ", testScore, "%")

    # Now do Multinomial Naive Bayes
    #    trainScore = model_MNB.score(trainFeatures, targetLabels)*100
    #    testScore = model_MNB.score(testFeatures, testLabels)*100
    #    print("Multinomial NB results:\n", "Train: ", trainScore, "%", "Test: ", testScore, "%")

    # Now do MLP
    trainScore = model_MLP.score(trainFeatures, targetLabels) * 100
    testScore = model_MLP.score(testFeatures, testLabels) * 100
    print("Multi-Layer Perceptron results:\n", "Train: ", trainScore, "%",
          "Test: ", testScore, "%\n\n")

    # Now do Perceptron
    trainScore = model_Perceptron.score(trainFeatures, targetLabels) * 100
    testScore = model_Perceptron.score(testFeatures, testLabels) * 100
    print("Perceptron Learning Algorithm results:\n", "Train: ", trainScore,
          "%", "Test: ", testScore, "%\n\n\n\n")

    # Some PCA magic, reduce down to n parameters
    """ Everything below this point is using PCA for dimensionality reduction in the data

    """
    numComp = 2
    pca = PCA(n_components=numComp)
    pca.fit(trainFeatures)
    trainFeatures = pca.transform(trainFeatures)
    testFeatures = pca.transform(testFeatures)
    print(trainFeatures.shape)

    model.fit(trainFeatures, targetLabels)
    model_GNB.fit(trainFeatures, targetLabels)
    model_Bern.fit(trainFeatures, targetLabels)

    # Now do support vector machine
    trainScore = model.score(trainFeatures, targetLabels) * 100
    testScore = model.score(testFeatures, testLabels) * 100
    print("(PCA) Support vector results:\n", "Train: ", trainScore, "%",
          "Test: ", testScore, "%\n\n")

    # Now do Gaussian Naive Bayes
    trainScore = model_GNB.score(trainFeatures, targetLabels) * 100
    testScore = model_GNB.score(testFeatures, testLabels) * 100
    print("(PCA) Gaussian NB results:\n", "Train: ", trainScore, "%", "Test: ",
          testScore, "%\n\n")

    # Now do Bernoulli Naive Bayes
    #    trainScore = model_Bern.score(trainFeatures, targetLabels)*100
    #    testScore = model_Bern.score(testFeatures, testLabels)*100
    #    print("Bernoulli NB results:\n", "Train: ", trainScore, "%", "Test: ", testScore, "%")

    model_MLP.fit(trainFeatures, targetLabels)
    trainScore = model_MLP.score(trainFeatures, targetLabels) * 100
    testScore = model_MLP.score(testFeatures, testLabels) * 100
    print("(PCA) Multi-Layer Perceptron results:\n", "Train: ", trainScore,
          "%", "Test: ", testScore, "%\n\n")

    model_Perceptron.fit(trainFeatures, targetLabels)
    trainScore = model_Perceptron.score(trainFeatures, targetLabels) * 100
    testScore = model_Perceptron.score(testFeatures, testLabels) * 100
    print("(PCA) Perceptron Learning Algorithm results:\n", "Train: ",
          trainScore, "%", "Test: ", testScore, "%\n\n\n")

    #    fig = plt.figure(figsize = (8,8))
    #    ax = fig.add_subplot(1,1,1)
    #    ax.set_title("n="+str(numComp)+"  PCA")

    #    colors = itertools.cycle(['r', 'g', 'b'])
    #    plt.scatter(trainFeatures[:, 0], trainFeatures[:, 1], alpha = 0.5)
    #    print(trainFeatures)
    #    plt.show()
    plot(trainFeatures, targetLabels, "PCA", "MLP", numComp)
Ejemplo n.º 36
0
print(np.shape(class_labels))
#print(class_labels)

# In[251]:


def calcul_accuracy(real_label, pre_label):
    cnt = 0
    for i in range(len(real_label)):
        if real_label[i] == pre_label[i]:
            cnt += 1
    return cnt / len(real_label)


#perceptron on the first two features
perceptron = Perceptron(max_iter=1000, tol=0.0001, random_state=None)
perceptron.fit(
    feature_2, class_labels
)  #fit(X, y[, coef_init, intercept_init, ...])	Fit linear model with Stochastic Gradient Descent.
print("feature_2's final Weight:")
#print(perceptron.intercept_)             #不懂
final_wei1 = perceptron.coef_
print(final_wei1)

label_train_pred1 = perceptron.predict(
    feature_2)  #Predict class labels for samples in X.
#print(label_train_pred1)
mean_ar = perceptron.score(feature_2, class_labels)
#print(mean_ar)
mean_accuracy1 = calcul_accuracy(
    class_labels, label_train_pred1
Ejemplo n.º 37
0
import numpy as np
from sklearn.linear_model import Perceptron

X = [[0, 0.1], [0.1, 0.9], [0.02, 0], [0.9, 0], [1, 0.9]]
y = [0, 0, 0, 0, 1]

per_clf = Perceptron(random_state=42)
per_clf.fit(X, y)

#y_pred = per_clf.predict([[2, 0.5]])
y_pred = per_clf.predict([[0.12, 0], [1, 0], [0, 1], [0.9, 0.9], [1, 1]])

print(y_pred)  # output expected to be [0 0 0 1 1]
Ejemplo n.º 38
0
def test_multiclass(data):
    data['target'] = lb.fit_transform(data['target'])
    print('Classes: {}'.format(list(lb.classes_)))

    X = data.loc[:, data.columns != 'target']
    y = data['target']

    print('Arvore de decisao - GINI')
    dt = DecisionTreeClassifier()
    scores = cross_validate(dt,
                            X,
                            y=y,
                            cv=10,
                            scoring={
                                'acc': 'accuracy',
                                'rec': make_scorer(multiclass_recall),
                                'f1': 'f1_weighted'
                            },
                            return_train_score=False)

    print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(),
                                                 scores['test_acc'].std()))
    print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(),
                                                  scores['test_rec'].std()))
    print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(),
                                                scores['test_f1'].std()))
    print('------------------------------')

    print('Arvore de decisao - Entropy')
    dt = DecisionTreeClassifier(criterion='entropy')
    scores = cross_validate(dt,
                            X,
                            y=y,
                            cv=10,
                            scoring={
                                'acc': 'accuracy',
                                'rec': make_scorer(multiclass_recall),
                                'f1': 'f1_weighted'
                            },
                            return_train_score=False)

    print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(),
                                                 scores['test_acc'].std()))
    print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(),
                                                  scores['test_rec'].std()))
    print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(),
                                                scores['test_f1'].std()))
    print('------------------------------')

    print('SVM - Um contra todos')
    dt = LinearSVC()
    scores = cross_validate(dt,
                            X,
                            y=y,
                            cv=10,
                            scoring={
                                'acc': 'accuracy',
                                'rec': make_scorer(multiclass_recall),
                                'f1': 'f1_weighted'
                            },
                            return_train_score=False)

    print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(),
                                                 scores['test_acc'].std()))
    print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(),
                                                  scores['test_rec'].std()))
    print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(),
                                                scores['test_f1'].std()))
    print('------------------------------')

    print('SVM - Crammer-Singer')
    dt = LinearSVC(multi_class='crammer_singer')
    scores = cross_validate(dt,
                            X,
                            y=y,
                            cv=10,
                            scoring={
                                'acc': 'accuracy',
                                'rec': make_scorer(multiclass_recall),
                                'f1': 'f1_weighted'
                            },
                            return_train_score=False)

    print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(),
                                                 scores['test_acc'].std()))
    print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(),
                                                  scores['test_rec'].std()))
    print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(),
                                                scores['test_f1'].std()))
    print('------------------------------')

    print('Perceptron')
    dt = Perceptron()
    scores = cross_validate(dt,
                            X,
                            y=y,
                            cv=10,
                            scoring={
                                'acc': 'accuracy',
                                'rec': make_scorer(multiclass_recall),
                                'f1': 'f1_weighted'
                            },
                            return_train_score=False)

    print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(),
                                                 scores['test_acc'].std()))
    print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(),
                                                  scores['test_rec'].std()))
    print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(),
                                                scores['test_f1'].std()))
    print('------------------------------')

    print('Random Forest - GINI')
    dt = RandomForestClassifier()
    scores = cross_validate(dt,
                            X,
                            y=y,
                            cv=10,
                            scoring={
                                'acc': 'accuracy',
                                'rec': make_scorer(multiclass_recall),
                                'f1': 'f1_weighted'
                            },
                            return_train_score=False)

    print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(),
                                                 scores['test_acc'].std()))
    print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(),
                                                  scores['test_rec'].std()))
    print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(),
                                                scores['test_f1'].std()))
    print('------------------------------')

    print('Random Forest - Entropy')
    dt = RandomForestClassifier(criterion='entropy')
    scores = cross_validate(dt,
                            X,
                            y=y,
                            cv=10,
                            scoring={
                                'acc': 'accuracy',
                                'rec': make_scorer(multiclass_recall),
                                'f1': 'f1_weighted'
                            },
                            return_train_score=False)

    print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(),
                                                 scores['test_acc'].std()))
    print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(),
                                                  scores['test_rec'].std()))
    print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(),
                                                scores['test_f1'].std()))
    print('------------------------------')

    print('Naive Bayes')
    dt = BernoulliNB()
    scores = cross_validate(dt,
                            X,
                            y=y,
                            cv=10,
                            scoring={
                                'acc': 'accuracy',
                                'rec': make_scorer(multiclass_recall),
                                'f1': 'f1_weighted'
                            },
                            return_train_score=False)

    print('Acurácia: {:.2f} (+/- {:.2f})'.format(scores['test_acc'].mean(),
                                                 scores['test_acc'].std()))
    print('Cobertura: {:.2f} (+/- {:.2f})'.format(scores['test_rec'].mean(),
                                                  scores['test_rec'].std()))
    print('F-score: {:.2f} (+/- {:.2f})'.format(scores['test_f1'].mean(),
                                                scores['test_f1'].std()))
Ejemplo n.º 39
0
# 建立正規化物件
sc = StandardScaler()

# 利用現有資料計算正規化所需的平均值與標準差
# 不執行此指令則之後的動作都無法操作
sc.fit(X_train)

# 查平均值與標準差,回傳陣列[平均值,標準差]
sc.mean_

# 利用sc.fit算出來的平均值與標準差來正規化訓練集與測試集
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# 模型訓練
ppn = Perceptron(max_iter=40, eta0=0.01, random_state=0)
ppn.fit(X_train_std, y_train)

# 預測
y_pred = ppn.predict(X_test_std)
print(accuracy_score(y_test, y_pred))

# 繪圖
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
pdr.plot_decision_regions(X=X_combined_std,
                          y=y_combined,
                          classifier=ppn,
                          test_idx=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
        self.model = model

    def FIT(self, X_train, y_train):
        self.model.fit(X_train, y_train)

    def predict(self, X_test, y_test):
        y_pred = self.model.predict(X_test_std)
        print(y_pred)
        print(f"Number of Misclassified Examples by {self.model} are {sum(y_pred!=y_test)}")

        print(f"Accuracy of {self.model} using Accuracy_score function is {accuracy_score(y_test, y_pred)}")

        print(f"Accuracy of {self.model} using Score is {self.model.score(X_test_std, y_test)}")



percp = Perceptron(eta0=0.1, random_state=1)
Logic = LogisticRegression(C=100.0, random_state=1, solver='lbfgs', multi_class='ovr')
Logic2 = LogisticRegression(C=100.0, random_state=1, solver='lbfgs', multi_class='multinomial',)

m1 = Model(percp)
m2 = Model(Logic)
m3 = Model(Logic2)

m1.FIT(X_train_std, y_train)
m2.FIT(X_train_std, y_train)
m3.FIT(X_train_std, y_train)

m1.predict(X_test_std, y_test)
m2.predict(X_test_std, y_test)
m3.predict(X_test_std, y_test)
Ejemplo n.º 41
0
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score

iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)

sc = StandardScaler()
sc.fit(X_train)

X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0, shuffle=True)
ppn.fit(X_train_std, y_train)

y_pred = ppn.predict(X_test_std)

print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))
Ejemplo n.º 42
0
from sklearn.datasets import load_files
from sklearn.model_selection import train_test_split
from sklearn import metrics

#


languages_data_folder = sys.argv[1] #this line is the cause of 90% of my stress in this class today
dataset = load_files(languages_data_folder)

docs_train, docs_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.5)

vectorizer = TfidfVectorizer(ngram_range=(1, 3), analyzer='char', use_idf=False)
# this is the thing that gets rid of small, common words because they're not super duper useful

clf = Pipeline([ ('vec', vectorizer), ('clf', Perceptron(tol=1e-3)), ]) #This is equivalent to one millitolerance

clf.fit(docs_train, y_train)

y_predicted = clf.predict(docs_test)

print(metrics.classification_report(y_test, y_predicted, target_names=dataset.target_names))
#this is the confusing confusion matrix

centimeter = metrics.confusion_matrix(y_test, y_predicted)

print(centimeter)


sentences = [
    u'Pikachu squirtle togepi',
Ejemplo n.º 43
0
    print("accuracy:   %0.3f" % score)
    totalPredictions.append(predictions)
    return name, score, train_time, test_time

results = []

#Main Code

#Classifiers
clf1 = LogisticRegression()
clf2 = PassiveAggressiveClassifier()
clf3 = MultinomialNB(alpha=.01)
clf4 = BernoulliNB(alpha=.01)
clf5 = NearestCentroid()
clf6 = RidgeClassifier(tol=1e-2, solver="sag")
clf7 = Perceptron(n_iter=50)
clf8 = SGDClassifier(loss='hinge',alpha=.0001, n_iter=50,shuffle=True,penalty="l2",n_jobs=-1)
clf9 = SGDClassifier(loss='hinge',alpha=.0001, n_iter=50,shuffle=True,penalty="l1",n_jobs=-1)
clf10 = SGDClassifier(loss='hinge',alpha=.0001, n_iter=50,shuffle=True,penalty="elasticnet",n_jobs=-1)
clf11 = SGDClassifier(loss='log',alpha=.0001, n_iter=50,shuffle=True,penalty="l2",n_jobs=-1)
clf12 = SGDClassifier(loss='log',alpha=.0001, n_iter=50,shuffle=True,penalty="l1",n_jobs=-1)
clf13 = SGDClassifier(loss='log',alpha=.0001, n_iter=50,shuffle=True,penalty="elasticnet",n_jobs=-1)
clf14 = SGDClassifier(loss='modified_huber',alpha=.0001, n_iter=50,shuffle=True,penalty="l2",n_jobs=-1)
clf15 = SGDClassifier(loss='modified_huber',alpha=.0001, n_iter=50,shuffle=True,penalty="l1",n_jobs=-1)
clf16 = SGDClassifier(loss='modified_huber',alpha=.0001, n_iter=50,shuffle=True,penalty="elasticnet",n_jobs=-1)
clf17 = SGDClassifier(loss='squared_hinge',alpha=.0001, n_iter=50,shuffle=True,penalty="l2",n_jobs=-1)
clf18 = SGDClassifier(loss='squared_hinge',alpha=.0001, n_iter=50,shuffle=True,penalty="l1",n_jobs=-1)
clf19 = SGDClassifier(loss='squared_hinge',alpha=.0001, n_iter=50,shuffle=True,penalty="elasticnet",n_jobs=-1)
clf20 = SGDClassifier(loss='perceptron',alpha=.0001, n_iter=50,shuffle=True,penalty="l2",n_jobs=-1)
clf21 = SGDClassifier(loss='perceptron',alpha=.0001, n_iter=50,shuffle=True,penalty="l1",n_jobs=-1)
clf22 = SGDClassifier(loss='perceptron',alpha=.0001, n_iter=50,shuffle=True,penalty="elasticnet",n_jobs=-1)
Ejemplo n.º 44
0
def test_predict_proba():
    X = np.random.randn(5, 5)
    y = np.array([0, 1, 0, 0, 0])
    clf1 = Perceptron()
    clf1.fit(X, y)
    DESKNN([clf1, clf1, clf1])
Ejemplo n.º 45
0
test_hy = hy[:8678, :]
X_train, X_test, y_train, y_test = train_test_split(hy, data, train_size=0.80, test_size=0.20, random_state=1234)
nb = MultinomialNB()
nb = nb.fit(X=X_train, y=y_train)
y_pred = nb.predict(X_test)
print("Bow + Tf-idf --->  " + str(accuracy_score(y_test, y_pred) * 100) + "%")


# -------------------------------------PERCEPTRON-------------------------------------

bow_vectorizer = CountVectorizer()
bow = bow_vectorizer.fit_transform(res)
train_bow = bow[8678:, :]
test_bow = bow[:8678, :]
X_train, X_test, y_train, y_test = train_test_split(bow, data, test_size=0.20, train_size=0.80, random_state=1234)
per = Perceptron(tol=1e-3, random_state=0)
per = per.fit(X=X_train, y=y_train)
y_pred = per.predict(X_test)
print("----------------------------------------------------------------------------------")
print("__________PERCEPTRON__________")
print("BagOfWords ----->  " + str(accuracy_score(y_test, y_pred) * 100) + "%")


tfidf_vectorizer = TfidfVectorizer()
tfidf = tfidf_vectorizer.fit_transform(res)
train_tfidf = tfidf[8678:, :]
test_tfidf = tfidf[:8678, :]
X_train, X_test, y_train, y_test = train_test_split(tfidf, data, test_size=0.20, train_size=0.80, random_state=1234)
per = Perceptron(tol=1e-3, random_state=0)
per = per.fit(X=X_train, y=y_train)
y_pred = per.predict(X_test)
Ejemplo n.º 46
0
    def CheckingClassifer(ClassiferName):
        if ClassiferName == "Perceptron":
            # Running the Perceptron Classifier and computing the Test accuracy and CV accuracy
            print(
                "--------------------------------perceptron---------------------------------------------------"
            )
            start_time = time.time()
            eta_P = input("Please Insert the value of learning Rate : \n")
            pip_perce = make_pipeline(
                StandardScaler(), PCA(n_components=2),
                Perceptron(penalty=None,
                           alpha=0.0001,
                           fit_intercept=True,
                           max_iter=50,
                           tol=None,
                           shuffle=True,
                           verbose=0,
                           eta0=float(eta_P),
                           n_jobs=None,
                           random_state=0,
                           early_stopping=False,
                           validation_fraction=0.1,
                           n_iter_no_change=5,
                           class_weight=None,
                           warm_start=False))
            pip_perce.fit(X, y)
            y_pred = pip_perce.predict(Xtest)
            print("Test Accuracy :  ", metrics.accuracy_score(
                ytest, y_pred))  # Computing the Test accuracy
            print("The Running Time To compute the Test Accuracy :")
            print("--- %s seconds ---" % (time.time() - start_time))
            start_time = time.time()
            K3 = StratifiedKFold(n_splits=10, random_state=1,
                                 shuffle=True).split(T, P)
            scores1 = []
            for K, (train_index, test_index) in enumerate(K3):
                pip_perce.fit(T[train_index], P[train_index])
                score = pip_perce.score(T[test_index], P[test_index])
                scores1.append(score)
            print(
                " The accuracy of Cross validation Perceptron : " +
                str(sum(scores1) / len(scores1)))  # Computing The CV Accuracy
            print("The Running Time To compute the CV Accuracy :")
            print("--- %s seconds ---" % (time.time() - start_time))

            print(
                "-----------------------------------------------------------------------------------"
            )
        elif ClassiferName == "SVM":
            # Running the Support Vector Machine (SVM) Classifier and computing the Test accuracy and CV accuracy
            print(
                "--------------------------------SVM---------------------------------------------------"
            )
            start_time = time.time()
            C_SVM = input("Please Inuput the value of C : \n")
            pip_SV = make_pipeline(
                StandardScaler(), PCA(n_components=2),
                LinearSVC(random_state=0, tol=1e-5, C=float(C_SVM)))
            pip_SV.fit(X, y)
            y_pred = pip_SV.predict(Xtest)
            print("Test Accuracy:  ", metrics.accuracy_score(
                ytest, y_pred))  # Computing The test accuracy
            print("The Running Time To compute the Test Accuracy :")
            print("--- %s seconds ---" % (time.time() - start_time))
            start_time = time.time()
            K3 = StratifiedKFold(n_splits=10, random_state=1,
                                 shuffle=True).split(T, P)
            scores2 = []
            for K, (train_index, test_index) in enumerate(K3):
                pip_SV.fit(T[train_index], P[train_index])
                score = pip_SV.score(T[test_index], P[test_index])
                scores2.append(score)
            print(
                " The accuracy of Cross validation SVM :  " +
                str(sum(scores2) / len(scores2)))  # Computing The CV accuracy
            print("The Running Time To compute the CV Accuracy :")
            print("--- %s seconds ---" % (time.time() - start_time))
            scores = []
        elif ClassiferName == "DecisionTree":
            # Running the Decision Tree algorithm (DT) Classifier and computing the Test accuracy and CV accuracy
            print(
                "--------------------------------DT---------------------------------------------------"
            )
            start_time = time.time()
            Max_depth = input("Please Insert the Depth of the Tree : \n")
            pip_DT = make_pipeline(
                StandardScaler(), PCA(n_components=2),
                DecisionTreeClassifier(random_state=0,
                                       max_depth=int(Max_depth)))
            pip_DT.fit(X, y)
            y_pred = pip_DT.predict(Xtest)
            print("Test Accuracy :  ", metrics.accuracy_score(
                ytest, y_pred))  # Computing The test accuracy
            print("The Running Time To compute the Test Accuracy :")
            print("--- %s seconds ---" % (time.time() - start_time))
            start_time = time.time()
            K0 = StratifiedKFold(n_splits=10, random_state=1,
                                 shuffle=True).split(T, P)
            scores3 = []
            for K, (train_index, test_index) in enumerate(K0):
                pip_DT.fit(T[train_index], P[train_index])
                score = pip_DT.score(T[test_index], P[test_index])
                scores3.append(score)
            print(
                " The accuracy of Cross validation DT :  " +
                str(sum(scores3) / len(scores3)))  # Computing The CV accuracy
            print("The Running Time To compute the CV Accuracy :")
            print("--- %s seconds ---" % (time.time() - start_time))

        elif ClassiferName == "KNN":
            # Running the k-nearest neighbors algorithm (k-NN) Classifier and computing the Test accuracy and CV accuracy
            print(
                "--------------------------------KNN---------------------------------------------------"
            )
            start_time = time.time()
            N_of_neg = input("Please insert the number of neigbours : \n")
            pip_KNN = make_pipeline(
                StandardScaler(), PCA(n_components=2),
                KNeighborsClassifier(n_neighbors=int(N_of_neg)))
            pip_KNN.fit(X, y)
            y_pred = pip_KNN.predict(Xtest)
            print("Test Accuracy: ", metrics.accuracy_score(
                ytest, y_pred))  # Computing the Test accuracy
            print("The Running Time To compute the Test Accuracy :")
            print("--- %s seconds ---" % (time.time() - start_time))
            start_time = time.time()
            K1 = StratifiedKFold(n_splits=10, random_state=1,
                                 shuffle=True).split(T, P)
            scores4 = []
            for K, (train_index, test_index) in enumerate(K1):
                pip_KNN.fit(T[train_index], P[train_index])
                score = pip_KNN.score(T[test_index], P[test_index])
                scores4.append(score)
            print(" The accuracy of Cross validation KNN :  " +
                  str(sum(scores4) / len(scores4)))  #Computing the CV Accuracy
            print("--- %s seconds ---" % (time.time() - start_time))

        elif ClassiferName == "LG":
            #Running the Logestic Regression Classifier and computing the Test accuracy and CV accuracy
            print(
                "--------------------------------LG---------------------------------------------------"
            )
            start_time = time.time()
            C_LG = input("Please Insert C value :\n")
            pip_LG = make_pipeline(
                StandardScaler(), PCA(n_components=2),
                LogisticRegression(penalty='l2',
                                   dual=False,
                                   tol=0.0001,
                                   C=float(C_LG),
                                   fit_intercept=True,
                                   intercept_scaling=1,
                                   class_weight=None,
                                   random_state=None,
                                   max_iter=500,
                                   solver='liblinear'))
            pip_LG.fit(X, y)
            y_pred = pip_LG.predict(Xtest)
            print("Test Accuracy :",
                  metrics.accuracy_score(ytest,
                                         y_pred))  #Comuting the test accuracy
            print("The Running Time To compute the Test Accuracy :")
            print("--- %s seconds ---" % (time.time() - start_time))
            start_time = time.time()
            kflod = StratifiedKFold(n_splits=10, random_state=1,
                                    shuffle=True).split(T, P)
            scores5 = []
            for K, (train_index, test_index) in enumerate(kflod):
                pip_LG.fit(T[train_index], P[train_index])
                score = pip_LG.score(T[test_index], P[test_index])
                scores5.append(score)
            print(" The accuracy of Cross validation LG : " +
                  str(sum(scores5) / len(scores5)))  #Computing The CV Accuracy
            print("--- %s seconds ---" % (time.time() - start_time))

            print(
                "----------------------------------------------------------------------"
            )
Ejemplo n.º 47
0
X = np.array(cancer.ix[:, 0:28].values)
#ouoput label
y = np.array(cancer.ix[:, 29].values)
#creating empty dictionary to append all classifiers
dictOfClassifiers = {}
knn = KNeighborsClassifier(n_neighbors=7, weights='uniform')
dictOfClassifiers.update({knn: "K nearest neighbour"})
svm = SVC(kernel="rbf", C=1)
dictOfClassifiers.update({svm: "SVM"})
lr = LogisticRegression(C=1, max_iter=100)
dictOfClassifiers.update({lr: "Logistic Regression"})
dt = DecisionTreeClassifier(criterion='entropy', max_depth=4)
dictOfClassifiers.update({dt: "Decision Tree"})
nb = GaussianNB()
dictOfClassifiers.update({nb: "Naive Bayes"})
p = Perceptron(eta0=1, alpha=0.0001)
dictOfClassifiers.update({p: "Perceptron"})
ann = MLPClassifier(hidden_layer_sizes=(3, 5),
                    activation='relu',
                    alpha=1,
                    max_iter=2000,
                    learning_rate='adaptive')
dictOfClassifiers.update({ann: "Neural Network"})
dnn = MLPClassifier(hidden_layer_sizes=(10, 10),
                    activation='relu',
                    alpha=1,
                    max_iter=2000,
                    learning_rate='adaptive')
dictOfClassifiers.update({dnn: "Deep Learning"})
rfc = RandomForestClassifier(max_depth=14,
                             n_estimators=19,
# In[ ]:

# Gaussian Naive Bayes

gaussian = GaussianNB()
gaussian.fit(X_train, Y_train)
Y_pred = gaussian.predict(X_test)
acc_gaussian = round(gaussian.score(X_train, Y_train) * 100, 2)
acc_gaussian

# In[ ]:

# Perceptron

perceptron = Perceptron()
perceptron.fit(X_train, Y_train)
Y_pred = perceptron.predict(X_test)
acc_perceptron = round(perceptron.score(X_train, Y_train) * 100, 2)
acc_perceptron

# In[ ]:

# Linear SVC

linear_svc = LinearSVC()
linear_svc.fit(X_train, Y_train)
Y_pred = linear_svc.predict(X_test)
acc_linear_svc = round(linear_svc.score(X_train, Y_train) * 100, 2)
acc_linear_svc
Ejemplo n.º 49
0
test_stats = {'n_test': 0, 'n_test_pos': 0}

tick = time.time()
parsing_time = time.time() - tick
tick = time.time()

# 数据集文本向量化 (哈希技巧) -------------------------------------------------------
# vectorizer = HashingVectorizer(decode_error='ignore', n_features=2 ** 18,
#                                alternate_sign=False)
# X_test = vectorizer.transform(xtest)

# 这里有一些支持`partial_fit`方法的分类器
# 新创建分类器容器
partial_fit_classifiers = {
    'SGD': SGDClassifier(),
    'Perceptron': Perceptron(),
    'NB Multinomial': MultinomialNB(alpha=0.01),
    'Passive-Aggressive': PassiveAggressiveClassifier(),
}
# 载入旧的分类器容器

# end 数据集文本向量化 (哈希技巧) -------------------------------------------------------

vectorizing_time = time.time() - tick
test_stats['n_test'] += len(ytest)
test_stats['n_test_pos'] += sum(ytest)


def progress(cls_name, stats):
    """Report progress information, return a string.报告进度信息,返回一个字符串。"""
    duration = time.time() - stats['t0']
Ejemplo n.º 50
0
   micro avg       0.84      0.84      0.84     16262
   macro avg       0.77      0.81      0.79     16262
weighted avg       0.85      0.84      0.84     16262

"""

from sklearn import metrics
fpr, tpr, thresholds = metrics.roc_curve(y_test, predicted, pos_label=1)
print("AUC :", metrics.auc(fpr, tpr))
"""
AUC : 0.803154835157
"""
from sklearn.linear_model import Perceptron

classifier5 = Perceptron(eta0=0.5, max_iter=75)
mlp = classifier5.fit(x_train, y_train)

print("Training accuracy: ", mlp.score(x_train, y_train))

predicted = mlp.predict(x_test)
print("Testing Accuracy score: ", accuracy_score(y_test, predicted))
print("Confusion Matrix: ")
print(confusion_matrix(y_test, predicted))

print("\nClassification Report: ")
print(classification_report(y_test, predicted))
"""
Training accuracy:  0.5
Testing Accuracy score:  0.23637928914
Confusion Matrix: 
Ejemplo n.º 51
0
    y = y.reshape(-1, 1)
    for idx, row in enumerate(tokens):
        for v in row:
            if v in feature_dict:
                X[idx, feature_dict[v]] += 1
    return X, y


if __name__ == '__main__':
    train_labels, train_tokens, train_vocab = convert_data(train_text)
    index_dir = {key: value for (value, key) in enumerate(train_vocab)}
    inversed_index_dir = {value: key for (key, value) in index_dir.items()}
    train_X, train_y = lists_to_arrays(train_labels, train_tokens, index_dir)

    # train perceptron
    perceptron = Perceptron(tol=1e-3, random_state=42)
    perceptron.fit(train_X, train_y)
    #print(perceptron.score(train_X,train_y))

    # train logistic regression
    lr = LogisticRegression(random_state=42, tol=1e-4)
    lr.fit(train_X, train_y)
    #print(lr.score(train_X, train_y))
    print('Top words for LR:')
    for coef in lr.coef_:
        print([inversed_index_dir[x] for x in np.argsort(coef)[::-1][:10]])

    print("Top words for Perceptron:")
    for coef in perceptron.coef_:
        print([inversed_index_dir[x] for x in np.argsort(coef)[::-1][:10]])
Ejemplo n.º 52
0
            metrics.classification_report(y_test,
                                          pred,
                                          target_names=target_names))

    if opts.print_cm:
        print("confusion matrix:")
        print(metrics.confusion_matrix(y_test, pred))

    print()
    clf_descr = str(clf).split('(')[0]
    return clf_descr, score, train_time, test_time


results = []
for clf, name in ((RidgeClassifier(tol=1e-2, solver="lsqr"),
                   "Ridge Classifier"), (Perceptron(n_iter=50), "Perceptron"),
                  (PassiveAggressiveClassifier(n_iter=50),
                   "Passive-Aggressive"),
                  (KNeighborsClassifier(n_neighbors=10), "kNN"),
                  (RandomForestClassifier(n_estimators=100), "Random forest")):
    print('=' * 80)
    print(name)
    results.append(benchmark(clf))

for penalty in ["l2", "l1"]:
    print('=' * 80)
    print("%s penalty" % penalty.upper())
    # Train Liblinear model
    results.append(benchmark(LinearSVC(penalty=penalty, dual=False, tol=1e-3)))

    # Train SGD model
Ejemplo n.º 53
0
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution))
    Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())
    X_test, y_test = X[test_id, :], y[test_id]
    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],alpha=0.8, c=cmap(idx), marker=markers[idx], label=cl)
    if test_id:
        X_test, y_test = X[test_id, :], y[test_id]
        plt.scatter(X_test[:,0],X_test[:, 1], c='', s=55, alpha=0.1, linewidths=1, marker='o', label='test set')

if __name__ == '__main__':
    iris = datasets.load_iris()
    iris_X = iris.data[:, [2, 3]]#取数据的2,3列特征
    iris_y = iris.target
    X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size=0.3, random_state=0)#划分数据集,分为训练集和测试集
    sc = StandardScaler()#特征缩放
    sc.fit(X_train)#训练得每个特征得样本均值和标准差
    X_train_sc = sc.transform(X_train)#对训练数据做标准化处理,即特征缩放
    X_test_sc = sc.transform(X_test)#测试集与训练集同步
    ppn = Perceptron(n_iter=50, eta0=0.1, random_state=0)#感知器模型,迭代次数50,学习速率为0.1每次迭代初始化重排训练集
    ppn.fit(X_train_sc, y_train)#训练
    y_yred = ppn.predict(X_test_sc)#用测试集进行预测
    X_combined_std = np.vstack((X_train_sc, X_test_sc))
    y_combined = np.hstack((y_train, y_test))
    plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_id=range(105, 150))
    plt.show()
Ejemplo n.º 54
0
    def run(self):
        names = ['DNA_TO_CLASSIFY', 'attribute', 'sequence']
        data = pd.read_csv(self.file_name, names = names)

        print('Build our dataset using custom pandas dataframe')
        clases = data.loc[:,'DNA_TO_CLASSIFY']

        sequence = list(data.loc[:, 'sequence'])

        dic = {}
        for i, seq in enumerate(sequence):
            nucleotides = list(seq)
            nucleotides = [char for char in nucleotides if char != '\t']
            nucleotides.append(clases[i])
            
            dic[i] = nucleotides

        print('Convert Dict object into dataframe')
        df = pd.DataFrame(dic)

        print('transpose dataframe into correct format')
        df = df.transpose()
        df.rename(columns = {XXX__Length of sample__XX:'XXXXX__Sample DNA Name___XXXXX'}, inplace = True)
        

        print('Encoding')
        numerical_df = pd.get_dummies(df)
        numerical_df.drop('helitron_not-helitron', axis = 1, inplace = True)
        print(numerical_df)
        numerical_df.rename(columns = {'XXX__Fill_XXXX':'XXX__Fill__XXX'}, inplace = True)

        #Importing different classifier from sklearn
        from sklearn.naive_bayes import MultinomialNB
        from sklearn.naive_bayes import BernoulliNB
        from sklearn.linear_model import Perceptron
        from sklearn.linear_model import SGDClassifier
        from sklearn.linear_model import PassiveAggressiveClassifier
        from sklearn.metrics import classification_report, accuracy_score

        from sklearn.model_selection import train_test_split
        X = numerical_df.drop(['heli'], axis = 1).values
        y = numerical_df['heli'].values
        

        #define a seed for reproducibility
        seed = 1

        print('Splitting data into training and testing data')
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = seed)
        print(X_test[0])  
        # Define scoring method
        scoring = 'accuracy'

        print('Model building to train')
        names = ['MultinomialNB', 'BernoulliNB', 'Perceptron', 'SGDClassifier', 'PassiveAggressiveClassifier']
        Classifiers = [
            MultinomialNB(),
            BernoulliNB(),
            Perceptron(),
            SGDClassifier(),
            PassiveAggressiveClassifier(),
            ]
        models = zip(names, Classifiers)
        from sklearn.model_selection import KFold, cross_val_score

        names = []
        result = []
        for name, model in models:
            kfold = KFold(n_splits = 5, random_state = 1)
            cv_results = cross_val_score(model, X_train, y_train, cv = kfold, scoring = 'accuracy', verbose=2, n_jobs=-1)
            result.append(cv_results)
            names.append(name)
            msg = "{0}: {1} ({2})".format(name, cv_results.mean(), cv_results.std())
            print(msg)


        models = zip(names, Classifiers)
        for name, model in models:
            print("Training with: "+name)
            model.partial_fit(X_train, y_train, classes=np.unique(y_train))
            y_pred = model.predict(X_test)
            print('Exporting')
            joblib.dump(model, "Models/"+name + ".pkl")
            print(name)
            print(accuracy_score(y_test, y_pred))
            print(classification_report(y_test, y_pred))
score.append(lr.score(x_test,y_test))
print('                 Logistic Regression      ')
print(cross_val_score(lr,x_train,y_train,cv=3))
print(f1(y_test,y_lr,average='micro'))

#LDA
lda = LinearDiscriminantAnalysis(solver='svd')
lda.fit(x_train,y_train)
y_lda = lda.predict(x_test)
score.append(lda.score(x_test,y_test))
print('                 Linear Discriminant Analysis         ')
print(cross_val_score(lda,x_train,y_train,cv=3))
print(f1(y_test,y_lda,average='micro'))

#Perceptron
per = Perceptron(penalty='l1',alpha=0.001)
per.fit(x_train,y_train)
y_per = per.predict(x_test)
score.append(per.score(x_test,y_test))
print('                 Perceptron Classifier         ')
print(cross_val_score(per,x_train,y_train,cv=3))
print(f1(y_test,y_per,average='micro'))

#SVM
svm = SVC(C=100,kernel='poly',degree=4,coef0 = 0)
svm.fit(x_train,y_train)
y_svm = svm.predict(x_test)
score.append(svm.score(x_test,y_test))
print('                Support Vector Machine           ')
print(f1(y_test,y_svm,average='micro'))
print(cross_val_score(svm,x_train,y_train,cv=3))
Ejemplo n.º 56
0
'''
    Multi Layer Perceptron
      단층 Perceptron은 XOR 분류를 하지 못함. 하지만 층(Layer)를 늘리면 가능. 이를 MLP라고 함 
'''
import numpy as np
from sklearn.linear_model import Perceptron

feature = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
label = np.array([0, 1, 1, 0])

ml = Perceptron(max_iter=1000).fit(feature, label)
print(ml.predict(feature))

print()
# 다층 신경망 사용
from sklearn.neural_network import MLPClassifier

# ml2 = MLPClassifier(hidden_layer_sizes=50, verbose=2).fit(feature, label) #verbose 진행과정 프린트(Iteration 반복횟수, loss = 실제값과 예측값 차이),
ml2 = MLPClassifier(
    hidden_layer_sizes=(10, 10, 10),
    learning_rate_init=0.01,
    max_iter=100,
    random_state=1,
    verbose=1).fit(
        feature,
        label)  # hidden_layer_sizes=(10, 10, 10) 노드가 10개인 히든레이어를 3개를 준 것
print(ml2.predict(feature))
Ejemplo n.º 57
0
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

X = [[181, 80, 44], [177, 70, 43], [160, 60, 38], [154, 54, 37], [166, 65, 40],
     [190, 90, 47], [175, 64, 39], [177, 70, 40], [159, 55, 37], [171, 75, 42],
     [181, 85, 43]]

Y = [
    'male', 'male', 'female', 'female', 'male', 'male', 'female', 'female',
    'female', 'male', 'male'
]

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.4)

clf_tree = tree.DecisionTreeClassifier()
clf_linear = Perceptron()
clf_log = LogisticRegression()
clf_knn = KNeighborsClassifier(n_neighbors=2)
clf_svm = SVC()

clf_tree = clf_tree.fit(X_train, y_train)
clf_linear = clf_linear.fit(X_train, y_train)
clf_log = clf_log.fit(X_train, y_train)
clf_knn = clf_knn.fit(X_train, y_train)
clf_svm = clf_svm.fit(X_train, y_train)

y_pred1 = clf_tree.predict(X_test)
y_pred2 = clf_linear.predict(X_test)
y_pred3 = clf_log.predict(X_test)
y_pred4 = clf_knn.predict(X_test)
y_pred5 = clf_svm.predict(X_test)
Ejemplo n.º 58
0
#New Code For PAN
pan_train='/home/namrita/Downloads/AIdata/pan13-author-profiling-training-corpus-2013-01-09/en'
pan_temp='/home/namrita/Downloads/AIdata/pantemp'
# pan_test='/home/namrita/Downloads/AIdata/pan13-author-profiling-test-corpus2-2013-04-29/en'
print ('Reading database ...')
k,g,y_train = read_pan(pan_train)
# print (y_train)
# k_t, test_y, a_t = read_pan(pan_test)
print('Extracting asset')
train_x,f_names,chi,transformer=feature_extraction2(givenlabel,k)
# print ('Writing database ...')
# writeTrainToTxt(train_x,'feature.txt')
# # writeTrainToTxt(y_train,'y.txt')
# print ('written')

train_X, test_X, train_y, test_y = train_test_split(train_x, y_train, train_size=0.80)
print ('Mission successful')
# print(len(X_train))
# test_x,_,_,_=feature_extraction2(givenlabel,k_t)
# train_x=train_x.toarray().astype(np.float)
# print (train_x.dtype)


# train_x,f_names,chi,transformer=feature_extraction(givenlabel,k)
# print(simple_classify(RandomForestClassifier(),test_X,test_y,train_X,train_y))
print(simple_classify(RidgeClassifier(),test_X,test_y,train_X,train_y))
print(simple_classify(Perceptron(),test_X,test_y,train_X,train_y))
print(simple_classify(PassiveAggressiveClassifier(),test_X,test_y,train_X,train_y))
print(simple_classify(RandomForestClassifier(),test_X,test_y,train_X,train_y))
print(simple_classify(KNeighborsClassifier(),test_X,test_y,train_X,train_y))
print(simple_classify(MultinomialNB(),test_X,test_y,train_X,k))
Ejemplo n.º 59
0
def get_model_from_name(model_name, training_params=None):

    # For Keras
    epochs = 250
    if 'is_test_suite' in sys.argv:
        print(
            'Heard that this is the test suite. Limiting epochs to 10, which will increase training speed dramatically at the expense of model accuracy'
        )
        epochs = 10

    all_model_params = {
        'LogisticRegression': {
            'n_jobs': -2
        },
        'RandomForestClassifier': {
            'n_jobs': -2
        },
        'ExtraTreesClassifier': {
            'n_jobs': -1
        },
        'AdaBoostClassifier': {
            'n_estimators': 10
        },
        'SGDClassifier': {
            'n_jobs': -1
        },
        'Perceptron': {
            'n_jobs': -1
        },
        'LinearRegression': {
            'n_jobs': -2
        },
        'RandomForestRegressor': {
            'n_jobs': -2
        },
        'ExtraTreesRegressor': {
            'n_jobs': -1
        },
        'MiniBatchKMeans': {
            'n_clusters': 8
        },
        'GradientBoostingRegressor': {
            'presort': False
        },
        'SGDRegressor': {
            'shuffle': False
        },
        'PassiveAggressiveRegressor': {
            'shuffle': False
        },
        'AdaBoostRegressor': {
            'n_estimators': 10
        },
        'XGBRegressor': {
            'nthread': -1,
            'n_estimators': 200
        },
        'XGBClassifier': {
            'nthread': -1,
            'n_estimators': 200
        },
        'LGBMRegressor': {},
        'LGBMClassifier': {},
        'DeepLearningRegressor': {
            'epochs': epochs,
            'batch_size': 50,
            'verbose': 2
        },
        'DeepLearningClassifier': {
            'epochs': epochs,
            'batch_size': 50,
            'verbose': 2
        }
    }

    model_params = all_model_params.get(model_name, None)
    if model_params is None:
        model_params = {}

    if training_params is not None:
        print('Now using the model training_params that you passed in:')
        print(training_params)
        # Overwrite our stock params with what the user passes in (i.e., if the user wants 10,000 trees, we will let them do it)
        model_params.update(training_params)
        print(
            'After overwriting our defaults with your values, here are the final params that will be used to initialize the model:'
        )
        print(model_params)

    model_map = {
        # Classifiers
        'LogisticRegression': LogisticRegression(),
        'RandomForestClassifier': RandomForestClassifier(),
        'RidgeClassifier': RidgeClassifier(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'ExtraTreesClassifier': ExtraTreesClassifier(),
        'AdaBoostClassifier': AdaBoostClassifier(),
        'SGDClassifier': SGDClassifier(),
        'Perceptron': Perceptron(),
        'PassiveAggressiveClassifier': PassiveAggressiveClassifier(),

        # Regressors
        # 'DeepLearningRegressor': KerasRegressor(build_fn=make_deep_learning_model, epochs=10, batch_size=10, **training_params, verbose=1),
        'LinearRegression': LinearRegression(),
        'RandomForestRegressor': RandomForestRegressor(),
        'Ridge': Ridge(),
        'ExtraTreesRegressor': ExtraTreesRegressor(),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'RANSACRegressor': RANSACRegressor(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),
        'Lasso': Lasso(),
        'ElasticNet': ElasticNet(),
        'LassoLars': LassoLars(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'BayesianRidge': BayesianRidge(),
        'ARDRegression': ARDRegression(),
        'SGDRegressor': SGDRegressor(),
        'PassiveAggressiveRegressor': PassiveAggressiveRegressor(),

        # Clustering
        'MiniBatchKMeans': MiniBatchKMeans()
    }

    if xgb_installed:
        model_map['XGBClassifier'] = xgb.XGBClassifier()
        model_map['XGBRegressor'] = xgb.XGBRegressor()

    if lgb_installed:
        model_map['LGBMRegressor'] = lgb.LGBMRegressor()
        model_map['LGBMClassifier'] = lgb.LGBMClassifier()

    if keras_installed:

        model_map['DeepLearningClassifier'] = KerasClassifier(
            build_fn=make_deep_learning_classifier)
        model_map['DeepLearningRegressor'] = KerasRegressor(
            build_fn=make_deep_learning_model)

    model_without_params = model_map[model_name]
    model_with_params = model_without_params.set_params(**model_params)

    return model_with_params
Ejemplo n.º 60
0
y= iris.target

#spliting the dataset
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

#Feature scaling: standarization
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train) #estimates the parameters: mean and standard deviation for EACH FEATURE DIMENSION from the training data
X_train_std=sc.transform(X_train)
X_test_std=sc.transform(X_test)

#Perceptron
from sklearn.linear_model import Perceptron
ppn = Perceptron(n_iter = 40, eta0 = 0.1, random_state = 0) #random_state=0 gives the system...
#... reproducibility of the initial shuffling of the training dataset after each epoch
ppn.fit(X_train_std, y_train) #initializing the model
y_pred = ppn.predict(X_test_std)
n_y_pred = y_pred.size
print(n_y_pred)

n_misclassification = (y_test != y_pred).sum()
print('Misclassified samples: %d' % n_misclassification)

#from the previous, the missclassification error can be calculated as:
misclassification_error = n_misclassification / n_y_pred

print('Misclassificication error: %.2f' % misclassification_error)

#Therefore the accuracy in two forms: