예제 #1
0
def main(path,filename):

	batchsT = ['histogramaByN','histogramaColor','patrones2x2ByN','patrones3x3ByN','patronesCirculaesByN_2_5','patronesCirculaesByN_2_9','patronesCirculaesByN_3_9','patronesCirculaesByN_5_9','patronesCirculaesByN_3_5']
	batchsAux = ['histogramaByN','histogramaColor','patronesCirculaesByN_2_5','patrones2x2ByN','patrones3x3ByN','patronesCirculaesByN_2_9','patronesCirculaesByN_3_9','patronesCirculaesByN_5_9','patronesCirculaesByN_3_5','patronesCirculaesByN_6_12','patronesCirculaesByN_8_12']
	#batchs = ['patrones2x2ByN','patrones3x3ByN','patronesCirculaesByN_2_5','patronesCirculaesByN_2_9']
	#batchs = ['patrones2x2ByN','patrones3x3ByN','patronesCirculaesByN_2_5','patronesCirculaesByN_3_5']
	#for batch in batchsAux:


	#print batch
	batchs = batchsAux
	#batchs.remove(batch)
	X = []
	y = []
	load_batch(y,path,'clases',filename) 
	y = [j for i in y for j in i]
	for batch in batchs:
		load_batch(X,path,batch,filename)
	
	#X,y = load_images('/tmp/train/')
	est = [RandomForest(),Boosting()]
	for i in xrange(0,15):
		est.append(Gradient(i))
	for i in xrange(0,4):
		est.append(SVM(i))

	#scores = cross_validation.cross_val_score(clf, X, y, cv=5)
	#print scores
	clf = VotingClassifier(estimators=est)

	clf.fit(X,y)
	pickle.dump( clf, open( "clf_grande.p", "wb" ) )
	return
	X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, y, test_size=0.2,random_state=777)
	#print clf.sub_score(X_test,Y_test)
	print 'start'
	conf_matrix = metrics.confusion_matrix(Y_test,clf.predict(X_test))
	print 'confution matrix'
	print conf_matrix
	return
	for name,estim in est:
		print name
		#estim.fit(X_train,Y_train)
		#print estim.score(X_test,Y_test)
		print cross_validation.cross_val_score(estim, X, y, cv=5,n_jobs=-1)
	print 'voter'
	print cross_validation.cross_val_score(clf, X, y, cv=5,n_jobs=-1)
	return
	#clf.fit(X_train,Y_train)
	print clf.score(X_test,Y_test)

	return
def vclas(w1,w2,w3, w4, w5):
    Xtrain,Xtest, ytrain,ytest= cv.train_test_split(trainX,trainY,test_size=0.4)

    clf1 = LogisticRegression()
    clf2 = GaussianNB()
    clf3 = RandomForestClassifier(n_estimators=10,bootstrap=True)
    clf4= ExtraTreesClassifier(n_estimators=10, bootstrap=True)
    clf5 = GradientBoostingClassifier(n_estimators=10)

    clfes=[clf1,clf2,clf3,clf4, clf5]

    eclf = VotingClassifier(estimators=[('lr', clf1), ('gnb', clf2), ('rf', clf3),('et',clf4), ('gb',clf5)],
                            voting='soft',
                            weights=[w1, w2, w3,w4, w5])

    [c.fit(Xtrain, ytrain) for c in (clf1, clf2, clf3,clf4, clf5, eclf)]
 
    N = 6
    ind = np.arange(N)
    width = 0.3
    fig, ax = plt.subplots()

    for i, clf in enumerate(clfes):
        print(clf,i)
        p1=ax.bar(i,clfes[i].score(Xtrain,ytrain,), width=width,color="blue", alpha=0.5)
        p2=ax.bar(i+width,clfes[i].score(Xtest,ytest,), width=width,color="red", alpha=0.5)
    ax.bar(len(clfes)+width,eclf.score(Xtrain,ytrain,), width=width,color="blue", alpha=0.5)
    ax.bar(len(clfes)+width *2,eclf.score(Xtest,ytest,), width=width,color="red", alpha=0.5)
    plt.axvline(4.8, color='k', linestyle='dashed')
    ax.set_xticks(ind + width)
    ax.set_xticklabels(['LogisticRegression',
                        'GaussianNB',
                        'RandomForestClassifier',
                        'ExtraTrees',
                        'GradientBoosting',
                        'VotingClassifier'],
                       rotation=40,
                       ha='right')
    plt.title('Training and Test Score for Different Classifiers')
    plt.legend([p1[0], p2[0]], ['training', 'test'], loc='lower left')
    plt.show()
예제 #3
0
def run_voting(training_set, train_set_labels, validation_set, validation_set_labels):
    from sklearn.ensemble import VotingClassifier
    standard_train_inputs = standard_data(training_set)
    standard_valid_inputs = standard_data(validation_set)
    kknn_class = KNeighborsClassifier(weights='uniform', n_neighbors=5)

    logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.01, C=1.0, fit_intercept=True,
                                                                         intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
                                                                         max_iter=100, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2)
    svm_class = svm.SVC(decision_function_shape='ovo', tol=0.001)
    eclf1 = VotingClassifier(estimators=[('knn', kknn_class), ('lr', logistic_regression_solver), ('svm', svm_class)], voting='hard')
    eclf1.fit(standard_train_inputs,train_set_labels.ravel())

    accuracy = eclf1.score(standard_valid_inputs,validation_set_labels.ravel())
    print accuracy
예제 #4
0
def do_ml(ticker):
    X, y, df = extract_featuresets(ticker)
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.25)


    #clf = neighbors.KNeighborsClassifier()
    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor', RandomForestClassifier())] )

    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('Accuracy', confidence)
    predictions = clf.predict(X_test)
    print('Predicted spread:', Counter(predictions))

    return confidence
예제 #5
0
def do_ml(ticker):
    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, y, test_size=0.25)

    clf = neighbors.KNeighborsClassifier()

    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor', RandomForestClassifier())])

    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('Accuracy:', confidence)
    predictions = clf.predict(X_test)
    print('Predicted spread:', Counter(predictions))
    return confidence
예제 #6
0
def ensemble_voting(X_train,X_test,y_train,y_test):
	y_train = y_train.ravel()
	y_test = y_test.ravel()

	C_value, gamma_value,kernel_type = svc_param_selection(X_train, y_train, 5)
	######################
	# fit clf1 with df1
	pipe1 = Pipeline([
		('col_extract', ColumnExtractor( cols=range(0,34) )), # selecting features 0 and 1 (df1) to be used with LR (clf1)
		('clf', SVC(C=C_value,kernel=kernel_type,gamma=gamma_value))
		])
	
	pipe1.fit(X_train, y_train) # sanity check
	print(' Sanity check')
	print(pipe1.score(X_test,y_test)) # sanity check
	

	######################
	# fit clf2 with df2
	pipe2 = Pipeline([
		('col_extract', ColumnExtractor( cols=range(35,47) )), # selecting features 2 and 3 (df2) to be used with SVC (clf2)
		('clf', KNeighborsClassifier())
		])

	pipe2.fit(X_train, y_train) # sanity check
	print(' Sanity check')
	print(pipe2.score(X_test,y_test)) # sanity check
	
	######################
	# fit clf3 with df3
	pipe3 = Pipeline([
		('col_extract', ColumnExtractor( cols=range(48,95) )), # selecting features 2 and 3 (df2) to be used with SVC (clf2)
		('clf', RandomForestClassifier(n_estimators=20, random_state=0,criterion='entropy'))
		])

	pipe3.fit(X_train, y_train) # sanity check
	print(' Sanity check')
	print(pipe3.score(X_test,y_test)) # sanity check

	######################
	# ensemble/voting classifier where clf1 fitted with df1 and clf2 fitted with df2
	eclf = VotingClassifier(estimators=[('MIR-SVM', pipe1), ('SPO-kNN', pipe2), ('LYR-RF',pipe3)], voting='hard')
	eclf.fit(X_train, y_train)
	print(eclf.score(X_test,y_test))
예제 #7
0
def do_ml(ticker):
    X, y, df = extract_featuresets(ticker)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

    clf = neighbors.KNeighborsClassifier(weights='distance')

    clf.fit(X_train, y_train)

    print("\n\n")
    print("Parameters of Kneighbors", clf.get_params())
    confidence = clf.score(X_test, y_test)
    print("Accuracy of Kneighbors", confidence)
    predicition = clf.predict(X_test)
    print("Predicted Spread of Kneighbors:", Counter(predicition))
    print("\n\n")

    print("Decision Tree")
    clf1 = DecisionTreeClassifier(max_depth=4)
    clf1.fit(X_train, y_train)
    print("Parameters of Decision Tree", clf1.get_params())
    print("Accuracy of Decision Tree", clf1.score(X_test, y_test))
    print("Predicted Spread of Decision Tree", Counter(clf1.predict(X_test)))
    print("\n\n")

    print("RandomForest")
    clf2 = RandomForestClassifier()
    clf2.fit(X_train, y_train)
    print("Parameters of RandomForest", clf2.get_params())
    print("Accuracy of RandomForest", clf2.score(X_test, y_test))
    print("Predicted Spread of RandomForest", Counter(clf2.predict(X_test)))

    print("Ensemble")
    clfn = VotingClassifier([('lsvc', svm.LinearSVC()),
                             ('knn', neighbors.KNeighborsClassifier()),
                             ('rfor', RandomForestClassifier())])

    clfn.fit(X_train, y_train)
    confidence = clfn.score(X_test, y_test)
    print("Accuracy of Ensembles", confidence)
    predicition = clfn.predict(X_test)

    print("Predicted Spread of ensembles:", Counter(predicition))

    return confidence
예제 #8
0
def mlearn(ticker):
    #X is percent change, y is target classification: 1,-1,0
    X, y, df = extract_featuresets(ticker)
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, y, test_size=0.25)

    clf = neighbors.KNeighborsClassifier()
    #sklearn has flags for linearSVC, KNN, random forest
    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor', RandomForestClassifier())])

    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('Accuracy', confidence)
    predictions = clf.predict(X_test)
    print('Predicted spread:', Counter(predictions))

    return confidence
예제 #9
0
def train(x, y):
    logging.debug("X sample: \ {} ".format(len(x.shape)))
    logging.debug("y sample: \ {} ".format(len(y.shape)))

    # random shuffle and split
    test_size = int(len(y) * 0.2)
    x_train, x_test, y_train, y_test = x[test_size:], x[:test_size], y[test_size:], y[:test_size]

    # combine the predictions of several base estimators
    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor', RandomForestClassifier())])
    clf.fit(x_train, y_train)

    # test data prediction
    np.set_printoptions(precision=2)
    confidence = clf.score(x_test, y_test)
    print('accuracy:', confidence)
    return confidence, clf
def do_ml(ticker):
    global clf
    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, y, test_size=0.25)
    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor', RandomForestClassifier())])
    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('accuracy:', confidence)
    predictions = clf.predict(X_test)
    print(np.shape(X_test))
    plt.plot(X_test, y_test, '-r')
    plt.show()
    print('predicted class counts:', Counter(predictions))
    print()
    return confidence
예제 #11
0
def do_ml(ticker):
    X, y, fileDataSet = extract_featuresets(ticker)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
    #clf = neighbors.KNeighborsClassifier()
    # Replace simple classifier with voting classifier:
    # Voting classifier will take list of tuples of classifier by name, classifier
    # List contains tuples (i.e. 3 classifiers: linear svc, neigbors, random forest classifiers)
    #clf = VotingClassifier([('lsvc', svm.LinearSVC()),
    clf = VotingClassifier([('lsvc', LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor', RandomForestClassifier())])

    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('Accuracy', confidence)
    predictions = clf.predict(X_test)
    print('Predicted spread:', Counter(predictions))

    return confidence
예제 #12
0
def train_test(ticker):
    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor', RandomForestClassifier())])

    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('accuracy:', confidence)
    predictions = clf.predict(X_test)
    print('predicted class counts:', Counter(predictions))
    #print()
    #print()

    #with open("clf.pickle","wb") as f:
    #    pickle.dump(clf,f)
    return predictions[-1], confidence
def do_ml(ticker):
    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
                                                                         y,
                                                                         test_size = 0.25)

    #clf = neighbors.KNeighborsClassifier()
    clf = VotingClassifier([("lsvc", svm.LinearSVC()),
                            ("knn", neighbors.KNeighborsClassifier()),
                            ("rfor", RandomForestClassifier())])

    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    predictions = clf.predict(X_test)
    print("Predicted Spread", Counter(predictions))
    print("Predicted Accuracy", confidence)

    return confidence
예제 #14
0
def machining_the_data(stock):
    stock_data = precent_change(stock)
    X = np.array(stock_data.drop(['label'],1))
    y = np.array(stock_data['label'])
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = .95)

    clf = VotingClassifier([('lsvc',svm.LinearSVC()),
                            ('knn',neighbors.KNeighborsClassifier()),
                            ('rfor',RandomForestClassifier())])

    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('accuracy:',confidence)
    print(X_test)
    
    predictions = clf.predict(np.array([[ 1.44231225,-9.70757936 ,-1.00000000],
 [ 2.96192498, -2.77678573 , 1.00000000],
 [ 2.50403844 , 6.29763054, -1.00000000]]))
    print(predictions)
예제 #15
0
class VotingClassifier3(AlgorithmInterface):
    def __init__(self, rfa, svma, lra):
        super(VotingClassifier3, self).__init__()
        self.accuracy_score = 0
        self.classifier = VotingClassifier(estimators=[(
            'rfa', rfa.classifier), ('svma',
                                     svma.classifier), ('lra',
                                                        lra.classifier)])

    def feature_engineering(self):
        self.convert_symbolic_feature_into_continuous()

    def train_phase(self):
        self.classifier.fit(self.test_data, self.test_label)

    def test_phase(self):
        self.accuracy_score = self.classifier.score(self.test_data,
                                                    self.test_label)
        print("准确度: %f" % self.accuracy_score)
def do_ml(ticker):
    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, y, test_size=0.25)
    #clf = neighbors.KNeighborsClassifier()
    ## make the machine vote by itself between 3 classifiers which one is best to use
    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor', RandomForestClassifier())])

    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('Accuracy:', confidence)
    predictions = clf.predict(X_test)
    ## to make differente predictions
    print('Predicted spread:', Counter(predictions))

    return confidence
def ml_operations(ticker):
    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

    # classifier = neighbors.KNeighborsClassifier()
    classifier = VotingClassifier([('linearsvc', svm.LinearSVC()),
                                   ('randforest', RandomForestClassifier()),
                                   ('knearest',
                                    neighbors.KNeighborsClassifier())])
    classifier.fit(X_train, y_train)

    confidence = classifier.score(X_test, y_test)
    print('Accuracy', confidence)
    predictions = classifier.predict(X_test)

    print('Predicted spread', Counter(predictions))

    return confidence
예제 #18
0
def voting(X_train, y_train, X_test, y_test):
    """Predict Dropouts using Voting Classifier with RandomForestClassifier, LogisticRegression, and XGBClassifier

    Args: 
        X_train: Training feature vetors
        y_train: Training label vetors
        X_test: Testing feature vetors
        y_test: Testing label vetors
    Returns:
        None, printing out the prediction results
    """
    t0 = time.time()
    clf1 = RandomForestClassifier(n_estimators=200,
                                  max_depth=12,
                                  random_state=0,
                                  min_samples_split=3,
                                  n_jobs=-1)
    clf2 = LogisticRegression(tol=1e-3, C=1.5, random_state=0)
    clf3 = XGBClassifier(n_estimators=200,
                         max_depth=6,
                         learning_rate=0.05,
                         min_child_weight=2,
                         n_jobs=-1,
                         max_delta_step=1,
                         objective='binary:logistic',
                         gamma=3,
                         subsample=1)

    clf = VotingClassifier(estimators=[('rf', clf1), ('lr', clf2),
                                       ('xgb', clf3)],
                           voting='hard')
    clf = clf.fit(X_train, y_train)
    expected = y_test
    predicted = clf.predict(X_test)

    print('Classifier: %s\n' % (clf, ))
    print('Classification report: \n %s \n' %
          (metrics.classification_report(expected, predicted), ))
    print('Confusion matrix:\n%s\n' %
          metrics.confusion_matrix(expected, predicted))
    print('Testing Score: %f' % clf.score(X_test, y_test))
    print('Time: %f seconds \n' % (time.time() - t0))
예제 #19
0
def do_ml(ticker):

    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
                                                                         y,
                                                                         test_size=0.25)
    # defining classifiers used in voting classifier
    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor', RandomForestClassifier())])
    # fit input (X_train: pct_change) to target (Y_train: 1, 0, -1)
    clf.fit(X_train, y_train)  # train classifier
    confidence = clf.score(X_test, y_test)  # get accuracy score
    print('accuracy:', confidence)
    predictions = clf.predict(X_test)
    print('predicted class counts:', Counter(predictions))
    print()
    print()
    return confidence
예제 #20
0
    def evaluate(self):
        for ticker in self.tickers:
            self.ticker = ticker

            self.add_data_for_label_to_stock_table()
            self.create_features_and_label()

            features_train, features_test, label_train, label_test = train_test_split(
                self.features, self.label, test_size=0.25)

            clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                                    ('knn', neighbors.KNeighborsClassifier()),
                                    ('rfor',
                                     RandomForestClassifier(n_estimators=10))])
            clf.fit(features_train, label_train)

            confidence = clf.score(features_test, label_test)
            predictions = clf.predict(features_test)

            self.result_output(confidence, predictions)
예제 #21
0
    def _ensemble_model(rf_model, knn_model, X_train, y_train, X_test, y_test):

        # Create a dictionary of our models
        estimators = [('knn', knn_model), ('rf', rf_model)]

        # Create our voting classifier, inputting our models
        ensemble = VotingClassifier(estimators, voting='hard')

        # fit model to training data
        ensemble.fit(X_train, y_train)

        # test our model on the test data
        print(ensemble.score(X_test, y_test))

        prediction = ensemble.predict(X_test)

        print(classification_report(y_test, prediction))
        print(confusion_matrix(y_test, prediction))

        return ensemble
예제 #22
0
def mlClassifiers(company):
    x, y, df_final, symbols = get_train_data(company)
    Xtrain, Xtest, Ytrain, Ytest = cross_validation.train_test_split(
        x, y, test_size=0.2)
    X = Xtrain.astype(int)
    Y = Ytrain.astype(int)
    Xt = Xtest.astype(int)
    Yt = Ytest.astype(int)

    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rf', RandomForestClassifier())])
    clf.fit(X, Y)
    accuracy = clf.score(Xt, Yt)
    print('Accuracy', accuracy)
    prediction = clf.predict(Xt)
    print('prediction spread:', Counter(prediction))
    backtest(df_final, company)
    backtest_result()
    return accuracy
예제 #23
0
def do_ml(ticker):
    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = model_selection.train_test_split(
        X, y, test_size=0.25)
    #lsvc = linear support vecotr classifier, knn = k near neighbors
    #rfor = random forest
    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor', RandomForestClassifier())])

    clf.fit(X_train, y_train)  # train model
    confidence = clf.score(X_test, y_test)
    print('Accuracy:', confidence)

    predictions = clf.predict(
        X_test)  # can be called by itself with pickle to have all pred
    print('Predicted spread:', Counter(predictions))

    return confidence
예제 #24
0
def doML(ticker):
    X, y, df = extractFeaturesets(ticker)

    XTrain, XTest, yTrain, yTest = cross_validation.train_test_split(
        X, y, test_size=0.25)

    clf = neighbors.KNeighborsClassifier()
    clf = VotingClassifier([("lsvc", svm.LinearSVC()),
                            ("Knn", neighbors.KNeighborsClassifier()),
                            ("Rfor", RandomForestClassifier())])

    clf.fit(XTrain, yTrain)
    confidence = clf.score(XTest, yTest)
    print("Accuracy: ", confidence)

    predictions = clf.predict(XTest)

    print("Predicted spread: ", Counter(predictions))

    return confidence
예제 #25
0
def ensemble_(feat, tar, split):
    scaler = MinMaxScaler()
    x_tr,x_te,y_tr,y_te = train_test_split(feat,tar,test_size = split,shuffle = True)
    scaler.fit(x_tr)
    x_tr = scaler.transform(x_tr)
    x_te = scaler.transform(x_te)
    
    knn = KNeighborsClassifier()
    params_knn = {'n_neighbors': np.arange(1, 25)}
    knn_gs = GridSearchCV(knn, params_knn, cv=5)
    knn_gs.fit(x_tr, y_tr)
    knn_best = knn_gs.best_estimator_
    print(knn_gs.best_params_)
    
    rf = RandomForestClassifier()
    params_rf = {'n_estimators': [50, 100, 200,300,400]}
    rf_gs = GridSearchCV(rf, params_rf, cv=5)
    rf_gs.fit(x_tr, y_tr)
    rf_best = rf_gs.best_estimator_
    print(rf_gs.best_params_)
    
    
    log_reg = LogisticRegression()
    log_reg.fit(x_tr, y_tr)
    
    print('knn: {}'.format(knn_best.score(x_te, y_te)))
    print('rf: {}'.format(rf_best.score(x_te, y_te)))
    print('log_reg: {}'.format(log_reg.score(x_te, y_te)))
    
    estimators=[('knn', knn_best), ('rf', rf_best), ('log_reg', log_reg)]
    ensemble = VotingClassifier(estimators, voting='hard')
    ensemble.fit(x_tr, y_tr)
    print("ensemble voting score: ",str(ensemble.score(x_te, y_te)))
    
    ensemble_bagging = BaggingClassifier(base_estimator=RandomForestClassifier(), n_estimators=10) 
    ensemble_bagging.fit(x_tr, y_tr)
    print("ensemble bagging score: ",str(ensemble_bagging.score(x_te, y_te)))
    
    ensemble_stacking = StackingClassifier(estimators,LogisticRegression())
    ensemble_stacking.fit(x_tr, y_tr)
    print("ensemble stacking score: ", str(ensemble_stacking.score(x_te, y_te)))
예제 #26
0
def main():
    train_dataset = pd.read_csv("../data/train.csv")
    test_dataset = pd.read_csv("../data/test.csv")

    # pre-processing
    X_train_processed, Y_train_processed, test_processed = preprocessor(
        train_dataset,
        test_dataset,
        fill_age_with='advanced_median_1',
        fill_cabin_with='X',
        dropPassengerID=False,
        dropName=True,
        dropTicket=True)

    X_train, X_valid, y_train, y_valid = train_test_split(
        X_train_processed.drop(['PassengerId'], axis=1),
        Y_train_processed,
        test_size=0.2,
        random_state=np.random.seed())

    # log_clf = LogisticRegression(random_state=42)
    rnd_clf = RandomForestClassifier(random_state=42)
    svm_clf = SVC(random_state=42)
    gbm_clf = GradientBoostingClassifier(random_state=42)
    # cat_clf = CatBoostClassifier(random_state=42)
    xg_clf = XGBClassifier(random_state=42)
    voting_clf = VotingClassifier(estimators=[
        ('gbm', gbm_clf), ('rnd', rnd_clf), ('svm', svm_clf), ('xg', xg_clf)
    ],
                                  voting='hard')
    voting_clf.fit(X_train, y_train)
    print("Train score: {0.2f}", voting_clf.score(X_train, y_train))
    print("Valid score: {0.2f}", voting_clf.score(X_valid, y_valid))
    v = voting_clf.predict(test_processed.drop('PassengerId', axis=1))

    V = pd.DataFrame({
        'PassengerId': test_dataset['PassengerId'],
        'Survived': v
    })

    V.to_csv('../submission/vc_advanced.csv', index=False)
예제 #27
0
파일: hello.py 프로젝트: Saad-Mahmud/EasyML
def algo(lr, dt, rf, gnb):
    print('{} {} {} {}'.format(lr, dt, rf, gnb))
    X = iris.data
    Y = iris.target
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        Y,
                                                        stratify=Y,
                                                        random_state=42)

    ap = LogisticRegression()
    #ap2 = DecisionTreeClassifier()('dt', ap2),
    #ap3 = RandomForestClassifier(n_estimators=15)('rf', ap3),
    ap5 = GaussianNB()
    dt = VotingClassifier(estimators=[('lr', ap), ('gnb', ap5)],
                          voting='soft',
                          weights=[1, 1])

    t0 = time()
    dt.fit(X_train, y_train)
    Ac = dt.score(X_test, y_test)
    Tm = time() - t0
예제 #28
0
def do_time_ml(ticker):
    X, y = extract_featuresets(ticker)
    # without test_size = the line crashes
    tscv = TimeSeriesSplit(n_splits=3)
    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor', RandomForestClassifier()),
                            ('gap', GaussianProcessClassifier()),
                            ('bag', BaggingClassifier()),
                            ('nn', MLPClassifier(max_iter=2000))])
    for train_index, test_index in tscv.split(X):
        print(train_index, test_index)
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        # need to have () after the classifier otherwise it gives an error
        # TypeError: get_params() missing 1 required positional argument: 'self'
        clf.fit(X_train, y_train)
        confidence = clf.score(X_test, y_test)
        predictions = clf.predict(X_test)
        print('Accuracy:', confidence)
        print("Predicted Spread:", Counter(predictions))
예제 #29
0
def do_ml(ticker):
    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size = 0.25)

    # class fire:
    # clf = neighbors.KNeighborsClassifier()
    # changing this:
    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor', RandomForestClassifier())])

    # y_train is 0, -1, or 1:
    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('acc: ', confidence)

    predictions = clf.predict(X_test)

    print('predictions:', Counter(predictions))

    return confidence
예제 #30
0
def do_ml_vote(ticker):
    features, target, df = extract_featuresets(ticker)

    x_train, x_test, y_train, y_test = train_test_split(features,
                                                        target,
                                                        test_size=0.25,
                                                        stratify=target)

    # x_train is the percent change
    classifier = VotingClassifier([('lvsc', svm.LinearSVC()),
                                   ('knn', neighbors.KNeighborsClassifier()),
                                   ('rfor', RandomForestClassifier())])

    classifier.fit(x_train, y_train)

    confidence = classifier.score(x_test, y_test)
    print('Accuracy', confidence)

    predictions = classifier.predict(x_test)
    print('Prediction Spread: ', Counter(predictions))

    return (confidence)
예제 #31
0
def analysis_stock(tickers, df, start, end):
    for ticker in tickers:
        X, y, df = extract_featuresets(df, ticker)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.25)

        #clf = neighbors.KNeighborsClassifier()
        clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                                ('knn', neighbors.KNeighborsClassifier()),
                                ('rfor', RandomForestClassifier())])

        clf.fit(X_train, y_train)
        confidence = clf.score(X_test, y_test)

        predictions = clf.predict(X_test)

        if (confidence > 0.6):
            print('accuracy:', confidence)
            print('predicted class counts:', Counter(predictions))
            print(' Recommend invesment for next 5-7 days:', ticker)
            print('Predictions for next 5-7 days: ', clf.predict(X[-1:]))
예제 #32
0
def classifier5():
# Results
# 46.21 Seconds to train SVC...
# Test Accuracy of SVC =  0.9851
	clf1 = LogisticRegression(random_state=1)

	clf = tree.DecisionTreeClassifier(criterion="entropy", max_depth=1)
	svc = LinearSVC(C=0.1)
	svc.probability = True

	eclf1 = VotingClassifier(estimators=[('svc', svc), ('clf1', clf1)], voting='hard')

	# Check the training time for the SVC
	t=time.time()
	eclf1.fit(X_train, y_train)
	t2 = time.time()
	print(round(t2-t, 2), 'Seconds to train SVC...')
	# Check the score of the SVC
	acc=eclf1.score(X_test, y_test)
	print('Test Accuracy of SVC = ', round(acc, 4))
	# Check the prediction time for a single sample
	t=time.time()
def use_std_vote_clf(X, y):
    '''
    estimator 传入一个列表,列表里面是tuple
    
    tuple  ("name",model)
    '''
    X_train, X_test, y_train, y_test = get_train_test(X, y)

    voting_clf=VotingClassifier(estimators=[

        ("log_clf",LogisticRegression()),\

        ("svm_clf",SVC()),\

        ("dt_clf",DecisionTreeClassifier())
        ],voting="hard")

    voting_clf.fit(X_train, y_train)

    score = voting_clf.score(X_test, y_test)

    print("sklearn_pipe_voting_classifier_score=", score)
예제 #34
0
파일: ml.py 프로젝트: Felipeforerome/ACT
def do_ml(ticker):
    """
    Runs 3 machine learning algorithm, inside a Voting classifier, to learn when it should buy sell of hold
    :param ticker: Ticker of the Cryptocurrency to undergo this process
    :return: returns the confidence of the model describing the data
    """
    X, y, df = extract_features_sets(ticker)

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, y, test_size=0.25)

    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor', RandomForestClassifier())])

    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    predictions = clf.predict(X_test)

    print('Prediction:', Counter(predictions))
    print('Accuracy', confidence)

    return confidence
tree6 = GBC()
tree6.fit(xtrain,ytrain1)
print(tree6.score(xtest,ytest1))
# look at n_estimators and change that along with changing warmstart to be true


# In[31]:

# votingClassifiers combine completely different machine learning classifiers and use a majority vote
clff1 = SVC()
clff2 = RFC(bootstrap=False)
clff3 = ETC()
clff4 = neighbors.KNeighborsClassifier()
clff5 = quadda()
from sklearn.ensemble import VotingClassifier
from sklearn import cross_validation
eclf = VotingClassifier(estimators = [('svc',clff1),('rfc',clff2),('etc',clff3),('knn',clff4),('qda',clff5)])
eclf = eclf.fit(xtrain,ytrain1)
print(eclf.score(xtest,ytest1))
# for claf, label in zip([clff1,clff2,clff3,clff4,clff5,eclf],['SVC','RFC','ETC','KNN','QDA','Ensemble']):
#     cla
#     scores = crossvalidation.cross_val_score(claf,xtrain,ytrain1,scoring='accuracy')
#     print ()
    


# In[ ]:



예제 #36
0
model = runModel(model=model, trainX=X_train[0:30000], trainY=y_train[0:30000],
                 optimize=False, parameters=None, scoring='roc_auc')


print "Applying Model ..."
start = time()
y_pred = model.predict(X_test)
print("Model took %.2f seconds to predict vals" % (time() - start))


### Evaluation
print "Scoring Classifier..."
start = time()

score = model.score(X_test, y_test)
recall = metrics.recall_score(y_test, y_pred, average='binary')
auc = metrics.roc_auc_score(y_test, y_pred, average='macro')
confusion = metrics.confusion_matrix(y_test, y_pred, labels=[0, 1])

print "Score: \t \t Recall: \t AUC:\n", score, recall, auc
print("Model took %.2f seconds to score" % (time() - start))

if plot_roc:

    fpr, tpr, thrsh = metrics.roc_curve(y_test, y_pred, pos_label=1)

    plt.figure()
    plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % auc)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    SelectFwe(score_func=f_classif, alpha=0.04),
    RandomForestClassifier(criterion="entropy",  max_features=0.6000000000000001, min_samples_split=5, n_estimators=100)
)

# 0.82
#clf4 = exported_pipeline = make_pipeline(
#    StackingEstimator(estimator=LogisticRegression(C=1.0, dual=True)),
#    RandomForestClassifier(max_features=0.6000000000000001, min_samples_leaf=20, min_samples_split=18)
#)

#eclf1 = VotingClassifier(estimators=[
#         ('lr', clf1), ('rf', clf2), ('gnb', clf3), ('rnd', clf4)], voting='hard')
eclf1 = VotingClassifier(estimators=[
         ('lr', clf1), ('gnb', clf2), ('rnd', clf3)], voting='hard')
eclf1 = eclf1.fit(X_train, y_train)
print(eclf1.score(X_test, y_test))

model1 = clf1.fit(X_train, y_train)
print(model1.score(X_test, y_test))

model2 = clf2.fit(X_train, y_train)
print(model2.score(X_test, y_test))

model3 = clf3.fit(X_train, y_train)
print(model3.score(X_test, y_test))

#model4 = clf4.fit(X_train, y_train)
#print(model4.score(X_test, y_test))

#tpot = TPOTClassifier(generations=20, population_size=50, verbosity=2)
#tpot.fit(X_train, y_train)
예제 #38
0
"orig_destination_distance", "srch_ci", "srch_co"]
features = [column for column in features if column not in removelist]
print("The features considered are:")
print(features)

start_time = timeit.default_timer()

# Create and fit a decision tree to the set of data in those features
y = trainFull["hotel_cluster"] 
X = trainFull[features]

rf = RandomForestClassifier(n_estimators=20, n_jobs=-1, max_features=None, min_samples_split=250)
ovr = OneVsRestClassifier(RandomForestClassifier(n_estimators=10, n_jobs=-1, max_features=None, min_samples_split=250), n_jobs=-1)
dt = DecisionTreeClassifier(min_samples_split=250, criterion="entropy")

vc = VotingClassifier(estimators=[('rf', rf), ('ovr', ovr), ('dt', dt)], voting='hard')
vc.fit(X, y)

# Measure ability to predict the right hotel clust for a new subset
testX = test_set[features]
testy = test_set["hotel_cluster"]
prediction = vc.predict(testX)

report = classification_report(testy, prediction, digits=5)
print(report)

elapsed = timeit.default_timer() - start_time
print(elapsed)

score = vc.score(testX, testy)
print("Score is " + str(score))
예제 #39
0
def myclassify(numfiers,xtrain,ytrain,xtest,ytest):
    count = 0
    print numfiers

    ytrain = np.ravel(ytrain)
    ytest = np.ravel(ytest)


    bagging2 = BaggingClassifier(ETC(),bootstrap=False,bootstrap_features=False)
    bagging2.fit(xtrain,ytrain)
    #print bagging2.score(xtest,ytest)
    count += 1
    classifiers = [bagging2.score(xtest,ytest)]
    print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%"


    if count < numfiers:

        tree2 = ETC()
        tree2.fit(xtrain,ytrain)
        #print tree2.fit(xtrain,ytrain)
        #print tree2.score(xtest,ytest)
        count+=1
        classifiers = np.append(classifiers,tree2.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
        bagging1 = BaggingClassifier(ETC())
        bagging1.fit(xtrain,ytrain)
        #print bagging1.score(xtest,ytest)
        count+=1
        classifiers = np.append(classifiers,bagging1.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
        # votingClassifiers combine completely different machine learning classifiers and use a majority vote
        clff1 = SVC()
        clff2 = RFC(bootstrap=False)
        clff3 = ETC()
        clff4 = neighbors.KNeighborsClassifier()
        clff5 = quadda()



        eclf = VotingClassifier(estimators = [('svc',clff1),('rfc',clff2),('etc',clff3),('knn',clff4),('qda',clff5)])
        eclf = eclf.fit(xtrain,ytrain)
        #print(eclf.score(xtest,ytest))
        # for claf, label in zip([clff1,clff2,clff3,clff4,clff5,eclf],['SVC','RFC','ETC','KNN','QDA','Ensemble']):
        #     cla
        #     scores = crossvalidation.cross_val_score(claf,xtrain,ytrain,scoring='accuracy')
        #     print ()
        count+=1
        classifiers = np.append(classifiers,eclf.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"


    if count < numfiers:
        svc1 = SVC()
        svc1.fit(xtrain,ytrain)
        dec = svc1.score(xtest,ytest)
        count+=1
        classifiers = np.append(classifiers,svc1.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
        # Quadradic discriminant analysis - classifier with quadratic decision boundary -
        qda = quadda()
        qda.fit(xtrain,ytrain)
        #print(qda.score(xtest,ytest))
        count+=1
        classifiers = np.append(classifiers,qda.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"



    if count < numfiers:

        tree1 = DTC()
        tree1.fit(xtrain,ytrain)
        #print tree1.fit(xtrain,ytrain)
        #print tree1.score(xtest,ytest)
        count+=1
        classifiers = np.append(classifiers,tree1.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
        knn1 = neighbors.KNeighborsClassifier() # this classifies based on the #k nearest neighbors, where k is definted by the user.
        knn1.fit(xtrain,ytrain)
        #print(knn1.score(xtest,ytest))
        count+=1
        classifiers = np.append(classifiers,knn1.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"


    if count < numfiers:
        # linear discriminant analysis - classifier with linear decision boundary -
        lda = linda()
        lda.fit(xtrain,ytrain)
        #print(lda.score(xtest,ytest))
        count+=1
        classifiers = np.append(classifiers,lda.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
        tree3 = RFC()
        tree3.fit(xtrain,ytrain)
        #print tree3.score(xtest,ytest)
        count+=1
        classifiers = np.append(classifiers,tree3.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
        bagging3 = BaggingClassifier(RFC(),bootstrap=False,bootstrap_features=False)
        bagging3.fit(xtrain,ytrain)
        #print bagging3.score(xtest,ytest)
        count+=1
        classifiers = np.append(classifiers,bagging3.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"


    if count < numfiers:
        bagging4 = BaggingClassifier(SVC(),bootstrap=False,bootstrap_features=False)
        bagging4.fit(xtrain,ytrain)
        #print bagging4.score(xtest,ytest)
        count+=1
        classifiers = np.append(classifiers,bagging4.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
        tree4 = RFC(bootstrap=False)
        tree4.fit(xtrain,ytrain)
        #print tree4.score(xtest,ytest)
        count+=1
        classifiers = np.append(classifiers,tree4.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
        tree6 = GBC()
        tree6.fit(xtrain,ytrain)
        #print(tree6.score(xtest,ytest))
        count+=1
        classifiers = np.append(classifiers,tree6.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
        knn2 = neighbors.KNeighborsClassifier(n_neighbors = 10)
        knn2.fit(xtrain,ytrain)
        #print(knn2.score(xtest,ytest))
        count+=1
        classifiers = np.append(classifiers,knn2.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
        knn3 = neighbors.KNeighborsClassifier(n_neighbors = 3)
        knn3.fit(xtrain,ytrain)
        #print(knn3.score(xtest,ytest))
        count+=1
        classifiers = np.append(classifiers,knn3.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
        knn4 = neighbors.KNeighborsClassifier(algorithm = 'ball_tree')
        knn4.fit(xtrain,ytrain)
        #print(knn4.score(xtest,ytest))
        count+=1
        classifiers = np.append(classifiers,knn4.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
        knn5 = neighbors.KNeighborsClassifier(algorithm = 'kd_tree')
        knn5.fit(xtrain,ytrain)
        #print(knn5.score(xtest,ytest))
        count+=1
        classifiers = np.append(classifiers,knn5.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
        ncc1 = NearestCentroid()
        ncc1.fit(xtrain,ytrain)
        #print (ncc1.score(xtest,ytest))
        count+=1
        classifiers = np.append(classifiers,ncc1.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
    # Nearest shrunken Centroid
        for shrinkage in [None,0.05,0.1,0.2,0.3,0.4,0.5]:
            ncc2 = NearestCentroid(shrink_threshold = shrinkage)
            ncc2.fit(xtrain,ytrain)
            #print(ncc2.score(xtest,ytest))

        count+=1
        classifiers = np.append(classifiers,ncc2.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%" + "   " + str(numfiers-count) + "classifiers left to train"

    if count < numfiers:
        tree5 = ABC()
        tree5.fit(xtrain,ytrain)
        #print(tree5.score(xtest,ytest))
        count+=1
        classifiers = np.append(classifiers,tree5.score(xtest,ytest))
        print "percentage classifcation complete: %s" % str(round(100*(float(count)/numfiers))) + "%"

    classifierlabel = ["BaggingETC (with bootstraps set to false)","ETC","BaggingETC","Voting Classifier","svm","QDA","DTC","KNN (default)","LDA","RFC",
                       "BaggingRFC (with bootstraps set to false)","BaggingSVC (with bootstraps set to false)","RFC (bootstrap false)","GBC",
                        "knn (n_neighbors = 10)","knn (n_neighbors = 3)","knn (ball tree algorithm)","knn (kd_tree algorithm)",
                       "Nearest Centroid","Shrunken Centroid?","ABC"]


    classifierlabel = classifierlabel[:len(classifiers)]

    for i in range(len(classifiers)):


        print ("{} classifier has percent correct {}".format(classifierlabel[i],classifiers[i]))
예제 #40
0
cl3 = GradientBoostingClassifier(n_estimators=1000, learning_rate=1,
max_depth=10, random_state=0, min_samples_split=5)
cl4 = GaussianNB()
cl5 = MLPClassifier(algorithm='adam', alpha=0.01, max_iter=500,
	learning_rate='constant', hidden_layer_sizes=(400,), 
	random_state=0, learning_rate_init=1e-2,
	activation='logistic')


eclf1 = VotingClassifier(estimators=[
('rf', cl1), ('svc', cl2), ('gbc', cl3),
('gnb',cl4),('mlp',cl5)
], voting='hard')

eclf1 = eclf1.fit(X, Y.values.ravel())
print ("Accuracy of Voting Ensemble: "+str(eclf1.score(P,Q)))



clf5 = SGDClassifier(loss="perceptron", penalty="elasticnet", 
	random_state=0).fit(X, Y.values.ravel())
print ("Accuracy of SGDClassifier: "+str(clf5.score(P,Q)))

gbc = GradientBoostingClassifier(loss='exponential').fit(X, Y.values.ravel())
adaboost = AdaBoostClassifier(n_estimators=10000, learning_rate=100).fit(X, Y.values.ravel())
print ("Accuracy of GBC: "+str(gbc.score(P,Q)))
print ("Accuracy of Adaboost: "+str(adaboost.score(P,Q)))


### Calculate MSE of different models
rf = clf.predict(P)