dtree = DecisionTreeClassifier(max_depth=1) """ base_estimator=None, 子模型类型 n_estimators=50, 子模型个数 learning_rate=1., 学习步长,缩放因子 algorithm='SAMME.R', random_state=None): """ algo = AdaBoostClassifier(base_estimator=dtree, n_estimators=10) # 模型训练 algo.fit(X_train, y_train) # 模型效果评估 print('训练集上的准确率:{}'.format(algo.score(X_train, y_train))) print('测试集上的准确率:{}'.format(algo.score(X_test, y_test))) x_test = [[6.9, 3.1, 5.1, 2.3], [6.1, 2.8, 4.0, 1.3], [5.2, 3.4, 1.4, 0.2]] print('样本预测值:') print(algo.predict(x_test)) print("样本的预测概率值:") print(algo.predict_proba(x_test)) print("样本的预测概率值的Log转换值:") print(algo.predict_log_proba(x_test)) print("训练好的所有子模型:\n{}".format(algo.estimators_)) x_test = [[6.9, 3.1, 5.1, 2.3], [6.1, 2.8, 4.0, 1.3], [5.2, 3.4, 2.9, 0.8]] generator = algo.staged_predict(x_test) print('阶段预测值:') for i in generator: print(i) print('各特征属性权重列表:{}'.format(algo.feature_importances_))
def test_sparse_classification(): # Check classification with sparse input. class CustomSVC(SVC): """SVC variant that records the nature of the training set.""" def fit(self, X, y, sample_weight=None): """Modification on fit caries data type for later verification.""" super(CustomSVC, self).fit(X, y, sample_weight=sample_weight) self.data_type_ = type(X) return self X, y = datasets.make_multilabel_classification(n_classes=1, n_samples=15, n_features=5, random_state=42) # Flatten y to a 1d array y = np.ravel(y) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) for sparse_format in [ csc_matrix, csr_matrix, lil_matrix, coo_matrix, dok_matrix ]: X_train_sparse = sparse_format(X_train) X_test_sparse = sparse_format(X_test) # Trained on sparse format sparse_classifier = AdaBoostClassifier( base_estimator=CustomSVC(probability=True), random_state=1, algorithm="SAMME").fit(X_train_sparse, y_train) # Trained on dense format dense_classifier = AdaBoostClassifier( base_estimator=CustomSVC(probability=True), random_state=1, algorithm="SAMME").fit(X_train, y_train) # predict sparse_results = sparse_classifier.predict(X_test_sparse) dense_results = dense_classifier.predict(X_test) assert_array_equal(sparse_results, dense_results) # decision_function sparse_results = sparse_classifier.decision_function(X_test_sparse) dense_results = dense_classifier.decision_function(X_test) assert_array_equal(sparse_results, dense_results) # predict_log_proba sparse_results = sparse_classifier.predict_log_proba(X_test_sparse) dense_results = dense_classifier.predict_log_proba(X_test) assert_array_equal(sparse_results, dense_results) # predict_proba sparse_results = sparse_classifier.predict_proba(X_test_sparse) dense_results = dense_classifier.predict_proba(X_test) assert_array_equal(sparse_results, dense_results) # score sparse_results = sparse_classifier.score(X_test_sparse, y_test) dense_results = dense_classifier.score(X_test, y_test) assert_array_equal(sparse_results, dense_results) # staged_decision_function sparse_results = sparse_classifier.staged_decision_function( X_test_sparse) dense_results = dense_classifier.staged_decision_function(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # staged_predict sparse_results = sparse_classifier.staged_predict(X_test_sparse) dense_results = dense_classifier.staged_predict(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # staged_predict_proba sparse_results = sparse_classifier.staged_predict_proba(X_test_sparse) dense_results = dense_classifier.staged_predict_proba(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # staged_score sparse_results = sparse_classifier.staged_score(X_test_sparse, y_test) dense_results = dense_classifier.staged_score(X_test, y_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # Verify sparsity of data is maintained during training types = [i.data_type_ for i in sparse_classifier.estimators_] assert all([(t == csc_matrix or t == csr_matrix) for t in types])
def test_sparse_classification(): # Check classification with sparse input. class CustomSVC(SVC): """SVC variant that records the nature of the training set.""" def fit(self, X, y, sample_weight=None): """Modification on fit caries data type for later verification.""" super(CustomSVC, self).fit(X, y, sample_weight=sample_weight) self.data_type_ = type(X) return self X, y = datasets.make_multilabel_classification(n_classes=1, n_samples=15, n_features=5, random_state=42) # Flatten y to a 1d array y = np.ravel(y) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) for sparse_format in [csc_matrix, csr_matrix, lil_matrix, coo_matrix, dok_matrix]: X_train_sparse = sparse_format(X_train) X_test_sparse = sparse_format(X_test) # Trained on sparse format sparse_classifier = AdaBoostClassifier( base_estimator=CustomSVC(probability=True), random_state=1, algorithm="SAMME" ).fit(X_train_sparse, y_train) # Trained on dense format dense_classifier = AdaBoostClassifier( base_estimator=CustomSVC(probability=True), random_state=1, algorithm="SAMME" ).fit(X_train, y_train) # predict sparse_results = sparse_classifier.predict(X_test_sparse) dense_results = dense_classifier.predict(X_test) assert_array_equal(sparse_results, dense_results) # decision_function sparse_results = sparse_classifier.decision_function(X_test_sparse) dense_results = dense_classifier.decision_function(X_test) assert_array_equal(sparse_results, dense_results) # predict_log_proba sparse_results = sparse_classifier.predict_log_proba(X_test_sparse) dense_results = dense_classifier.predict_log_proba(X_test) assert_array_equal(sparse_results, dense_results) # predict_proba sparse_results = sparse_classifier.predict_proba(X_test_sparse) dense_results = dense_classifier.predict_proba(X_test) assert_array_equal(sparse_results, dense_results) # score sparse_results = sparse_classifier.score(X_test_sparse, y_test) dense_results = dense_classifier.score(X_test, y_test) assert_array_equal(sparse_results, dense_results) # staged_decision_function sparse_results = sparse_classifier.staged_decision_function( X_test_sparse) dense_results = dense_classifier.staged_decision_function(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # staged_predict sparse_results = sparse_classifier.staged_predict(X_test_sparse) dense_results = dense_classifier.staged_predict(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # staged_predict_proba sparse_results = sparse_classifier.staged_predict_proba(X_test_sparse) dense_results = dense_classifier.staged_predict_proba(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # staged_score sparse_results = sparse_classifier.staged_score(X_test_sparse, y_test) dense_results = dense_classifier.staged_score(X_test, y_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # Verify sparsity of data is maintained during training types = [i.data_type_ for i in sparse_classifier.estimators_] assert all([(t == csc_matrix or t == csr_matrix) for t in types])
class _AdaBoostClassifierImpl: def __init__( self, base_estimator=None, *, n_estimators=50, learning_rate=1.0, algorithm="SAMME.R", random_state=None, ): if base_estimator is None: estimator_impl = None else: estimator_impl = _FitSpecProxy(base_estimator) self._hyperparams = { "base_estimator": estimator_impl, "n_estimators": n_estimators, "learning_rate": learning_rate, "algorithm": algorithm, "random_state": random_state, } self._wrapped_model = SKLModel(**self._hyperparams) self._hyperparams["base_estimator"] = base_estimator def get_params(self, deep=True): out = self._wrapped_model.get_params(deep=deep) # we want to return the lale operator, not the underlying impl out["base_estimator"] = self._hyperparams["base_estimator"] return out def fit(self, X, y=None): if isinstance(X, pd.DataFrame): feature_transformer = FunctionTransformer( func=lambda X_prime: pd.DataFrame(X_prime, columns=X.columns), inverse_func=None, check_inverse=False, ) self._hyperparams["base_estimator"] = _FitSpecProxy( feature_transformer >> self._hyperparams["base_estimator"]) self._wrapped_model = SKLModel(**self._hyperparams) if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X) def predict_proba(self, X): return self._wrapped_model.predict_proba(X) def predict_log_proba(self, X): return self._wrapped_model.predict_log_proba(X) def decision_function(self, X): return self._wrapped_model.decision_function(X) def score(self, X, y, sample_weight=None): return self._wrapped_model.score(X, y, sample_weight)
class PCR: def __init__(self): self.__clustersNumber = CLUSTERS_NUMBER self.__queue = Queue() self.__verbose = VERBOSE self.__useCache = USE_CACHE for i in range(FILE_LOAD_THREADS): t = Thread(target=self.__worker) t.daemon = True t.start() self.__kmeans = MiniBatchKMeans(n_clusters=self.__clustersNumber, random_state=CLUSTER_SEED, verbose=self.__verbose) self.__tfidf = TfidfTransformer() self.__tfidf1 = TfidfTransformer() self.__clf = AdaBoostClassifier(MultinomialNB(alpha=BAYES_ALPHA), n_estimators=ADA_BOOST_ESTIMATORS) self.__clf1 = AdaBoostClassifier(MultinomialNB(alpha=BAYES_ALPHA), n_estimators=ADA_BOOST_ESTIMATORS) def __worker(self): while True: task = self.__queue.get() func, args = task try: func(args) except Exception as e: print 'EXCEPTION:', e self.__queue.task_done() def train(self, positiveFiles, negativeFiles): cachedData = self.__loadCache() if cachedData is None: self.__log('loading positives') positiveSamples = self.__loadSamples(positiveFiles) self.__log('loading negatives') negativeSamples = self.__loadSamples(negativeFiles) totalDescriptors = [] self.__addDescriptors(totalDescriptors, positiveSamples) self.__addDescriptors(totalDescriptors, negativeSamples) self.__kmeans.fit(totalDescriptors) clusters = self.__kmeans.predict(totalDescriptors) self.__printDistribution(clusters) self.__saveCache( (positiveSamples, negativeSamples, self.__kmeans, clusters)) else: self.__log('using cache') positiveSamples, negativeSamples, self.__kmeans, clusters = cachedData totalSamplesNumber = len(negativeSamples) + len(positiveSamples) counts = lil_matrix((totalSamplesNumber, self.__clustersNumber)) counts1 = lil_matrix((totalSamplesNumber, 256)) self.__currentSample = 0 self.__currentDescr = 0 self.__calculteCounts(positiveSamples, counts, counts1, clusters) self.__calculteCounts(negativeSamples, counts, counts1, clusters) counts = csr_matrix(counts) counts1 = csr_matrix(counts1) self.__log('training bayes classifier') tfidf = self.__tfidf.fit_transform(counts) tfidf1 = self.__tfidf1.fit_transform(counts1) classes = [True] * len(positiveSamples) + [False ] * len(negativeSamples) self.__clf.fit(tfidf, classes) self.__clf1.fit(tfidf1, classes) self.__log('training complete') def predict(self, files): self.__log('loading files') samples = self.__loadSamples(files) totalDescriptors = [] self.__addDescriptors(totalDescriptors, samples) self.__log('predicting classes') clusters = self.__kmeans.predict(totalDescriptors) counts = lil_matrix((len(samples), self.__clustersNumber)) counts1 = lil_matrix((len(samples), 256)) self.__currentSample = 0 self.__currentDescr = 0 self.__calculteCounts(samples, counts, counts1, clusters) counts = csr_matrix(counts) counts1 = csr_matrix(counts1) tfidf = self.__tfidf.transform(counts) tfidf1 = self.__tfidf1.transform(counts1) self.__log('classifying') weights = self.__clf.predict_log_proba(tfidf.toarray()) weights1 = self.__clf1.predict_log_proba(tfidf1.toarray()) predictions = [] for i in xrange(0, len(weights)): w = weights[i][0] - weights[i][1] w1 = weights1[i][0] - weights1[i][1] pred = w < 0 pred1 = w1 < 0 if pred != pred1: pred = w + w1 < 0 predictions.append(pred) self.__log('prediction complete') return predictions def saveModel(self, fileName): data = pickle.dumps( (self.__clustersNumber, self.__kmeans, self.__tfidf, self.__tfidf1, self.__clf, self.__clf1), -1) data = zlib.compress(data) open(fileName, 'w').write(data) def loadModel(self, fileName): data = open(fileName, 'r').read() data = zlib.decompress(data) data = pickle.loads(data) self.__clustersNumber, self.__kmeans, self.__tfidf, self.__tfidf1, self.__clf, self.__clf1 = data def __log(self, message): if self.__verbose: print message def __saveCache(self, data): if not self.__useCache: return data = pickle.dumps(data, -1) data = zlib.compress(data) open('cache.bin', 'w').write(data) def __loadCache(self): if not self.__useCache: return None if not os.path.isfile('cache.bin'): return None data = open('cache.bin', 'r').read() data = zlib.decompress(data) data = pickle.loads(data) return data def __calculteCounts(self, samples, counts, counts1, clusters): cn = self.__clustersNumber for s in samples: currentCounts = {} for d in s[0]: currentCounts[clusters[ self.__currentDescr]] = currentCounts.get( clusters[self.__currentDescr], 0) + 1 self.__currentDescr += 1 for clu, cnt in currentCounts.iteritems(): counts[self.__currentSample, clu] = cnt for i, histCnt in enumerate(s[1]): counts1[self.__currentSample, i] = histCnt[0] self.__currentSample += 1 def __printDistribution(self, clusters): if not self.__verbose: return distr = {} for c in clusters: distr[c] = distr.get(c, 0) + 1 v = sorted(distr.values(), reverse=True) print 'distribution:', v[0:15], '...', v[-15:] def __addDescriptors(self, totalDescriptors, samples): for sample in samples: for descriptor in sample[0]: totalDescriptors.append(descriptor) def __loadSamples(self, files): samples = [[]] * len(files) n = 0 for f in files: self.__queue.put((self.__loadSingleSample, (f, samples, n))) n += 1 self.__queue.join() if _g_removed: print ' === REMOVED = TERMINATE' sys.exit(44) return samples def __loadSingleSample(self, args): global _g_removed fileName, samples, sampleNum = args des, hist = self.__getFeatures(fileName) if des is None: print 'ERROR: failed to load', fileName os.remove(fileName) _g_removed = True #sys.exit(44) des = [] hist = [[0]] * 256 samples[sampleNum] = (des, hist) def __getFeatures(self, fileName): fid = 'cache/' + str(zlib.crc32(fileName)) self.__log('loading %s' % fileName) if os.path.isfile(fid): des, hist = pickle.loads(open(fid, 'rb').read()) else: img = cv2.imread(fileName) if img.shape[1] > 1000: cf = 1000.0 / img.shape[1] newSize = (int(cf * img.shape[0]), int(cf * img.shape[1]), img.shape[2]) img.resize(newSize) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) s = cv2.SIFT(nfeatures=400) d = cv2.DescriptorExtractor_create("OpponentSIFT") kp = s.detect(gray, None) kp, des = d.compute(img, kp) hist = self.__getColorHist(img) #open(fid, 'wb').write(pickle.dumps((des, hist), -1)) return des, hist def __getColorHist(self, img): hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) dist = cv2.calcHist([hsv], [0], None, [256], [0, 256]) return dist
testSamples = makeSamples(testFiles) testDescriptors = [] addDescriptors(testDescriptors, testSamples) testClusters = kmeans.predict(testDescriptors) testCounts = lil_matrix((len(testSamples), CLUSTERS_NUMBER)) testCounts1 = lil_matrix((len(testSamples), 256)) calculteCounts(testSamples, testCounts, testCounts1, testClusters) testCounts = csr_matrix(testCounts) testCounts1 = csr_matrix(testCounts1) _tfidf = tfidf.transform(testCounts) _tfidf1 = tfidf1.transform(testCounts1) weights = clf.predict_log_proba(_tfidf) weights1 = clf1.predict_log_proba(_tfidf1) predictions = [] for i in xrange(0, len(weights)): w = weights[i][0] - weights[i][1] w1 = weights1[i][0] - weights1[i][1] pred = w < 0 pred1 = w1 < 0 if pred != pred1: pred = w + w1 < 0 predictions.append(pred) match = 0 dismatch = 0 if len(testFiles) == len(predictions): log = open('log.txt', 'w')
class PCR: def __init__(self): self.__clustersNumber = CLUSTERS_NUMBER self.__queue = Queue() self.__verbose = VERBOSE self.__useCache = USE_CACHE for i in range(FILE_LOAD_THREADS): t = Thread(target=self.__worker) t.daemon = True t.start() self.__kmeans = MiniBatchKMeans( n_clusters=self.__clustersNumber, random_state=CLUSTER_SEED, verbose=self.__verbose) self.__tfidf = TfidfTransformer() self.__tfidf1 = TfidfTransformer() self.__clf = AdaBoostClassifier(MultinomialNB(alpha=BAYES_ALPHA), n_estimators=ADA_BOOST_ESTIMATORS) self.__clf1 = AdaBoostClassifier(MultinomialNB(alpha=BAYES_ALPHA), n_estimators=ADA_BOOST_ESTIMATORS) def __worker(self): while True: task = self.__queue.get() func, args = task try: func(args) except Exception as e: print('EXCEPTION:', e) self.__queue.task_done() def train(self, positiveFiles, negativeFiles): cachedData = self.__loadCache() if cachedData is None: self.__log('loading positives') positiveSamples = self.__loadSamples(positiveFiles) self.__log('loading negatives') negativeSamples = self.__loadSamples(negativeFiles) totalDescriptors = [] self.__addDescriptors(totalDescriptors, positiveSamples) self.__addDescriptors(totalDescriptors, negativeSamples) self.__kmeans.fit(totalDescriptors) clusters = self.__kmeans.predict(totalDescriptors) self.__printDistribution(clusters) self.__saveCache((positiveSamples, negativeSamples, self.__kmeans, clusters)) else: self.__log('using cache') positiveSamples, negativeSamples, self.__kmeans, clusters = cachedData totalSamplesNumber = len(negativeSamples) + len(positiveSamples) counts = lil_matrix((totalSamplesNumber, self.__clustersNumber)) counts1 = lil_matrix((totalSamplesNumber, 256)) self.__currentSample = 0 self.__currentDescr = 0 self.__calculteCounts(positiveSamples, counts, counts1, clusters) self.__calculteCounts(negativeSamples, counts, counts1, clusters) counts = csr_matrix(counts) counts1 = csr_matrix(counts1) self.__log('training bayes classifier') tfidf = self.__tfidf.fit_transform(counts) tfidf1 = self.__tfidf1.fit_transform(counts1) classes = [True] * len(positiveSamples) + [False] * len(negativeSamples) self.__clf.fit(tfidf, classes) self.__clf1.fit(tfidf1, classes) self.__log('training complete') def predict(self, files): self.__log('loading files') samples = self.__loadSamples(files) totalDescriptors = [] self.__addDescriptors(totalDescriptors, samples) self.__log('predicting classes') clusters = self.__kmeans.predict(totalDescriptors) counts = lil_matrix((len(samples), self.__clustersNumber)) counts1 = lil_matrix((len(samples), 256)) self.__currentSample = 0 self.__currentDescr = 0 self.__calculteCounts(samples, counts, counts1, clusters) counts = csr_matrix(counts) counts1 = csr_matrix(counts1) tfidf = self.__tfidf.transform(counts) tfidf1 = self.__tfidf1.transform(counts1) self.__log('classifying') weights = self.__clf.predict_log_proba(tfidf.toarray()) weights1 = self.__clf1.predict_log_proba(tfidf1.toarray()) predictions = [] for i in range(0, len(weights)): w = weights[i][0] - weights[i][1] w1 = weights1[i][0] - weights1[i][1] pred = w < 0 pred1 = w1 < 0 if pred != pred1: pred = w + w1 < 0 predictions.append(pred) self.__log('prediction complete') return predictions def saveModel(self, fileName): data = pickle.dumps((self.__clustersNumber, self.__kmeans, self.__tfidf, self.__tfidf1, self.__clf, self.__clf1), -1) data = zlib.compress(data) open(fileName, 'wb').write(data) def loadModel(self, fileName): data = open(fileName, 'rb').read() data = zlib.decompress(data) data = pickle.loads(data) self.__clustersNumber, self.__kmeans, self.__tfidf, self.__tfidf1, self.__clf, self.__clf1 = data def __log(self, message): if self.__verbose: print(message) def __saveCache(self, data): if not self.__useCache: return data = pickle.dumps(data, -1) data = zlib.compress(data) open('cache.bin', 'w').write(data) def __loadCache(self): if not self.__useCache: return None if not os.path.isfile('cache.bin'): return None data = open('cache.bin', 'r').read() data = zlib.decompress(data) data = pickle.loads(data) return data def __calculteCounts(self, samples, counts, counts1, clusters): cn = self.__clustersNumber for s in samples: currentCounts = {} for d in s[0]: currentCounts[clusters[self.__currentDescr]] = currentCounts.get(clusters[self.__currentDescr], 0) + 1 self.__currentDescr += 1 for clu, cnt in currentCounts.iteritems(): counts[self.__currentSample, clu] = cnt for i, histCnt in enumerate(s[1]): counts1[self.__currentSample, i] = histCnt[0] self.__currentSample += 1 def __printDistribution(self, clusters): if not self.__verbose: return distr = {} for c in clusters: distr[c] = distr.get(c, 0) + 1 v = sorted(distr.values(), reverse=True) print('distribution:', v[0:15], '...', v[-15:]) def __addDescriptors(self, totalDescriptors, samples): for sample in samples: for descriptor in sample[0]: totalDescriptors.append(descriptor) def __loadSamples(self, files): samples = [[]] * len(files) n = 0 for f in files: self.__queue.put((self.__loadSingleSample, (f, samples, n))) n += 1 self.__queue.join() if _g_removed: print(' === REMOVED = TERMINATE') sys.exit(44) return samples def __loadSingleSample(self, args): global _g_removed fileName, samples, sampleNum = args des, hist = self.__getFeatures(fileName) if des is None: print('ERROR: failed to load', fileName) os.remove(fileName) _g_removed = True # sys.exit(44) des = [] hist = [[0]] * 256 samples[sampleNum] = (des, hist) def __getFeatures(self, fileName): fid = 'cache/' + str(zlib.crc32(fileName)) self.__log('loading %s' % fileName) if os.path.isfile(fid): des, hist = pickle.loads(open(fid, 'rb').read()) else: img = cv2.imread(fileName) if img.shape[1] > 1000: cf = 1000.0 / img.shape[1] newSize = (int(cf * img.shape[0]), int(cf * img.shape[1]), img.shape[2]) img.resize(newSize) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) s = cv2.SIFT(nfeatures=400) d = cv2.DescriptorExtractor_create("OpponentSIFT") kp = s.detect(gray, None) kp, des = d.compute(img, kp) hist = self.__getColorHist(img) #open(fid, 'wb').write(pickle.dumps((des, hist), -1)) return des, hist def __getColorHist(self, img): hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) dist = cv2.calcHist([hsv], [0], None, [256], [0, 256]) return dist
confusionmatrix=confusion_matrix(ypred,ytest) print(confusionmatrix) rmse=math.sqrt(mean_squared_error(ypred,ytest)) print(rmse) plt.plot(ypred) plt.show() from sklearn.ensemble import AdaBoostClassifier adc=AdaBoostClassifier(random_state=0,learning_rate=1.0) print(adc.fit(xtrain,ytrain)) ypred=adc.predict(xtest) ypred1=adc.predict(xtrain) print(ypred) print(list(le.inverse_transform(ypred))) print(classification_report(ypred,ytest)) print(adc.predict_proba(xtest)) print(adc.predict_log_proba(xtest)) print(accuracy_score(ytest,ypred)) print(accuracy_score(ytrain,ypred1)) import xgboost as xgb import lightgbm as lgb from xgboost import plot_importance xgb1=xgb.XGBClassifier(booster='gbtree',n_jobs=-1,n_estimators=500,max_depth=0,learning_rate=0.3,random_state=14,max_leaves=5,grow_policy="lossguide") print(xgb1.fit(xtrain,ytrain)) ypred=xgb1.predict(xtest) ypred1=xgb1.predict(xtrain) print(ypred) print(xgb1.predict_proba(xtest)) print(list(le.inverse_transform(ypred))) print(accuracy_score(ytest,ypred)) print(accuracy_score(ytrain,ypred1)) lgb1=lgb.LGBMClassifier(boosting_type="gbdt",num_leaves=5,n_estimators=500,n_jobs=-1,learning_rate=0.3,max_depth=0,random_state=14)