def main(): print('--- Adaboost ---') data = datasets.load_digits() X, y = data.data, data.target digit1 = 1 digit2 = 8 idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0]) y = data.target[idx] y[y == digit1] = 1 y[y == digit2] = -1 X = data.data[idx] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) clf = Adaboost(n_estimators=5) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) acc = accuracy_score(y_pred, y_test) clf_tree = ClassificationTree() clf_tree.fit(X_train, y_train) y_pred_tree = clf_tree.predict(X_test) acc_tree = accuracy_score(y_pred, y_test) print("Adaboost_Accuracy:", acc) print("Tree_Accuracy:", acc_tree)
def fit(self, X, y): n_samples, n_features = X.shape[0], X.shape[1] # 初始化各样本的权重 w = np.full(n_samples, (1 / n_samples)) # 储存每个分类器 self.clfs = [] for i in tqdm(range(self.n_clfs)): # 实例化一个分类器 clf = ClassificationTree() # 训练 clf.fit(X, y) # 得到训练结果 y_pred = clf.predict(X) # 计算训练误差 print(accuracy_score(y, y_pred)) error = sum(w[y != y_pred]) # 对于错误率大于0.5的分类器,因为是二分类问题(Adaboost只能解决二分类问题),我们可以翻转 # 分类器的预测结果来使得其错误率为1-error>0.5 print(error) if error > 0.5: self.polarity[i] = -1 y_pred *= -1 error = 1 - error self.alphas[i] = 0.5 * np.log((1.0 - error) / (error + 1e-10)) predictions = np.array(self.polarity[i] * y_pred) w *= np.exp(-self.alphas[i] * y * predictions) w /= sum(w) self.clfs.append(clf)
def main(): data = datasets.load_digits() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = RandomForest(n_estimators=10) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy)
def main(): print ("-- XGBoost --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, seed=3) clf = XGBoost(n_estimators=20) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy)
def main(): print("-- Classification Tree --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = ClassificationTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy)
def main(): data = datasets.load_digits() X = data.data y = data.target y = to_categorical(y.astype("int")) n_hidden = 512 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = MultilayerPerceptron(n_hidden) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print(f"Accuracy: {accuracy}") Plot().plot_in_2d(X_test, y_pred, title="perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main_classifier(): print("-- Gradient Boosting Classification --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) print(y_train.shape) clf = GradientBoostingClassifier(n_estimators=10) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # data preprocess: One-hot encoding of nominal y-values y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = Perceptron(n_iterations=5000, learning_rate=0.001, loss=CrossEntropy, activation_function=Sigmoid) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print(f"Accuracy: {accuracy}") Plot().plot_in_2d(X_test, y_pred, title="perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def evaluate(self, sess, data_manager, id_to_tag): # 评价slot和intent的分数 """ :param sess: session to run the model :param data: list of data :param id_to_tag: index to tag name :param id_to_intent: index to intent name :return: evaluate result """ slot_results = [] itent_results = [] trans = self.trans.eval() for batch in data_manager.iter_batch(): strings = batch[0] tags = batch[-2] # 真实的slot标签 intents = np.asarray(batch[-1])[:, 1] # 真实的intents标签 lengths, scores_slot, intent_idx, intent_rank = self.run_step( sess, False, batch) batch_paths = self.decode(scores_slot, lengths, trans) # viterbi算法求出最佳路径 for i in range(len(strings)): result = [] string = strings[i][:lengths[i]] gold = iobes_iob( [id_to_tag[int(x)] for x in tags[i][:lengths[i]]]) pred = iobes_iob( [id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]]) for char, gold, pred in zip(string, gold, pred): result.append(" ".join([char, gold, pred])) slot_results.append(result) intent_acc = accuracy_score(intents, intent_idx) itent_results.append(intent_acc) return slot_results, itent_results
def main(): data = datasets.load_digits() X = data.data y = data.target digit1 = 1 digit2 = 8 idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0]) y = data.target[idx] # Change labels to {-1, 1} y[y == digit1] = -1 y[y == digit2] = 1 X = data.data[idx] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Adaboost classification with 5 weak classifiers clf = Adaboost_1(n_clfs=5) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy)
import numpy as np from sklearn import datasets import matplotlib.pyplot as plt from deep.activations_functions import Sigmoid from deep.loss_functions import CrossEntropy from deep.perceptron import Perceptron from utils.utils import accuracy_score np.random.seed(0) X, y = datasets.make_moons(200, noise=0.20) clf = Perceptron(n_iterations=5000, learning_rate=0.001, loss=CrossEntropy, activation_function=Sigmoid) clf.fit(X, y) x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 h = 0.01 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) tmp = np.c_[xx.ravel(), yy.ravel()] Z = clf.predict(tmp) y_pred = np.argmax(Z, axis=1) accuracy = accuracy_score(y, y_pred) print(f"Accuracy: {accuracy}") plt.contourf(xx, yy, y_pred.reshape(xx.shape), cmap=plt.cm.get_cmap("Spectral")) plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.get_cmap("Spectral")) plt.show()
def accuracy(self, y, p): return accuracy_score(np.argmax(y, axis=1), np.argmax(p, axis=1))
self.parameters[c][feature_i]['var'], feature_value) posterior *= likelihood else: posterior *= self.parameters[c][feature_i] # 储存x为类别c的概率 posteriors.append(posterior) # 返回概率最大的类别 return self.classes[np.argmax(posteriors)] def predict(self, X_test): y_pred = [self.get_label(x) for x in X_test] return y_pred if __name__ == '__main__': print("-- Navie-Bayes --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) clf = Navie_Bayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy)
def main(args): torch.manual_seed(17) # Randomly seed PyTorch # Load train, dev, test iterators with auto-batching and pretrained vectors (train, dev, test, code_vecs, comm_vecs) = utils.load_all_data(args.batch_size, args.corpus) # Create model model = cl.CodeCommClassifier(code_vecs, comm_vecs, gpu=args.use_gpu, model_type=args.model) # Load a saved model if args.load != "": model.load_state_dict(torch.load(args.load)) # Discover available CUDA devices and use DataParallelism if possible devices = list(range(torch.cuda.device_count())) if len(devices) > 1: print("Using {} GPUs!!".format(len(devices))) model = nn.DataParallel(model) # Send model to GPU for processing if args.use_gpu: model.cuda() # Do training if args.epochs > 0: print("Training {} model w/ batch_sz={} on {} dataset".format( args.model, args.batch_size, args.corpus)) # Adam optimizer works, SGD fails to train the network for any batch size optimizer = optim.Adam(model.parameters(), args.learning_rate) class_weights = torch.tensor([10.]) if args.use_gpu: class_weights = class_weights.cuda() for epoch in range(args.epochs): model.train() # Set the model to training mode with tqdm(total=len(train), desc="Epoch {}/{}".format(epoch + 1, args.epochs)) as pbar: total_loss = 0 for b_idx, batch in enumerate(train): # model.zero_grad() optimizer.zero_grad() # Clear current gradient # Get the label into a tensor for loss prop truth = torch.autograd.Variable(batch.label).float() if args.use_gpu: truth = truth.cuda() # Run the model using the batch outputs = model((batch.code, batch.comm)) # Get loss from log(softmax()) loss = F.binary_cross_entropy_with_logits( outputs.view(-1), truth.view(-1), pos_weight=class_weights) loss.backward() # Propagate loss optimizer.step() # Update the optimizer new_loss = loss.item() total_loss += new_loss curr_loss = total_loss / (b_idx + 1) pbar.set_postfix(batch_loss=curr_loss) pbar.update() # Check DEV accuracy after every epoch scores = score_dataset(model, dev) acc = utils.accuracy_score(scores) sys.stdout.write("Epoch {} -- dev acc: {}%\n".format( epoch + 1, acc)) # Save the model weights if args.save != "": torch.save(model.state_dict(), args.save) # Save the DEV set evaluations if args.eval_dev: print( "Evaluating Dev for {} model w/ batch_sz={} on {} dataset".format( args.model, args.batch_size, args.corpus)) scores = score_dataset(model, dev) dev_score_path = utils.CODE_CORPUS / "results" / "{}_{}_{}_{}_gpus_scores_dev.pkl".format( args.model, args.batch_size, args.corpus, len(devices)) utils.save_scores(scores, dev_score_path) # Save the TEST set evaluations if args.eval_test: print( "Evaluating Dev for {} model w/ batch_sz={} on {} dataset".format( args.model, args.batch_size, args.corpus)) scores = score_dataset(model, test) test_score_path = utils.CODE_CORPUS / "results" / "{}_{}_{}_{}_gpus_scores_test.pkl".format( args.model, args.batch_size, args.corpus, len(devices)) utils.save_scores(scores, test_score_path)