def RunSkMethod(s='ppn'): isTree = False if s == 'ppn': method = Perceptron(n_iter=40, eta0=0.1, random_state=0, shuffle=True) elif s == 'lr': method = LogisticRegression(C=100.0, random_state=0) elif s == 'svc': method = SVC(kernel='linear', C=1.0, random_state=0) elif s == 'svm': method = SVC(kernel='rbf', random_state=0, gamma=float(args[2]), C=float(args[3])) elif s == 'tree': method = DTC(criterion='entropy', max_depth=3, random_state=0) isTree = True elif s == 'forest': method = RFC(criterion='entropy', n_estimators=10, random_state=1, n_jobs=2) elif s == 'knn': method = KNC(n_neighbors=5, p=2, metric='minkowski') elif s == 'pca': method = PCA(n_components=2) return dd = ir.IrisDataSets() dd.useFit(method) pdr.plot_decision_regions(X=dd.X_combined_std, y=dd.y_combined, classifier=method, test_idx=range(105, 150)) dd.drawGraph() if s == 'lr': print(method.predict_proba(dd.X_test_std[0, :].reshape(1, -1))) # after this function, execute following command on terminal # dot -Tpng tree.dot -o tree.png if isTree == True: export_graphviz(method, out_file='tree.dot', feature_names=['petal length', 'petal width'])
class PerceptronWrapper: #k is the number of features def __init__(self, C=1): self.k = 4 self.C = C #This is not actually used def retrain(self, examples, labels, weights): self.classifier = Perceptron() #self.classifier.penalty = 'l2' #print len(examples) #print len(weights) #print "HUH" self.classifier.fit(examples, labels, sample_weight=weights) def predict(self, testExamples): return self.classifier.predict(testExamples) def score(self, testExamples, labels): return self.classifier.score(testExamples, labels) def fscore(self, testExamples, labels): predictions = self.predict(testExamples) precision = 0.0 precisionD = 0.000000001 recall = 0.0 recallD = 0.000000001 for (prediction, label) in zip(predictions, labels): if prediction == 1: if label == 1: precision += 1 precisionD += 1 if label == 1: if prediction == 1: recall += 1 recallD += 1 precision /= precisionD recall /= recallD return 2 * ((precision * recall) / (precision + recall + 0.000000001)) #distance to the hyperplane def getUncertainty(self, example): probs = self.classifier.predict_proba([example]) entropy = 0.0 for p in probs[0]: entropy += p * log(p + 0.0000001) entropy *= -1 return entropy def getAllUncertainties(self, examples): entropies = [] probs = self.classifier.predict_proba(examples) for prob in probs: entropy = 0.0 for p in prob: entropy += p * log(p + 0.0000001) #print "BOOP" #print p #print log(p) #print entropy entropy *= -1 entropies.append(entropy) return entropies def getMostUncertainTask(self, tasks, taskIndices): highestUncertainty = -21930123123 highestEntropyDistribution = None mostUncertainTaskIndices = [] mustUncertainTasks = [] entropies = self.getAllUncertainties(tasks) for (task, i, uncertainty) in zip(tasks, taskIndices, entropies): if uncertainty > highestUncertainty: mostUncertainTaskIndices = [i] mostUncertainTasks = [task] highestUncertainty = uncertainty elif uncertainty == highestUncertainty: mostUncertainTaskIndices.append(i) mostUncertainTasks.append(task) #(mostUncertainTaskIndex, # mostUncertainTask) = sample(zip(mostUncertainTaskIndices, # mostUncertainTasks), 1)[0] mostUncertainTaskIndex = mostUncertainTaskIndices[0] mostUncertainTask = mostUncertainTasks[0] return (self.classifier.predict_proba([mostUncertainTask])[0], mostUncertainTaskIndex) def getTotalUncertainty(self, examples): totalUncertainty = 0.0 for example in examples: #print "YO" #print self.getUncertainty(example) totalUncertainty += self.getUncertainty(example) totalUncertainty /= len(examples) #return max(self.getAllUncertainties(examples)) return totalUncertainty
final_STD_ACC = STD_ACC else: final_AVG_ACC = np.dstack([final_AVG_ACC, AVG_ACC]) final_STD_ACC = np.dstack([final_STD_ACC, STD_ACC]) final_accuracy_mean_list = np.mean(final_AVG_ACC, axis=2) max_ind = np.unravel_index(np.argmax(final_accuracy_mean_list, axis=None), final_accuracy_mean_list.shape) chosen_alpha = alpha_list[max_ind[0]] chosen_penalty = penality_list[max_ind[1]] print "By Cross Validation - Chosen alpha for Perceptron: ", chosen_alpha print "By Cross Validation - Chosen Penalty for Perceptron: ", chosen_penalty perceptron_model_final = Perceptron(penalty=chosen_penalty, alpha=chosen_alpha, class_weight='balanced') perceptron_model_final = CalibratedClassifierCV( base_estimator=perceptron_model_final, cv=10, method='isotonic') perceptron_model_final.fit(df_train_features, df_train_class) predicted_train = perceptron_model_final.predict(df_train_features) predicted_test = perceptron_model_final.predict(df_test_features) predicted_prob_train = perceptron_model_final.predict_proba(df_train_features) predicted_prob_test = perceptron_model_final.predict_proba(df_test_features) evaluate_classifier_performance(df_train_class, predicted_train, predicted_prob_train, df_test_class, predicted_test, predicted_prob_test, 'y')
model_score.append(accuracy_score(y_test, xgboost_prediction)) xgboost_prediction = xgboost.predict(word_vector) try: xgboost_prob = xgboost.predict_proba(word_vector) model_probas.append(xgboost_prob) except: model_probas.append("n/a") #Perceptron perc = Perceptron() perc.fit(X_train, y_train) perc_prediction = perc.predict(X_test) model_score.append(accuracy_score(y_test, perc_prediction)) perc_prediction = perc.predict(word_vector) try: perc_prob = perc.predict_proba(word_vector) model_probas.append(perc_prob) except: model_probas.append("n/a") #K-NearestNeighbor knn = KNeighborsClassifier(n_neighbors=10) knn.fit(X_train, y_train) knn_prediction = knn.predict(X_test) model_score.append(accuracy_score(y_test, knn_prediction)) knn_prediction = knn.predict(word_vector) try: knn_prob = knn.predict_proba(word_vector) model_probas.append(knn_prob) except: model_probas.append("n/a")
def predict_perceptron(X_train, y_train, X_test, sample_weight): clf = Perceptron(alpha=0.01) clf.fit(X_train, y_train, sample_weight=sample_weight) predictions = clf.predict_proba(X_test) return predictions
# In[67]: model.fit(X_train, y_train) # In[68]: score = model.score(X_test, y_test) # In[69]: score # In[70]: model.predict_proba(X_test) # In[73]: X = iris.loc[:, 0:3].values # In[76]: y = iris[4].values # In[77]: np.unique(y) # In[78]: