예제 #1
0
def RunSkMethod(s='ppn'):
    isTree = False
    if s == 'ppn':
        method = Perceptron(n_iter=40, eta0=0.1, random_state=0, shuffle=True)
    elif s == 'lr':
        method = LogisticRegression(C=100.0, random_state=0)
    elif s == 'svc':
        method = SVC(kernel='linear', C=1.0, random_state=0)
    elif s == 'svm':
        method = SVC(kernel='rbf',
                     random_state=0,
                     gamma=float(args[2]),
                     C=float(args[3]))
    elif s == 'tree':
        method = DTC(criterion='entropy', max_depth=3, random_state=0)
        isTree = True
    elif s == 'forest':
        method = RFC(criterion='entropy',
                     n_estimators=10,
                     random_state=1,
                     n_jobs=2)
    elif s == 'knn':
        method = KNC(n_neighbors=5, p=2, metric='minkowski')
    elif s == 'pca':
        method = PCA(n_components=2)
        return

    dd = ir.IrisDataSets()
    dd.useFit(method)
    pdr.plot_decision_regions(X=dd.X_combined_std,
                              y=dd.y_combined,
                              classifier=method,
                              test_idx=range(105, 150))
    dd.drawGraph()

    if s == 'lr':
        print(method.predict_proba(dd.X_test_std[0, :].reshape(1, -1)))

    # after this function, execute following command on terminal
    # dot -Tpng tree.dot -o tree.png
    if isTree == True:
        export_graphviz(method,
                        out_file='tree.dot',
                        feature_names=['petal length', 'petal width'])
예제 #2
0
class PerceptronWrapper:

    #k is the number of features
    def __init__(self, C=1):
        self.k = 4
        self.C = C  #This is not actually used

    def retrain(self, examples, labels, weights):
        self.classifier = Perceptron()
        #self.classifier.penalty = 'l2'
        #print len(examples)
        #print len(weights)
        #print "HUH"
        self.classifier.fit(examples, labels, sample_weight=weights)

    def predict(self, testExamples):
        return self.classifier.predict(testExamples)

    def score(self, testExamples, labels):
        return self.classifier.score(testExamples, labels)

    def fscore(self, testExamples, labels):
        predictions = self.predict(testExamples)
        precision = 0.0
        precisionD = 0.000000001
        recall = 0.0
        recallD = 0.000000001
        for (prediction, label) in zip(predictions, labels):
            if prediction == 1:
                if label == 1:
                    precision += 1
                precisionD += 1
            if label == 1:
                if prediction == 1:
                    recall += 1
                recallD += 1

        precision /= precisionD
        recall /= recallD

        return 2 * ((precision * recall) / (precision + recall + 0.000000001))

    #distance to the hyperplane
    def getUncertainty(self, example):
        probs = self.classifier.predict_proba([example])
        entropy = 0.0
        for p in probs[0]:
            entropy += p * log(p + 0.0000001)
        entropy *= -1

        return entropy

    def getAllUncertainties(self, examples):
        entropies = []
        probs = self.classifier.predict_proba(examples)
        for prob in probs:
            entropy = 0.0
            for p in prob:
                entropy += p * log(p + 0.0000001)
                #print "BOOP"
                #print p
                #print log(p)
            #print entropy
            entropy *= -1
            entropies.append(entropy)

        return entropies

    def getMostUncertainTask(self, tasks, taskIndices):
        highestUncertainty = -21930123123
        highestEntropyDistribution = None
        mostUncertainTaskIndices = []
        mustUncertainTasks = []

        entropies = self.getAllUncertainties(tasks)
        for (task, i, uncertainty) in zip(tasks, taskIndices, entropies):
            if uncertainty > highestUncertainty:
                mostUncertainTaskIndices = [i]
                mostUncertainTasks = [task]
                highestUncertainty = uncertainty
            elif uncertainty == highestUncertainty:
                mostUncertainTaskIndices.append(i)
                mostUncertainTasks.append(task)

        #(mostUncertainTaskIndex,
        # mostUncertainTask) = sample(zip(mostUncertainTaskIndices,
        #                               mostUncertainTasks), 1)[0]

        mostUncertainTaskIndex = mostUncertainTaskIndices[0]
        mostUncertainTask = mostUncertainTasks[0]

        return (self.classifier.predict_proba([mostUncertainTask])[0],
                mostUncertainTaskIndex)

    def getTotalUncertainty(self, examples):

        totalUncertainty = 0.0
        for example in examples:
            #print "YO"
            #print self.getUncertainty(example)
            totalUncertainty += self.getUncertainty(example)

        totalUncertainty /= len(examples)

        #return max(self.getAllUncertainties(examples))
        return totalUncertainty
예제 #3
0
        final_STD_ACC = STD_ACC
    else:
        final_AVG_ACC = np.dstack([final_AVG_ACC, AVG_ACC])
        final_STD_ACC = np.dstack([final_STD_ACC, STD_ACC])

final_accuracy_mean_list = np.mean(final_AVG_ACC, axis=2)
max_ind = np.unravel_index(np.argmax(final_accuracy_mean_list, axis=None),
                           final_accuracy_mean_list.shape)

chosen_alpha = alpha_list[max_ind[0]]
chosen_penalty = penality_list[max_ind[1]]
print "By Cross Validation - Chosen alpha for Perceptron: ", chosen_alpha
print "By Cross Validation - Chosen Penalty for Perceptron: ", chosen_penalty

perceptron_model_final = Perceptron(penalty=chosen_penalty,
                                    alpha=chosen_alpha,
                                    class_weight='balanced')
perceptron_model_final = CalibratedClassifierCV(
    base_estimator=perceptron_model_final, cv=10, method='isotonic')
perceptron_model_final.fit(df_train_features, df_train_class)

predicted_train = perceptron_model_final.predict(df_train_features)
predicted_test = perceptron_model_final.predict(df_test_features)

predicted_prob_train = perceptron_model_final.predict_proba(df_train_features)
predicted_prob_test = perceptron_model_final.predict_proba(df_test_features)

evaluate_classifier_performance(df_train_class, predicted_train,
                                predicted_prob_train, df_test_class,
                                predicted_test, predicted_prob_test, 'y')
예제 #4
0
model_score.append(accuracy_score(y_test, xgboost_prediction))
xgboost_prediction = xgboost.predict(word_vector)
try:
    xgboost_prob = xgboost.predict_proba(word_vector)
    model_probas.append(xgboost_prob)
except:
    model_probas.append("n/a")

#Perceptron
perc = Perceptron()
perc.fit(X_train, y_train)
perc_prediction = perc.predict(X_test)
model_score.append(accuracy_score(y_test, perc_prediction))
perc_prediction = perc.predict(word_vector)
try:
    perc_prob = perc.predict_proba(word_vector)
    model_probas.append(perc_prob)
except:
    model_probas.append("n/a")

#K-NearestNeighbor
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train, y_train)
knn_prediction = knn.predict(X_test)
model_score.append(accuracy_score(y_test, knn_prediction))
knn_prediction = knn.predict(word_vector)
try:
    knn_prob = knn.predict_proba(word_vector)
    model_probas.append(knn_prob)
except:
    model_probas.append("n/a")
예제 #5
0
def predict_perceptron(X_train, y_train, X_test, sample_weight):
    clf = Perceptron(alpha=0.01)
    clf.fit(X_train, y_train, sample_weight=sample_weight)

    predictions = clf.predict_proba(X_test)
    return predictions
예제 #6
0
# In[67]:

model.fit(X_train, y_train)

# In[68]:

score = model.score(X_test, y_test)

# In[69]:

score

# In[70]:

model.predict_proba(X_test)

# In[73]:

X = iris.loc[:, 0:3].values

# In[76]:

y = iris[4].values

# In[77]:

np.unique(y)

# In[78]: