def trainOneSVM(masterK, y, subjects):
    Cs = 1.0 / np.array([0.1, 0.5, 2.5, 12.5, 62.5, 312.5])
    # Cs = 10. ** np.arange(-5, +6)/2.
    uniqueSubjects, subjectIdxs = np.unique(subjects, return_inverse=True)
    highestAccuracy = -float("inf")
    NUM_MINI_FOLDS = 4
    for C in Cs:  # For each regularization value
        # print "C={}".format(C)
        accuracies = []
        for i in range(NUM_MINI_FOLDS):  # For each test subject
            testIdxs = np.nonzero(subjectIdxs % NUM_MINI_FOLDS == i)[0]
            trainIdxs = np.nonzero(subjectIdxs % NUM_MINI_FOLDS != i)[0]
            if len(np.unique(y[testIdxs])) > 1:
                K = masterK[trainIdxs, :]
                K = K[:, trainIdxs]
                svm = sklearn.svm.SVC(kernel="precomputed", C=C)
                svm.fit(K, y[trainIdxs])

                K = masterK[testIdxs, :]
                K = K[:, trainIdxs]  # I.e., need trainIdxs dotted with testIdxs
                accuracy = sklearn.metrics.roc_auc_score(y[testIdxs], svm.decision_function(K))
                # print accuracy
                accuracies.append(accuracy)
        if np.mean(accuracies) > highestAccuracy:
            highestAccuracy = np.mean(accuracies)
            bestC = C
    svm = sklearn.svm.SVC(kernel="precomputed", C=bestC)
    svm.fit(masterK, y)
    return svm
def trainSVM(filteredFaces, labels, subjects, e):
    uniqueSubjects = np.unique(subjects)
    accuracies = []
    masterK = filteredFaces.dot(filteredFaces.T)
    for testSubject in uniqueSubjects:
        idxs = np.nonzero(subjects != testSubject)[0]
        someFilteredFacesTrain = filteredFaces[idxs]
        someLabels = labels[idxs]
        y = someLabels == e
        K = masterK[idxs, :]
        K = K[:, idxs]
        svm = sklearn.svm.SVC(kernel="precomputed")
        svm.fit(K, y)

        idxs = np.nonzero(subjects == testSubject)[0]
        someFilteredFaces = filteredFaces[idxs]
        someLabels = labels[idxs]
        y = someLabels == e
        yhat = svm.decision_function(someFilteredFaces.dot(someFilteredFacesTrain.T))

        if len(np.unique(y)) > 1:
            auc = sklearn.metrics.roc_auc_score(y, yhat)
        else:
            auc = np.nan
        print "{}: {}".format(testSubject, auc)
        accuracies.append(auc)
    accuracies = np.array(accuracies)
    accuracies = accuracies[np.isfinite(accuracies)]
    print np.mean(accuracies), np.median(accuracies)
def increment_svm(svm, L_ids, baseline_accuracy):
    
    L = X[L_ids]
    y_l = y[L_ids]
    
    U_ids = np.array(list((set(instance_ids) - set(L_ids))))
    U = X[U_ids]
    y_u = y[U_ids]

    ordered_indices = np.argsort(svm.decision_function(U))
    smallest_indices = ordered_indices[:500]
    smallest_ids = U_ids[smallest_indices]
    largest_indices = ordered_indices[-500:]
    largest_ids = U_ids[largest_indices]
    
    high_confidence_unlabeled = scipy.sparse.vstack([U[smallest_indices], U[largest_indices]])
    high_confidence_ids = np.concatenate([smallest_ids, largest_ids])
    high_confidence_predicted_labels = svm.predict(high_confidence_unlabeled)
    high_confidence_true_labels = y[high_confidence_ids]
    
    splits = sklearn.cross_validation.StratifiedShuffleSplit(high_confidence_predicted_labels, n_iter=2, test_size=0.9)

    saved_L_primes = []
    saved_L_prime_ids = []
    saved_cv_accuracies = []

    for augment_indices, test_indices in splits:

        augment = high_confidence_unlabeled[augment_indices]
        test = high_confidence_unlabeled[test_indices]

        augment_ids = high_confidence_ids[augment_indices]
        test_ids = high_confidence_ids[test_indices]

        augment_labels = high_confidence_predicted_labels[augment_indices] 
        test_labels = high_confidence_predicted_labels[test_indices]

        L_prime = scipy.sparse.vstack([L, augment])

        y_l_prime = np.concatenate([y_l, augment_labels])
        L_prime_ids = np.concatenate([L_ids, augment_ids])

        saved_L_primes.append(L_prime)
        saved_L_prime_ids.append(L_prime_ids)    

        svm_prime = sklearn.svm.LinearSVC(penalty='l2', C=10, dual=False)
        accuracy = sklearn.cross_validation.cross_val_score(svm_prime, L_prime, y_l_prime, cv=5, n_jobs=7).mean()

        saved_cv_accuracies.append(accuracy)
            
    best_index = np.argmax(saved_cv_accuracies)
    best_L_prime_ids = saved_L_prime_ids[best_index]
    best_accuracy = saved_cv_accuracies[best_index]
    
    return best_L_prime_ids, best_accuracy
Esempio n. 4
0
def classify_embedded_attributes(data_mat, svms):
    """
    Calculate SVM-scores for each feature vector (=row) in data_mat.

    @return: Matrix with each column representing PHOC-transformation of one feature vector.
    """
    num_attributes = len(svms)
    num_examples = data_mat.shape[0]
    A = np.zeros(shape=(num_attributes, num_examples))
    log.d("Classifying {} examples...".format(num_examples))
    for att_idx, svm in enumerate(svms):
        update_progress(att_idx + 1, num_attributes)
        if svm is not None:
            if sklearn.__version__ == '0.14.1':
                A[att_idx] = svm.decision_function(data_mat)
            else:
                # the return format of this function was changed in 0.15...
                A[att_idx] = svm.decision_function(data_mat).T
    print("")
    return A
Esempio n. 5
0
def test_svc_ovr_tie_breaking(SVCClass):
    """Test if predict breaks ties in OVR mode.
    Related issue: https://github.com/scikit-learn/scikit-learn/issues/8277
    """
    X, y = make_blobs(random_state=27)

    xs = np.linspace(X[:, 0].min(), X[:, 0].max(), 1000)
    ys = np.linspace(X[:, 1].min(), X[:, 1].max(), 1000)
    xx, yy = np.meshgrid(xs, ys)

    svm = SVCClass(kernel="linear", decision_function_shape='ovr',
                   break_ties=False, random_state=42).fit(X, y)
    pred = svm.predict(np.c_[xx.ravel(), yy.ravel()])
    dv = svm.decision_function(np.c_[xx.ravel(), yy.ravel()])
    assert not np.all(pred == np.argmax(dv, axis=1))

    svm = SVCClass(kernel="linear", decision_function_shape='ovr',
                   break_ties=True, random_state=42).fit(X, y)
    pred = svm.predict(np.c_[xx.ravel(), yy.ravel()])
    dv = svm.decision_function(np.c_[xx.ravel(), yy.ravel()])
    assert np.all(pred == np.argmax(dv, axis=1))
Esempio n. 6
0
    def svm_inference(self, data, confidence, svm, norm=True, in_test=False):
        print('\tPerforming SVM inference')
        Nt = len(data)
        print(Nt)
        acc1 = 0
        acc2 = 0
        total1 = 0
        total2 = 0
        conf_new = np.zeros(confidence.shape)

        cur_line = 0
        for i in range(Nt):

            word = data[i]
            word_len = word.shape[0]
            # print(word.shape)

            Y = word[:, -1]

            if in_test:
                self.test_labels[cur_line:cur_line + word_len] = Y

            # TODO: implemented iterative context inference
            W_prime = np.zeros(
                (word_len, self.dtr + self.n_classes * self.window_size * 2))
            # W_prime : [X | extended context]
            W_prime[:, :self.dtr] = word[:, :self.dtr]
            W_prime[:, self.dtr:] = self.extend_context(
                confidence[cur_line:(cur_line + word_len), :])

            # y_hat = svm.predict(W_prime)          # Predictions
            conf = svm.decision_function(
                W_prime)  # Confidence measures of predictions

            if norm:
                conf = (1 + np.exp(
                    -1 * conf))**-1  # Sigmoid function --> Normalization

            conf_new[cur_line:cur_line + word_len, :] = conf
            cur_line += word_len

            # Calculate accuracy rates
            total1 += word_len
            total2 += 1
            subtask_acc = svm.score(W_prime, Y)
            acc2 += subtask_acc
            acc1 += subtask_acc * word_len
            # print('\t\tShort-term accuracy: ' + str(subtask_acc))

        return acc1 / total1, acc2 / total2, conf_new
Esempio n. 7
0
def create_svm_decision_boundary(svm: sklearn.svm.SVC, N: int, samples: torch.Tensor):
    xranges = torch.linspace(-10,10,N)
    yranges = torch.linspace(-10,10,N)
    X, Y = torch.meshgrid(xranges, yranges)
    shapes = X.detach().numpy().shape
    test_samples = torch.stack([X.ravel(), Y.ravel()], dim=-1) #Batch, 2; Tensor
    test_samples_npy = test_samples.detach().numpy() #Batch, 2; numpy
    UP = samples[:,:2] #(L,2)
    DOWN = samples[:,[0,2]] #(L,2)
    train_samples = torch.cat([UP, DOWN], dim=0) #(2L,2)
    kernel = implicitDecisionBoundary(test_samples, train_samples, surface=False) #To (Batch, 2L)
    kernel.detach_()
    kernel = kernel.numpy()

    Z = svm.decision_function(kernel) #Batch, ncls; numpy; must use precomputed format; F(x,y,z) = 0/+-1 etc.
    Z_npy = Z.reshape(*shapes)
    X_npy, Y_npy = X.detach().numpy(), Y.detach().numpy()

    plt.contourf(X_npy, Y_npy, Z_npy, alpha=0.8, cmap=plt.cm.get_cmap("gnuplot2"))
    plt.colorbar()
    CS = plt.contour(X_npy, Y_npy, Z_npy, levels=[-1, 0, 1], alpha=1, linestyles=["--", "-", "--"], colors=["k", "k", "k"], )
    # plot_samples(samples, show=False, alpha=.8) #scatter plot
    # plt.scatter(*train_samples[svm.support_].detach().numpy().T, s=100, linewidth=1, facecolors="none", edgecolors="k",)
    level1 = CS.allsegs[1][0] #Fetch the coordinates of boundary condition! (WITHOUT F(x,y,z)=0 implicit function!!)
    plt.scatter(*level1.T, c="g") #Make sure boundary is drawn
    # level1 = CS.allsegs[1][1] #Fetch the coordinates of boundary condition! (WITHOUT F(x,y,z)=0 implicit function!!)
    # plt.scatter(*level1.T, c="g") #Make sure boundary is drawn

    #INFERENCE
    what_samples = train_samples #train_samples, test_samples, level1
    # grad, surf = differentiate_surface(samples, reference=samples, svm=svm) #(2L,2) ; weighted sum
    grad, surf = differentiate_surface(what_samples, reference=samples, direct=True, svm=svm, sample_len=samples.shape[0]*2) #(2L,2) ; weighted sum
    # signed_grad = - grad.data.div(grad.data.norm(dim=-1, keepdim=True)) * surf.sign() #SIGN should be considered for gradient info!
    signed_grad = - grad.data * surf.sign() #SIGN should be considered for gradient info!
    if isinstance(what_samples, torch.Tensor): plt.quiver(*what_samples.detach().numpy().T, *signed_grad.detach().numpy().T, width=.005)
    else: plt.quiver(*what_samples.T, *signed_grad.detach().numpy().T, width=.005)
    plt.title("Decision Boundary")
    plt.show()

    plt.hist(surf.detach().numpy().reshape(-1,), bins=50)
    plt.axvline(surf.detach().numpy().reshape(-1,).mean(), c="r", linewidth=4)
    plt.title("Distribution of Implicit Function Values")
    plt.show()

    try: print(np.allclose(Z, surf.detach().numpy().reshape(-1,), atol=5e-4)) #True: LIBSVM decision bounary (i.e. F(x,y,z)=k and manual F(x,y,z)=k are the same!)
    except Exception as e: print(e)
    print("Done!")

    return Z_npy, CS
Esempio n. 8
0
    def svm_inference(self, data, confidence, svm, norm=True, in_test=False):
        print('\tPerforming SVM inference')
        Nt = len(data)
        print(Nt)
        acc1 = 0
        acc2 = 0
        total1 = 0
        total2 = 0
        conf_new = np.zeros(confidence.shape)

        cur_line = 0
        for i in range(Nt):

            word = data[i]
            word_len = word.shape[0]
            # print(word.shape)

            Y = word[:, -1]

            if in_test:
                self.test_labels[cur_line:cur_line+word_len] = Y

            # TODO: implemented iterative context inference
            W_prime = np.zeros((word_len, self.dtr + self.n_classes * self.window_size * 2))
            # W_prime : [X | extended context]
            W_prime[:, :self.dtr] = word[:, :self.dtr]
            W_prime[:, self.dtr:] = self.extend_context(confidence[cur_line:(cur_line + word_len), :])

            # y_hat = svm.predict(W_prime)          # Predictions
            conf = svm.decision_function(W_prime)   # Confidence measures of predictions

            if norm:
                conf = (1 + np.exp(-1*conf))**-1    # Sigmoid function --> Normalization

            conf_new[cur_line : cur_line+word_len, :] = conf
            cur_line += word_len

            # Calculate accuracy rates
            total1 += word_len
            total2 += 1
            subtask_acc = svm.score(W_prime, Y)
            acc2 += subtask_acc
            acc1 += subtask_acc * word_len
            # print('\t\tShort-term accuracy: ' + str(subtask_acc))

        return acc1/total1, acc2/total2, conf_new
def outlier_detection_with_SVM(dataframe, kernel, gamma, outlier_percentage):
	"""
	Note that the SVM parameters are higly sensitive to the dataset, so they have to be manually selected for each dataset
	"""
	assert isinstance(dataframe, DataFrame), "Expected pandas DataFrame, but got %s."%type(dataframe)
	from scipy.stats import scoreatpercentile
	from sklearn import svm
	svm = svm.OneClassSVM(kernel=kernel, gamma=gamma)
	
	points = dataframe.values
	svm.fit(points)
	assignment = svm.decision_function(points)
	score = scoreatpercentile(assignment.ravel(), 1 - outlier_percentage)
	
	inliers_idx, dummy = np.where(assignment <= score)
	outliers_idx, dummy = np.where(assignment > score)
	
	print "%s inliers and %s outliers"%(len(inliers_idx), len(outliers_idx))
	return inliers_idx, outliers_idx
Esempio n. 10
0
    def scores_ovr_student(self, X):
        '''
        Compute class scores for OVR.

        Arguments:
            X: Features to predict.

        Returns:
            scores: a numpy ndarray with scores.
        '''
        pred = []

        for x in X:
            scores = []
            for l, svm in self.binary_svm.items():
                scores.append(svm.decision_function([x]))
            pred.append(np.array(scores))

        return np.array(pred)
Esempio n. 11
0
def main():
    svm = pickle.load(open(model_file, 'rb'))
    target = color.rgb2gray(io.imread(sys.argv[1]))
    target_scaled = target + 0

    scale_factor = 2.0**(-1.0 / 8.0)
    detections = []
    for s in range(16):
        histogram = lbp.get_histogram(target_scaled)

        for y in range(0, histogram.shape[0] - HEIGHT // CELL_SIZE):
            for x in range(0, histogram.shape[1] - WIDTH // CELL_SIZE):
                features = histogram[y:y + HEIGHT // CELL_SIZE,
                                     x:x + WIDTH // CELL_SIZE].reshape(1, -1)
                score = svm.decision_function(features)

                if score[0] > THRESHOLD:
                    print(score, features)
                    scale = (scale_factor**s)
                    detections.append({
                        'x': x * CELL_SIZE // scale,
                        'y': y * CELL_SIZE // scale,
                        'width': WIDTH // scale,
                        'height': HEIGHT // scale,
                        'score': score[0]
                    })
        target_scaled = transform.rescale(target_scaled, scale_factor)

    print(detections)
    ax = plt.axes()
    ax.imshow(target, cmap=cm.Greys_r)
    ax.set_axis_off()
    for d in detections:
        ax.add_patch(
            plt.Rectangle((d['y'], d['x']),
                          d['width'],
                          d['height'],
                          edgecolor='r',
                          facecolor='none'))
    #plt.show()
    plt.savefig('out/{}'.format(os.path.basename(sys.argv[1])))
Esempio n. 12
0
def testmodel(path):
    data, tags = loaddata(path)
    types = [
        'bear', 'bicycle', 'bird', 'car', 'cow', 'elk', 'fox', 'giraffe',
        'horse', 'koala', 'lion', 'monkey', 'plane', 'puppy', 'sheep',
        'statue', 'tiger', 'tower', 'train', 'whale', 'zebra'
    ]
    svm = joblib.load("SVM.pkl")
    pca1 = joblib.load("pca1.pkl")
    pca2 = joblib.load("pca2.pkl")
    pca3 = joblib.load("pca3.pkl")
    pca4 = joblib.load("pca4.pkl")
    voc = joblib.load("voc.pkl")
    centers = joblib.load("voc.pkl")
    testfeatures_1 = []
    testfeatures_2 = []
    testfeatures_3 = []
    testfeatures_4 = []
    testfeatures_5 = []
    trainData = np.float32([]).reshape(0, 50)
    ty = []
    ######
    num = len(data)
    # print("!")
    for i in range(0, num):
        img = data[i]
        ty.append(tags[i])
        testfeatures_1.append(hist(img))
        testfeatures_2.append(glcm_feature(img))
        testfeatures_3.append(lbp_feature(img))
        testfeatures_4.append(hog(img))
        features = calcSiftFeature(img)
        featVec = calcFeatVec(features, centers)
        # print(len(featVec))
        trainData = np.append(trainData, featVec, axis=0)

    #####pca
    testfeatures_1 = pca1.transform(testfeatures_1)
    testfeatures_2 = pca2.transform(testfeatures_2)
    testfeatures_3 = pca3.transform(testfeatures_3)
    testfeatures_4 = pca4.transform(testfeatures_4)
    testfeatures_5 = trainData

    all_feature = np.array(testfeatures_1)
    all_feature = np.hstack((all_feature, testfeatures_2))
    all_feature = np.hstack((all_feature, testfeatures_3))
    all_feature = np.hstack((all_feature, testfeatures_4))
    all_feature = np.hstack((all_feature, testfeatures_5))
    all_feature = meaningful(np.array(all_feature))

    ret = svm.predict(all_feature)
    dec = svm.decision_function(all_feature)
    # print(dec)
    c, k = dec.shape
    num = 0
    for i in range(0, c):
        flag = 0
        # print(ty[i])
        for j in range(0, 5):
            maxx = np.max(dec[i])
            for kk in range(0, k):
                if maxx == dec[i][kk]:
                    dec[i][kk] = -999
                    if types[kk] == ty[i]:
                        flag = 1
        if flag == 1:
            num += 1
    # print(num)
    # print(c)
    # print(num/c)
    return (num / float(c))
Esempio n. 13
0
X = np.array([
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
])
y = np.array([0, 0, 0, 1])
 

clf = clf.SVC(kernel='linear', C=1e6)
clf.fit(X, y)

plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)
 
# plot the decision function
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
 
# create grid to evaluate model
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = clf.decision_function(xy).reshape(XX.shape)
 
# plot decision boundary and margins
ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-', '--'])
# plot support vectors
ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100, linewidth=1, facecolors='none')
plt.show()
Esempio n. 14
0
import ms.version

ms.version.addpkg('numpy', '1.14.2')
ms.version.addpkg('scipy', '1.0.0')
ms.version.addpkg('sklearn', '0.19.1')

import sys
import time
import numpy as np

from sklearn import svm

import util

outlier_frac = 0.05
svm = svm.OneClassSVM(nu=outlier_frac, kernel='rbf', gamma=0.1)
while True:
    X_train = util.receive_point_list_from_stdin()
    X_predict = util.receive_point_list_from_stdin()
    X_train, X_predict = util.nomalize_train_evaluate_data(X_train, X_predict)

    svm.fit(X_train)
    pred = svm.predict(X_predict)

    bools = pred == -1
    decisions = np.squeeze(svm.decision_function(X_predict))

    util.send_bool_list_to_stdout(bools)
    util.send_double_list_to_stdout(decisions)
    def _evaluate(self, Gs, Gs_kwargs, num_gpus):
        minibatch_size = num_gpus * self.minibatch_per_gpu
        inception = misc.load_pkl(
            'https://drive.google.com/uc?id=1MzTY44rLToO5APn8TZmfR7_ENSe5aZUn'
        )  # inception_v3_features.pkl
        real_activations = np.empty(
            [self.num_images, inception.output_shape[1]], dtype=np.float32)
        fake_activations = np.empty(
            [self.num_images, inception.output_shape[1]], dtype=np.float32)

        # Construct TensorFlow graph.
        self._configure(self.minibatch_per_gpu, hole_range=self.hole_range)
        real_img_expr = []
        fake_img_expr = []
        real_result_expr = []
        fake_result_expr = []
        for gpu_idx in range(num_gpus):
            with tf.device('/gpu:%d' % gpu_idx):
                Gs_clone = Gs.clone()
                inception_clone = inception.clone()
                latents = tf.random_normal([self.minibatch_per_gpu] +
                                           Gs_clone.input_shape[1:])
                reals, labels = self._get_minibatch_tf()
                reals_tf = tflib.convert_images_from_uint8(reals)
                masks = self._get_random_masks_tf()
                fakes = Gs_clone.get_output_for(latents, labels, reals_tf,
                                                masks, **Gs_kwargs)
                fakes = tflib.convert_images_to_uint8(fakes[:, :3])
                reals = tflib.convert_images_to_uint8(reals_tf[:, :3])
                real_img_expr.append(reals)
                fake_img_expr.append(fakes)
                real_result_expr.append(inception_clone.get_output_for(reals))
                fake_result_expr.append(inception_clone.get_output_for(fakes))

        for begin in tqdm(range(0, self.num_images, minibatch_size)):
            self._report_progress(begin, self.num_images)
            end = min(begin + minibatch_size, self.num_images)
            real_results, fake_results = tflib.run(
                [real_result_expr, fake_result_expr])
            real_activations[begin:end] = np.concatenate(real_results,
                                                         axis=0)[:end - begin]
            fake_activations[begin:end] = np.concatenate(fake_results,
                                                         axis=0)[:end - begin]

        # Calculate FID conviniently.
        mu_real = np.mean(real_activations, axis=0)
        sigma_real = np.cov(real_activations, rowvar=False)
        mu_fake = np.mean(fake_activations, axis=0)
        sigma_fake = np.cov(fake_activations, rowvar=False)
        m = np.square(mu_fake - mu_real).sum()
        s, _ = scipy.linalg.sqrtm(np.dot(sigma_fake, sigma_real), disp=False)
        dist = m + np.trace(sigma_fake + sigma_real - 2 * s)
        self._report_result(np.real(dist), suffix='-FID')

        svm = sklearn.svm.LinearSVC(dual=False)
        svm_inputs = np.concatenate([real_activations, fake_activations])
        svm_targets = np.array([1] * real_activations.shape[0] +
                               [0] * fake_activations.shape[0])
        svm.fit(svm_inputs, svm_targets)
        self._report_result(1 - svm.score(svm_inputs, svm_targets),
                            suffix='-U')
        real_outputs = svm.decision_function(real_activations)
        fake_outputs = svm.decision_function(fake_activations)
        self._report_result(np.mean(fake_outputs > real_outputs), suffix='-P')
Esempio n. 16
0
X, y = X[y != 2], y[y != 2]

# Add noisy features to make the problem harder
random_state = np.random.RandomState(0)
n_samples, n_features = X.shape
X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]

# shuffle and split training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=0)

# Learn to predict each class against the other
svm = svm.SVC(kernel='linear', probability=True, random_state=random_state)

###通过decision_function()计算得到的y_score的值,用在roc_curve()函数中
svm.fit(X_train, y_train)
y_score = svm.decision_function(X_test)

# Compute ROC curve and ROC area for each class
fpr, tpr, threshold = roc_curve(y_test, y_score)  ###计算真正率和假正率
roc_auc = auc(fpr, tpr)  ###计算auc的值

plt.figure()
lw = 2
plt.figure(figsize=(10, 10))
plt.plot(fpr, tpr, color='darkorange',
         lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)  ###假正率为横坐标,真正率为纵坐标做曲线
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
Esempio n. 17
0
def increment_svm(svm, L_ids, baseline_accuracy):

    L = X[L_ids]
    y_l = y[L_ids]

    U_ids = np.array(list((set(instance_ids) - set(L_ids))))
    U = X[U_ids]
    y_u = y[U_ids]

    ordered_indices = np.argsort(svm.decision_function(U))
    smallest_indices = ordered_indices[:500]
    smallest_ids = U_ids[smallest_indices]
    largest_indices = ordered_indices[-500:]
    largest_ids = U_ids[largest_indices]

    high_confidence_unlabeled = scipy.sparse.vstack(
        [U[smallest_indices], U[largest_indices]])
    high_confidence_ids = np.concatenate([smallest_ids, largest_ids])
    high_confidence_predicted_labels = svm.predict(high_confidence_unlabeled)
    high_confidence_true_labels = y[high_confidence_ids]

    splits = sklearn.cross_validation.StratifiedShuffleSplit(
        high_confidence_predicted_labels, n_iter=2, test_size=0.9)

    saved_L_primes = []
    saved_L_prime_ids = []
    saved_cv_accuracies = []

    for augment_indices, test_indices in splits:

        augment = high_confidence_unlabeled[augment_indices]
        test = high_confidence_unlabeled[test_indices]

        augment_ids = high_confidence_ids[augment_indices]
        test_ids = high_confidence_ids[test_indices]

        augment_labels = high_confidence_predicted_labels[augment_indices]
        test_labels = high_confidence_predicted_labels[test_indices]

        L_prime = scipy.sparse.vstack([L, augment])

        y_l_prime = np.concatenate([y_l, augment_labels])
        L_prime_ids = np.concatenate([L_ids, augment_ids])

        saved_L_primes.append(L_prime)
        saved_L_prime_ids.append(L_prime_ids)

        svm_prime = sklearn.svm.LinearSVC(penalty='l2', C=10, dual=False)
        accuracy = sklearn.cross_validation.cross_val_score(svm_prime,
                                                            L_prime,
                                                            y_l_prime,
                                                            cv=5,
                                                            n_jobs=7).mean()

        saved_cv_accuracies.append(accuracy)

    best_index = np.argmax(saved_cv_accuracies)
    best_L_prime_ids = saved_L_prime_ids[best_index]
    best_accuracy = saved_cv_accuracies[best_index]

    return best_L_prime_ids, best_accuracy
    [X_outliers,
     np.random.uniform(low=-30, high=30, size=(200, 2))])

# fit the model
svm = svm.OneClassSVM(kernel="rbf", gamma=0.1, nu=0.1)
svm.fit(X_train)
y_pred_train = svm.predict(X_train)
y_pred_test = svm.predict(X_test)
y_pred_outliers = svm.predict(X_outliers)
n_error_train = y_pred_train[y_pred_train == -1].size
n_error_test = y_pred_test[y_pred_test == -1].size
n_error_outliers = y_pred_outliers[y_pred_outliers == 1].size

# plot the line, the points, and the nearest vectors to the plane
xx, yy = np.meshgrid(np.linspace(-30, 30, 500), np.linspace(-10, 10, 500))
Z = svm.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.rc('axes', axisbelow=True)
plt.figure(1)
plt.title("Measured Deviation from Nominal")
plt.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu)
a = plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='darkred')
plt.contourf(xx, yy, Z, levels=[0, Z.max()], colors='palevioletred')

s = 6
markers = np.array(['.', 'D'])
b1 = plt.scatter(X_train[:, 0], X_train[:, 1], s=s, color='green')
b2 = plt.scatter(X_test[:, 0], X_test[:, 1], s=s, color='blue')
c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], s=s, color='red')
plt.axis('tight')
Esempio n. 19
0
    def printEvaluasi1(self, svm, weight, label):
        svm_decision_function = svm.decision_function(weight)
        svm_hasil_prediksi = svm.predict(weight)

        #print("hasil perhitungan svm :")
        #print(svm_decision_function)

        self.cetak("Bobot : ")
        self.cetak(svm.coef_)
        self.cetak("Intercept :")
        self.cetak(svm.intercept_)

        cm_akurasi_score = accuracy_score(label, svm_hasil_prediksi)
        cm_confusionmatrix = confusion_matrix(label, svm_hasil_prediksi)
        cm_classification_report = classification_report(
            label, svm_hasil_prediksi)

        self.cetak("Perhitungan Akurasi Training :")
        self.cetak(round(cm_akurasi_score, 4) * 100)
        self.cetak("Confusion Matrix :")
        self.cetak(cm_confusionmatrix)
        self.cetak("Laporan Klasifikasi :")
        self.cetak(cm_classification_report)

        #'''

        if len(weight) > 10:
            X = np.array(weight)
            y = np.array(label)
            kf = KFold(n_splits=10)
            i = 1

            for train_index, test_index in kf.split(X):
                self.cetak("".join(
                    ["Perhitungan akurasi Fold ",
                     str(i), " : "]))

                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]

                svm = copy.deepcopy(self.svm_standard)
                svm.fit(X_train, y_train)
                self.cetak(round(svm.score(X_test, y_test), 4) * 100)

                fold_predik = svm.predict(X_test)
                self.cetak(confusion_matrix(fold_predik, y_test))
                self.cetak(classification_report(fold_predik, y_test))

                i = i + 1

            cross_hasil = cross_val_score(copy.deepcopy(self.svm_standard),
                                          weight,
                                          label,
                                          cv=10,
                                          n_jobs=-1)
            self.cetak("perhitungan akurasi terhadap keseluruhan data : ")
            self.cetak(round(cross_hasil.mean(), 4) * 100)

        #'''
        '''

        pembuatan kurva roc

        '''
        '''
        
        roc_fpr = dict()
        roc_tpr = dict()
        roc_auc = dict()

        label_binari = label_binarize(svm_hasil_prediksi, svm.classes_)

        for i in range(3):
            roc_fpr[i], roc_tpr[i], _ = roc_curve(label_binari[:, i], svm_decision_function [:, i])
            roc_auc[i] = auc(roc_fpr[i], roc_tpr[i])

        #self.cetak(''.join(str(e) + " " for e in svm.predict(weight)))

        self.cetak("perhitungan akurasi terhadap keseluruhan data : ")
        self.cetak(str(round(cm_akurasi_score * 100, 2)))
        '''
        '''

        perhitungan akurasi menggunakan K-10 fold cross validation

        '''
        '''
Esempio n. 20
0
 for file in os.listdir(train_file_folder):
     if file.split('_')[-1] == 'svm.pkl':
         svms.append(joblib.load(os.path.join(train_file_folder, file)))
 if len(svms) == 0:
     svms = train_svms(train_file_folder, model)
 print("Done fitting svms")
 features = model.predict(imgs)
 print("predict image:")
 print(np.shape(features))
 results = []
 results_label = []
 count = 0
 for f in features:
     for svm in svms:
         pred = svm.predict([f.tolist()])
         pred_prob = svm.decision_function([f.tolist()])
         # not background
         if pred[0] != 0:
             # print(pred_prob[0])
             rect = list(verts[count])
             # print(verts[count], r_l)
             rect.append(pred_prob[0])
             # print(rect)
             results.append(rect)
             results_label.append(pred[0])
     count += 1
 # print(results)
 results_np = np.array(results, dtype="float32")
 # print(results_np)
 # print(results_np.shape)
 keep_results_index = py_cpu_nms(results_np)
Esempio n. 21
0
 def plotSupportVectors(self, svm):
     decisionFunction = svm.decision_function(X)