Ejemplo n.º 1
0
class SVMModel(object):
    def __init__(self):
        self.clf = OneVsOneClassifier(SVC())
        self.name = 'SVM'

    def get_params(self):
        return self.clf.get_params()

    def train(self, dframe):
        X = get_featues(dframe)
        y = get_label(dframe)
        self.clf.fit(X, y)

    def predict(self, X):
        y_pred = self.clf.predict(X)

        return y_pred

    def save(self, fname):
        with open(fname, 'wb') as ofile:
            pickle.dump(self.clf, ofile, pickle.HIGHEST_PROTOCOL)

    def load(self, fname):
        with open(fname, 'rb') as ifile:
            self.clf = pickle.load(ifile)
Ejemplo n.º 2
0
def RVMTraining(XEstimate, XValidate, Parameters, class_labels):
    clf = OneVsOneClassifier(
        GridSearchCV(RVC(kernel='rbf', n_iter=1), Parameters))
    clf.fit(XEstimate, class_labels)
    Yvalidate = clf.predict(XValidate)
    EstParameters = clf.get_params()
    return {"Yvalidate": Yvalidate, "EstParameters": EstParameters, "clf": clf}
def RVMTraining(XEstimate, XValidate, Parameters, class_labels):
    clf = OneVsOneClassifier(
        GridSearchCV(RVC(kernel='rbf', n_iter=2),
                     Parameters))  # One vs One classifier used
    clf.fit(XEstimate, class_labels)  # RVM object trained with training data
    Yvalidate = clf.predict(XValidate)  # RVM object predicting labels
    EstParameters = clf.get_params()
    return {"Yvalidate": Yvalidate, "EstParameters": EstParameters, "clf": clf}
def SVMTraining(XEstimate, XValidate, Parameters, class_labels):
    svcClassifier = SVC(kernel='rbf', probability=True)
    gridSearcher = GridSearchCV(svcClassifier, Parameters)
    clf = OneVsOneClassifier(gridSearcher)  # One vs One classifier used

    clf.fit(XEstimate, class_labels)  # SVM object trained with training data
    Yvalidate = clf.predict(XValidate)  # SVM object predicting labels

    EstParameters = clf.get_params()

    return {"Yvalidate": Yvalidate, "EstParameters": EstParameters, "clf": clf}
Ejemplo n.º 5
0
def SVMTraining(XEstimate, XValidate, Parameters, class_labels):
    svcClassifier = SVC(kernel='rbf', probability=True)
    gridSearcher = GridSearchCV(svcClassifier, Parameters)
    clf = OneVsOneClassifier(gridSearcher)

    clf.fit(XEstimate, class_labels)
    Yvalidate = clf.predict(XValidate)

    EstParameters = clf.get_params()

    return {"Yvalidate": Yvalidate,
            "EstParameters": EstParameters,
            "clf": clf}
Ejemplo n.º 6
0
modelPath = featuresDirPath + 'svmModelsOneVsRest/svmTrainModel.pkl'

startTime = time.ctime()
start = time.time()

X_train, y_train = load_svmlight_file(train_feature_path)
X_test, y_test = load_svmlight_file(test_feature_path)

#X = np.array([[1,1], [2,2], [-1,2], [-2,3], [-1,-1], [-2,-3], [2,-4], [3,-5]])
#y = np.array([0, 0, 1, 1, 2, 2, 3, 3])
print('start at %s' % startTime)
print('start training...')
clf = OneVsOneClassifier(LinearSVC(random_state=0))
#clf = OneVsRestClassifier(LinearSVC(random_state = 0))
clf = clf.fit(X_train, y_train)
print(clf.get_params())
#joblib.dump(clf, modelPath)   # save the trained model

#lists =[[5, -1], [-2, -6], [2,1], [-2, 5]]
#test = np.array(lists)
#test_label = np.array([3, 2, 0, 1])
print("start predicting...")

#clf = joblib.load(modelPath)   # load the model
score = clf.score(X_test, y_test)
print('accuracy is {0}'.format(score))
#==============================================================================
# count = 0
# predictions = clf.predict(X_test)
# lens = len(predictions)
# for i in xrange(lens):
Ejemplo n.º 7
0
class Classifier:
    def __init__(self,
                 name,
                 load_model=True,
                 c=1,
                 gamma="scale",
                 kernel="linear"):
        self.model = None
        self.model_file = "model.joblib"
        self.path = "model/" + name + "/"
        self.model_char_size = 15
        if load_model:
            if os.path.isfile(self.path + self.model_file):
                self.model = load(self.path + self.model_file)
                self.model_char_size = load(self.path + "X.joblib").shape[1]

        if not self.model:
            #self.model = OneVsOneClassifier(svm.SVC(C=c,kernel=kernel,gamma=gamma,verbose=True))
            self.model = OneVsOneClassifier(
                svm.SVC(C=c, kernel=kernel, gamma=gamma))
            self.model.classes_ = None
            X = []
            Y = []
            with open(self.path + name + ".csv", 'r') as cs:
                reader = csv.reader(cs)
                for row in reader:
                    X.append(row[:-1])
                    Y.append(row[-1])

            X = np.array(X).astype('int')
            X = np.where(X == 255, 1, X)
            Y = np.array(Y)
            self.len_x = len(X)
            self.len_y = len(np.unique(Y))

            dump(X, self.path + "X.joblib")
            dump(Y, self.path + "Y.joblib")

            self.model.fit(X, Y)
            dump(self.model, self.path + self.model_file)

        self.kernel = self.model.get_params()['estimator__kernel']
        self.prev_model = None
        self.X_temp = None
        self.Y_temp = None
        np.set_printoptions(precision=3)

    def prediction_test(self, data, label):
        return str(self.model.predict(data)[0]) == label.rstrip()

    def train(self, data, label):
        info = {}
        #prev dataset
        if os.path.isfile(self.path + 'X.joblib'):
            arr = load(self.path + "X.joblib")
        else:
            arr = np.array([])

        if os.path.isfile(self.path + 'Y.joblib'):
            arr_y = load(self.path + "Y.joblib")
        else:
            arr_y = np.array([])

        if arr.size == 0:
            arr = data
        else:
            arr = np.concatenate((arr, data), axis=0)

        arr_y = np.append(arr_y, label)

        self.X_temp = arr
        self.Y_temp = arr_y

        if self.model.classes_ is not None:
            info["classes_before"] = self.model.classes_
            info["classes_len_before"] = len(self.model.classes_)
            info["count_model_before"] = len(self.model.estimators_)

        self.prev_model = copy.copy(self.model)
        self.model.fit(arr, arr_y)
        info["classes_after"] = self.model.classes_
        info["classes_len"] = len(self.model.classes_)
        info["count_model"] = len(self.model.estimators_)

        return info

    def save_model(self):
        if self.X_temp is not None and self.Y_temp is not None:
            dtset = np.column_stack((self.X_temp, self.Y_temp))
            with open(self.path + "dataset1.csv", "w") as f:
                writer = csv.writer(f)
                writer.writerows(dtset)
            dump(self.X_temp, self.path + "X.joblib")
            dump(self.Y_temp, self.path + "Y.joblib")
            dump(self.model, self.path + self.model_file)
        self.X_temp = None
        self.Y_temp = None

    def rollback(self):
        self.model = self.prev_model
        self.X_temp = None
        self.Y_temp = None

    def prediction(self, data, verbose=False):
        X = np.array([data]).astype("int")
        indices = self.model.pairwise_indices_
        pjg = len(self.model.estimators_)

        if indices is None:
            Xs = [X] * pjg
        else:
            Xs = [X[:, idx] for idx in indices]

        predictions = np.vstack(
            [est.predict(Xi) for est, Xi in zip(self.model.estimators_, Xs)]).T

        confidences = np.vstack([
            self.predict_binary(est, Xi)
            for est, Xi in zip(self.model.estimators_, Xs)
        ]).T
        Y = self.votes_count(predictions, confidences)
        info = {}
        info["prediction"] = self.model.classes_[Y["sum_conf"].argmax()]
        if verbose:
            info["votes"] = Y['votes']
            info["n_model"] = pjg
            info["model_ex"] = {}
            info["model_ex"]["negative_class"] = self.model.classes_[0]
            est = self.model.estimators_[Y["sum_conf"].argmax() - 1]
            info["model_ex"]["no"] = Y["sum_conf"].argmax() - 1
            info["model_ex"]["bias"] = est.intercept_
            info["model_ex"]["df"] = est.decision_function(X)
            info["model_ex"]["n_support"] = est.n_support_
            info["model_ex"]["kernel_type"] = est.get_params()['kernel']
            if info["model_ex"]["kernel_type"] == "linear":
                info["model_ex"]["kernel"] = linear_kernel(
                    est.support_vectors_, X)
                info["model_ex"]["w"] = np.around(est.coef_, 2)

            else:
                info["model_ex"]["gamma"] = est._gamma
                info["model_ex"]["kernel"] = rbf_kernel(est.support_vectors_,
                                                        X,
                                                        gamma=est._gamma)
                info["model_ex"]["w"] = est.dual_coef_

        return info

    def prediction_bulk(self, X):
        return "".join(self.model.predict(X))

    def predict_binary(self, estimator, X):
        """Make predictions using a single binary estimator."""
        return sklearn.multiclass._predict_binary(estimator, X)

    def votes_count(self, pred, conf):
        n_samples = pred.shape[0]
        info = {}

        n_classes = len(self.model.classes_)
        votes = np.zeros((n_samples, n_classes))
        sum_of_confidences = np.zeros((n_samples, n_classes))

        k = 0
        for i in range(n_classes):
            for j in range(i + 1, n_classes):
                sum_of_confidences[:, i] -= conf[:, k]
                sum_of_confidences[:, j] += conf[:, k]
                votes[pred[:, k] == 0, i] += 1
                votes[pred[:, k] == 1, j] += 1

                k += 1
        transformed_confidences = (sum_of_confidences /
                                   (3 * (np.abs(sum_of_confidences) + 1)))

        info["votes"] = votes
        info["sum_conf"] = votes + transformed_confidences
        return info

    def accuracy(self, filename, answ=None):
        score = 0
        score1 = 0
        if answ:
            filename = filename.replace("\n", "").replace(" ", "")
            answ = answ.replace("\n", "").replace(" ", "")

            #f = list(filename)
            #an = list(answ)

            #print(confusion_matrix(f, an, labels=self.model.classes_))

            for i in range(0, max(len(filename), len(answ))):
                if i >= len(filename) or i >= len(answ):
                    break

                if filename[i].lower() == answ[i].lower():
                    score1 += 1
                if filename[i] == answ[i]:
                    score += 1
            count = len(answ)

        else:
            test = open("static/image/character_test/" + filename, 'r')
            lines = test.readlines()
            count = len(lines)
            ch = int(math.sqrt(self.model_char_size))
            rs = self.model_char_size
            lst = {}
            for i in lines:
                a = i.split(",")
                data = np.array(a[0:-1]).astype('uint8')
                label = a[-1].rstrip()
                im = [data]
                ori_size = int(math.sqrt(len(data)))

                if len(data) != rs:
                    im = np.array(
                        Image.fromarray(data.reshape(
                            (ori_size, ori_size))).resize(
                                (ch, ch))).reshape(rs).reshape(1, -1)

                asd = self.model.predict(im)

                if str(asd[0]).lower() == label.lower():
                    score1 += 1
                if str(asd[0]) == label:
                    score += 1

        return {
            "sensitive": score / count * 100,
            "insensitive": score1 / count * 100
        }
Ejemplo n.º 8
0
startTime = time.ctime()
start = time.time()


X_train, y_train = load_svmlight_file(train_feature_path)
X_test, y_test = load_svmlight_file(test_feature_path)


#X = np.array([[1,1], [2,2], [-1,2], [-2,3], [-1,-1], [-2,-3], [2,-4], [3,-5]])
#y = np.array([0, 0, 1, 1, 2, 2, 3, 3])
print('start at %s' % startTime)
print('start training...')
clf = OneVsOneClassifier(LinearSVC(random_state = 0))
#clf = OneVsRestClassifier(LinearSVC(random_state = 0))
clf = clf.fit(X_train, y_train)
print(clf.get_params())
#joblib.dump(clf, modelPath)   # save the trained model

#lists =[[5, -1], [-2, -6], [2,1], [-2, 5]] 
#test = np.array(lists)
#test_label = np.array([3, 2, 0, 1])
print("start predicting...")

#clf = joblib.load(modelPath)   # load the model
score = clf.score(X_test, y_test)
print('accuracy is {0}'.format(score))
#==============================================================================
# count = 0
# predictions = clf.predict(X_test)
# lens = len(predictions)
# for i in xrange(lens):
Ejemplo n.º 9
0
    svm_model_linear = OneVsOneClassifier(
        SVC(max_iter=-1, C=10, gamma=100, kernel='rbf', degree=15))
    svm_model_linear.fit(X_train, y_train)  # learn
    svm_predictions_train = svm_model_linear.predict(X_train)
    svm_predictions = svm_model_linear.predict(X_test)  # predict
    print(svm_predictions_train)
    print(X_train)
    print(y_train)
    #print(svm_predictions)
    #print(y_test)
    # model accuracy for X_test.
    accuracy_train = svm_model_linear.score(X_train, y_train)
    accuracy_test = svm_model_linear.score(X_test, y_test)
    print("Training accuracy:", accuracy_train)
    print("Test accuracy:", accuracy_test)
    print("Params:", svm_model_linear.get_params())

    # creating a confusion matrix. We should have a nice diagonal line.
    cm = confusion_matrix(y_test, svm_predictions)
    print("Confusion matrix:\n", cm)

    if args.quiet: sys.exit(0)  # pass in with -q flag to skip graphing

    # These are settings for drawing. Do whatever with these.
    widths = {2 + ix: 9.9 for ix, col in enumerate(colnamesX[2:])}
    values = {2 + ix: 10 for ix, col in enumerate(colnamesX[2:])}
    figs, ax = plt.subplots(2, 1, figsize=(6, 8))
    print("Features shown in graph:", colnamesX[:2])
    print("Features flattened in graph:", colnamesX[2:])
    #figs, ax = plt.subplots(len(colnamesX) + 1, len(colnamesX), figsize=(3, 4))